diff --git a/src/main/scala/io/archivesunleashed/ArchiveRecord.scala b/src/main/scala/io/archivesunleashed/ArchiveRecord.scala index 241c4dee..2b04f40e 100644 --- a/src/main/scala/io/archivesunleashed/ArchiveRecord.scala +++ b/src/main/scala/io/archivesunleashed/ArchiveRecord.scala @@ -59,8 +59,11 @@ trait ArchiveRecord extends Serializable { * @param r the serialized record */ class ArchiveRecordImpl(r: SerializableWritable[ArchiveRecordWritable]) extends ArchiveRecord { + // Option would require refactor of methods. Ignore. + // scalastyle:off null var arcRecord: ARCRecord = null var warcRecord: WARCRecord = null + // scalastyle:on null if (r.t.getFormat == ArchiveRecordWritable.ArchiveFormat.ARC) { arcRecord = r.t.getRecord.asInstanceOf[ARCRecord] diff --git a/src/main/scala/io/archivesunleashed/app/CommandLineApp.scala b/src/main/scala/io/archivesunleashed/app/CommandLineApp.scala index 7210ff2b..52115629 100644 --- a/src/main/scala/io/archivesunleashed/app/CommandLineApp.scala +++ b/src/main/scala/io/archivesunleashed/app/CommandLineApp.scala @@ -70,14 +70,14 @@ class CmdAppConf(args: Seq[String]) extends ScallopConf(args) { * * @param e exception that Scallop throws */ + // scalastyle:off regex override def onError(e: Throwable): Unit = e match { case ScallopException(message) => - // scalastyle:off println(message) - // scalastyle:on throw new IllegalArgumentException() case other: Any => throw other } + // scalastyle:on regex mainOptions = Seq(input, output) var extractor = opt[String](descr = "extractor", required = true) diff --git a/src/main/scala/io/archivesunleashed/app/NERCombinedJson.scala b/src/main/scala/io/archivesunleashed/app/NERCombinedJson.scala index 4e29e86b..ca716d03 100644 --- a/src/main/scala/io/archivesunleashed/app/NERCombinedJson.scala +++ b/src/main/scala/io/archivesunleashed/app/NERCombinedJson.scala @@ -61,7 +61,7 @@ class NERCombinedJson extends Serializable { val tmpPath = new Path(tmpFile) // Merge part-files into single file. - FileUtil.copyMerge(hdfs, srcPath, hdfs, tmpPath, false, hadoopConfig, null) + FileUtil.copyMerge(hdfs, srcPath, hdfs, tmpPath, false, hadoopConfig, "") // Read file of JSON arrays, write into single JSON array of arrays. val fsInStream = hdfs.open(tmpPath) @@ -71,9 +71,14 @@ class NERCombinedJson extends Serializable { // now is a file of JSON val outFile = new BufferedWriter(new OutputStreamWriter(fsOutStream)) outFile.write("[") - val line = inFile.readLine() - if (line != null) outFile.write(line) - Iterator.continually(inFile.readLine()).takeWhile(_ != null).foreach(s => {outFile.write(", " + s)}) + val line: Option[String] = Option(inFile.readLine()) + line match { + case Some(line) => + outFile.write(line) + case None => + } + Iterator.continually(inFile.readLine()).takeWhile(Option(_) != None) + .foreach(s => {outFile.write(", " + s)}) outFile.write("]") outFile.close() diff --git a/src/main/scala/io/archivesunleashed/matchbox/ComputeImageSize.scala b/src/main/scala/io/archivesunleashed/matchbox/ComputeImageSize.scala index 1f8b5cdb..5a157d95 100644 --- a/src/main/scala/io/archivesunleashed/matchbox/ComputeImageSize.scala +++ b/src/main/scala/io/archivesunleashed/matchbox/ComputeImageSize.scala @@ -35,9 +35,11 @@ object ComputeImageSize { try { val in = new ByteArrayInputStream(bytes) val image = ImageIO.read(in) + // scalastyle:off null if (image == null) { nullImage } + // scalastyle:on null (image.getWidth(), image.getHeight()) } catch { case e: Throwable => { diff --git a/src/main/scala/io/archivesunleashed/matchbox/ComputeMD5.scala b/src/main/scala/io/archivesunleashed/matchbox/ComputeMD5.scala index d556c53d..92bf85ca 100644 --- a/src/main/scala/io/archivesunleashed/matchbox/ComputeMD5.scala +++ b/src/main/scala/io/archivesunleashed/matchbox/ComputeMD5.scala @@ -19,9 +19,9 @@ package io.archivesunleashed.matchbox import java.security.MessageDigest /** Compute MD5 checksum. */ -// scalastyle: off +// scalastyle:off object.name object ComputeMD5 { -// scalastyle: on +// scalastyle:on object.name /** Computes the MD5 checksum of a byte array (eg. an image). * * For string data, it is better to use `StringUtils.computeHash()`. diff --git a/src/main/scala/io/archivesunleashed/matchbox/ExtractBoilerpipeText.scala b/src/main/scala/io/archivesunleashed/matchbox/ExtractBoilerpipeText.scala index 7206b140..e827c0ae 100644 --- a/src/main/scala/io/archivesunleashed/matchbox/ExtractBoilerpipeText.scala +++ b/src/main/scala/io/archivesunleashed/matchbox/ExtractBoilerpipeText.scala @@ -29,29 +29,13 @@ object ExtractBoilerpipeText { * @return text with boilerplate removed or Nil if the text is empty. */ def apply(input: String): String = { - try { - if (input.isEmpty) { - null - } else { - extract(input) - } - } catch { - case e: Exception => - throw new IOException("Caught exception processing input row " + e) - } - } - - /** Extracts boilerplate. - * - * @param input an html string possibly containing boilerpipe text - * @return filtered text or Nil if the text is empty. - */ - def extract (input: String): String = { - val text = DefaultExtractor.INSTANCE.getText(input).replaceAll("[\\r\\n]+", " ").trim() - if (text.isEmpty) { - null - } else { - text + val maybeInput = Option(input) + maybeInput match { + case Some(text) => + DefaultExtractor.INSTANCE + .getText(input).replaceAll("[\\r\\n]+", " ").trim() + case None => + "" } } } diff --git a/src/main/scala/io/archivesunleashed/matchbox/ExtractDate.scala b/src/main/scala/io/archivesunleashed/matchbox/ExtractDate.scala index 60a31717..f62e59a1 100644 --- a/src/main/scala/io/archivesunleashed/matchbox/ExtractDate.scala +++ b/src/main/scala/io/archivesunleashed/matchbox/ExtractDate.scala @@ -37,15 +37,18 @@ object ExtractDate { val yearSS = 4 val monthSS = 6 val daySS = 8 - if (fullDate == null) { - fullDate - } else {dateFormat match { - case YYYY => fullDate.substring(startSS, yearSS) - case MM => fullDate.substring(yearSS, monthSS) - case DD => fullDate.substring(monthSS, daySS) - case YYYYMM => fullDate.substring(startSS, monthSS) - case _ => fullDate.substring(startSS, daySS) - } + val maybeFullDate: Option[String] = Option(fullDate) + maybeFullDate match { + case Some(fulldate) => + dateFormat match { + case YYYY => fullDate.substring(startSS, yearSS) + case MM => fullDate.substring(yearSS, monthSS) + case DD => fullDate.substring(monthSS, daySS) + case YYYYMM => fullDate.substring(startSS, monthSS) + case _ => fullDate.substring(startSS, daySS) + } + case None => + "" } } } diff --git a/src/main/scala/io/archivesunleashed/matchbox/ExtractDomain.scala b/src/main/scala/io/archivesunleashed/matchbox/ExtractDomain.scala index 7362a856..63f11b21 100644 --- a/src/main/scala/io/archivesunleashed/matchbox/ExtractDomain.scala +++ b/src/main/scala/io/archivesunleashed/matchbox/ExtractDomain.scala @@ -27,18 +27,28 @@ object ExtractDomain { * @return domain host, source or null if url is null. */ def apply(url: String, source: String = ""): String = { - if (url == null) return null - var host: String = null - try { - host = new URL(url).getHost - } catch { - case e: Exception => // it's okay + val maybeSource: Option[URL] = checkUrl(source) + val maybeHost: Option[URL] = checkUrl(url) + maybeSource match { + case Some(source) => + source.getHost + + case None => + maybeHost match { + case Some(host) => + host.getHost + case None => + "" + } } - if (host != null || source == null) return host + } + + def checkUrl(url: String): Option[URL] = { try { - new URL(source).getHost + Some(new URL(url)) } catch { - case e: Exception => null + case e: Exception => + None } } } diff --git a/src/main/scala/io/archivesunleashed/matchbox/ExtractLinks.scala b/src/main/scala/io/archivesunleashed/matchbox/ExtractLinks.scala index 02a5a1ec..1bdb68a2 100644 --- a/src/main/scala/io/archivesunleashed/matchbox/ExtractLinks.scala +++ b/src/main/scala/io/archivesunleashed/matchbox/ExtractLinks.scala @@ -20,6 +20,7 @@ import java.io.IOException import org.jsoup.Jsoup import org.jsoup.select.Elements import scala.collection.mutable +import scala.Option /** Extracts links from a webpage given the HTML content (using Jsoup). */ object ExtractLinks { @@ -32,28 +33,30 @@ object ExtractLinks { * @return a sequence of (source, target, anchortext). */ def apply(src: String, html: String, base: String = ""): Seq[(String, String, String)] = { - try { - val output = mutable.MutableList[(String, String, String)]() - - // Basic input checking, return empty list if we fail. - if (src == null) return output - if (html.isEmpty) return output - - val doc = Jsoup.parse(html) - val links: Elements = doc.select("a[href]") - val it = links.iterator() - while (it.hasNext) { - val link = it.next() - if (base.nonEmpty) link.setBaseUri(base) - val target = link.attr("abs:href") - if (target.nonEmpty) { - output += ((src, target, link.text)) - } + val srcMaybe: Option[String] = Option(src) + val htmlMaybe: Option[String] = Option(html) + val output = mutable.MutableList[(String, String, String)]() + srcMaybe match { + case Some(valid_src) => + htmlMaybe match { + case Some (valid_html) => + val doc = Jsoup.parse(valid_html) + val links: Elements = doc.select("a[href]") + val it = links.iterator() + while (it.hasNext) { + val link = it.next() + if (base.nonEmpty) link.setBaseUri(base) + val target = link.attr("abs:href") + if (target.nonEmpty) { + output += ((valid_src, target, link.text)) + } + } + case None => + // do nothing + } + case None => + // do nothing } - output - } catch { - case e: Exception => - throw new IOException("Caught exception processing input ", e); - } + output } } diff --git a/src/main/scala/io/archivesunleashed/matchbox/NERClassifier.scala b/src/main/scala/io/archivesunleashed/matchbox/NERClassifier.scala index 410e0063..f61ce5e9 100644 --- a/src/main/scala/io/archivesunleashed/matchbox/NERClassifier.scala +++ b/src/main/scala/io/archivesunleashed/matchbox/NERClassifier.scala @@ -25,6 +25,7 @@ import edu.stanford.nlp.ling.{CoreAnnotations, CoreLabel} import java.util import scala.collection.mutable +// scalastyle:off /** Reads in a text string, and returns entities identified by the configured Stanford NER classifier. */ object NERClassifier { @@ -114,3 +115,4 @@ object NERClassifier { } } } +// scalastyle:on diff --git a/src/main/scala/io/archivesunleashed/matchbox/RemoveHTML.scala b/src/main/scala/io/archivesunleashed/matchbox/RemoveHTML.scala index fa93b3cf..647ba323 100644 --- a/src/main/scala/io/archivesunleashed/matchbox/RemoveHTML.scala +++ b/src/main/scala/io/archivesunleashed/matchbox/RemoveHTML.scala @@ -28,11 +28,12 @@ object RemoveHTML { * @return content without html markup. */ def apply(content: String): String = { - try { - Jsoup.parse(content).text().replaceAll("[\\r\\n]+", " ") - } - catch { - case e: Exception => throw new IOException("Caught exception processing input row ", e) + val maybeContent: Option[String] = Option(content) + maybeContent match { + case Some(content) => + Jsoup.parse(content).text().replaceAll("[\\r\\n]+", " ") + case None => + "" } } } diff --git a/src/main/scala/io/archivesunleashed/matchbox/RemoveHttpHeader.scala b/src/main/scala/io/archivesunleashed/matchbox/RemoveHttpHeader.scala index 32652f88..f1006727 100644 --- a/src/main/scala/io/archivesunleashed/matchbox/RemoveHttpHeader.scala +++ b/src/main/scala/io/archivesunleashed/matchbox/RemoveHttpHeader.scala @@ -26,16 +26,16 @@ object RemoveHttpHeader { * @return string with HTTP headers removed. */ def apply(content: String): String = { - try { - if (content.startsWith("HTTP/")){ - content.substring(content.indexOf(headerEnd) + headerEnd.length) - } else { - content - } - } catch { - case e: Exception => { - null - } + val maybeContent: Option[String] = Option(content) + maybeContent match { + case Some(content) => + if (content.startsWith("HTTP/")){ + content.substring(content.indexOf(headerEnd) + headerEnd.length) + } else { + content + } + case None => + "" } } } diff --git a/src/main/scala/io/archivesunleashed/matchbox/TupleFormatter.scala b/src/main/scala/io/archivesunleashed/matchbox/TupleFormatter.scala index f98ede92..ed680b19 100644 --- a/src/main/scala/io/archivesunleashed/matchbox/TupleFormatter.scala +++ b/src/main/scala/io/archivesunleashed/matchbox/TupleFormatter.scala @@ -25,23 +25,25 @@ import ops.tuple.ToList object TupleFormatter { /** Borrowed from shapeless' flatten.scala example. */ trait LowPriorityFlatten extends Poly1 { + // scalastyle:off public.methods.have.type implicit def default[T] = at[T](Tuple1(_)) } /** Flattens nested tuples, taking an argument a tuple of any size. */ - // scalastyle:off + // scalastyle:off object.name object flatten extends LowPriorityFlatten { - // scalastyle:on + // scalastyle:on object.name implicit def caseTuple[T <: Product](implicit fm: FlatMapper[T, flatten.type]) = at[T](_.flatMap(flatten)) } /** Transforms a tuple into a tab-delimited string, flattening any nesting, * taking an argument a tuple of any size. */ - // scalastyle:off + // scalastyle:off object.name object tabDelimit extends Poly1 { + // scalastyle:on object.name implicit def caseTuple[T <: Product, Lub](implicit tl: ToList[T, Lub], fm: FlatMapper[T, flatten.type]) = at[T](flatten(_).asInstanceOf[Product].productIterator.mkString("\t")) } - // scalastyle:on + // scalastyle:on public.methods.have.type } diff --git a/src/main/scala/io/archivesunleashed/matchbox/package.scala b/src/main/scala/io/archivesunleashed/matchbox/package.scala index 4bdd629e..8d6a21b1 100644 --- a/src/main/scala/io/archivesunleashed/matchbox/package.scala +++ b/src/main/scala/io/archivesunleashed/matchbox/package.scala @@ -28,10 +28,10 @@ import scala.xml.Utility._ package object matchbox { implicit class WWWLink(s: String) { def removePrefixWWW(): String = { - if (s == null) { - null - } else { - s.replaceAll("^\\s*www\\.", "") + val maybeString: Option[String] = Option(s) + maybeString match { + case Some(s) => s.replaceAll("^\\s*www\\.", "") + case None => "" } } diff --git a/src/main/scala/io/archivesunleashed/package.scala b/src/main/scala/io/archivesunleashed/package.scala index f851f916..0b6c7526 100644 --- a/src/main/scala/io/archivesunleashed/package.scala +++ b/src/main/scala/io/archivesunleashed/package.scala @@ -60,8 +60,10 @@ package object archivesunleashed { * @return an RDD of JValue (json objects) for mapping. */ def loadTweets(path: String, sc: SparkContext): RDD[JValue] = + // scalastyle:off null sc.textFile(path).filter(line => !line.startsWith("{\"delete\":")) .map(line => try { parse(line) } catch { case e: Exception => null }).filter(x => x != null) + // scalastyle:on null } /** A Wrapper class around RDD to simplify counting. */ diff --git a/src/main/scala/io/archivesunleashed/util/TweetUtils.scala b/src/main/scala/io/archivesunleashed/util/TweetUtils.scala index c12e3e9c..6e631b15 100644 --- a/src/main/scala/io/archivesunleashed/util/TweetUtils.scala +++ b/src/main/scala/io/archivesunleashed/util/TweetUtils.scala @@ -31,15 +31,15 @@ object TweetUtils { val user = "user" implicit lazy val formats = org.json4s.DefaultFormats /** Get Twitter status id. */ - def id(): String = try { (tweet \ "id_str").extract[String] } catch { case e: Exception => null} + def id(): String = try { (tweet \ "id_str").extract[String] } catch { case e: Exception => ""} /** Get the date a status was created. */ - def createdAt(): String = try { (tweet \ "created_at").extract[String] } catch { case e: Exception => null} + def createdAt(): String = try { (tweet \ "created_at").extract[String] } catch { case e: Exception => ""} /** Get the status text. */ - def text(): String = try { (tweet \ "text").extract[String] } catch { case e: Exception => null} + def text(): String = try { (tweet \ "text").extract[String] } catch { case e: Exception => ""} /** Get the language code (ISO 639-1). */ - def lang: String = try { (tweet \ "lang").extract[String] } catch { case e: Exception => null} + def lang: String = try { (tweet \ "lang").extract[String] } catch { case e: Exception => ""} /** Get the username of the user who wrote the status. */ - def username(): String = try { (tweet \ user \ "screen_name").extract[String] } catch { case e: Exception => null} + def username(): String = try { (tweet \ user \ "screen_name").extract[String] } catch { case e: Exception => ""} /** Check if user of status is "verified" (true or false). */ def isVerifiedUser(): Boolean = try { (tweet \ user \ "verified").extract[Boolean] } catch { case e: Exception => false} /** Get the number of followers the user has. */ diff --git a/src/test/scala/io/archivesunleashed/app/CommandLineAppTest.scala b/src/test/scala/io/archivesunleashed/app/CommandLineAppTest.scala index bb0e45a0..ddefd9e5 100644 --- a/src/test/scala/io/archivesunleashed/app/CommandLineAppTest.scala +++ b/src/test/scala/io/archivesunleashed/app/CommandLineAppTest.scala @@ -72,13 +72,13 @@ class CommandLineAppTest extends FunSuite with BeforeAndAfter { } test("command line app tests") { - for (a <- testSuccessCmds) { + for {a <- testSuccessCmds} { app.CommandLineAppRunner.test(a, sc) assert(Files.exists(Paths.get(outputDir))) FileUtils.deleteDirectory(new File(outputDir)) } - for (a <- testFailCmds) { + for {a <- testFailCmds} { try { app.CommandLineAppRunner.test(a, sc) assert(false) diff --git a/src/test/scala/io/archivesunleashed/df/SaveImageTest.scala b/src/test/scala/io/archivesunleashed/df/SaveImageTest.scala index 2614a4ad..0205cae2 100644 --- a/src/test/scala/io/archivesunleashed/df/SaveImageTest.scala +++ b/src/test/scala/io/archivesunleashed/df/SaveImageTest.scala @@ -77,7 +77,7 @@ class SaveImageTest extends FunSuite with BeforeAndAfter { assert(originalImage.getHeight() == savedImage.getHeight()) assert(originalImage.getWidth() == savedImage.getWidth()) - for (y <- 0 until originalImage.getHeight()) { + for {y <- 0 until originalImage.getHeight()} { for (x <- 0 until originalImage.getWidth()) { assert(originalImage.getRGB(x, y) == savedImage.getRGB(x, y)) } diff --git a/src/test/scala/io/archivesunleashed/matchbox/ComputeImageSizeTest.scala b/src/test/scala/io/archivesunleashed/matchbox/ComputeImageSizeTest.scala index c1488fae..50e52614 100644 --- a/src/test/scala/io/archivesunleashed/matchbox/ComputeImageSizeTest.scala +++ b/src/test/scala/io/archivesunleashed/matchbox/ComputeImageSizeTest.scala @@ -42,6 +42,8 @@ class ComputeImageSizeTest extends FunSuite { val emptyImageSize = (0, 0) assert(ComputeImageSize(image) == imageSize) assert(ComputeImageSize(Array[Byte](0,0,0)) == emptyImageSize) + // scalastyle:off null assert(ComputeImageSize(null) == emptyImageSize) + // scalastyle:on null } } diff --git a/src/test/scala/io/archivesunleashed/matchbox/ExtractBoilerPipeTextTest.scala b/src/test/scala/io/archivesunleashed/matchbox/ExtractBoilerPipeTextTest.scala index 454999c0..418aee30 100644 --- a/src/test/scala/io/archivesunleashed/matchbox/ExtractBoilerPipeTextTest.scala +++ b/src/test/scala/io/archivesunleashed/matchbox/ExtractBoilerPipeTextTest.scala @@ -31,11 +31,9 @@ class ExtractBoilerPipeTextTest extends FunSuite { test("Collects boilerpipe") { assert(ExtractBoilerpipeText(text) == boiler) - assert(ExtractBoilerpipeText("") == null) - assert(ExtractBoilerpipeText("All Rights Reserved.") == null) - val caught = intercept[IOException] { - ExtractBoilerpipeText(null) - } - assert(caught.getMessage == "Caught exception processing input row java.lang.NullPointerException") + // scalastyle:off null + assert(ExtractBoilerpipeText(null) == "") + // scalastyle:on null + assert(ExtractBoilerpipeText("All Rights Reserved.") == "") } } diff --git a/src/test/scala/io/archivesunleashed/matchbox/ExtractDateTest.scala b/src/test/scala/io/archivesunleashed/matchbox/ExtractDateTest.scala index fdcfe2b9..afafa5fd 100644 --- a/src/test/scala/io/archivesunleashed/matchbox/ExtractDateTest.scala +++ b/src/test/scala/io/archivesunleashed/matchbox/ExtractDateTest.scala @@ -34,7 +34,9 @@ class ExtractDateTest extends FunSuite { assert(ExtractDate(date, DD) == "04") assert(ExtractDate(date, YYYYMM) == "201512") assert(ExtractDate(date, YYYYMMDD) == date) - assert(ExtractDate(null, YYYYMMDD) == null) + // scalastyle:off null + assert(ExtractDate(null, YYYYMMDD) == "") + // scalastyle:on null } test("more perfect") { diff --git a/src/test/scala/io/archivesunleashed/matchbox/ExtractDomainTest.scala b/src/test/scala/io/archivesunleashed/matchbox/ExtractDomainTest.scala index 0e85cca7..67382dda 100644 --- a/src/test/scala/io/archivesunleashed/matchbox/ExtractDomainTest.scala +++ b/src/test/scala/io/archivesunleashed/matchbox/ExtractDomainTest.scala @@ -30,11 +30,11 @@ class ExtractDomainTest extends FunSuite { ("http://www.umiacs.umd.edu/~jimmylin/", umiacs), ("https://github.com/lintool", "github.com"), ("http://ianmilligan.ca/2015/05/04/iipc-2015-slides-for-warcs-wats-and-wgets-presentation/", "ianmilligan.ca"), - (index, null)).result() + (index, "")).result() private val data2 = Seq.newBuilder.+=( (index, "http://www.umiacs.umd.edu/~jimmylin/", umiacs), - (index, "lintool/", null)).result() + (index, "lintool/", "")).result() test("simple") { data1.foreach { @@ -49,7 +49,9 @@ class ExtractDomainTest extends FunSuite { } test("error") { - assert(ExtractDomain(null) == null) - assert(ExtractDomain(index, null) == null) + // scalastyle:off null + assert(ExtractDomain(null) == "") + assert(ExtractDomain(index, null) == "") + // scalastyle:on null } } diff --git a/src/test/scala/io/archivesunleashed/matchbox/ExtractImageLinksTest.scala b/src/test/scala/io/archivesunleashed/matchbox/ExtractImageLinksTest.scala index ef197302..6f429fec 100644 --- a/src/test/scala/io/archivesunleashed/matchbox/ExtractImageLinksTest.scala +++ b/src/test/scala/io/archivesunleashed/matchbox/ExtractImageLinksTest.scala @@ -49,7 +49,9 @@ class ExtractImageLinksTest extends FunSuite { """Image here: and another and """ assert(ExtractImageLinks("", "") == Nil) // Need way of creating an exception here + // scalastyle:off null val invalid = null + // scalastyle:on null intercept[IOException] { ExtractImageLinks(invalid, fragment) } diff --git a/src/test/scala/io/archivesunleashed/matchbox/ExtractLinksTest.scala b/src/test/scala/io/archivesunleashed/matchbox/ExtractLinksTest.scala index 32413ba4..e89300f4 100644 --- a/src/test/scala/io/archivesunleashed/matchbox/ExtractLinksTest.scala +++ b/src/test/scala/io/archivesunleashed/matchbox/ExtractLinksTest.scala @@ -24,6 +24,7 @@ import org.scalatest.FunSuite import org.scalatest.junit.JUnitRunner import scala.collection.mutable +import scala.Byte @RunWith(classOf[JUnitRunner]) class ExtractLinksTest extends FunSuite { @@ -57,10 +58,11 @@ class ExtractLinksTest extends FunSuite { } test("errors") { + val bytes: Array[Byte] = "wronglyTyped".getBytes() val invalid: String = "Here is a fake url bogus search engine." + // scalastyle:off null assert(ExtractLinks(null, fragment, fooFragment) == mutable.MutableList[(String, String, String)]()) + // scalastyle:on null assert(ExtractLinks("", "", fooFragment) == mutable.MutableList[(String, String, String)]()) - // invalid url should throw exception - need more information here - intercept[IOException] { ExtractLinks("", null, "FROTSTEDwww.foobar.org/index.html") } } } diff --git a/src/test/scala/io/archivesunleashed/matchbox/RemoveHTMLTest.scala b/src/test/scala/io/archivesunleashed/matchbox/RemoveHTMLTest.scala index fe437013..610c1758 100644 --- a/src/test/scala/io/archivesunleashed/matchbox/RemoveHTMLTest.scala +++ b/src/test/scala/io/archivesunleashed/matchbox/RemoveHTMLTest.scala @@ -38,9 +38,9 @@ class RemoveHTMLTest extends FunSuite { val removed = RemoveHTML(html) assert(removed == "Here is some... HTML") - val caught = intercept[IOException] { - RemoveHTML(null) - } - assert(caught.getMessage == "Caught exception processing input row ") + // scalastyle:off null + val empty = RemoveHTML(null) + // scalastyle:on null + assert(empty == "") } } diff --git a/src/test/scala/io/archivesunleashed/matchbox/RemoveHttpHeaderTest.scala b/src/test/scala/io/archivesunleashed/matchbox/RemoveHttpHeaderTest.scala index 3751967c..eeae111e 100644 --- a/src/test/scala/io/archivesunleashed/matchbox/RemoveHttpHeaderTest.scala +++ b/src/test/scala/io/archivesunleashed/matchbox/RemoveHttpHeaderTest.scala @@ -28,10 +28,11 @@ class RemoveHttpHeaderTest extends FunSuite { val nohttp = "This has no Http" val removed = RemoveHttpHeader(header) val unchanged = RemoveHttpHeader(nohttp) + // scalastyle:off null val error = RemoveHttpHeader(null) - + // scalastyle:on null assert(removed == "Hello content") assert(unchanged == nohttp) - assert( error == null ) + assert( error == "" ) } } diff --git a/src/test/scala/io/archivesunleashed/matchbox/StringUtilsTest.scala b/src/test/scala/io/archivesunleashed/matchbox/StringUtilsTest.scala index 66b94c8a..24cda009 100644 --- a/src/test/scala/io/archivesunleashed/matchbox/StringUtilsTest.scala +++ b/src/test/scala/io/archivesunleashed/matchbox/StringUtilsTest.scala @@ -27,14 +27,18 @@ import org.scalatest.junit.JUnitRunner class StringUtilsTest extends FunSuite { test("remove prefix") { val s: String = "www.example.com" + // scalastyle:off null val n: String = null + // scalastyle:on null assert(s.removePrefixWWW() == "example.com") - assert(n.removePrefixWWW() == null) + assert(n.removePrefixWWW() == "") } test("create hash") { val invalid: String = "AC&D\""; + // scalastyle:off null val except: String = null; + // scalastyle:on null assert(invalid.escapeInvalidXML() == "A<B>C&D""); val caught = intercept[IOException] {except.escapeInvalidXML()} assert (caught.getMessage == "Caught exception processing input row ");