Skip to content

Commit

Permalink
Merge pull request #4 from gaurav/clean-up-code
Browse files Browse the repository at this point in the history
Reorganized code so it's easier to read.
  • Loading branch information
gaurav committed Jun 13, 2022
2 parents f591be1 + 532e277 commit 52443b1
Show file tree
Hide file tree
Showing 6 changed files with 275 additions and 177 deletions.
3 changes: 3 additions & 0 deletions .scalafmt.conf
Original file line number Diff line number Diff line change
@@ -1,2 +1,5 @@
version = 3.5.8
runner.dialect = scala213

# Don't keep the params lined up with each other.
align.openParenDefnSite = false
83 changes: 83 additions & 0 deletions src/main/scala/org/renci/babel/validator/Comparer.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
package org.renci.babel.validator

import com.typesafe.scalalogging.LazyLogging
import org.renci.babel.validator.model.Compendium
import zio.blocking.Blocking
import zio.{Chunk, ZIO}

/** Methods in this class can be used to compare results between two compendia.
*/
object Comparer extends LazyLogging {

/** Helper method for displaying the percent change between two counts.
*/
def relativePercentChange(count: Long, countPrev: Long): String = {
val percentChange = (count - countPrev).toDouble / countPrev * 100
f"${count - countPrev}%+d\t$percentChange%+2.2f%%"
}

case class LengthComparison(filename: String, count: Long, prevCount: Long) {
val relativePercentChange: String =
Comparer.relativePercentChange(count, prevCount)
override val toString: String =
s"${filename}\t${count}\t${prevCount}\t${relativePercentChange}"
}

def compareLengths(
filename: String,
summary: Compendium,
prevSummary: Compendium
): ZIO[Blocking, Throwable, LengthComparison] = {
for {
count <- summary.count
prevCount <- prevSummary.count
} yield LengthComparison(filename, count, prevCount)
}

case class TypeComparison(
filename: String,
types: Chunk[String],
prevTypes: Chunk[String]
) {
val typesSet = types.toSet
val prevTypesSet = types.toSet
val added: Set[String] = typesSet -- prevTypesSet
val deleted: Set[String] = prevTypesSet -- typesSet
val changeString: String = (added.toSeq, deleted.toSeq) match {
case (Seq(), Seq()) => "No change"
case (added, Seq()) => s"Added: ${added}"
case (Seq(), _) => s"Deleted: ${added}"
case (added, deleted) =>
s"Added: ${added}, Deleted: ${deleted}"
}

override val toString: String =
s"${filename}\t${typesSet.mkString(", ")} (${types.length})\t${prevTypesSet
.mkString(", ")} (${prevTypes.length})\t${changeString}"
}

def compareTypes(
filename: String,
summary: Compendium,
prevSummary: Compendium
): ZIO[Blocking, Throwable, TypeComparison] = {
for {
typesChunk <- (for {
row: Compendium.Record <- summary.records
} yield (row.`type`)).runCollect
typesErrors <- summary.types.catchAll(err => {
logger.error(s"Types error: ${err}")
ZIO.fail(err)
})
prevTypesChunk <- (for {
row: Compendium.Record <- prevSummary.records
} yield (row.`type`)).runCollect
prevTypesErrors <- prevSummary.types.catchAll(err => {
logger.error(s"prevTypes error: ${err}")
ZIO.fail(err)
})
} yield {
TypeComparison(filename, typesChunk, prevTypesChunk)
}
}
}
116 changes: 116 additions & 0 deletions src/main/scala/org/renci/babel/validator/Reporter.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
package org.renci.babel.validator

import com.typesafe.scalalogging.LazyLogging
import org.renci.babel.validator.Validator.Conf
import org.renci.babel.validator.model.{BabelOutput, Compendium}
import zio.ZIO
import zio.blocking.Blocking
import zio.console.Console
import zio.stream.ZStream

import java.io.{FileOutputStream, PrintStream}

/** Functions for reporting on the differences between two input files.
*/
object Reporter extends LazyLogging {

/** Helper method for displaying the percent change between two counts.
*/
def relativePercentChange(count: Long, countPrev: Long): String = {
val percentChange = (count - countPrev).toDouble / countPrev * 100
f"${count - countPrev}%+d\t$percentChange%+2.2f%%"
}

/** Generic method to determine whether a particular filename should be
* filtered in or out from the results. The algorithm we use is:
* 1. If any `--filtered-in` prefixes are provided, then we exclude
* everything that isn't explicitly filtered in (by starting with one of
* those prefixes in a case-sensitive manner). 2. Otherwise, all
* filenames are allowed EXCEPT those explicitly filtered out by
* `--filtered-out` by starting with one of those prefixes in a
* case-sensitive manner.
*/
def filterFilename(conf: Conf, filename: String): Boolean = {
val filteredIn = conf.filterIn.getOrElse(List())
val filteredOut = conf.filterOut.getOrElse(List())

if (filteredIn.nonEmpty) {
if (filteredIn.exists(filename.startsWith(_))) {
return true;
} else {
return false;
}
}

if (filteredOut.nonEmpty && filteredOut.exists(filename.startsWith(_))) {
return false;
}

true
}

/** Given two BabelOutputs, it returns a list of all compendia found in BOTH
* of the BabelOutputs paired together.
*
* TODO: modify this so we return every compendium found in EITHER
* BabelOutput.
*/
def retrievePairedCompendiaSummaries(
babelOutput: BabelOutput,
babelPrevOutput: BabelOutput
): Seq[(String, Compendium, Compendium)] = {
for {
summary <- babelOutput.compendia
summaryPrev <- babelPrevOutput.compendia
if summaryPrev.filename == summary.filename
} yield {
(summary.filename, summary, summaryPrev)
}
}

def diffResults(conf: Conf): ZIO[Blocking with Console, Throwable, Unit] = {
val babelOutput = new BabelOutput(conf.babelOutput())
val babelPrevOutput = new BabelOutput(conf.babelPrevOutput())
val output = conf.output.toOption match {
case Some(file) => new PrintStream(new FileOutputStream(file))
case _ => System.out
}

val pairedSummaries =
retrievePairedCompendiaSummaries(babelOutput, babelPrevOutput)
output.println("Filename\tCount\tPrevCount\tDiff\tPercentageChange")
ZStream
.fromIterable(pairedSummaries)
.mapMParUnordered(conf.nCores()) {
case (
filename: String,
summary: Compendium,
prevSummary: Compendium
) if filterFilename(conf, filename) => {

for {
lengthComparison <- Comparer.compareLengths(
filename,
summary,
prevSummary
)
typeComparison <- Comparer.compareTypes(
filename,
summary,
prevSummary
)
} yield {
output.println(lengthComparison.toString)
output.println(typeComparison.toString)
}
}
case (filename: String, _, _) if !filterFilename(conf, filename) => {
logger.info(s"Skipping ${filename}")
ZIO.succeed(())
}
case abc =>
ZIO.fail(new RuntimeException(s"Invalid paired summary: ${abc}"))
}
.runDrain
}
}
127 changes: 2 additions & 125 deletions src/main/scala/org/renci/babel/validator/Validator.scala
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
package org.renci.babel.validator

import com.typesafe.scalalogging._
import org.renci.babel.validator.model.{BabelOutput, Compendium}
import org.rogach.scallop._
import zio._
import zio.blocking.Blocking
import zio.console._
import zio.stream.ZStream

import java.io.{File, FileOutputStream, PrintStream}
import java.io.File

object Validator extends zio.App with LazyLogging {
class Conf(args: Seq[String]) extends ScallopConf(args) {
Expand All @@ -35,127 +33,6 @@ object Validator extends zio.App with LazyLogging {
verify()
}

def filterFilename(conf: Conf, filename: String): Boolean = {
val filteredIn = conf.filterIn.getOrElse(List())
val filteredOut = conf.filterOut.getOrElse(List())

if (filteredIn.nonEmpty) {
if (filteredIn.exists(filename.startsWith(_))) {
return true;
} else {
return false;
}
}

if (filteredOut.nonEmpty && filteredOut.exists(filename.startsWith(_))) {
return false;
}

true
}

def retrievePairedCompendiaSummaries(
babelOutput: BabelOutput,
babelPrevOutput: BabelOutput
): Seq[(String, Compendium#Summary, Compendium#Summary)] = {
for {
summary <- babelOutput.compendiaSummary
summaryPrev <- babelPrevOutput.compendiaSummary
if summaryPrev.filename == summary.filename
} yield {
(summary.filename, summary, summaryPrev)
}
}

def relativePercentChange(count: Long, countPrev: Long): String = {
val percentChange = (count - countPrev).toDouble / countPrev * 100
f"${count - countPrev}%+d\t$percentChange%+2.4f%%"
}

def diffResults(conf: Conf): ZIO[Blocking with Console, Throwable, Unit] = {
val babelOutput = new BabelOutput(conf.babelOutput())
val babelPrevOutput = new BabelOutput(conf.babelPrevOutput())
val output = conf.output.toOption match {
case Some(file) => new PrintStream(new FileOutputStream(file))
case _ => System.out
}

/*
val xyz = for {
recs <- babelOutput.compendia.head.recordsRaw
} yield {
println(s"Record: ${recs}")
}
return xyz.runDrain
*/

val pairedSummaries =
retrievePairedCompendiaSummaries(babelOutput, babelPrevOutput)
output.println("Filename\tCount\tPrevCount\tDiff\tPercentageChange")
ZStream
.fromIterable(pairedSummaries)
.mapMParUnordered(conf.nCores())(result =>
result match {
case (
filename: String,
summary: Compendium#Summary,
prevSummary: Compendium#Summary
) if filterFilename(conf, filename) => {
for {
count <- summary.countZIO
prevCount <- prevSummary.countZIO
typesChunk <- (for {
row: Compendium#CompendiumRecord <-
summary.typesZStream.collectRight
} yield (row.`type`)).runCollect
typesErrors <- summary.typesZStream.collectLeft.runCollect
prevTypesChunk <- (for {
row: Compendium#CompendiumRecord <-
prevSummary.typesZStream.collectRight
} yield (row.`type`)).runCollect

// types <- summary.typesZIO
// prevTypes <- prevSummary.typesZIO
} yield {
output.println(
s"${filename}\t${count}\t${prevCount}\t${relativePercentChange(count, prevCount)}"
)

if (typesErrors.nonEmpty) {
logger.error(s"Types errors: ${typesErrors}")
} else {
val types = typesChunk.toSet
val prevTypes = prevTypesChunk.toSet

val added = types -- prevTypes
val deleted = prevTypes -- types
val changeString = (added.toSeq, deleted.toSeq) match {
case (Seq(), Seq()) => "No change"
case (added, Seq()) => s"Added: ${added}"
case (Seq(), _) => s"Deleted: ${added}"
case (added, deleted) =>
s"Added: ${added}, Deleted: ${deleted}"
}

output.println(
s"${filename}\t${types.mkString(", ")} (${typesChunk.length})\t${prevTypes
.mkString(", ")} (${prevTypesChunk.length})\t${changeString}"
)
}
}
}
case (filename: String, _, _) if !filterFilename(conf, filename) => {
logger.info(s"Skipping ${filename}")
ZIO.succeed(())
}
case abc =>
ZIO.fail(new RuntimeException(s"Invalid paired summary: ${abc}"))
}
)
.runDrain
}

// TODO:
// - Add processing time, preferably broken down by compendium or something (maybe just emit logs?)
// - Some stats on memory usage would be great too
Expand All @@ -168,6 +45,6 @@ object Validator extends zio.App with LazyLogging {
def run(
args: List[String]
): URIO[Blocking with Console with Console, ExitCode] = {
diffResults(new Conf(args)).exitCode
Reporter.diffResults(new Conf(args)).exitCode
}
}
25 changes: 18 additions & 7 deletions src/main/scala/org/renci/babel/validator/model/BabelOutput.scala
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,33 @@ import java.io.File
/** A BabelOutput is a directory containing Babel output results.
*/
class BabelOutput(root: File) {

/** A description of this BabelOutput. */
override def toString: String = {
s"BabelOutput(${root}) containing ${compendia.length} compendia"
}

/** Return a list of all the files in a subdirectory of this BabelOutput.
* @param dirName
* The subdirectory name.
* @return
* The list of files in the {BabelOutput root}/{subdirectory}.
*/
def getFilesInDir(dirName: String): Seq[String] = {
val dir = new File(root, dirName)
val filenames = dir.list()
// TODO: this would be a good place to look for out-of-place files.
filenames
}

val compendiaDir = new File(root, "compendia")
/** The compendia directory in this BabelOutput.
*/
val compendiaDir: File = new File(root, "compendia")

/** A list of all the compendia in this BabelOutput.
*/
lazy val compendia: Seq[Compendium] =
getFilesInDir("compendia").map(filename =>
new Compendium(new File(compendiaDir, filename))
)

def compendiaSummary: Seq[Compendium#Summary] = compendia.map(_.summary)

override def toString: String = {
s"BabelOutput(${root}) containing ${compendia.length} compendia"
}
}
Loading

0 comments on commit 52443b1

Please sign in to comment.