Skip to content

Commit

Permalink
Simpleton and Metadata for CWL Files / Directories (#3139)
Browse files Browse the repository at this point in the history
* Simpletonization of Files / Directories
  • Loading branch information
Horneth committed Jan 18, 2018
1 parent 1a42976 commit 6fd20e7
Show file tree
Hide file tree
Showing 7 changed files with 363 additions and 72 deletions.
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
package centaur.cwl

import io.circe.Json
import spray.json.{JsArray, JsNumber, JsObject, JsString, JsValue}
import spray.json.{JsArray, JsNumber, JsObject, JsString}
import cwl.{MyriadOutputType, File => CwlFile}
import shapeless.{Inl, Poly1}
import cwl.{CwlType, MyriadOutputType, File => CwlFile}
import io.circe.Json
import io.circe.{Json, JsonObject}
import io.circe.generic.auto._
import io.circe.literal._
import io.circe.refined._
Expand All @@ -15,6 +12,7 @@ import shapeless.{Inl, Poly1}
import _root_.cwl._
import cromwell.core.path.PathBuilder
import cwl.command.ParentName
import spray.json.JsValue

//Take cromwell's outputs and format them as expected by the spec
object OutputManipulator extends Poly1 {
Expand All @@ -23,6 +21,46 @@ object OutputManipulator extends Poly1 {
def resolveOutput(jsValue: JsValue, pathBuilder: PathBuilder, mot: MyriadOutputType): Json = {
mot.fold(this).apply(jsValue, pathBuilder)
}

private def populateFileFields(pathBuilder: PathBuilder)(obj: JsonObject): JsonObject = {
val objAsMap = obj.toMap
val path = pathBuilder.build(objAsMap("location").asString.get).get

def populateInnerFiles(json: Json): Option[Json] = {
import mouse.boolean._

// Assume the json is an array ("secondaryFiles" and "listing" are both arrays)
val innerFiles = json.asArray.get
// the cwl test runner doesn't expect a "secondaryFiles" or "listing" field at all if it's empty
innerFiles.nonEmpty.option(Json.arr(innerFiles.map(_.mapObject(populateFileFields(pathBuilder))): _*))
}

def updateFileOrDirectoryWithNestedFiles(obj: JsonObject, fieldName: String) = {
// Cromwell metadata has a field for all values even if their content is empty
// remove it as the cwl test runner expects nothing instead
val withoutField = obj.remove(fieldName)

// If the field was not empty, add it back with each inner file / directory properly updated as well
populateInnerFiles(objAsMap(fieldName))
.map(withoutField.add(fieldName, _))
.getOrElse(withoutField)
}

// The cwl test runner expects only the name, not the full path
val updatedLocation = obj.add("location", Json.fromString(path.name))

if (path.isRegularFile) {
val checksum = Json.fromString("sha1$" + path.sha1.toLowerCase)
val size = Json.fromLong(path.size)
val withChecksumAndSize = updatedLocation
.add("checksum", checksum)
.add("size", size)

updateFileOrDirectoryWithNestedFiles(withChecksumAndSize, "secondaryFiles")
} else if (path.isDirectory) {
updateFileOrDirectoryWithNestedFiles(updatedLocation, "listing")
} else throw new RuntimeException(s"${path.pathAsString} is neither a valid file or a directory")
}

private def resolveOutputViaInnerType(mot: MyriadOutputInnerType)(jsValue: JsValue, pathBuilder: PathBuilder): Json = {
(jsValue, mot) match {
Expand All @@ -36,6 +74,12 @@ object OutputManipulator extends Poly1 {
checksum = Option("sha1$" + path.sha1.toLowerCase),
size = Option(path.size)
).asJson
// If it's a JsObject it means it's already in the right format, we just want to fill in some values that might not
// have been populated like "checksum" and "size"
case (obj: JsObject, Inl(CwlType.File)) =>
import io.circe.parser._
val json = parse(obj.compactPrint).right.getOrElse(throw new Exception("Failed to parse Json output as Json... something is very wrong"))
json.mapObject(populateFileFields(pathBuilder))
case (JsNumber(metadata), Inl(CwlType.Long)) => metadata.longValue.asJson
case (JsNumber(metadata), Inl(CwlType.Float)) => metadata.floatValue.asJson
case (JsNumber(metadata), Inl(CwlType.Double)) => metadata.doubleValue.asJson
Expand Down
121 changes: 92 additions & 29 deletions core/src/main/scala/cromwell/core/simpleton/WomValueBuilder.scala
Original file line number Diff line number Diff line change
Expand Up @@ -55,23 +55,37 @@ object WomValueBuilder {
// Within the noncapturing `?:` group, this looks for an escaped metacharacter OR a non-metacharacter.
private val MapElementPattern = raw"^:((?:\\[]\[:]|[^]\[:])+)(.*)".r

// Group tuples by key using a Map with key type `K`.
private def group[K](tuples: Traversable[(K, SimpletonComponent)]): Map[K, Traversable[SimpletonComponent]] = {
tuples groupBy { case (i, _) => i } map { case (k, v) => k -> (v map { case (_, s) => s}) }
}

// Returns a tuple of the index into the outermost array and a `SimpletonComponent` whose path reflects the "descent"
// into the array. e.g. for a component
// SimpletonComponent("[0][1]", v) this would return (0 -> SimpletonComponent("[1]", v)).
private def descendIntoArray(component: SimpletonComponent): (Int, SimpletonComponent) = {
component.path match { case ArrayElementPattern(index, more) => index.toInt -> component.copy(path = more)}
}

// Returns a tuple of the key into the outermost map and a `SimpletonComponent` whose path reflects the "descent"
// into the map. e.g. for a component
// SimpletonComponent(":bar:baz", v) this would return ("bar" -> SimpletonComponent(":baz", v)).
// Map keys are treated as Strings by this method, the caller must ultimately do the appropriate coercion to the
// actual map key type.
private def descendIntoMap(component: SimpletonComponent): (String, SimpletonComponent) = {
component.path match { case MapElementPattern(key, more) => key.unescapeMeta -> component.copy(path = more)}
}

private implicit class EnhancedSimpletonComponents(val components: Traversable[SimpletonComponent]) extends AnyVal {
def asArray: List[Traversable[SimpletonComponent]] = group(components map descendIntoArray).toList.sortBy(_._1).map(_._2)
def asMap: Map[String, Traversable[SimpletonComponent]] = group(components map descendIntoMap)
def asPrimitive: WomValue = components.head.value
def asString: String = asPrimitive.valueString
}

private def toWomValue(outputType: WomType, components: Traversable[SimpletonComponent]): WomValue = {

// Returns a tuple of the index into the outermost array and a `SimpletonComponent` whose path reflects the "descent"
// into the array. e.g. for a component
// SimpletonComponent("[0][1]", v) this would return (0 -> SimpletonComponent("[1]", v)).
def descendIntoArray(component: SimpletonComponent): (Int, SimpletonComponent) = {
component.path match { case ArrayElementPattern(index, more) => index.toInt -> component.copy(path = more)}
}

// Returns a tuple of the key into the outermost map and a `SimpletonComponent` whose path reflects the "descent"
// into the map. e.g. for a component
// SimpletonComponent(":bar:baz", v) this would return ("bar" -> SimpletonComponent(":baz", v)).
// Map keys are treated as Strings by this method, the caller must ultimately do the appropriate coercion to the
// actual map key type.
def descendIntoMap(component: SimpletonComponent): (String, SimpletonComponent) = {
component.path match { case MapElementPattern(key, more) => key.unescapeMeta -> component.copy(path = more)}
}

// Returns a tuple of the key into the pair (i.e. left or right) and a `SimpletonComponent` whose path reflects the "descent"
// into the pair. e.g. for a component
Expand All @@ -85,46 +99,95 @@ object WomValueBuilder {
case MapElementPattern("right", more) => PairRight -> component.copy(path = more)
}
}

// Group tuples by key using a Map with key type `K`.
def group[K](tuples: Traversable[(K, SimpletonComponent)]): Map[K, Traversable[SimpletonComponent]] = {
tuples groupBy { case (i, _) => i } mapValues { _ map { case (_, s) => s} }

def toWomFile(components: Traversable[SimpletonComponent]) = {
// If there's just one simpleton, it's a primitive (file or directory)
if (components.size == 1) components.asPrimitive
else {
// Otherwise make a map of the components and detect the type of file from the class field
val groupedListing = components.asMap

def isClass(className: String) = {
groupedListing.get(ClassKey)
/* If the class field is in an array it will be prefixed with a ':', so check for that as well.
* e.g: secondaryFiles[0]:class -> "File"
* secondaryFiles[0]:value -> "file/path"
* would produce a Map(
* ":class" -> List(Simpleton("File")),
* ":value" -> List(Simpleton("file/path"))
* )
*/
.orElse(groupedListing.get(s":$ClassKey"))
.map(_.asPrimitive.valueString)
.contains(className)
}

def isDirectory = isClass(WomValueSimpleton.DirectoryClass)
def isFile = isClass(WomValueSimpleton.FileClass)

if (isDirectory) toWomValue(WomMaybeListedDirectoryType, components)
else if (isFile) toWomValue(WomMaybePopulatedFileType, components)
else throw new IllegalArgumentException(s"There is no WomFile that can be built from simpletons: ${groupedListing.toList.mkString(", ")}")
}
}

outputType match {
case _: WomPrimitiveType =>
components collectFirst { case SimpletonComponent(_, v) => v } get
components.asPrimitive
case opt: WomOptionalType =>
if (components.isEmpty) {
WomOptionalValue(opt.memberType, None)
} else {
WomOptionalValue(toWomValue(opt.memberType, components))
}
case arrayType: WomArrayType =>
val groupedByArrayIndex: Map[Int, Traversable[SimpletonComponent]] = group(components map descendIntoArray)
WomArray(arrayType, groupedByArrayIndex.toList.sortBy(_._1) map { case (_, s) => toWomValue(arrayType.memberType, s) })
WomArray(arrayType, components.asArray map { toWomValue(arrayType.memberType, _) })
case mapType: WomMapType =>
val groupedByMapKey: Map[String, Traversable[SimpletonComponent]] = group(components map descendIntoMap)
// map keys are guaranteed by the WDL spec to be primitives, so the "coerceRawValue(..).get" is safe.
WomMap(mapType, groupedByMapKey map { case (k, ss) => mapType.keyType.coerceRawValue(k).get -> toWomValue(mapType.valueType, ss) })
WomMap(mapType, components.asMap map { case (k, ss) => mapType.keyType.coerceRawValue(k).get -> toWomValue(mapType.valueType, ss) })
case pairType: WomPairType =>
val groupedByLeftOrRight: Map[PairLeftOrRight, Traversable[SimpletonComponent]] = group(components map descendIntoPair)
WomPair(toWomValue(pairType.leftType, groupedByLeftOrRight(PairLeft)), toWomValue(pairType.rightType, groupedByLeftOrRight(PairRight)))
case WomObjectType =>
val groupedByMapKey: Map[String, Traversable[SimpletonComponent]] = group(components map descendIntoMap)
// map keys are guaranteed by the WDL spec to be primitives, so the "coerceRawValue(..).get" is safe.
val map: Map[String, WomValue] = groupedByMapKey map { case (k, ss) => k -> toWomValue(WomAnyType, ss) }
val map: Map[String, WomValue] = components.asMap map { case (k, ss) => k -> toWomValue(WomAnyType, ss) }
WomObject(map)
case composite: WomCompositeType =>
val groupedByMapKey: Map[String, Traversable[SimpletonComponent]] = group(components map descendIntoMap)
// map keys are guaranteed by the WDL spec to be primitives, so the "coerceRawValue(..).get" is safe.
val map: Map[String, WomValue] = groupedByMapKey map { case (k, ss) =>
val map: Map[String, WomValue] = components.asMap map { case (k, ss) =>
val valueType = composite
.typeMap
.getOrElse(k, throw new RuntimeException(s"Field $k is not a declared field of composite type $composite. Cannot build a WomValue from the simpletons."))
k -> toWomValue(valueType, ss)
}
WomObject.withType(map, composite)
case WomMaybeListedDirectoryType =>
val directoryValues = components.asMap

val value = directoryValues.get("value").map(_.asString)
val listing = directoryValues.get("listing")
.map({ _.asArray.map(toWomFile).collect({ case womFile: WomFile => womFile }) })

WomMaybeListedDirectory(value, listing)
case WomMaybePopulatedFileType =>
val populatedValues = components.asMap

val value = populatedValues.get("value").map(_.asString)
val checksum = populatedValues.get("checksum").map(_.asString)
val size = populatedValues.get("size").map(_.asString.toLong)
val format = populatedValues.get("format").map(_.asString)
val contents = populatedValues.get("contents").map(_.asString)
val secondaryFiles = populatedValues.get("secondaryFiles").toList.flatMap({
_.asArray.map(toWomFile).collect({ case womFile: WomFile => womFile })
})

WomMaybePopulatedFile(
valueOption = value,
checksumOption = checksum,
sizeOption = size,
formatOption = format,
contentsOption = contents,
secondaryFiles = secondaryFiles
)
case WomAnyType =>
// Ok, we're going to have to guess, but the keys should give us some clues:
if (components forall { component => MapElementPattern.findFirstMatchIn(component.path).isDefined }) {
Expand Down
63 changes: 43 additions & 20 deletions core/src/main/scala/cromwell/core/simpleton/WomValueSimpleton.scala
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ case class WomValueSimpleton(simpletonKey: String, simpletonValue: WomPrimitive)
* `WomValueSimpleton`s are transformed back to `WomValue`s.
*/
object WomValueSimpleton {

val ClassKey = "class"
val DirectoryClass = "Directory"
val FileClass = "File"

implicit class KeyMetacharacterEscaper(val key: String) extends AnyVal {
// The escapes are necessary on the first arguments to `replaceAll` since they're treated like regular expressions
Expand All @@ -25,27 +29,46 @@ object WomValueSimpleton {
}

implicit class WomValueSimplifier(womValue: WomValue) {
def simplify(name: String): Iterable[WomValueSimpleton] = womValue match {
case prim: WomPrimitive => List(WomValueSimpleton(name, prim))
case opt: WomOptionalValue => opt.value.map(_.simplify(name)).getOrElse(Seq.empty)
case WomArray(_, arrayValue) => arrayValue.zipWithIndex flatMap { case (arrayItem, index) => arrayItem.simplify(s"$name[$index]") }
case WomMap(_, mapValue) => mapValue flatMap { case (key, value) => value.simplify(s"$name:${key.valueString.escapeMeta}") }
case WomPair(left, right) => left.simplify(s"$name:left") ++ right.simplify(s"$name:right")
case womObjectLike: WomObjectLike => womObjectLike.values flatMap {
case (key, value) => value.simplify(s"$name:${key.escapeMeta}")
private def toStringSimpleton(key: String)(value: String) = WomValueSimpleton(key, WomString(value))
private def toNumberSimpleton(key: String)(value: Long) = WomValueSimpleton(key, WomInteger(value.toInt))

def simplify(name: String): Iterable[WomValueSimpleton] = {
def suffix(suffix: String) = s"$name:$suffix"

womValue match {
case prim: WomPrimitive => List(WomValueSimpleton(name, prim))
case opt: WomOptionalValue => opt.value.map(_.simplify(name)).getOrElse(Seq.empty)
case WomArray(_, arrayValue) => arrayValue.zipWithIndex flatMap { case (arrayItem, index) => arrayItem.simplify(s"$name[$index]") }
case WomMap(_, mapValue) => mapValue flatMap { case (key, value) => value.simplify(s"$name:${key.valueString.escapeMeta}") }
case WomPair(left, right) => left.simplify(s"$name:left") ++ right.simplify(s"$name:right")
case womObjectLike: WomObjectLike => womObjectLike.values flatMap {
case (key, value) => value.simplify(s"$name:${key.escapeMeta}")
}
case WomMaybeListedDirectory(valueOption, listingOption) =>
// This simpleton is not strictly part of the WomFile but is used to record the type of this WomValue so it can
// be re-built appropriately in the WomValueBuilder
val classSimpleton = Option(toStringSimpleton(suffix(ClassKey))(DirectoryClass))
val valueSimpleton = valueOption.map(toStringSimpleton(suffix("value")))
val listingSimpletons = listingOption.toList.flatMap(files =>
files.zipWithIndex flatMap { case (arrayItem, index) => arrayItem.simplify(suffix(s"listing[$index]")) }
)
classSimpleton ++ listingSimpletons ++ valueSimpleton
case womMaybePopulatedFile: WomMaybePopulatedFile =>
// This simpleton is not strictly part of the WomFile but is used to record the type of this WomValue so it can
// be re-built appropriately in the WomValueBuilder
val classSimpleton = Option(toStringSimpleton(suffix(ClassKey))(FileClass))
val valueSimpleton = womMaybePopulatedFile.valueOption.map(toStringSimpleton(suffix("value")))
val checksumSimpleton = womMaybePopulatedFile.checksumOption.map(toStringSimpleton(suffix("checksum")))
val contentsSimpleton = womMaybePopulatedFile.contentsOption.map(toStringSimpleton(suffix("contents")))
val sizeSimpleton = womMaybePopulatedFile.sizeOption.map(toNumberSimpleton(suffix("size")))
val formatSimpleton = womMaybePopulatedFile.formatOption.map(toStringSimpleton(suffix("format")))
val secondaryFilesSimpletons = womMaybePopulatedFile.secondaryFiles.toList.zipWithIndex flatMap {
case (arrayItem, index) => arrayItem.simplify(suffix(s"secondaryFiles[$index]"))
}

classSimpleton ++ valueSimpleton ++ checksumSimpleton ++ contentsSimpleton ++ sizeSimpleton ++ formatSimpleton ++ secondaryFilesSimpletons
case other => throw new Exception(s"Cannot simplify wdl value $other of type ${other.womType}")
}
// TODO: WOM: WOMFILE: Better simplification of listed dirs / populated files
case womMaybeListedDirectory: WomMaybeListedDirectory =>
womMaybeListedDirectory
.valueOption
.map(value => WomUnlistedDirectory(value).simplify(name))
.getOrElse(Seq.empty)
case womMaybePopulatedFile: WomMaybePopulatedFile =>
womMaybePopulatedFile
.valueOption
.map(value => WomSingleFile(value).simplify(name))
.getOrElse(Seq.empty)
case other => throw new Exception(s"Cannot simplify wdl value $other of type ${other.womType}")
}
}

Expand Down

0 comments on commit 6fd20e7

Please sign in to comment.