Skip to content

Commit

Permalink
Support inputs of various types (#3097)
Browse files Browse the repository at this point in the history
  • Loading branch information
danbills committed Jan 9, 2018
1 parent e2a7743 commit be2ea6a
Show file tree
Hide file tree
Showing 13 changed files with 346 additions and 302 deletions.
76 changes: 1 addition & 75 deletions cwl/src/main/scala/cwl/CommandOutputBinding.scala
Original file line number Diff line number Diff line change
Expand Up @@ -2,86 +2,12 @@ package cwl

import cwl.CommandOutputBinding.Glob
import shapeless.{:+:, CNil}
import wom.expression.IoFunctionSet
import wom.types._
import wom.values._

import scala.language.postfixOps
import scala.concurrent.Await
import scala.concurrent.duration._

/** @see <a href="http://www.commonwl.org/v1.0/Workflow.html#CommandOutputBinding">CommandOutputBinding</a> */
case class CommandOutputBinding(
glob: Option[Glob] = None,
loadContents: Option[Boolean] = None,
outputEval: Option[StringOrExpression] = None) {

/*
CommandOutputBinding.glob:
Find files relative to the output directory, using POSIX glob(3) pathname matching. If an array is provided, find
files that match any pattern in the array. If an expression is provided, the expression must return a string or an
array of strings, which will then be evaluated as one or more glob patterns. Must only match and return files which
actually exist.
http://www.commonwl.org/v1.0/CommandLineTool.html#CommandOutputBinding
*/
def commandOutputBindingToWomValue(parameterContext: ParameterContext,
ioFunctionSet: IoFunctionSet): WomValue = {

val paths: Seq[String] = glob map { globValue =>
GlobEvaluator.globPaths(globValue, parameterContext, ioFunctionSet)
} getOrElse {
Vector.empty
}

val loadContents: Boolean = this.loadContents getOrElse false

val womMapType = WomMapType(WomStringType, WomStringType)
val womMaps = paths map { path =>
// TODO: WOM: basename/dirname/size/checksum/etc.
val globPathWithDirectory = GlobFunctions.prefixWithGlobDir(path)

val contents: Map[WomValue, WomValue] =
if (loadContents) Map(WomString("contents") -> WomString(load64KiB(globPathWithDirectory, ioFunctionSet))) else Map.empty

val womKeyValues: Map[WomValue, WomValue] = Map(
WomString("location") -> WomString(path)
) ++ contents

WomMap(womMapType, womKeyValues)
}

val arrayOfCwlFileMaps = WomArray(WomArrayType(womMapType), womMaps)

val outputEvalParameterContext = parameterContext.copy(self = arrayOfCwlFileMaps)

outputEval match {
case Some(outputEvalCoproduct) =>
outputEvalCoproduct match {
case StringOrExpression.String(s) => WomString(s)
case StringOrExpression.Expression(e) =>
// For now throw if the expression evaluation fails
e.fold(EvaluateExpression).apply(outputEvalParameterContext).get
}
case None =>
// Return the WomArray of file paths, three_step.ps needs this for stdout output.
// There will be conversion required between this Array[File] output type and the requested File.
arrayOfCwlFileMaps
}
}


private def load64KiB(path: String, ioFunctionSet: IoFunctionSet): String = {
// This suggests the IoFunctionSet should have a length-limited read API as both CWL and WDL support this concept.
// ChrisL: But remember that they are different (WDL => failure, CWL => truncate)
val content = ioFunctionSet.readFile(path)

// TODO: propagate IO, Try, or Future or something all the way out via "commandOutputBindingtoWomValue" signature
// TODO: Stream only the first 64 KiB, this "read everything then ignore most of it" method is terrible
val initialResult = Await.result(content, 5 seconds)
initialResult.substring(0, Math.min(initialResult.length, 64 * 1024))
}
}
outputEval: Option[StringOrExpression] = None)

object CommandOutputBinding {
type Glob = Expression :+: String :+: Array[String] :+: CNil
Expand Down
130 changes: 130 additions & 0 deletions cwl/src/main/scala/cwl/CommandOutputExpression.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
package cwl

import cats.data.NonEmptyList
import cats.syntax.either._
import cats.syntax.validated._
import common.Checked
import common.validation.ErrorOr.ErrorOr
import common.validation.Validation._
import mouse.all._
import wom.expression.IoFunctionSet
import wom.types._
import wom.values.{GlobFunctions, WomArray, WomFile, WomGlobFile, WomMap, WomString, WomValue}

import scala.concurrent.Await
import scala.concurrent.duration._
import scala.language.postfixOps

case class CommandOutputExpression(outputBinding: CommandOutputBinding,
override val cwlExpressionType: WomType,
override val inputs: Set[String]) extends CwlWomExpression {

// TODO WOM: outputBinding.toString is probably not the best representation of the expression source
override def sourceString = outputBinding.toString

override def evaluateValue(inputValues: Map[String, WomValue], ioFunctionSet: IoFunctionSet): ErrorOr[WomValue] = {

val parameterContext = ParameterContext(inputValues)

/*
CommandOutputBinding.glob:
Find files relative to the output directory, using POSIX glob(3) pathname matching. If an array is provided, find
files that match any pattern in the array. If an expression is provided, the expression must return a string or an
array of strings, which will then be evaluated as one or more glob patterns. Must only match and return files which
actually exist.
http://www.commonwl.org/v1.0/CommandLineTool.html#CommandOutputBinding
*/
def outputBindingEvaluationResult: Checked[WomValue] = {
import StringOrExpression._
outputBinding match {
case CommandOutputBinding(_, _, Some(String(value))) => WomString(value).asRight
case CommandOutputBinding(Some(glob), _, None) =>
GlobEvaluator.globPaths(glob, parameterContext, ioFunctionSet) match {
case Seq(fileName) => WomString(fileName).asRight
case array => WomArray(WomArrayType(WomStringType), array.map(WomString.apply)).asRight
}


case CommandOutputBinding(glob, loadContents, Some(Expression(expression))) =>

val paths: Seq[String] = glob.toSeq flatMap { globValue =>
GlobEvaluator.globPaths(globValue, parameterContext, ioFunctionSet)
}

val _loadContents: Boolean = loadContents getOrElse false

val womMaps: Array[Map[String, String]] =
paths.toArray map {
(path: String) =>
// TODO: WOM: basename/dirname/size/checksum/etc.
val globPathWithDirectory = GlobFunctions.prefixWithGlobDir(path)

val contents =
Map("contents" -> load64KiB(globPathWithDirectory, ioFunctionSet)).
filter(_ => _loadContents)

val location = Map("location" -> path)

location ++ contents
}

val outputEvalParameterContext: ParameterContext = parameterContext.setSelf(womMaps)

expression.
fold(EvaluateExpression).
apply(outputEvalParameterContext).
cata(Right(_),Left(_)). // this is because toEither is not a thing in scala 2.11.
leftMap(e => NonEmptyList(e.getMessage, e.getStackTrace.map(_.toString).toList))
}
}
//To facilitate ECMAScript evaluation, filenames are stored in a map under the key "location"
val womValue =
outputBindingEvaluationResult map {
case WomArray(_, Seq(WomMap(WomMapType(WomStringType, WomStringType), results))) => results(WomString("location"))
case other => other
}

//If the value is a string but the output is expecting a file, we consider that string a POSIX "glob" and apply
//it accordingly to retrieve the file list to which it expands.
val globbedIfFile:ErrorOr[WomValue] =
(womValue, cwlExpressionType) match {

//In the case of a single file being expected, we must enforce that the glob only represents a single file
case (Right(WomString(glob)), WomSingleFileType) =>
Await.result(ioFunctionSet.glob(glob), Duration.Inf) match {
case head :: Nil => WomString(head).validNel
case list => s"expecting a single File glob but instead got ${list.toList.mkString(", ")}".invalidNel
}

case (other, _) => other.toValidated
}

//CWL tells us the type this output is expected to be. Attempt to coerce the actual output into this type.
globbedIfFile.toTry.flatMap(cwlExpressionType.coerceRawValue).toErrorOr
}

/*
TODO:
DB: It doesn't make sense to me that this function returns type WomFile but accepts a type to which it coerces.
Wouldn't coerceTo always == WomFileType, and if not then what?
*/
override def evaluateFiles(inputs: Map[String, WomValue], ioFunctionSet: IoFunctionSet, coerceTo: WomType): ErrorOr[Set[WomFile]] ={
val pc = ParameterContext(inputs)
(for {
globValue <- outputBinding.glob.toList
path <- GlobEvaluator.globPaths(globValue, pc, ioFunctionSet)
} yield WomGlobFile(path): WomFile).toSet.validNel
}

private def load64KiB(path: String, ioFunctionSet: IoFunctionSet): String = {
// This suggests the IoFunctionSet should have a length-limited read API as both CWL and WDL support this concept.
// ChrisL: But remember that they are different (WDL => failure, CWL => truncate)
val content = ioFunctionSet.readFile(path)

// TODO: propagate IO, Try, or Future or something all the way out via "commandOutputBindingtoWomValue" signature
// TODO: Stream only the first 64 KiB, this "read everything then ignore most of it" method will be very inefficient
val initialResult = Await.result(content, 60 seconds)
initialResult.substring(0, Math.min(initialResult.length, 64 * 1024))
}
}
36 changes: 18 additions & 18 deletions cwl/src/main/scala/cwl/CwlExpressionCommandPart.scala
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package cwl

import cats.syntax.validated._
import cats.data.NonEmptyList
import cats.syntax.either._
import common.validation.ErrorOr.ErrorOr
import common.validation.Validation._
import cwl.CommandLineTool.CommandInputParameter
Expand All @@ -12,22 +13,22 @@ import wom.graph.LocalName
import wom.values._
import wom.{CommandPart, InstantiatedCommand}

import scala.util.Try
import scala.language.postfixOps
import scala.util.Try

case class CwlExpressionCommandPart(expr: Expression) extends CommandPart {
override def instantiate(inputsMap: Map[LocalName, WomValue],
functions: IoFunctionSet,
valueMapper: (WomValue) => WomValue,
runtimeEnvironment: RuntimeEnvironment): ErrorOr[InstantiatedCommand] = {
val stringKeyMap = inputsMap.map { case (LocalName(localName), value) => localName -> value }

val pc =
ParameterContext(
runtime = runtimeEnvironment.cwlMap
).withInputs(stringKeyMap, functions)
val pc = ParameterContext().
addLocalInputs(inputsMap).
setRuntime(runtimeEnvironment)

expr.fold(EvaluateExpression).apply(pc).toErrorOr.map(v => InstantiatedCommand(v.valueString))
val evaluatedExpression = expr.fold(EvaluateExpression).apply(pc).toEither.leftMap(e => NonEmptyList.one(e.getMessage))

evaluatedExpression.map(_.valueString).map(InstantiatedCommand.apply(_)). toValidated
}
}

Expand All @@ -37,21 +38,20 @@ case class CommandLineBindingCommandPart(argument: CommandLineBinding) extends C
functions: IoFunctionSet,
valueMapper: (WomValue) => WomValue,
runtimeEnvironment: RuntimeEnvironment): ErrorOr[InstantiatedCommand] = {
val pc = ParameterContext(runtime = runtimeEnvironment.cwlMap).withInputs(inputsMap.map({
case (LocalName(localName), sf: WomSingleFile) => localName -> valueMapper(sf)
case (LocalName(localName), value) => localName -> value
}), functions)
val pc = ParameterContext().addLocalInputs(inputsMap)

val womValue: ErrorOr[WomValue] = argument match {
val expressionEvaluationResult: Either[NonEmptyList[WorkflowStepInputId], WomValue] = (argument match {
case CommandLineBinding(_, _, _, _, _, Some(StringOrExpression.Expression(expression)), Some(false)) =>
expression.fold(EvaluateExpression).apply(pc).map(valueMapper).toErrorOr
expression.fold(EvaluateExpression).apply(pc).map(valueMapper).toEither.leftMap(e => NonEmptyList.one(e.getMessage))
case CommandLineBinding(_, _, _, _, _, Some(StringOrExpression.String(string)), Some(false)) =>
WomString(string).validNel
Right(WomString(string))
// There's a fair few other cases to add, but until then...
case other => s"As-yet-unsupported command line binding: $other".invalidNel
}
case other => throw new NotImplementedError(s"As-yet-unsupported command line binding: $other")
})

val commandString = expressionEvaluationResult.map(_.valueString)

womValue.map(v => InstantiatedCommand(v.valueString))
commandString.map(InstantiatedCommand.apply(_)).toValidated
}
}

Expand Down

0 comments on commit be2ea6a

Please sign in to comment.