diff --git a/.gitignore b/.gitignore index fa01827..387acc5 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,12 @@ -tags -.idea/ -target -.artifactory +# common scala config *~ +.DS_Store +.artifactory +.idea/* +!/.idea/inspectionProfiles/ +.idea/inspectionProfiles/* +!/.idea/inspectionProfiles/Project_Default.xml +target + +# custom config +tags diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 0000000..b15d7f2 --- /dev/null +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.pullapprove.yml b/.pullapprove.yml index 7eee40d..3ab4b6b 100644 --- a/.pullapprove.yml +++ b/.pullapprove.yml @@ -5,9 +5,12 @@ author_approval: ignored reviewers: required: 2 members: - - cjllanwarne - Horneth - - scottfrazer - - mcovarr + - cjllanwarne - geoffjentry + - jsotobroad + - katevoss + - kcibul - kshakir + - mcovarr + - ruchim diff --git a/README.md b/README.md index 8803dd5..6ff656f 100644 --- a/README.md +++ b/README.md @@ -165,6 +165,109 @@ $ java -jar wdltool.jar highlight test.wdl html } ``` +## graph + +The syntax of the graph command is: +``` +wdltool graph [--all] wdlFile.wdl +``` + +Given a WDL file input, command generates the data-flow graph through the system in `.dot` format. + +For example the fork-join WDL: +``` +task mkFile { + command { + for i in `seq 1 1000` + do + echo $i + done + } + output { + File numbers = stdout() + } + runtime {docker: "ubuntu:latest"} +} + +task grep { + String pattern + File in_file + command { + grep '${pattern}' ${in_file} | wc -l + } + output { + Int count = read_int(stdout()) + } + runtime {docker: "ubuntu:latest"} +} + +task wc { + File in_file + command { + cat ${in_file} | wc -l + } + output { + Int count = read_int(stdout()) + } + runtime {docker: "ubuntu:latest"} +} + +task join { + Int grepCount + Int wcCount + command { + expr ${wcCount} / ${grepCount} + } + output { + Int proportion = read_int(stdout()) + } + runtime {docker: "ubuntu:latest"} +} + +workflow forkjoin { + call mkFile + call grep { input: in_file = mkFile.numbers } + call wc { input: in_file=mkFile.numbers } + call join { input: wcCount = wc.count, grepCount = grep.count } + output { + join.proportion + } +} +``` + +Produces the DAG: +``` +digraph forkjoin { + "call forkjoin.mkFile" -> "call forkjoin.wc" + "call forkjoin.mkFile" -> "call forkjoin.grep" + "call forkjoin.wc" -> "call forkjoin.join" + "call forkjoin.grep" -> "call forkjoin.join" +} +``` + +### The --all flag + +If this flag is set, all WDL graph nodes become nodes in the generated DAG, even if they are not "executed". Typically this will mean task declarations and call outputs. +For example in the above example, with `--all` you would get: + +``` +digraph forkjoin { + "call forkjoin.grep" -> "String forkjoin.grep.pattern" + "call forkjoin.grep" -> "output { forkjoin.grep.count = read_int(stdout()) }" + "call forkjoin.grep" -> "File forkjoin.grep.in_file" + "call forkjoin.wc" -> "output { forkjoin.wc.count = read_int(stdout()) }" + "call forkjoin.grep" -> "call forkjoin.join" + "call forkjoin.wc" -> "File forkjoin.wc.in_file" + "call forkjoin.mkFile" -> "call forkjoin.grep" + "call forkjoin.join" -> "output { forkjoin.join.proportion = read_int(stdout()) }" + "call forkjoin.join" -> "Int forkjoin.join.wcCount" + "call forkjoin.wc" -> "call forkjoin.join" + "call forkjoin.mkFile" -> "output { forkjoin.mkFile.numbers = stdout() }" + "call forkjoin.mkFile" -> "call forkjoin.wc" + "call forkjoin.join" -> "Int forkjoin.join.grepCount" +} +``` + # Getting Started with WDL -For many examples on how to use WDL see [the WDL site](https://github.com/broadinstitute/wdl/tree/develop#getting-started-with-wdl) +For documentation and many examples on how to use WDL see [the WDL website](https://software.broadinstitute.org/wdl/). diff --git a/build.sbt b/build.sbt index 8edff10..66eaed0 100644 --- a/build.sbt +++ b/build.sbt @@ -1,39 +1,61 @@ import com.typesafe.sbt.GitPlugin.autoImport._ import sbt.Keys._ import sbtassembly.MergeStrategy -import com.typesafe.sbt.SbtGit.GitCommand name := "wdltool" organization := "org.broadinstitute" -scalaVersion := "2.11.7" +scalaVersion := "2.12.1" -// Upcoming release, or current if we're on the master branch -git.baseVersion := "0.4" +val wdl4sV = "0.14-7c693a3-SNAP" -// Shorten the git commit hash -git.gitHeadCommit := git.gitHeadCommit.value map { _.take(7) } +lazy val versionSettings = Seq( + // Upcoming release, or current if we're on the master branch + git.baseVersion := "0.14", -// Travis will deploy tagged releases, add -SNAPSHOT for all local builds -git.gitUncommittedChanges := true + // Shorten the git commit hash + git.gitHeadCommit := git.gitHeadCommit.value map { _.take(7) }, -versionWithGit + // Travis will deploy tagged releases, add -SNAPSHOT for all local builds + git.gitUncommittedChanges := true, + + // For now, obfuscate SNAPSHOTs from sbt's developers: https://github.com/sbt/sbt/issues/2687#issuecomment-236586241 + git.uncommittedSignifier := Option("SNAP") +) + +versionWithGit ++ versionSettings assemblyJarName in assembly := "wdltool-" + git.baseVersion.value + ".jar" logLevel in assembly := Level.Info resolvers ++= Seq( - "Broad Artifactory Releases" at "https://artifactory.broadinstitute.org/artifactory/libs-release/", - "Broad Artifactory Snapshots" at "https://artifactory.broadinstitute.org/artifactory/libs-snapshot/" + "Broad Artifactory Releases" at "https://broadinstitute.jfrog.io/broadinstitute/libs-release/", + "Broad Artifactory Snapshots" at "https://broadinstitute.jfrog.io/broadinstitute/libs-snapshot/" ) +lazy val catsDependencies = List( + "org.typelevel" %% "cats" % "0.9.0", + "org.typelevel" %% "kittens" % "1.0.0-M10", + "com.github.benhutchison" %% "mouse" % "0.9" +) map (_ + /* + Exclude test framework cats-laws and its transitive dependency scalacheck. + If sbt detects scalacheck, it tries to run it. + Explicitly excluding the two problematic artifacts instead of including the three (or four?). + https://github.com/typelevel/cats/tree/v0.7.2#getting-started + Re "_2.12", see also: https://github.com/sbt/sbt/issues/1518 + */ + exclude("org.typelevel", "cats-laws_2.12") + exclude("org.typelevel", "cats-kernel-laws_2.12") + ) + libraryDependencies ++= Seq( - "org.broadinstitute" %% "wdl4s" % "0.4", + "org.broadinstitute" %% "wdl4s" % wdl4sV, //---------- Test libraries -------------------// - "org.scalatest" %% "scalatest" % "2.2.5" % Test -) + "org.scalatest" %% "scalatest" % "3.0.1" % Test +) ++ catsDependencies val customMergeStrategy: String => MergeStrategy = { case x if Assembly.isConfigFile(x) => diff --git a/project/build.properties b/project/build.properties new file mode 100644 index 0000000..35c88ba --- /dev/null +++ b/project/build.properties @@ -0,0 +1 @@ +sbt.version=0.13.12 diff --git a/project/plugins.sbt b/project/plugins.sbt index 20755cc..cadbd87 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -2,7 +2,7 @@ addSbtPlugin("com.typesafe.sbt" % "sbt-git" % "0.8.5") addSbtPlugin("com.github.gseitz" % "sbt-release" % "1.0.0") -addSbtPlugin("org.scoverage" % "sbt-scoverage" % "1.0.4") +addSbtPlugin("org.scoverage" % "sbt-scoverage" % "1.5.0") addSbtPlugin("org.scoverage" % "sbt-coveralls" % "1.0.0") diff --git a/src/main/scala/wdltool/GraphPrint.scala b/src/main/scala/wdltool/GraphPrint.scala new file mode 100644 index 0000000..2125c8a --- /dev/null +++ b/src/main/scala/wdltool/GraphPrint.scala @@ -0,0 +1,96 @@ +package wdltool + +import java.nio.file.{Files, Paths} +import java.util.concurrent.atomic.AtomicInteger + +import wdl4s.wdl.{CallOutput, Declaration, If, Scatter, WdlCall, _} +import wdl4s.wdl.WdlGraphNode + +import scala.collection.JavaConverters._ + +import cats.implicits._ +import cats.derived.monoid._, legacy._ + +object GraphPrint { + + final case class WorkflowDigraph(workflowName: String, digraph: NodesAndLinks) + final case class NodesAndLinks(nodes: Set[String], links: Set[String]) + + def generateWorkflowDigraph(file: String, allNodesMode: Boolean): WorkflowDigraph = { + // It's ok to use .get here, we're happy to throw an exception and crash the program! + val namespace = WdlNamespaceWithWorkflow.load(Files.readAllLines(Paths.get(file)).asScala.mkString(System.lineSeparator()), Seq(WdlNamespace.fileResolver _)).get + + val digraph = listAllGraphNodes(namespace.workflow) + + WorkflowDigraph(namespace.workflow.unqualifiedName, digraph) + } + + private val clusterCount: AtomicInteger = new AtomicInteger(0) + + + private def listAllGraphNodes(scope: Scope): NodesAndLinks = { + + val callsAndDeclarations: Set[WdlGraphNode] = (scope.children collect { + case w: WdlGraphNode if isCallOrDeclaration(w) => w + }).toSet + + val subGraphs: Set[WdlGraphNode] = (scope.children collect { + case s: Scatter => s + case i: If => i + }).toSet + + def upstreamLinks(wdlGraphNode: WdlGraphNode, graphNodeName: String, suffix: String = ""): Set[String] = wdlGraphNode.upstream collect { + case upstream: WdlGraphNode if isCallOrDeclaration(upstream) => + val upstreamName = graphName(upstream) + s""""$upstreamName" -> "$graphNodeName" $suffix""" + } + + val thisLevelNodesAndLinks: NodesAndLinks = callsAndDeclarations foldMap { graphNode => + val name = graphName(graphNode) + val initialSet: Set[String] = graphNode match { + case w: WdlGraphNode if isCallOrDeclaration(w) => Set(s""""$name"""") + case _ => Set.empty + } + + val fromStart = if (graphNode.upstream.isEmpty) Set(s""""start" -> "$name"""") else Set.empty + + NodesAndLinks(initialSet, upstreamLinks(graphNode, name) ++ fromStart) + } + + val subGraphNodesAndLinks: NodesAndLinks = subGraphs foldMap { wdlGraphNode => + val clusterName = "cluster_" + clusterCount.getAndIncrement() + val subGraphName = graphName(wdlGraphNode) + val subNodes = listAllGraphNodes(wdlGraphNode) + val scope = s""" + |subgraph $clusterName { + | ${subNodes.nodes.mkString(sep="\n ")} + | "$subGraphName" [shape=plaintext] + |} + """.stripMargin + + NodesAndLinks(Set(scope), subNodes.links ++ upstreamLinks(wdlGraphNode, subGraphName, s"[lhead=$clusterName]")) + } + + thisLevelNodesAndLinks |+| subGraphNodesAndLinks + } + + private def isCallOrDeclaration(w: WdlGraphNode): Boolean = w.isInstanceOf[WdlCall] || w.isInstanceOf[Declaration] + + private def dotSafe(s: String) = s.replaceAllLiterally("\"", "\\\"") + + private def graphName(g: WdlGraphNode): String = dotSafe(g match { + case d: Declaration => + val exprString = d.expression.map(e => " = " + e.toWdlString).getOrElse("") + s"${d.wdlType.toWdlString} ${d.unqualifiedName}$exprString" + case c: WdlCall => + s"call ${c.unqualifiedName}" + case i: If => + s"if (${i.condition.toWdlString})" + case s: Scatter => + s"scatter (${s.item} in ${s.collection.toWdlString})" + case c: CallOutput => + val exprString = c.expression.map(e => " = " + e.toWdlString).getOrElse("") + s"output { ${c.fullyQualifiedName}$exprString }" + case other => s"${other.getClass.getSimpleName}: ${other.fullyQualifiedName}" + }) +} diff --git a/src/main/scala/wdltool/Main.scala b/src/main/scala/wdltool/Main.scala index 59c9943..d2d0c9d 100644 --- a/src/main/scala/wdltool/Main.scala +++ b/src/main/scala/wdltool/Main.scala @@ -1,12 +1,12 @@ package wdltool -import java.io.{File => JFile} +import java.nio.file.Paths -import wdl4s.formatter.{AnsiSyntaxHighlighter, HtmlSyntaxHighlighter, SyntaxFormatter} -import wdl4s.{WdlNamespace, NamespaceWithWorkflow, AstTools} +import wdl4s.wdl.formatter.{AnsiSyntaxHighlighter, HtmlSyntaxHighlighter, SyntaxFormatter} +import wdl4s.wdl._ import spray.json._ -import scala.util.{Failure, Success, Try} +import scala.util.{Failure, Success} object Main extends App { sealed trait Termination { @@ -33,6 +33,7 @@ object Main extends App { case Some(x) if x == Actions.Highlight => highlight(args.tail) case Some(x) if x == Actions.Inputs => inputs(args.tail) case Some(x) if x == Actions.Parse => parse(args.tail) + case Some(x) if x == Actions.Graph => graph(args.tail) case _ => BadUsageTermination } } @@ -55,9 +56,9 @@ object Main extends App { def inputs(args: Seq[String]): Termination = { continueIf(args.length == 1) { loadWdl(args.head) { namespace => - import wdl4s.types.WdlTypeJsonFormatter._ + import wdl4s.wdl.types.WdlTypeJsonFormatter._ val msg = namespace match { - case x: NamespaceWithWorkflow => x.workflow.inputs.toJson.prettyPrint + case x: WdlNamespaceWithWorkflow => x.workflow.inputs.toJson.prettyPrint case _ => "WDL does not have a local workflow" } @@ -68,14 +69,33 @@ object Main extends App { def parse(args: Seq[String]): Termination = { continueIf(args.length == 1) { - SuccessfulTermination(AstTools.getAst(new JFile(args.head)).toPrettyString) + SuccessfulTermination(AstTools.getAst(Paths.get(args.head)).toPrettyString) + } + } + + def graph(args: Seq[String]): Termination = { + continueIf(args.length == 1 || (args.length == 2 && args.head.equals("--all"))) { + + val (file, allNodesMode) = + if (args.size == 1) (args.head, false) + else (args(1), true) + + val workflowDigraph = GraphPrint.generateWorkflowDigraph(file, allNodesMode) + + val result = s"""|digraph ${workflowDigraph.workflowName} { + | compound=true; + | ${workflowDigraph.digraph.links.mkString(System.lineSeparator + " ")} + | ${workflowDigraph.digraph.nodes.mkString(System.lineSeparator + " ")} + |} + |""" + SuccessfulTermination(result.stripMargin) } } private[this] def continueIf(valid: => Boolean)(block: => Termination): Termination = if (valid) block else BadUsageTermination private[this] def loadWdl(path: String)(f: WdlNamespace => Termination): Termination = { - Try(WdlNamespace.load(new JFile(path))) match { + WdlNamespace.loadUsingPath(Paths.get(path), None, None) match { case Success(namespace) => f(namespace) case Failure(t) => UnsuccessfulTermination(t.getMessage) } @@ -88,7 +108,7 @@ object Main extends App { } yield action object Actions extends Enumeration { - val Parse, Validate, Highlight, Inputs = Value + val Parse, Validate, Highlight, Inputs, Graph = Value } val UsageMessage = """ @@ -119,7 +139,14 @@ object Main extends App { | abstract syntax tree if it is valid, and a syntax error | otherwise. Note that higher-level AST checks are not done | via this sub-command and the 'validate' subcommand should - | be used for full validation + | be used for full validation. + |graph [--all] + | + | Reads a WDL file against the grammar and prints out a + | .dot of the DAG if it is valid, and a syntax error + | otherwise. + | Use [--all] to show all graph nodes in the WDL spec, + | even the non-executable nodes. """.stripMargin val termination = dispatchCommand(args) @@ -130,5 +157,5 @@ object Main extends App { case BadUsageTermination => Console.err.println(UsageMessage) } - termination.returnCode + sys.exit(termination.returnCode) } diff --git a/src/test/scala/wdltool/MainSpec.scala b/src/test/scala/wdltool/MainSpec.scala index 01c69c9..d20317b 100644 --- a/src/test/scala/wdltool/MainSpec.scala +++ b/src/test/scala/wdltool/MainSpec.scala @@ -1,10 +1,9 @@ package wdltool -import java.nio.file.{Paths, Path} import better.files._ import org.scalatest.{BeforeAndAfterAll, FlatSpec, Matchers} -import wdltool.SampleWdl.{EmptyWorkflow, EmptyTask, EmptyInvalid, ThreeStep} -import MainSpec._ +import wdltool.MainSpec._ +import wdltool.SampleWdl.{EmptyInvalid, EmptyTask, EmptyWorkflow, ThreeStep} class MainSpec extends FlatSpec with Matchers with BeforeAndAfterAll { @@ -36,7 +35,7 @@ class MainSpec extends FlatSpec with Matchers with BeforeAndAfterAll { testWdl(ThreeStep) { wdlAndInputs => val res = Main.dispatchCommand(Seq("parse", wdlAndInputs.wdl)) assert(res.isInstanceOf[SuccessfulTermination]) - res.output should include("(Document:") + res.output should include("(Namespace:") } } @@ -137,48 +136,47 @@ object MainSpec { */ case class WdlAndInputs(sampleWdl: SampleWdl, optionsJson: String = "{}") { // Track all the temporary files we create, and delete them after the test. - private var tempFiles = Vector.empty[Path] + private var tempFiles = Vector.empty[File] - lazy val wdlPath: Path = { - val path = File.newTemp(s"${sampleWdl.name}.", ".wdl").path + lazy val wdlFile = { + val path = File.newTemporaryFile(s"${sampleWdl.name}.", ".wdl") tempFiles :+= path path write sampleWdl.wdlSource("") path } - lazy val wdl = wdlPath.fullPath + lazy val wdl = wdlFile.pathAsString - lazy val inputsPath = { - val path = swapExt(wdlPath, ".wdl", ".inputs") + lazy val inputsFile = { + val path = swapExt(wdlFile, ".wdl", ".inputs") tempFiles :+= path path write sampleWdl.wdlJson path } - lazy val inputs = inputsPath.fullPath + lazy val inputs = inputsFile.pathAsString - lazy val optionsPath = { - val path = swapExt(wdlPath, ".wdl", ".options") + lazy val optionsFile = { + val path = swapExt(wdlFile, ".wdl", ".options") tempFiles :+= path path write optionsJson path } - lazy val options = optionsPath.fullPath + lazy val options = optionsFile.pathAsString - lazy val metadataPath = { - val path = swapExt(wdlPath, ".wdl", ".metadata.json") + lazy val metadataFile = { + val path = swapExt(wdlFile, ".wdl", ".metadata.json") tempFiles :+= path - path.toAbsolutePath + path } - lazy val metadata = metadataPath.fullPath + lazy val metadata = metadataFile.pathAsString - def deleteTempFiles() = tempFiles.foreach(_.delete(ignoreIOExceptions = true)) + def deleteTempFiles() = tempFiles.foreach(_.delete(swallowIOExceptions = true)) } - def swapExt(filePath: Path, oldExt: String, newExt: String): Path = { - Paths.get(filePath.toString.stripSuffix(oldExt) + newExt) + def swapExt(filePath: File, oldExt: String, newExt: String): File = { + File(filePath.toString.stripSuffix(oldExt) + newExt) } } - diff --git a/src/test/scala/wdltool/SampleWdl.scala b/src/test/scala/wdltool/SampleWdl.scala index d2179b9..4326449 100644 --- a/src/test/scala/wdltool/SampleWdl.scala +++ b/src/test/scala/wdltool/SampleWdl.scala @@ -4,13 +4,13 @@ import java.io.{FileWriter, File} import java.nio.file.{Files, Path} import spray.json._ -import wdl4s._ -import wdl4s.values._ +import wdl4s.wdl._ +import wdl4s.wdl.values._ import scala.language.postfixOps trait SampleWdl { - def wdlSource(runtime: String = ""): WdlSource + def wdlSource(runtime: String = ""): WorkflowSource def rawInputs: WorkflowRawInputs def name = getClass.getSimpleName.stripSuffix("$") @@ -35,7 +35,7 @@ trait SampleWdl { def read(value: JsValue) = throw new NotImplementedError(s"Reading JSON not implemented: $value") } - def wdlJson: WdlJson = rawInputs.toJson.prettyPrint + def wdlJson: WorkflowJson = rawInputs.toJson.prettyPrint def createFileArray(base: Path): Unit = { createFile("f1", base, "line1\nline2\n")