diff --git a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bcftools.scala b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bcftools.scala index 7a6196ff..61d4d956 100644 --- a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bcftools.scala +++ b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bcftools.scala @@ -18,7 +18,9 @@ package org.bdgenomics.cannoli.cli import htsjdk.samtools.ValidationStringency +import org.apache.hadoop.fs.{ FileSystem, Path } import org.apache.spark.SparkContext +import org.bdgenomics.adam.models.VariantContext import org.bdgenomics.adam.rdd.ADAMContext._ import org.bdgenomics.adam.rdd.ADAMSaveAnyArgs import org.bdgenomics.adam.rdd.variant.{ @@ -26,25 +28,37 @@ import org.bdgenomics.adam.rdd.variant.{ VCFInFormatter, VCFOutFormatter } +import org.bdgenomics.adam.sql.{ VariantContext => VariantContextProduct } +import org.bdgenomics.cannoli.builder.CommandBuilders import org.bdgenomics.utils.cli._ import org.bdgenomics.utils.misc.Logging import org.kohsuke.args4j.{ Argument, Option => Args4jOption } +import scala.collection.JavaConversions._ /** * Bcftools function arguments. */ class BcftoolsFnArgs extends Args4jBase { - @Args4jOption(required = false, name = "-bcftools_path", usage = "Path to the BCFtools executable. Defaults to bcftools.") - var bcftoolsPath: String = "bcftools" + @Args4jOption(required = false, name = "-executable", usage = "Path to the BCFtools executable. Defaults to bcftools.") + var executable: String = "bcftools" - @Args4jOption(required = true, name = "-bcftools_reference", usage = "Reference sequence for analysis. An index file (.fai) will be created if none exists.") - var referencePath: String = null + @Args4jOption(required = false, name = "-image", usage = "Container image to use. Defaults to quay.io/biocontainers/bcftools:1.6--0.") + var image: String = "quay.io/biocontainers/bcftools:1.6--0" + + @Args4jOption(required = false, name = "-sudo", usage = "Run via sudo.") + var sudo: Boolean = false - @Args4jOption(required = false, name = "-docker_image", usage = "Docker image to use. Defaults to quay.io/biocontainers/bcftools:1.6--0.") - var dockerImage: String = "quay.io/biocontainers/bcftools:1.6--0" + @Args4jOption(required = false, name = "-add_files", usage = "If true, use the SparkFiles mechanism to distribute files to executors.") + var addFiles: Boolean = false - @Args4jOption(required = false, name = "-use_docker", usage = "If true, uses Docker to launch BCFtools. If false, uses the BCFtools executable path.") + @Args4jOption(required = false, name = "-use_docker", usage = "If true, uses Docker to launch BCFtools.") var useDocker: Boolean = false + + @Args4jOption(required = false, name = "-use_singularity", usage = "If true, uses Singularity to launch BCFtools.") + var useSingularity: Boolean = false + + @Args4jOption(required = true, name = "-reference", usage = "Reference sequence for analysis. An index file (.fai) will be created if none exists.") + var referencePath: String = null } /** @@ -52,56 +66,42 @@ class BcftoolsFnArgs extends Args4jBase { * for use in cannoli-shell or notebooks. * * @param args Bcftools function arguments. - * @param files Files to make locally available to the commands being run. - * @param environment A map containing environment variable/value pairs to set - * in the environment for the newly created process. * @param sc Spark context. */ class BcftoolsFn( val args: BcftoolsFnArgs, - val files: Seq[String], - val environment: Map[String, String], - val sc: SparkContext) extends Function1[VariantContextRDD, VariantContextRDD] with Logging { - - /** - * @param args Bcftools function arguments. - * @param sc Spark context. - */ - def this(args: BcftoolsFnArgs, sc: SparkContext) = this(args, Seq.empty, Map.empty, sc) - - /** - * @param args Bcftools function arguments. - * @param files Files to make locally available to the commands being run. - * @param sc Spark context. - */ - def this(args: BcftoolsFnArgs, files: Seq[String], sc: SparkContext) = this(args, files, Map.empty, sc) + sc: SparkContext) extends CannoliFn[VariantContextRDD, VariantContextRDD](sc) with Logging { override def apply(variantContexts: VariantContextRDD): VariantContextRDD = { - val bcftoolsCommand = if (args.useDocker) { - Seq("docker", - "run", - "--interactive", - "--rm", - args.dockerImage, - "bcftools", - "norm", - "--fasta-ref", - args.referencePath) - } else { - Seq(args.bcftoolsPath, - "norm", - "--fasta-ref", - args.referencePath) + val builder = CommandBuilders.create(args.useDocker, args.useSingularity) + .setExecutable(args.executable) + .add("norm") + .add("--fasta-ref") + .add(if (args.addFiles) "$0" else absolute(args.referencePath)) + + if (args.addFiles) { + builder.addFile(args.referencePath) + builder.addFile(args.referencePath + ".fai") + } + + if (args.useDocker || args.useSingularity) { + builder + .setImage(args.image) + .setSudo(args.sudo) + .addMount(if (args.addFiles) "$root" else root(args.referencePath)) } - log.info("Piping {} to bcftools with command: {} files: {} environment: {}", - Array(variantContexts, bcftoolsCommand, files, environment)) + log.info("Piping {} to bcftools with command: {} files: {}", + variantContexts, builder.build(), builder.getFiles()) implicit val tFormatter = VCFInFormatter implicit val uFormatter = new VCFOutFormatter(sc.hadoopConfiguration) - variantContexts.pipe(bcftoolsCommand, files, environment) + variantContexts.pipe[VariantContext, VariantContextProduct, VariantContextRDD, VCFInFormatter]( + cmd = builder.build(), + files = builder.getFiles() + ) } } diff --git a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bedtools.scala b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bedtools.scala index 5b1165f3..1e58ff1d 100644 --- a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bedtools.scala +++ b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bedtools.scala @@ -17,7 +17,9 @@ */ package org.bdgenomics.cannoli.cli +import org.apache.hadoop.fs.{ FileSystem, Path } import org.apache.spark.SparkContext +import org.bdgenomics.adam.projections.{ FeatureField, Projection } import org.bdgenomics.adam.rdd.ADAMContext._ import org.bdgenomics.adam.rdd.ADAMSaveAnyArgs import org.bdgenomics.adam.rdd.feature.{ @@ -25,9 +27,13 @@ import org.bdgenomics.adam.rdd.feature.{ BEDInFormatter, BEDOutFormatter } +import org.bdgenomics.adam.sql.{ Feature => FeatureProduct } +import org.bdgenomics.cannoli.builder.CommandBuilders +import org.bdgenomics.formats.avro.Feature; import org.bdgenomics.utils.cli._ import org.bdgenomics.utils.misc.Logging import org.kohsuke.args4j.{ Argument, Option => Args4jOption } +import scala.collection.JavaConversions._ /** * Bedtools function arguments. @@ -42,14 +48,23 @@ class BedtoolsFnArgs extends Args4jBase { @Args4jOption(required = false, name = "-sorted", usage = "Bedtools intersect -sorted option. Inputs must be sorted by chromosome and then by start position.") var sorted: Boolean = false - @Args4jOption(required = false, name = "-bedtools_path", usage = "Path to the Bedtools executable. Defaults to bedtools.") - var bedtoolsPath: String = "bedtools" + @Args4jOption(required = false, name = "-executable", usage = "Path to the Bedtools executable. Defaults to bedtools.") + var executable: String = "bedtools" - @Args4jOption(required = false, name = "-docker_image", usage = "Docker image to use. Defaults to quay.io/biocontainers/bedtools:2.27.1--0.") - var dockerImage: String = "quay.io/biocontainers/bedtools:2.27.1--0" + @Args4jOption(required = false, name = "-image", usage = "Container image to use. Defaults to quay.io/biocontainers/bedtools:2.27.1--0.") + var image: String = "quay.io/biocontainers/bedtools:2.27.1--0" - @Args4jOption(required = false, name = "-use_docker", usage = "If true, uses Docker to launch Bedtools. If false, uses the Bedtools executable path.") + @Args4jOption(required = false, name = "-sudo", usage = "Run via sudo.") + var sudo: Boolean = false + + @Args4jOption(required = false, name = "-add_files", usage = "If true, use the SparkFiles mechanism to distribute files to executors.") + var addFiles: Boolean = false + + @Args4jOption(required = false, name = "-use_docker", usage = "If true, uses Docker to launch Bedtools.") var useDocker: Boolean = false + + @Args4jOption(required = false, name = "-use_singularity", usage = "If true, uses Singularity to launch Bedtools.") + var useSingularity: Boolean = false } /** @@ -61,29 +76,15 @@ class BedtoolsFnArgs extends Args4jBase { * args.b = "foo.bed" * args.useDocker = true * val features = ... - * val pipedFeatures = new BedtoolsFn(args).apply(features) + * val pipedFeatures = new BedtoolsFn(args, sc).apply(features) * * * @param args Bedtools function arguments. - * @param files Files to make locally available to the commands being run. - * @param environment A map containing environment variable/value pairs to set - * in the environment for the newly created process. + * @param sc Spark context. */ class BedtoolsFn( val args: BedtoolsFnArgs, - val files: Seq[String], - val environment: Map[String, String]) extends Function1[FeatureRDD, FeatureRDD] with Logging { - - /** - * @param args Bedtools function arguments. - */ - def this(args: BedtoolsFnArgs) = this(args, Seq.empty, Map.empty) - - /** - * @param args Bedtools function arguments. - * @param files Files to make locally available to the commands being run. - */ - def this(args: BedtoolsFnArgs, files: Seq[String]) = this(args, files, Map.empty) + sc: SparkContext) extends CannoliFn[FeatureRDD, FeatureRDD](sc) with Logging { override def apply(features: FeatureRDD): FeatureRDD = { val optA = Option(args.a) @@ -91,36 +92,36 @@ class BedtoolsFn( require(optA.size + optB.size == 1, "Strictly one of {-a,-b} should be left unspecified to accept piped input.") - val bedtoolsCommand = if (args.useDocker) { - Seq("docker", - "run", - "--rm", - args.dockerImage, - "bedtools", - "intersect", - "-a", - optA.getOrElse("stdin"), - "-b", - optB.getOrElse("stdin"), - if (args.sorted) "-sorted" else "" - ) - } else { - Seq(args.bedtoolsPath, - "intersect", - "-a", - optA.getOrElse("stdin"), - "-b", - optB.getOrElse("stdin"), - if (args.sorted) "-sorted" else "" - ) + val file = List(optA, optB).flatten.get(0) + + var builder = CommandBuilders.create(args.useDocker, args.useSingularity) + .setExecutable(args.executable) + .add("intersect") + .add("-a") + .add(optA.fold("stdin")(f => if (args.addFiles) "$0" else absolute(f))) + .add("-b") + .add(optB.fold("stdin")(f => if (args.addFiles) "$0" else absolute(f))) + + if (args.sorted) builder.add("-sorted") + if (args.addFiles) builder.addFile(file) + + if (args.useDocker || args.useSingularity) { + builder + .setImage(args.image) + .setSudo(args.sudo) + .addMount(if (args.addFiles) "$root" else root(file)) } - log.info("Piping {} to bedtools with command: {} files: {} environment: {}", - Array(features, bedtoolsCommand, files, environment)) + log.info("Piping {} to bedtools with command: {} files: {}", + features, builder.build(), builder.getFiles()) implicit val tFormatter = BEDInFormatter implicit val uFormatter = new BEDOutFormatter - features.pipe(bedtoolsCommand, files, environment) + + features.pipe[Feature, FeatureProduct, FeatureRDD, BEDInFormatter]( + cmd = builder.build(), + files = builder.getFiles() + ) } } @@ -137,12 +138,18 @@ object Bedtools extends BDGCommandCompanion { * Bedtools command line arguments. */ class BedtoolsArgs extends BedtoolsFnArgs with ADAMSaveAnyArgs with ParquetArgs { - @Argument(required = true, metaVar = "INPUT", usage = "Location to pipe from.", index = 0) + @Argument(required = true, metaVar = "INPUT", usage = "Location to pipe features from (e.g., .bed, .gff/.gtf, .gff3, .interval_list, .narrowPeak). If extension is not detected, Parquet is assumed.", index = 0) var inputPath: String = null - @Argument(required = true, metaVar = "OUTPUT", usage = "Location to pipe to.", index = 1) + @Argument(required = true, metaVar = "OUTPUT", usage = "Location to pipe features to. If extension is not detected, Parquet is assumed.", index = 1) var outputPath: String = null + @Args4jOption(required = false, name = "-limit_projection", usage = "If input is Parquet, limit to BED format-only fields by projection.") + var limitProjection: Boolean = false + + @Args4jOption(required = false, name = "-partitions", usage = "Number of partitions to use when loading a text file.") + var partitions: Int = _ + @Args4jOption(required = false, name = "-single", usage = "Saves OUTPUT as single file.") var asSingleFile: Boolean = false @@ -163,8 +170,21 @@ class Bedtools(protected val args: BedtoolsArgs) extends BDGSparkCommand[Bedtool val companion = Bedtools override def run(sc: SparkContext) { - val features = sc.loadFeatures(args.inputPath) - val pipedFeatures = new BedtoolsFn(args).apply(features) + val projection = Projection( + FeatureField.contigName, + FeatureField.start, + FeatureField.end, + FeatureField.name, + FeatureField.score, + FeatureField.strand + ) + + val features = sc.loadFeatures( + args.inputPath, + optMinPartitions = Option(args.partitions), + optProjection = if (args.limitProjection) Some(projection) else None + ) + val pipedFeatures = new BedtoolsFn(args, sc).apply(features) pipedFeatures.save(args.outputPath, asSingleFile = args.asSingleFile, diff --git a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bowtie.scala b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bowtie.scala index 62c73f5b..bb894d32 100644 --- a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bowtie.scala +++ b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bowtie.scala @@ -18,29 +18,43 @@ package org.bdgenomics.cannoli.cli import htsjdk.samtools.ValidationStringency +import org.apache.hadoop.fs.{ FileSystem, Path, PathFilter } import org.apache.spark.SparkContext import org.bdgenomics.adam.rdd.ADAMContext._ import org.bdgenomics.adam.rdd.ADAMSaveAnyArgs import org.bdgenomics.adam.rdd.fragment.{ FragmentRDD, InterleavedFASTQInFormatter } import org.bdgenomics.adam.rdd.read.{ AlignmentRecordRDD, AnySAMOutFormatter } +import org.bdgenomics.adam.sql.{ AlignmentRecord => AlignmentRecordProduct } +import org.bdgenomics.cannoli.builder.CommandBuilders +import org.bdgenomics.formats.avro.AlignmentRecord import org.bdgenomics.utils.cli._ import org.bdgenomics.utils.misc.Logging import org.kohsuke.args4j.{ Argument, Option => Args4jOption } +import scala.collection.JavaConversions._ /** * Bowtie function arguments. */ class BowtieFnArgs extends Args4jBase { - @Args4jOption(required = false, name = "-bowtie_path", usage = "Path to the Bowtie executable. Defaults to bowtie.") - var bowtiePath: String = "bowtie" + @Args4jOption(required = false, name = "-executable", usage = "Path to the Bowtie executable. Defaults to bowtie.") + var executable: String = "bowtie" - @Args4jOption(required = false, name = "-docker_image", usage = "Docker image to use. Defaults to quay.io/biocontainers/bowtie:1.2.1.1--py27pl5.22.0_0.") - var dockerImage: String = "quay.io/biocontainers/bowtie:1.2.1.1--py27pl5.22.0_0" + @Args4jOption(required = false, name = "-image", usage = "Container image to use. Defaults to quay.io/biocontainers/bowtie:1.2.1.1--py27pl5.22.0_0.") + var image: String = "quay.io/biocontainers/bowtie:1.2.1.1--py27pl5.22.0_0" - @Args4jOption(required = false, name = "-use_docker", usage = "If true, uses Docker to launch Bowtie. If false, uses the Bowtie executable path.") + @Args4jOption(required = false, name = "-sudo", usage = "Run via sudo.") + var sudo: Boolean = false + + @Args4jOption(required = false, name = "-add_files", usage = "If true, use the SparkFiles mechanism to distribute files to executors.") + var addFiles: Boolean = false + + @Args4jOption(required = false, name = "-use_docker", usage = "If true, uses Docker to launch Bowtie.") var useDocker: Boolean = false - @Args4jOption(required = true, name = "-bowtie_index", usage = "Basename of the bowtie index to be searched, e.g. in bowtie [options]* ...") + @Args4jOption(required = false, name = "-use_singularity", usage = "If true, uses Singularity to launch Bowtie.") + var useSingularity: Boolean = false + + @Args4jOption(required = true, name = "-index", usage = "Basename of the bowtie index to be searched, e.g. in bowtie [options]* .") var indexPath: String = null } @@ -49,56 +63,45 @@ class BowtieFnArgs extends Args4jBase { * for use in cannoli-shell or notebooks. * * @param args Bowtie function arguments. - * @param files Files to make locally available to the commands being run. - * @param environment A map containing environment variable/value pairs to set - * in the environment for the newly created process. + * @param sc Spark context. */ class BowtieFn( val args: BowtieFnArgs, - val files: Seq[String], - val environment: Map[String, String]) extends Function1[FragmentRDD, AlignmentRecordRDD] with Logging { - - /** - * @param args Bowtie function arguments. - */ - def this(args: BowtieFnArgs) = this(args, Seq.empty, Map.empty) - - /** - * @param args Bowtie function arguments. - * @param files Files to make locally available to the commands being run. - */ - def this(args: BowtieFnArgs, files: Seq[String]) = this(args, files, Map.empty) + sc: SparkContext) extends CannoliFn[FragmentRDD, AlignmentRecordRDD](sc) with Logging { override def apply(fragments: FragmentRDD): AlignmentRecordRDD = { - val bowtieCommand = if (args.useDocker) { - Seq("docker", - "run", - "--interactive", - "--rm", - args.dockerImage, - "bowtie", - "-S", - args.indexPath, - "--interleaved", - "-" - ) - } else { - Seq(args.bowtiePath, - "-S", - args.indexPath, - "--interleaved", - "-" - ) + val builder = CommandBuilders.create(args.useDocker, args.useSingularity) + .setExecutable(args.executable) + .add("-S") + .add(if (args.addFiles) "$0" else absolute(args.indexPath)) + .add("--interleaved") + .add("-") + + if (args.addFiles) { + // add args.indexPath for "$0" + builder.addFile(args.indexPath) + // add bowtie indexes via globbed index path + builder.addFiles(files(args.indexPath + "*.ebwt")) + } + + if (args.useDocker || args.useSingularity) { + builder + .setImage(args.image) + .setSudo(args.sudo) + .addMount(if (args.addFiles) "$root" else root(args.indexPath)) } - log.info("Piping {} to bowtie with command: {} files: {} environment: {}", - Array(fragments, bowtieCommand, files, environment)) + log.info("Piping {} to bowtie with command: {} files: {}", + fragments, builder.build(), builder.getFiles()) implicit val tFormatter = InterleavedFASTQInFormatter implicit val uFormatter = new AnySAMOutFormatter - fragments.pipe(bowtieCommand, files, environment) + fragments.pipe[AlignmentRecord, AlignmentRecordProduct, AlignmentRecordRDD, InterleavedFASTQInFormatter]( + cmd = builder.build(), + files = builder.getFiles() + ) } } @@ -115,10 +118,10 @@ object Bowtie extends BDGCommandCompanion { * Bowtie command line arguments. */ class BowtieArgs extends BowtieFnArgs with ADAMSaveAnyArgs with ParquetArgs { - @Argument(required = true, metaVar = "INPUT", usage = "Location to pipe from, in interleaved FASTQ format.", index = 0) + @Argument(required = true, metaVar = "INPUT", usage = "Location to pipe fragments from (e.g. interleaved FASTQ format, .ifq). If extension is not detected, Parquet is assumed.", index = 0) var inputPath: String = null - @Argument(required = true, metaVar = "OUTPUT", usage = "Location to pipe to.", index = 1) + @Argument(required = true, metaVar = "OUTPUT", usage = "Location to pipe alignments to (e.g. .bam, .cram, .sam). If extension is not detected, Parquet is assumed.", index = 1) var outputPath: String = null @Args4jOption(required = false, name = "-single", usage = "Saves OUTPUT as single file.") @@ -146,7 +149,7 @@ class Bowtie(protected val args: BowtieArgs) extends BDGSparkCommand[BowtieArgs] def run(sc: SparkContext) { val fragments = sc.loadFragments(args.inputPath, stringency = stringency) - val alignments = new BowtieFn(args).apply(fragments) + val alignments = new BowtieFn(args, sc).apply(fragments) alignments.save(args) } } diff --git a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bowtie2.scala b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bowtie2.scala index a6313637..5079f11f 100644 --- a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bowtie2.scala +++ b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bowtie2.scala @@ -18,29 +18,43 @@ package org.bdgenomics.cannoli.cli import htsjdk.samtools.ValidationStringency +import org.apache.hadoop.fs.{ FileSystem, Path } import org.apache.spark.SparkContext import org.bdgenomics.adam.rdd.ADAMContext._ import org.bdgenomics.adam.rdd.ADAMSaveAnyArgs import org.bdgenomics.adam.rdd.fragment.{ FragmentRDD, InterleavedFASTQInFormatter } import org.bdgenomics.adam.rdd.read.{ AlignmentRecordRDD, AnySAMOutFormatter } +import org.bdgenomics.adam.sql.{ AlignmentRecord => AlignmentRecordProduct } +import org.bdgenomics.cannoli.builder.CommandBuilders +import org.bdgenomics.formats.avro.AlignmentRecord import org.bdgenomics.utils.cli._ import org.bdgenomics.utils.misc.Logging import org.kohsuke.args4j.{ Argument, Option => Args4jOption } +import scala.collection.JavaConversions._ /** * Bowtie 2 function arguments. */ class Bowtie2FnArgs extends Args4jBase { - @Args4jOption(required = false, name = "-bowtie2_path", usage = "Path to the Bowtie 2 executable. Defaults to bowtie2.") - var bowtie2Path: String = "bowtie2" + @Args4jOption(required = false, name = "-executable", usage = "Path to the Bowtie 2 executable. Defaults to bowtie2.") + var executable: String = "bowtie2" - @Args4jOption(required = false, name = "-docker_image", usage = "Docker image to use. Defaults to quay.io/biocontainers/bowtie2:2.3.4--py27pl5.22.0_0.") - var dockerImage: String = "quay.io/biocontainers/bowtie2:2.3.4--py27pl5.22.0_0" + @Args4jOption(required = false, name = "-image", usage = "Container image to use. Defaults to quay.io/biocontainers/bowtie2:2.3.4--py27pl5.22.0_0.") + var image: String = "quay.io/biocontainers/bowtie2:2.3.4--py27pl5.22.0_0" - @Args4jOption(required = false, name = "-use_docker", usage = "If true, uses Docker to launch Bowtie 2. If false, uses the Bowtie 2 executable path.") + @Args4jOption(required = false, name = "-sudo", usage = "Run via sudo.") + var sudo: Boolean = false + + @Args4jOption(required = false, name = "-add_files", usage = "If true, use the SparkFiles mechanism to distribute files to executors.") + var addFiles: Boolean = false + + @Args4jOption(required = false, name = "-use_docker", usage = "If true, uses Docker to launch Bowtie 2.") var useDocker: Boolean = false - @Args4jOption(required = true, name = "-bowtie2_index", usage = "Basename of the index for the reference genome, e.g. in bowtie2 [options]* -x .") + @Args4jOption(required = false, name = "-use_singularity", usage = "If true, uses Singularity to launch Bowtie 2.") + var useSingularity: Boolean = false + + @Args4jOption(required = true, name = "-index", usage = "Basename of the index for the reference genome, e.g. in bowtie2 [options]* -x .") var indexPath: String = null } @@ -49,56 +63,45 @@ class Bowtie2FnArgs extends Args4jBase { * for use in cannoli-shell or notebooks. * * @param args Bowtie 2 function arguments. - * @param files Files to make locally available to the commands being run. - * @param environment A map containing environment variable/value pairs to set - * in the environment for the newly created process. + * @param sc Spark context. */ class Bowtie2Fn( val args: Bowtie2FnArgs, - val files: Seq[String], - val environment: Map[String, String]) extends Function1[FragmentRDD, AlignmentRecordRDD] with Logging { - - /** - * @param args Bowtie 2 function arguments. - */ - def this(args: Bowtie2FnArgs) = this(args, Seq.empty, Map.empty) - - /** - * @param args Bowtie 2 function arguments. - * @param files Files to make locally available to the commands being run. - */ - def this(args: Bowtie2FnArgs, files: Seq[String]) = this(args, files, Map.empty) + sc: SparkContext) extends CannoliFn[FragmentRDD, AlignmentRecordRDD](sc) with Logging { override def apply(fragments: FragmentRDD): AlignmentRecordRDD = { - val bowtie2Command = if (args.useDocker) { - Seq("docker", - "run", - "--interactive", - "--rm", - args.dockerImage, - "bowtie2", - "-x", - args.indexPath, - "--interleaved", - "-" - ) - } else { - Seq(args.bowtie2Path, - "-x", - args.indexPath, - "--interleaved", - "-" - ) + val builder = CommandBuilders.create(args.useDocker, args.useSingularity) + .setExecutable(args.executable) + .add("-x") + .add(if (args.addFiles) "$0" else absolute(args.indexPath)) + .add("--interleaved") + .add("-") + + if (args.addFiles) { + // add args.indexPath for "$0" + builder.addFile(args.indexPath) + // add bowtie2 indexes via globbed index path + builder.addFiles(files(args.indexPath + "*.bt2")) + } + + if (args.useDocker || args.useSingularity) { + builder + .setImage(args.image) + .setSudo(args.sudo) + .addMount(if (args.addFiles) "$root" else root(args.indexPath)) } - log.info("Piping {} to bowtie2 with command: {} files: {} environment: {}", - Array(fragments, bowtie2Command, files, environment)) + log.info("Piping {} to bowtie2 with command: {} files: {}", + fragments, builder.build(), builder.getFiles()) implicit val tFormatter = InterleavedFASTQInFormatter implicit val uFormatter = new AnySAMOutFormatter - fragments.pipe(bowtie2Command, files, environment) + fragments.pipe[AlignmentRecord, AlignmentRecordProduct, AlignmentRecordRDD, InterleavedFASTQInFormatter]( + cmd = builder.build(), + files = builder.getFiles() + ) } } @@ -115,10 +118,10 @@ object Bowtie2 extends BDGCommandCompanion { * Bowtie 2 command line arguments. */ class Bowtie2Args extends Bowtie2FnArgs with ADAMSaveAnyArgs with ParquetArgs { - @Argument(required = true, metaVar = "INPUT", usage = "Location to pipe from, in interleaved FASTQ format.", index = 0) + @Argument(required = true, metaVar = "INPUT", usage = "Location to pipe fragments from (e.g. interleaved FASTQ format, .ifq). If extension is not detected, Parquet is assumed.", index = 0) var inputPath: String = null - @Argument(required = true, metaVar = "OUTPUT", usage = "Location to pipe to.", index = 1) + @Argument(required = true, metaVar = "OUTPUT", usage = "Location to pipe alignments to (e.g. .bam, .cram, .sam). If extension is not detected, Parquet is assumed.", index = 1) var outputPath: String = null @Args4jOption(required = false, name = "-single", usage = "Saves OUTPUT as single file.") @@ -146,7 +149,7 @@ class Bowtie2(protected val args: Bowtie2Args) extends BDGSparkCommand[Bowtie2Ar def run(sc: SparkContext) { val fragments = sc.loadFragments(args.inputPath, stringency = stringency) - val alignments = new Bowtie2Fn(args).apply(fragments) + val alignments = new Bowtie2Fn(args, sc).apply(fragments) alignments.save(args) } } diff --git a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bwa.scala b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bwa.scala index 734e9a3e..ef2bc525 100644 --- a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bwa.scala +++ b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Bwa.scala @@ -25,10 +25,14 @@ import org.bdgenomics.adam.rdd.ADAMContext._ import org.bdgenomics.adam.rdd.ADAMSaveAnyArgs import org.bdgenomics.adam.rdd.fragment.{ FragmentRDD, InterleavedFASTQInFormatter } import org.bdgenomics.adam.rdd.read.{ AlignmentRecordRDD, AnySAMOutFormatter } +import org.bdgenomics.adam.sql.{ AlignmentRecord => AlignmentRecordProduct } +import org.bdgenomics.cannoli.builder.CommandBuilders import org.bdgenomics.cannoli.util.QuerynameGrouper +import org.bdgenomics.formats.avro.AlignmentRecord import org.bdgenomics.utils.cli._ import org.bdgenomics.utils.misc.Logging import org.kohsuke.args4j.{ Argument, Option => Args4jOption } +import scala.collection.JavaConversions._ /** * Bwa function arguments. @@ -37,23 +41,26 @@ class BwaFnArgs extends Args4jBase { @Argument(required = true, metaVar = "SAMPLE", usage = "Sample ID.", index = 2) var sample: String = null - @Args4jOption(required = true, name = "-index", usage = "Path to the bwa index to be searched, e.g. in bwa [options]* ...") + @Args4jOption(required = true, name = "-index", usage = "Path to the BWA index to be searched, e.g. in bwa [options]* .") var indexPath: String = null - @Args4jOption(required = false, name = "-bwa_path", usage = "Path to the BWA executable. Defaults to bwa.") - var bwaPath: String = "bwa" + @Args4jOption(required = false, name = "-executable", usage = "Path to the BWA executable. Defaults to bwa.") + var executable: String = "bwa" - @Args4jOption(required = false, name = "-docker_image", usage = "Docker image to use. Defaults to quay.io/biocontainers/bwa:0.7.17--pl5.22.0_0.") - var dockerImage: String = "quay.io/biocontainers/bwa:0.7.17--pl5.22.0_0" + @Args4jOption(required = false, name = "-image", usage = "Container image to use. Defaults to quay.io/biocontainers/bwa:0.7.17--pl5.22.0_0.") + var image: String = "quay.io/biocontainers/bwa:0.7.17--pl5.22.0_0" - @Args4jOption(required = false, name = "-use_docker", usage = "If true, uses Docker to launch BWA. If false, uses the BWA executable path.") - var useDocker: Boolean = false + @Args4jOption(required = false, name = "-sudo", usage = "Run via sudo.") + var sudo: Boolean = false + + @Args4jOption(required = false, name = "-add_files", usage = "If true, use the SparkFiles mechanism to distribute files to executors.") + var addFiles: Boolean = false - @Args4jOption(required = false, name = "-docker_cmd", usage = "The docker command to run. Defaults to 'docker'.") - var dockerCmd: String = "docker" + @Args4jOption(required = false, name = "-use_docker", usage = "If true, uses Docker to launch BWA.") + var useDocker: Boolean = false - @Args4jOption(required = false, name = "-add_indices", usage = "Adds index files via SparkFiles mechanism.") - var addIndices: Boolean = false + @Args4jOption(required = false, name = "-use_singularity", usage = "If true, uses Singularity to launch BWA.") + var useSingularity: Boolean = false } /** @@ -61,29 +68,11 @@ class BwaFnArgs extends Args4jBase { * for use in cannoli-shell or notebooks. * * @param args Bwa function arguments. - * @param files Files to make locally available to the commands being run. - * @param environment A map containing environment variable/value pairs to set - * in the environment for the newly created process. * @param sc Spark context. */ class BwaFn( val args: BwaFnArgs, - val files: Seq[String], - val environment: Map[String, String], - val sc: SparkContext) extends Function1[FragmentRDD, AlignmentRecordRDD] with Logging { - - /** - * @param args Bwa function arguments. - * @param sc Spark context. - */ - def this(args: BwaFnArgs, sc: SparkContext) = this(args, Seq.empty, Map.empty, sc) - - /** - * @param args Bwa function arguments. - * @param files Files to make locally available to the commands being run. - * @param sc Spark context. - */ - def this(args: BwaFnArgs, files: Seq[String], sc: SparkContext) = this(args, files, Map.empty, sc) + sc: SparkContext) extends CannoliFn[FragmentRDD, AlignmentRecordRDD](sc) with Logging { override def apply(fragments: FragmentRDD): AlignmentRecordRDD = { val sample = args.sample @@ -105,7 +94,7 @@ class BwaFn( } def optionalPath(ext: String): Option[String] = { - val path = new Path(fastaPath, ext) + val path = new Path(fastaPath + ext) val fs = path.getFileSystem(sc.hadoopConfiguration) if (fs.exists(path)) { Some(canonicalizePath(fs, path)) @@ -126,49 +115,38 @@ class BwaFn( pathsWithScheme ++ optionalPathsWithScheme } - val (filesToAdd, bwaCommand) = if (args.useDocker) { - val (mountpoint, indexPath, filesToMount) = if (args.addIndices) { - ("$root", "$0", getIndexPaths(args.indexPath)) - } else { - (Path.getPathWithoutSchemeAndAuthority(new Path(args.indexPath).getParent()).toString, - args.indexPath, - Seq.empty) - } - - (filesToMount, Seq(args.dockerCmd, - "-v", "%s:%s".format(mountpoint, mountpoint), - "run", - "--rm", - args.dockerImage, - "mem", - "-t", "1", - "-R", s"@RG\\tID:${sample}\\tLB:${sample}\\tPL:ILLUMINA\\tPU:0\\tSM:${sample}", - "-p", - indexPath, - "-")) - } else { - val (indexPath, filesToMount) = if (args.addIndices) { - ("$0", getIndexPaths(args.indexPath)) - } else { - (args.indexPath, Seq.empty) - } + var builder = CommandBuilders.create(args.useDocker, args.useSingularity) + .setExecutable(args.executable) + .add("mem") + .add("-t") + .add("1") + .add("-R") + .add(s"@RG\\tID:${sample}\\tLB:${sample}\\tPL:ILLUMINA\\tPU:0\\tSM:${sample}") + .add("-p") + .add(if (args.addFiles) "$0" else args.indexPath) + .add("-") + + if (args.addFiles) { + getIndexPaths(args.indexPath).foreach(builder.addFile(_)) + } - (filesToMount, Seq(args.bwaPath, - "mem", - "-t", "1", - "-R", s"@RG\\tID:${sample}\\tLB:${sample}\\tPL:ILLUMINA\\tPU:0\\tSM:${sample}", - "-p", - args.indexPath, - "-")) + if (args.useDocker || args.useSingularity) { + builder + .setImage(args.image) + .setSudo(args.sudo) + .addMount(if (args.addFiles) "$root" else root(args.indexPath)) } - log.info("Piping {} to bwa with command: {} files: {} environment: {}", - Array(fragments, bwaCommand, files, environment)) + log.info("Piping {} to bwa with command: {} files: {}", + fragments, builder.build(), builder.getFiles()) implicit val tFormatter = InterleavedFASTQInFormatter implicit val uFormatter = new AnySAMOutFormatter - fragments.pipe(bwaCommand, files, environment) + fragments.pipe[AlignmentRecord, AlignmentRecordProduct, AlignmentRecordRDD, InterleavedFASTQInFormatter]( + cmd = builder.build(), + files = builder.getFiles() + ) } } diff --git a/cli/src/main/scala/org/bdgenomics/cannoli/cli/CannoliFn.scala b/cli/src/main/scala/org/bdgenomics/cannoli/cli/CannoliFn.scala new file mode 100644 index 00000000..0892a5da --- /dev/null +++ b/cli/src/main/scala/org/bdgenomics/cannoli/cli/CannoliFn.scala @@ -0,0 +1,103 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.cannoli.cli + +import java.io.FileNotFoundException +import org.apache.hadoop.fs.{ Path, PathFilter } +import org.apache.spark.SparkContext + +/** + * Cannoli function. + * + * @param sc Spark context. + * @tparam X Cannoli function argument type parameter. + * @tparam Y Cannoli function result type parameter. + */ +abstract class CannoliFn[X, Y](val sc: SparkContext) extends Function1[X, Y] { + + def absolute(pathName: String): String = { + val path = new Path(pathName) + + // get the underlying fs for the file + val fs = Option(path.getFileSystem(sc.hadoopConfiguration)).getOrElse( + throw new FileNotFoundException( + s"Couldn't find filesystem for ${path.toUri} with Hadoop configuration ${sc.hadoopConfiguration}" + )) + + Path.getPathWithoutSchemeAndAuthority(fs.resolvePath(path)).toString + } + + def root(pathName: String): String = { + val path = new Path(pathName) + + // get the underlying fs for the file + val fs = Option(path.getFileSystem(sc.hadoopConfiguration)).getOrElse( + throw new FileNotFoundException( + s"Couldn't find filesystem for ${path.toUri} with Hadoop configuration ${sc.hadoopConfiguration}" + )) + + Path.getPathWithoutSchemeAndAuthority(fs.resolvePath(path).getParent()).toString + } + + def files(pathName: String): Seq[String] = { + files(pathName, new PathFilter() { + def accept(path: Path): Boolean = { + return true + } + }) + } + + def files(pathName: String, filter: PathFilter): Seq[String] = { + val path = new Path(pathName) + + // get the underlying fs for the file + val fs = Option(path.getFileSystem(sc.hadoopConfiguration)).getOrElse( + throw new FileNotFoundException( + s"Couldn't find filesystem for ${path.toUri} with Hadoop configuration ${sc.hadoopConfiguration}" + )) + + // elaborate out the path; this returns FileStatuses + val paths = if (fs.isDirectory(path)) { + val paths = fs.listStatus(path) + if (paths.isEmpty) { + throw new FileNotFoundException( + s"Couldn't find any files matching ${path.toUri}, directory is empty" + ) + } + fs.listStatus(path, filter) + } else { + val paths = fs.globStatus(path) + if (paths == null || paths.isEmpty) { + throw new FileNotFoundException( + s"Couldn't find any files matching ${path.toUri}" + ) + } + fs.globStatus(path, filter) + } + + // the path must match PathFilter + if (paths == null || paths.isEmpty) { + throw new FileNotFoundException( + s"Couldn't find any files matching ${path.toUri} for the requested PathFilter" + ) + } + + // map the paths returned to their paths + paths.map(_.getPath.toString) + } +} diff --git a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Freebayes.scala b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Freebayes.scala index 2e55c8d5..c1e16dfe 100644 --- a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Freebayes.scala +++ b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Freebayes.scala @@ -19,6 +19,7 @@ package org.bdgenomics.cannoli.cli import htsjdk.samtools.ValidationStringency import htsjdk.variant.vcf.VCFHeaderLine +import org.apache.hadoop.fs.{ FileSystem, Path } import org.apache.spark.SparkContext import org.apache.spark.util.CollectionAccumulator import org.bdgenomics.adam.models.VariantContext @@ -26,6 +27,9 @@ import org.bdgenomics.adam.rdd.ADAMContext._ import org.bdgenomics.adam.rdd.ADAMSaveAnyArgs import org.bdgenomics.adam.rdd.read.{ AlignmentRecordRDD, BAMInFormatter } import org.bdgenomics.adam.rdd.variant.{ VariantContextRDD, VCFOutFormatter } +import org.bdgenomics.adam.sql.{ VariantContext => VariantContextProduct } +import org.bdgenomics.adam.util.FileExtensions._ +import org.bdgenomics.cannoli.builder.CommandBuilders import org.bdgenomics.utils.cli._ import org.bdgenomics.utils.misc.Logging import org.kohsuke.args4j.{ Argument, Option => Args4jOption } @@ -35,17 +39,32 @@ import scala.collection.JavaConversions._ * Freebayes function arguments. */ class FreebayesFnArgs extends Args4jBase { - @Args4jOption(required = false, name = "-freebayes_path", usage = "Path to the Freebayes executable. Defaults to freebayes.") - var freebayesPath: String = "freebayes" + @Args4jOption(required = false, name = "-executable", usage = "Path to the Freebayes executable. Defaults to freebayes.") + var executable: String = "freebayes" - @Args4jOption(required = true, name = "-freebayes_reference", usage = "Reference sequence for analysis. An index file (.fai) will be created if none exists.") - var referencePath: String = null + @Args4jOption(required = false, name = "-image", usage = "Container image to use. Defaults to quay.io/biocontainers/freebayes:1.1.0.46--htslib1.6_2.") + var image: String = "quay.io/biocontainers/freebayes:1.1.0.46--htslib1.6_2" + + @Args4jOption(required = false, name = "-sudo", usage = "Run via sudo.") + var sudo: Boolean = false - @Args4jOption(required = false, name = "-docker_image", usage = "Docker image to use. Defaults to quay.io/biocontainers/freebayes:1.1.0.46--htslib1.6_2.") - var dockerImage: String = "quay.io/biocontainers/freebayes:1.1.0.46--htslib1.6_2" + @Args4jOption(required = false, name = "-add_files", usage = "If true, use the SparkFiles mechanism to distribute files to executors.") + var addFiles: Boolean = false - @Args4jOption(required = false, name = "-use_docker", usage = "If true, uses Docker to launch Freebayes. If false, uses the Freebayes executable path.") + @Args4jOption(required = false, name = "-use_docker", usage = "If true, uses Docker to launch Freebayes.") var useDocker: Boolean = false + + @Args4jOption(required = false, name = "-use_singularity", usage = "If true, uses Singularity to launch Freebayes.") + var useSingularity: Boolean = false + + @Args4jOption(required = true, name = "-fasta_reference", usage = "Reference sequence for analysis. An index file (.fai) will be created if none exists.") + var referencePath: String = null + + @Args4jOption(required = false, name = "-gvcf", usage = "Write gVCF output or equivalent genotypes which indicate coverage in uncalled regions.") + var gvcf: Boolean = false + + @Args4jOption(required = false, name = "-gvcf_chunk", usage = "When writing gVCF output or equivalent genotypes emit a record for every N bases.") + var gvcfChunk: Int = _ } /** @@ -53,57 +72,50 @@ class FreebayesFnArgs extends Args4jBase { * for use in cannoli-shell or notebooks. * * @param args Freebayes function arguments. - * @param files Files to make locally available to the commands being run. - * @param environment A map containing environment variable/value pairs to set - * in the environment for the newly created process. * @param sc Spark context. */ class FreebayesFn( val args: FreebayesFnArgs, - val files: Seq[String], - val environment: Map[String, String], - val sc: SparkContext) extends Function1[AlignmentRecordRDD, VariantContextRDD] with Logging { - - /** - * @param args Freebayes function arguments. - * @param sc Spark context. - */ - def this(args: FreebayesFnArgs, sc: SparkContext) = this(args, Seq.empty, Map.empty, sc) - - /** - * @param args Freebayes function arguments. - * @param files Files to make locally available to the commands being run. - * @param sc Spark context. - */ - def this(args: FreebayesFnArgs, files: Seq[String], sc: SparkContext) = this(args, files, Map.empty, sc) + sc: SparkContext) extends CannoliFn[AlignmentRecordRDD, VariantContextRDD](sc) with Logging { override def apply(alignments: AlignmentRecordRDD): VariantContextRDD = { - val freebayesCommand = if (args.useDocker) { - Seq("docker", - "run", - "--rm", - args.dockerImage, - "freebayes", - "--fasta-reference", - args.referencePath, - "--stdin") - } else { - Seq(args.freebayesPath, - "--fasta-reference", - args.referencePath, - "--stdin") + var builder = CommandBuilders.create(args.useDocker, args.useSingularity) + .setExecutable(args.executable) + .add("--fasta-reference") + .add(if (args.addFiles) "$0" else absolute(args.referencePath)) + .add("--stdin") + .add("--strict-vcf") + + if (args.gvcf) { + builder.add("--gvcf") + Option(args.gvcfChunk).foreach(i => builder.add("--gvcf-chunk").add(i.toString)) + } + + if (args.addFiles) { + builder.addFile(args.referencePath) + builder.addFile(args.referencePath + ".fai") + } + + if (args.useDocker || args.useSingularity) { + builder + .setImage(args.image) + .setSudo(args.sudo) + .addMount(if (args.addFiles) "$root" else root(args.referencePath)) } - log.info("Piping {} to freebayes with command: {} files: {} environment: {}", - Array(alignments, freebayesCommand, files, environment)) + log.info("Piping {} to freebayes with command: {} files: {}", + alignments, builder.build(), builder.getFiles()) val accumulator: CollectionAccumulator[VCFHeaderLine] = sc.collectionAccumulator("headerLines") implicit val tFormatter = BAMInFormatter implicit val uFormatter = new VCFOutFormatter(sc.hadoopConfiguration, Some(accumulator)) - val variantContexts = alignments.pipe[VariantContext, VariantContextRDD, BAMInFormatter](freebayesCommand, files, environment) + val variantContexts = alignments.pipe[VariantContext, VariantContextProduct, VariantContextRDD, BAMInFormatter]( + cmd = builder.build(), + files = builder.getFiles() + ) val headerLines = accumulator.value.distinct variantContexts.replaceHeaderLines(headerLines) @@ -123,10 +135,10 @@ object Freebayes extends BDGCommandCompanion { * Freebayes command line arguments. */ class FreebayesArgs extends FreebayesFnArgs with ADAMSaveAnyArgs with ParquetArgs { - @Argument(required = true, metaVar = "INPUT", usage = "Location to pipe from.", index = 0) + @Argument(required = true, metaVar = "INPUT", usage = "Location to pipe alignment records from (e.g. .bam, .cram, .sam). If extension is not detected, Parquet is assumed.", index = 0) var inputPath: String = null - @Argument(required = true, metaVar = "OUTPUT", usage = "Location to pipe to, in VCF format.", index = 1) + @Argument(required = true, metaVar = "OUTPUT", usage = "Location to pipe genotypes to (e.g. .vcf, .vcf.gz, .vcf.bgz). If extension is not detected, Parquet is assumed.", index = 1) var outputPath: String = null @Args4jOption(required = false, name = "-single", usage = "Saves OUTPUT as single file.") @@ -155,6 +167,17 @@ class Freebayes(protected val args: FreebayesArgs) extends BDGSparkCommand[Freeb def run(sc: SparkContext) { val alignments = sc.loadAlignments(args.inputPath, stringency = stringency) val variantContexts = new FreebayesFn(args, sc).apply(alignments) - variantContexts.saveAsVcf(args, stringency) + + if (isVcfExt(args.outputPath)) { + variantContexts.saveAsVcf( + args.inputPath, + asSingleFile = args.asSingleFile, + deferMerging = args.deferMerging, + disableFastConcat = args.disableFastConcat, + stringency + ) + } else { + variantContexts.toGenotypes.saveAsParquet(args) + } } } diff --git a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Samtools.scala b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Samtools.scala index 9152e584..60f23f6b 100644 --- a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Samtools.scala +++ b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Samtools.scala @@ -18,31 +18,45 @@ package org.bdgenomics.cannoli.cli import htsjdk.samtools.ValidationStringency +import org.apache.hadoop.fs.{ FileSystem, Path } import org.apache.spark.SparkContext import org.bdgenomics.adam.models.VariantContext import org.bdgenomics.adam.rdd.ADAMContext._ import org.bdgenomics.adam.rdd.ADAMSaveAnyArgs import org.bdgenomics.adam.rdd.read.{ AlignmentRecordRDD, BAMInFormatter } import org.bdgenomics.adam.rdd.variant.{ VariantContextRDD, VCFOutFormatter } +import org.bdgenomics.adam.sql.{ VariantContext => VariantContextProduct } +import org.bdgenomics.adam.util.FileExtensions._ +import org.bdgenomics.cannoli.builder.CommandBuilders import org.bdgenomics.utils.cli._ import org.bdgenomics.utils.misc.Logging import org.kohsuke.args4j.{ Argument, Option => Args4jOption } +import scala.collection.JavaConversions._ /** * Samtools function arguments. */ class SamtoolsFnArgs extends Args4jBase { - @Args4jOption(required = false, name = "-samtools_path", usage = "Path to the samtools executable. Defaults to samtools.") - var samtoolsPath: String = "samtools" + @Args4jOption(required = false, name = "-executable", usage = "Path to the samtools executable. Defaults to samtools.") + var executable: String = "samtools" - @Args4jOption(required = true, name = "-samtools_reference", usage = "Reference sequence for analysis. An index file (.fai) will be created if none exists.") - var referencePath: String = null + @Args4jOption(required = false, name = "-docker_image", usage = "Container image to use. Defaults to quay.io/biocontainers/samtools:1.6--0.") + var image: String = "quay.io/biocontainers/samtools:1.6--0" + + @Args4jOption(required = false, name = "-sudo", usage = "Run via sudo.") + var sudo: Boolean = false - @Args4jOption(required = false, name = "-docker_image", usage = "Docker image to use. Defaults to quay.io/biocontainers/samtools:1.6--0.") - var dockerImage: String = "quay.io/biocontainers/samtools:1.6--0" + @Args4jOption(required = false, name = "-add_files", usage = "If true, use the SparkFiles mechanism to distribute files to executors.") + var addFiles: Boolean = false - @Args4jOption(required = false, name = "-use_docker", usage = "If true, uses Docker to launch samtools. If false, uses the samtools executable path.") + @Args4jOption(required = false, name = "-use_docker", usage = "If true, uses Docker to launch samtools.") var useDocker: Boolean = false + + @Args4jOption(required = false, name = "-use_singularity", usage = "If true, uses Singularity to launch samtools.") + var useSingularity: Boolean = false + + @Args4jOption(required = true, name = "-reference", usage = "Reference sequence for analysis. An index file (.fai) will be created if none exists.") + var referencePath: String = null } /** @@ -50,62 +64,45 @@ class SamtoolsFnArgs extends Args4jBase { * for use in cannoli-shell or notebooks. * * @param args Samtools function arguments. - * @param files Files to make locally available to the commands being run. - * @param environment A map containing environment variable/value pairs to set - * in the environment for the newly created process. * @param sc Spark context. */ class SamtoolsFn( val args: SamtoolsFnArgs, - val files: Seq[String], - val environment: Map[String, String], - val sc: SparkContext) extends Function1[AlignmentRecordRDD, VariantContextRDD] with Logging { - - /** - * @param args Samtools function arguments. - * @param sc Spark context. - */ - def this(args: SamtoolsFnArgs, sc: SparkContext) = this(args, Seq.empty, Map.empty, sc) - - /** - * @param args Samtools function arguments. - * @param files Files to make locally available to the commands being run. - * @param sc Spark context. - */ - def this(args: SamtoolsFnArgs, files: Seq[String], sc: SparkContext) = this(args, files, Map.empty, sc) + sc: SparkContext) extends CannoliFn[AlignmentRecordRDD, VariantContextRDD](sc) with Logging { override def apply(alignments: AlignmentRecordRDD): VariantContextRDD = { - val samtoolsCommand = if (args.useDocker) { - Seq("docker", - "run", - "--interactive", - "--rm", - args.dockerImage, - "samtools", - "mpileup", - "-", - "--reference", - args.referencePath, - "-v", - "-u") - } else { - Seq(args.samtoolsPath, - "mpileup", - "-", - "--reference", - args.referencePath, - "-v", - "-u") + val builder = CommandBuilders.create(args.useDocker, args.useSingularity) + .setExecutable(args.executable) + .add("mpileup") + .add("-") + .add("--reference") + .add(if (args.addFiles) "$0" else absolute(args.referencePath)) + .add("-v") + .add("-u") + + if (args.addFiles) { + builder.addFile(args.referencePath) + builder.addFile(args.referencePath + ".fai") } - log.info("Piping {} to samtools with command: {} files: {} environment: {}", - Array(alignments, samtoolsCommand, files, environment)) + if (args.useDocker || args.useSingularity) { + builder + .setImage(args.image) + .setSudo(args.sudo) + .addMount(if (args.addFiles) "$root" else root(args.referencePath)) + } + + log.info("Piping {} to samtools with command: {} files: {}", + alignments, builder.build(), builder.getFiles()) implicit val tFormatter = BAMInFormatter implicit val uFormatter = new VCFOutFormatter(sc.hadoopConfiguration) - alignments.pipe[VariantContext, VariantContextRDD, BAMInFormatter](samtoolsCommand, files, environment) + alignments.pipe[VariantContext, VariantContextProduct, VariantContextRDD, BAMInFormatter]( + cmd = builder.build(), + files = builder.getFiles() + ) } } @@ -122,10 +119,10 @@ object Samtools extends BDGCommandCompanion { * Samtools command line arguments. */ class SamtoolsArgs extends SamtoolsFnArgs with ADAMSaveAnyArgs with ParquetArgs { - @Argument(required = true, metaVar = "INPUT", usage = "Location to pipe from.", index = 0) + @Argument(required = true, metaVar = "INPUT", usage = "Location to pipe alignment records from (e.g. .bam, .cram, .sam). If extension is not detected, Parquet is assumed.", index = 0) var inputPath: String = null - @Argument(required = true, metaVar = "OUTPUT", usage = "Location to pipe to, in VCF format.", index = 1) + @Argument(required = true, metaVar = "OUTPUT", usage = "Location to pipe genotypes to (e.g. .vcf, .vcf.gz, .vcf.bgz). If extension is not detected, Parquet is assumed.", index = 1) var outputPath: String = null @Args4jOption(required = false, name = "-single", usage = "Saves OUTPUT as single file.") @@ -154,6 +151,17 @@ class Samtools(protected val args: SamtoolsArgs) extends BDGSparkCommand[Samtool def run(sc: SparkContext) { val alignments = sc.loadAlignments(args.inputPath, stringency = stringency) val variantContexts = new SamtoolsFn(args, sc).apply(alignments) - variantContexts.saveAsVcf(args, stringency) + + if (isVcfExt(args.outputPath)) { + variantContexts.saveAsVcf( + args.inputPath, + asSingleFile = args.asSingleFile, + deferMerging = args.deferMerging, + disableFastConcat = args.disableFastConcat, + stringency + ) + } else { + variantContexts.toGenotypes.saveAsParquet(args) + } } } diff --git a/cli/src/main/scala/org/bdgenomics/cannoli/cli/SnpEff.scala b/cli/src/main/scala/org/bdgenomics/cannoli/cli/SnpEff.scala index 1875d726..ee5763bc 100644 --- a/cli/src/main/scala/org/bdgenomics/cannoli/cli/SnpEff.scala +++ b/cli/src/main/scala/org/bdgenomics/cannoli/cli/SnpEff.scala @@ -19,6 +19,7 @@ package org.bdgenomics.cannoli.cli import htsjdk.samtools.ValidationStringency import org.apache.spark.SparkContext +import org.bdgenomics.adam.models.VariantContext import org.bdgenomics.adam.rdd.ADAMContext._ import org.bdgenomics.adam.rdd.ADAMSaveAnyArgs import org.bdgenomics.adam.rdd.variant.{ @@ -26,25 +27,34 @@ import org.bdgenomics.adam.rdd.variant.{ VCFInFormatter, VCFOutFormatter } +import org.bdgenomics.adam.sql.{ VariantContext => VariantContextProduct } +import org.bdgenomics.cannoli.builder.CommandBuilders import org.bdgenomics.utils.cli._ import org.bdgenomics.utils.misc.Logging import org.kohsuke.args4j.{ Argument, Option => Args4jOption } +import scala.collection.JavaConversions._ /** * SnpEff function arguments. */ class SnpEffFnArgs extends Args4jBase { - @Args4jOption(required = false, name = "-database", usage = "SnpEff database name. Defaults to GRCh38.86.") - var snpEffDatabase: String = "GRCh38.86" + @Args4jOption(required = false, name = "-executable", usage = "Path to the SnpEff executable. Defaults to snpEff.") + var executable: String = "snpEff" - @Args4jOption(required = false, name = "-snpeff_path", usage = "Path to the SnpEff executable. Defaults to snpEff.") - var snpEffPath: String = "snpEff" + @Args4jOption(required = false, name = "-image", usage = "Container image to use. Defaults to quay.io/biocontainers/snpeff:4.3.1t--0.") + var image: String = "quay.io/biocontainers/snpeff:4.3.1t--0" - @Args4jOption(required = false, name = "-docker_image", usage = "Docker image to use. Defaults to quay.io/biocontainers/snpeff:4.3.1t--0.") - var dockerImage: String = "quay.io/biocontainers/snpeff:4.3.1t--0" + @Args4jOption(required = false, name = "-sudo", usage = "Run via sudo.") + var sudo: Boolean = false - @Args4jOption(required = false, name = "-use_docker", usage = "If true, uses Docker to launch SnpEff. If false, uses the SnpEff executable path.") + @Args4jOption(required = false, name = "-use_docker", usage = "If true, uses Docker to launch SnpEff.") var useDocker: Boolean = false + + @Args4jOption(required = false, name = "-use_singularity", usage = "If true, uses Singularity to launch SnpEff.") + var useSingularity: Boolean = false + + @Args4jOption(required = false, name = "-database", usage = "SnpEff database name. Defaults to GRCh38.86.") + var database: String = "GRCh38.86" } /** @@ -52,53 +62,35 @@ class SnpEffFnArgs extends Args4jBase { * for use in cannoli-shell or notebooks. * * @param args SnpEff function arguments. - * @param files Files to make locally available to the commands being run. - * @param environment A map containing environment variable/value pairs to set - * in the environment for the newly created process. * @param sc Spark context. */ class SnpEffFn( val args: SnpEffFnArgs, - val files: Seq[String], - val environment: Map[String, String], - val sc: SparkContext) extends Function1[VariantContextRDD, VariantContextRDD] with Logging { - - /** - * @param args SnpEff function arguments. - * @param sc Spark context. - */ - def this(args: SnpEffFnArgs, sc: SparkContext) = this(args, Seq.empty, Map.empty, sc) - - /** - * @param args SnpEff function arguments. - * @param files Files to make locally available to the commands being run. - * @param sc Spark context. - */ - def this(args: SnpEffFnArgs, files: Seq[String], sc: SparkContext) = this(args, files, Map.empty, sc) + sc: SparkContext) extends CannoliFn[VariantContextRDD, VariantContextRDD](sc) with Logging { override def apply(variantContexts: VariantContextRDD): VariantContextRDD = { - val snpEffCommand = if (args.useDocker) { - Seq("docker", - "run", - "--rm", - args.dockerImage, - "snpEff", - "-download", - args.snpEffDatabase) - } else { - Seq(args.snpEffPath, - "-download", - args.snpEffDatabase) + var builder = CommandBuilders.create(args.useDocker, args.useSingularity) + .setExecutable(args.executable) + .add("-download") + .add(args.database) + + if (args.useDocker || args.useSingularity) { + builder + .setImage(args.image) + .setSudo(args.sudo) } - log.info("Piping {} to snpEff with command: {} files: {} environment: {}", - Array(variantContexts, snpEffCommand, files, environment)) + log.info("Piping {} to snpEff with command: {} files: {}", + variantContexts, builder.build(), builder.getFiles()) implicit val tFormatter = VCFInFormatter implicit val uFormatter = new VCFOutFormatter(sc.hadoopConfiguration) - variantContexts.pipe(snpEffCommand, files, environment) + variantContexts.pipe[VariantContext, VariantContextProduct, VariantContextRDD, VCFInFormatter]( + cmd = builder.build(), + files = builder.getFiles() + ) } } diff --git a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Vt.scala b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Vt.scala index cafb0571..5daa3837 100644 --- a/cli/src/main/scala/org/bdgenomics/cannoli/cli/Vt.scala +++ b/cli/src/main/scala/org/bdgenomics/cannoli/cli/Vt.scala @@ -18,7 +18,9 @@ package org.bdgenomics.cannoli.cli import htsjdk.samtools.ValidationStringency +import org.apache.hadoop.fs.{ FileSystem, Path } import org.apache.spark.SparkContext +import org.bdgenomics.adam.models.VariantContext import org.bdgenomics.adam.rdd.ADAMContext._ import org.bdgenomics.adam.rdd.ADAMSaveAnyArgs import org.bdgenomics.adam.rdd.variant.{ @@ -26,25 +28,40 @@ import org.bdgenomics.adam.rdd.variant.{ VCFInFormatter, VCFOutFormatter } +import org.bdgenomics.adam.sql.{ VariantContext => VariantContextProduct } +import org.bdgenomics.cannoli.builder.CommandBuilders import org.bdgenomics.utils.cli._ import org.bdgenomics.utils.misc.Logging import org.kohsuke.args4j.{ Argument, Option => Args4jOption } +import scala.collection.JavaConversions._ /** * Vt function arguments. */ class VtFnArgs extends Args4jBase { - @Args4jOption(required = false, name = "-vt_path", usage = "Path to the vt executable. Defaults to vt.") - var vtPath: String = "vt" + @Args4jOption(required = false, name = "-executable", usage = "Path to the vt executable. Defaults to vt.") + var executable: String = "vt" - @Args4jOption(required = true, name = "-vt_reference", usage = "Reference sequence for analysis.") - var referencePath: String = null + @Args4jOption(required = false, name = "-image", usage = "Container image to use. Defaults to heuermh/vt.") + var image: String = "heuermh/vt" + + @Args4jOption(required = false, name = "-sudo", usage = "Run via sudo.") + var sudo: Boolean = false - @Args4jOption(required = false, name = "-docker_image", usage = "Docker image to use. Defaults to heuermh/vt.") - var dockerImage: String = "heuermh/vt" + @Args4jOption(required = false, name = "-add_files", usage = "If true, use the SparkFiles mechanism to distribute files to executors.") + var addFiles: Boolean = false - @Args4jOption(required = false, name = "-use_docker", usage = "If true, uses Docker to launch vt. If false, uses the vt executable path.") + @Args4jOption(required = false, name = "-use_docker", usage = "If true, uses Docker to launch vt.") var useDocker: Boolean = false + + @Args4jOption(required = false, name = "-use_singularity", usage = "If true, uses Singularity to launch vt.") + var useSingularity: Boolean = false + + @Args4jOption(required = true, name = "-reference", usage = "Reference sequence for analysis.") + var referencePath: String = null + + @Args4jOption(required = false, name = "-window", usage = "Window size for local sorting of variants. Defaults to 10000.") + var window: Int = _ } /** @@ -52,58 +69,42 @@ class VtFnArgs extends Args4jBase { * for use in cannoli-shell or notebooks. * * @param args Vt function arguments. - * @param files Files to make locally available to the commands being run. - * @param environment A map containing environment variable/value pairs to set - * in the environment for the newly created process. * @param sc Spark context. */ class VtFn( val args: VtFnArgs, - val files: Seq[String], - val environment: Map[String, String], - val sc: SparkContext) extends Function1[VariantContextRDD, VariantContextRDD] with Logging { - - /** - * @param args Vt function arguments. - * @param sc Spark context. - */ - def this(args: VtFnArgs, sc: SparkContext) = this(args, Seq.empty, Map.empty, sc) - - /** - * @param args Vt function arguments. - * @param files Files to make locally available to the commands being run. - * @param sc Spark context. - */ - def this(args: VtFnArgs, files: Seq[String], sc: SparkContext) = this(args, files, Map.empty, sc) + sc: SparkContext) extends CannoliFn[VariantContextRDD, VariantContextRDD](sc) with Logging { override def apply(variantContexts: VariantContextRDD): VariantContextRDD = { - val vtCommand = if (args.useDocker) { - Seq("docker", - "run", - "--interactive", - "--rm", - args.dockerImage, - "vt", - "normalize", - "-", - "-r", - args.referencePath) - } else { - Seq(args.vtPath, - "normalize", - "-", - "-r", - args.referencePath) + var builder = CommandBuilders.create(args.useDocker, args.useSingularity) + .setExecutable(args.executable) + .add("normalize") + .add("-") + .add("-r") + .add(if (args.addFiles) "$0" else absolute(args.referencePath)) + + Option(args.window).foreach(i => builder.add("-w").add(i.toString)) + + if (args.addFiles) builder.addFile(args.referencePath) + + if (args.useDocker || args.useSingularity) { + builder + .setImage(args.image) + .setSudo(args.sudo) + .addMount(if (args.addFiles) "$root" else root(args.referencePath)) } - log.info("Piping {} to vt with command: {} files: {} environment: {}", - Array(variantContexts, vtCommand, files, environment)) + log.info("Piping {} to vt with command: {} files: {}", + variantContexts, builder.build(), builder.getFiles()) implicit val tFormatter = VCFInFormatter implicit val uFormatter = new VCFOutFormatter(sc.hadoopConfiguration) - variantContexts.pipe(vtCommand, files, environment) + variantContexts.pipe[VariantContext, VariantContextProduct, VariantContextRDD, VCFInFormatter]( + cmd = builder.build(), + files = builder.getFiles() + ) } } diff --git a/core/pom.xml b/core/pom.xml index 2acb76f9..1d771298 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -37,6 +37,11 @@ + + junit + junit + test + org.apache.spark spark-core_${scala.version.prefix} diff --git a/core/src/main/java/org/bdgenomics/cannoli/builder/CommandBuilder.java b/core/src/main/java/org/bdgenomics/cannoli/builder/CommandBuilder.java new file mode 100644 index 00000000..353fd26c --- /dev/null +++ b/core/src/main/java/org/bdgenomics/cannoli/builder/CommandBuilder.java @@ -0,0 +1,456 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.cannoli.builder; + +import static com.google.common.base.Preconditions.checkNotNull; + +import java.io.Serializable; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; + +import java.util.concurrent.TimeUnit; + +import javax.annotation.Nullable; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; + +/** + * Abstract command builder. + */ +public abstract class CommandBuilder implements Serializable { + /** Command to run. */ + private String executable; + + /** Name of container image. */ + private String image; + + /** Number of bases to flank each command invocation by. */ + private Integer flankSize; + + /** True to run via sudo. */ + private boolean sudo = false; + + /** How long to let a single partition run for, in seconds. */ + private Long timeout; + + /** List of command arguments. */ + private final List arguments = new ArrayList(); + + /** Map of environment variables. */ + private final Map environment = new HashMap(); + + /** List of files to make available locally. */ + private final List files = new ArrayList(); + + /** Map of mount points. */ + private Map mounts = new HashMap(); + + + /** + * Create a new command builder. + */ + protected CommandBuilder() { + // empty + } + + /** + * Create a new command builder with the specified executable. + * + * @param executable executable, must not be null + */ + protected CommandBuilder(final String executable) { + this(); + setExecutable(executable); + } + + + /** + * Set the executable for this command builder. + * + * @param executable executable, must not be null + * @return this command builder + */ + public final CommandBuilder setExecutable(final String executable) { + checkNotNull(executable); + this.executable = executable; + return this; + } + + /** + * Set the number of bases to flank each command invocation by for this builder. + * + * @param flankSize number of bases to flank each command invocation by + * @return this command builder + */ + public final CommandBuilder setFlankSize(@Nullable final Integer flankSize) { + this.flankSize = flankSize; + return this; + } + + /** + * Set the image for this command builder. + * + * @param image image, must not be null + * @return this command builder + */ + public final CommandBuilder setImage(final String image) { + checkNotNull(image); + this.image = image; + return this; + } + + /** + * Set to true to run via sudo for this command builder. + * + * @param sudo true to run via sudo + * @return this command builder + */ + public final CommandBuilder setSudo(final boolean sudo) { + this.sudo = sudo; + return this; + } + + /** + * Set how long to let a single partition run for, in seconds, for this builder. + * + * @param timeout how long to let a single partition run for, in seconds + * @return this command builder + */ + public final CommandBuilder setTimeout(@Nullable final Long timeout) { + this.timeout = timeout; + return this; + } + + /** + * Set how long to let a single partition run for, in the specified time unit, for this builder. + * + * @param duration duration + * @param timeUnit time unit, must not be null + * @return this command builder + */ + public final CommandBuilder setTimeout(final long duration, final TimeUnit timeUnit) { + checkNotNull(timeUnit); + this.timeout = timeUnit.toSeconds(duration); + return this; + } + + + /** + * Add one or more arguments to the list of command arguments for this command builder. + * + * @param arguments variable number of arguments to add, must not be null + * @return this command builder + */ + public final CommandBuilder add(final String... arguments) { + return addArguments(arguments); + } + + /** + * Add one or more arguments to the list of command arguments for this command builder. + * + * @param arguments arguments to add, must not be null + * @return this command builder + */ + public final CommandBuilder add(final Iterable arguments) { + return addArguments(arguments); + } + + /** + * Add an argument to the list of command arguments for this command builder. + * + * @param argument argument to add, must not be null + * @return this command builder + */ + public final CommandBuilder addArgument(final String argument) { + checkNotNull(argument); + arguments.add(argument); + return this; + } + + /** + * Add one or more arguments to the list of command arguments for this command builder. + * + * @param arguments variable number of arguments to add, must not be null + * @return this command builder + */ + public final CommandBuilder addArguments(final String... arguments) { + checkNotNull(arguments); + for (String argument : arguments) { + this.arguments.add(argument); + } + return this; + } + + /** + * Add one or more arguments to the list of command arguments for this command builder. + * + * @param arguments arguments to add, must not be null + * @return this command builder + */ + public final CommandBuilder addArguments(final Iterable arguments) { + checkNotNull(arguments); + for (String argument : arguments) { + this.arguments.add(argument); + } + return this; + } + + /** + * Add an environment variable to the map of environment variables for this command builder. + * + * @param variable environment variable to add, must not be null + * @param value environment variable value to add, must not be null + * @return this command builder + */ + public final CommandBuilder addEnvironment(final String variable, final String value) { + checkNotNull(variable); + checkNotNull(value); + environment.put(variable, value); + return this; + } + + /** + * Add environment variables to the map of environment variables for this command builder. + * + * @param environment environment variables to add, must not be null + * @return this command builder + */ + public final CommandBuilder addEnvironment(final Map environment) { + checkNotNull(environment); + this.environment.putAll(environment); + return this; + } + + /** + * Add a file to the list of files to make available locally for this command builder. + * + * @param file file to add, must not be null + * @return this command builder + */ + public final CommandBuilder addFile(final String file) { + checkNotNull(file); + files.add(file); + return this; + } + + /** + * Add zero or more files to the list of files to make available locally for this command builder. + * + * @param files variable number of files to add, must not be null + * @return this command builder + */ + public final CommandBuilder addFiles(final String... files) { + checkNotNull(files); + for (String file : files) { + this.files.add(file); + } + return this; + } + + /** + * Add files to the list of files to make available locally for this command builder. + * + * @param files files to add, must not be null + * @return this command builder + */ + public final CommandBuilder addFiles(final Iterable files) { + checkNotNull(files); + for (String file : files) { + this.files.add(file); + } + return this; + } + + /** + * Add the specified mount point to the map of mount points for this command builder. + * + * @param source mount point source and target, must not be null + * @return this command builder + */ + public final CommandBuilder addMount(final String mount) { + return addMount(mount, mount); + } + + /** + * Add the specified mount point to the map of mount points for this command builder. + * + * @param source mount point source, must not be null + * @param target mount point target, must not be null + * @return this command builder + */ + public final CommandBuilder addMount(final String source, final String target) { + checkNotNull(source); + checkNotNull(target); + mounts.put(source, target); + return this; + } + + /** + * Add the specified mount points to the map of mount points for this command builder. + * + * @param mounts mount points to add, must not be null + */ + public final CommandBuilder addMounts(final Map mounts) { + checkNotNull(mounts); + this.mounts.putAll(mounts); + return this; + } + + + /** + * Return the executable for this command builder. + * + * @return the executable for this command builder + */ + public final String getExecutable() { + return executable; + } + + /** + * Return the number of bases to flank each command invocation by for this builder. May be null. + * + * @return the number of bases to flank each command invocation by for this builder + */ + public final Integer getFlankSize() { + return flankSize; + } + + /** + * Return the number of bases to flank each command invocation by for this builder, as an optional. + * + * @return the number of bases to flank each command invocation by for this builder, as an optional + */ + public final Optional getOptFlankSize() { + return Optional.ofNullable(flankSize); + } + + /** + * Return the image for this command builder. + * + * @return the image for this command builder. + */ + public final String getImage() { + return image; + } + + /** + * Return how long to let a single partition run for, in seconds, for this builder. May be null. + * + * @return how long to let a single partition run for, in seconds, for this builder + */ + public final Long getTimeout() { + return timeout; + } + + /** + * Return how long to let a single partition run for, in seconds, for this builder, as an optional. + * + * @return how long to let a single partition run for, in seconds, for this builder, as an optional + */ + public final Optional getOptTimeout() { + return Optional.ofNullable(timeout); + } + + /** + * Return true to run via sudo for this command builder. + * + * @return true to run via sudo for this command builder + */ + public final boolean getSudo() { + return isSudo(); + } + + /** + * Return true to run via sudo for this command builder. + * + * @return true to run via sudo for this command builder + */ + public final boolean isSudo() { + return sudo; + } + + /** + * Return an immutable list of command arguments for this command builder. + * + * @return an immutable list of command arguments for this command builder + */ + public final List getArguments() { + return ImmutableList.copyOf(arguments); + } + + /** + * Return an immutable map of environment variables for this command builder. + * + * @return an immutable map of environment variables for this command builder + */ + public final Map getEnvironment() { + return ImmutableMap.copyOf(environment); + } + + /** + * Return an immutable list of files to make available locally for this command builder. + * + * @return an immutable list of files to make available locally for this command builder + */ + public final List getFiles() { + return ImmutableList.copyOf(files); + } + + /** + * Return the map of mount points for this command builder. + * + * @return the map of mount points for this command builder + */ + public final Map getMounts() { + return ImmutableMap.copyOf(mounts); + } + + /** + * Reset this command builder. + * + * @return this command builder + */ + public final CommandBuilder reset() { + executable = null; + flankSize = null; + image = null; + sudo = false; + timeout = null; + + arguments.clear(); + environment.clear(); + files.clear(); + mounts.clear(); + + return this; + } + + /** + * Build and return the command for this command builder as a list of strings. + * + * @return the command for this command builder as a list of strings. + * @throws IllegalStateException if this builder is in an illegal state, e.g. + * if required values are not set + */ + public abstract List build(); +} diff --git a/core/src/main/java/org/bdgenomics/cannoli/builder/CommandBuilders.java b/core/src/main/java/org/bdgenomics/cannoli/builder/CommandBuilders.java new file mode 100644 index 00000000..b8284207 --- /dev/null +++ b/core/src/main/java/org/bdgenomics/cannoli/builder/CommandBuilders.java @@ -0,0 +1,48 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.cannoli.builder; + +import java.io.Serializable; + +/** + * Cannoli pipe command builders. + */ +public final class CommandBuilders implements Serializable { + + /** + * Create a new Cannoli pipe command builder. + * + * @param useDocker true to use Docker, + * useDocker and useSingularity must not both be true + * @param useSingularity true to use Singularity, + * useDocker and useSingularity must not both be true + * @return a new Cannoli pipe command builder + */ + public static CommandBuilder create(final boolean useDocker, final boolean useSingularity) { + if (useDocker && useSingularity) { + throw new IllegalArgumentException("useDocker and useSingularity must not both be true"); + } + if (useDocker) { + return new DockerBuilder(); + } + else if (useSingularity) { + return new SingularityBuilder(); + } + return new LocalBuilder(); + } +} diff --git a/core/src/main/java/org/bdgenomics/cannoli/builder/ContainerBuilder.java b/core/src/main/java/org/bdgenomics/cannoli/builder/ContainerBuilder.java new file mode 100644 index 00000000..862ec83c --- /dev/null +++ b/core/src/main/java/org/bdgenomics/cannoli/builder/ContainerBuilder.java @@ -0,0 +1,160 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.cannoli.builder; + +import static com.google.common.base.Preconditions.checkNotNull; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; + +/** + * Abstract container builder. + */ +abstract class ContainerBuilder extends CommandBuilder { + + /** + * Create a new container builder. + */ + protected ContainerBuilder() { + super(); + } + + /** + * Create a new container builder with the specified executable. + * + * @param executable executable, must not be null + */ + protected ContainerBuilder(final String executable) { + super(executable); + } + + /** + * Create a new container builder with the specified executable and image. + * + * @param executable executable, must not be null + * @param image image, must not be null + */ + protected ContainerBuilder(final String executable, final String image) { + super(executable); + setImage(image); + } + + + /** + * Return the container commands for this container builder. + * + * @return the container commands for this container builder + */ + protected abstract List getContainerCommands(); + + /** + * Return the remove argument for this container builder. + * + * @return the remove argument for this container builder + */ + protected abstract List getRemoveArgument(); + + /** + * Format the specified environment variable into a list of strings. + * + * @param variable variable + * @param value value + * @return the specified environment variable formatted into a list of strings. + */ + protected abstract List formatEnvironmentVariable(String variable, String value); + + /** + * Format the specified environment variable into a list of string arguments. + * + * @param variable variable + * @param value value + * @return the specified environment variable formatted into a list of string arguments. + */ + protected abstract List formatEnvironmentArgument(String variable, String value); + + /** + * Format the specified environment variable into a list of string arguments. + * + * @param image image + * @return the specified environment variable formatted into a list of string arguments. + */ + protected abstract List formatImage(String image); + + /** + * Format the specified mount point into a list of string arguments. + * + * @param source source + * @param target target + * @return the specified mount point formatted into a list of string arguments + */ + protected abstract List formatMount(String source, String target); + + @Override + public final List build() { + if (getExecutable() == null) { + throw new IllegalStateException("executable must not be null"); + } + if (getImage() == null) { + throw new IllegalStateException("image must not be null"); + } + + List command = new ArrayList(); + + // add environment variables + for (Map.Entry e : getEnvironment().entrySet()) { + command.addAll(formatEnvironmentVariable(e.getKey(), e.getValue())); + } + + // add sudo if necessary + if (isSudo()) { + command.add("sudo"); + } + + // e.g. docker run, etc. + command.addAll(getContainerCommands()); + + // add environment arguments + for (Map.Entry e : getEnvironment().entrySet()) { + command.addAll(formatEnvironmentArgument(e.getKey(), e.getValue())); + } + + // add mount arguments + for (Map.Entry e : getMounts().entrySet()) { + command.addAll(formatMount(e.getKey(), e.getValue())); + } + + // e.g. --rm + command.addAll(getRemoveArgument()); + + // container image name + command.addAll(formatImage(getImage())); + + // add command + command.add(getExecutable()); + + // add command arguments + command.addAll(getArguments()); + + return command; + } +} diff --git a/core/src/main/java/org/bdgenomics/cannoli/builder/DockerBuilder.java b/core/src/main/java/org/bdgenomics/cannoli/builder/DockerBuilder.java new file mode 100644 index 00000000..9bfa7d1e --- /dev/null +++ b/core/src/main/java/org/bdgenomics/cannoli/builder/DockerBuilder.java @@ -0,0 +1,86 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.cannoli.builder; + +import java.util.Collections; +import java.util.List; + +import com.google.common.collect.ImmutableList; + +/** + * Docker container builder. + */ +public final class DockerBuilder extends ContainerBuilder { + + /** + * Create a new Docker command builder. + */ + public DockerBuilder() { + super(); + } + + /** + * Create a new Docker command builder with the specified executable. + * + * @param executable executable, must not be null + */ + public DockerBuilder(final String executable) { + super(executable); + } + + /** + * Create a new Docker command builder with the specified executable and image. + * + * @param executable executable, must not be null + * @param image image, must not be null + */ + public DockerBuilder(final String executable, final String image) { + super(executable, image); + } + + + @Override + protected List getContainerCommands() { + return ImmutableList.of("docker", "run", "-i"); + } + + @Override + protected List getRemoveArgument() { + return ImmutableList.of("--rm"); + } + + @Override + protected List formatImage(final String image) { + return ImmutableList.of(image); + } + + @Override + protected List formatEnvironmentVariable(final String variable, final String value) { + return Collections.emptyList(); + } + + @Override + protected List formatEnvironmentArgument(final String variable, final String value) { + return ImmutableList.of("--env", variable + "=" + value); + } + + @Override + protected List formatMount(final String source, final String target) { + return ImmutableList.of("-v", source + ":" + target); + } +} diff --git a/core/src/main/java/org/bdgenomics/cannoli/builder/LocalBuilder.java b/core/src/main/java/org/bdgenomics/cannoli/builder/LocalBuilder.java new file mode 100644 index 00000000..08a367f3 --- /dev/null +++ b/core/src/main/java/org/bdgenomics/cannoli/builder/LocalBuilder.java @@ -0,0 +1,60 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.cannoli.builder; + +import java.util.ArrayList; +import java.util.List; + +/** + * Local command builder. + */ +public final class LocalBuilder extends CommandBuilder { + + /** + * Create a new local command builder. + */ + public LocalBuilder() { + super(); + } + + /** + * Create a new local command builder with the specified executable. + * + * @param executable executable, must not be null + */ + public LocalBuilder(final String executable) { + super(executable); + } + + + @Override + public final List build() { + if (getExecutable() == null) { + throw new IllegalStateException("executable must not be null"); + } + List command = new ArrayList(); + + // add command + command.add(getExecutable()); + + // add command arguments + command.addAll(getArguments()); + + return command; + } +} diff --git a/core/src/main/java/org/bdgenomics/cannoli/builder/SingularityBuilder.java b/core/src/main/java/org/bdgenomics/cannoli/builder/SingularityBuilder.java new file mode 100644 index 00000000..b44ceb01 --- /dev/null +++ b/core/src/main/java/org/bdgenomics/cannoli/builder/SingularityBuilder.java @@ -0,0 +1,98 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.cannoli.builder; + +import java.util.Collections; +import java.util.List; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import com.google.common.collect.ImmutableList; + +/** + * Singularity container builder. + */ +public final class SingularityBuilder extends ContainerBuilder { + private static final Pattern SCHEME = Pattern.compile("^[^/:\\. ]+://(.*)"); + + /** + * Create a new Singularity command builder. + */ + public SingularityBuilder() { + super(); + } + + /** + * Create a new Singularity command builder with the specified executable. + * + * @param executable executable, must not be null + */ + public SingularityBuilder(final String executable) { + super(executable); + } + + /** + * Create a new Singularity command builder with the specified executable and image. + * + * @param executable executable, must not be null + * @param image image, must not be null + */ + public SingularityBuilder(final String executable, final String image) { + super(executable); + setImage(image); + } + + + @Override + protected List getContainerCommands() { + return ImmutableList.of("singularity", "exec"); // todo: -q ? + } + + @Override + protected List getRemoveArgument() { + return Collections.emptyList(); + } + + @Override + protected List formatEnvironmentVariable(final String variable, final String value) { + return ImmutableList.of("SINGULARITYENV_" + variable + "=" + value); + } + + @Override + protected List formatEnvironmentArgument(final String variable, final String value) { + return Collections.emptyList(); + } + + @Override + protected List formatImage(final String image) { + if (image.startsWith("/")) { + return ImmutableList.of(image); + } + Matcher m = SCHEME.matcher(image); + if (m.matches()) { + return ImmutableList.of(image); + } + return ImmutableList.of("docker://" + image); + } + + @Override + protected List formatMount(final String source, final String target) { + return ImmutableList.of("--bind", source.equals(target) ? source : source + ":" + target); // todo: /mnt issue + } +} diff --git a/core/src/main/java/org/bdgenomics/cannoli/builder/package-info.java b/core/src/main/java/org/bdgenomics/cannoli/builder/package-info.java new file mode 100644 index 00000000..178fdc13 --- /dev/null +++ b/core/src/main/java/org/bdgenomics/cannoli/builder/package-info.java @@ -0,0 +1,22 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Cannoli pipe command builder. + */ +package org.bdgenomics.cannoli.builder; diff --git a/core/src/test/java/org/bdgenomics/cannoli/builder/CommandBuildersTest.java b/core/src/test/java/org/bdgenomics/cannoli/builder/CommandBuildersTest.java new file mode 100644 index 00000000..a6a4ef08 --- /dev/null +++ b/core/src/test/java/org/bdgenomics/cannoli/builder/CommandBuildersTest.java @@ -0,0 +1,49 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.cannoli.builder; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import org.junit.Test; + +/** + * Unit test for CommandBuilders. + */ +public final class CommandBuildersTest { + + @Test + public void testCreateLocalBuilder() { + assertFalse(CommandBuilders.create(false, false).setExecutable("foo").setSudo(true).build().contains("sudo")); + } + + @Test + public void testCreateDockerBuilder() { + assertTrue(CommandBuilders.create(true, false).setExecutable("foo").setImage("image").build().contains("docker")); + } + + @Test + public void testCreateSingularityBuilder() { + assertTrue(CommandBuilders.create(false, true).setExecutable("foo").setImage("image").build().contains("singularity")); + } + + @Test(expected=IllegalArgumentException.class) + public void testCreateBothTrue() { + CommandBuilders.create(true, true); + } +} diff --git a/core/src/test/java/org/bdgenomics/cannoli/builder/DockerBuilderTest.java b/core/src/test/java/org/bdgenomics/cannoli/builder/DockerBuilderTest.java new file mode 100644 index 00000000..3012679b --- /dev/null +++ b/core/src/test/java/org/bdgenomics/cannoli/builder/DockerBuilderTest.java @@ -0,0 +1,175 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.cannoli.builder; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; + +import java.util.List; + +import org.junit.Before; +import org.junit.Test; + +/** + * Unit test for DockerBuilder. + */ +public final class DockerBuilderTest { + private DockerBuilder builder; + + @Before + public void setUp() { + builder = new DockerBuilder(); + } + + @Test + public void testCtr() { + assertNotNull(builder); + } + + @Test(expected=NullPointerException.class) + public void setImageNull() { + builder.setImage(null); + } + + @Test(expected=NullPointerException.class) + public void addMountNull() { + builder.addMount(null); + } + + @Test(expected=NullPointerException.class) + public void addMountNullSource() { + builder.addMount(null, "/target"); + } + + @Test(expected=NullPointerException.class) + public void addMountNullTarget() { + builder.addMount("/source", null); + } + + @Test(expected=NullPointerException.class) + public void addMountsNull() { + builder.addMounts(null); + } + + @Test + public void testResetEmpty() { + builder.reset(); + assertNull(builder.getExecutable()); + assertNull(builder.getTimeout()); + assertNull(builder.getFlankSize()); + assertTrue(builder.getEnvironment().isEmpty()); + assertTrue(builder.getFiles().isEmpty()); + assertTrue(builder.getArguments().isEmpty()); + assertFalse(builder.getSudo()); + assertNull(builder.getImage()); + assertTrue(builder.getMounts().isEmpty()); + } + + @Test + public void testResetFull() { + builder + .setExecutable("foo") + .setTimeout(1000L) + .setFlankSize(100) + .addEnvironment("VARIABLE", "value") + .addFile("file") + .addArgument("--help") + .setSudo(true) + .setImage("image") + .addMount("/source", "/target"); + + builder.reset(); + assertNull(builder.getExecutable()); + assertNull(builder.getTimeout()); + assertNull(builder.getFlankSize()); + assertTrue(builder.getEnvironment().isEmpty()); + assertTrue(builder.getFiles().isEmpty()); + assertTrue(builder.getArguments().isEmpty()); + assertFalse(builder.getSudo()); + assertNull(builder.getImage()); + assertTrue(builder.getMounts().isEmpty()); + } + + @Test(expected=IllegalStateException.class) + public void testBuildNullExecutable() { + builder.build(); + } + + @Test(expected=IllegalStateException.class) + public void testBuildNullImage() { + builder.setExecutable("foo").build(); + } + + @Test + public void testBuild() { + builder + .setExecutable("foo") + .setTimeout(1000L) + .setFlankSize(100) + .addEnvironment("VARIABLE", "value") + .addFile("file") + .addArgument("--help") + .setSudo(true) + .setImage("image") + .addMount("/source", "/target"); + + assertEquals("foo", builder.getExecutable()); + assertEquals(Long.valueOf(1000L), builder.getTimeout()); + assertEquals(Long.valueOf(1000L), builder.getOptTimeout().get()); + assertEquals(Integer.valueOf(100), builder.getFlankSize()); + assertEquals(Integer.valueOf(100), builder.getOptFlankSize().get()); + assertEquals(1, builder.getEnvironment().size()); + assertEquals("value", builder.getEnvironment().get("VARIABLE")); + assertEquals(1, builder.getFiles().size()); + assertEquals("file", builder.getFiles().get(0)); + assertEquals(1, builder.getArguments().size()); + assertEquals("--help", builder.getArguments().get(0)); + assertTrue(builder.getSudo()); + assertEquals("image", builder.getImage()); + assertEquals(1, builder.getMounts().size()); + assertEquals("/target", builder.getMounts().get("/source")); + + List command = builder.build(); + assertEquals(12, command.size()); + assertEquals("sudo", command.get(0)); + assertEquals("docker", command.get(1)); + assertEquals("run", command.get(2)); + assertEquals("-i", command.get(3)); + assertEquals("--env", command.get(4)); + assertEquals("VARIABLE=value", command.get(5)); + assertEquals("-v", command.get(6)); + assertEquals("/source:/target", command.get(7)); + assertEquals("--rm", command.get(8)); + assertEquals("image", command.get(9)); + assertEquals("foo", command.get(10)); + assertEquals("--help", command.get(11)); + } + + @Test + public void testBuildMount() { + builder + .setExecutable("foo") + .setImage("image") + .addMount("/mount"); + + assertTrue(builder.build().contains("/mount:/mount")); + } +} diff --git a/core/src/test/java/org/bdgenomics/cannoli/builder/LocalBuilderTest.java b/core/src/test/java/org/bdgenomics/cannoli/builder/LocalBuilderTest.java new file mode 100644 index 00000000..3f30fd6b --- /dev/null +++ b/core/src/test/java/org/bdgenomics/cannoli/builder/LocalBuilderTest.java @@ -0,0 +1,174 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.cannoli.builder; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; + +import java.util.List; +import java.util.Map; + +import org.junit.Before; +import org.junit.Test; + +/** + * Unit test for LocalBuilder. + */ +public final class LocalBuilderTest { + private LocalBuilder builder; + + @Before + public void setUp() { + builder = new LocalBuilder(); + } + + @Test + public void testCtr() { + assertNotNull(builder); + } + + @Test(expected=NullPointerException.class) + public void testAddNullVarargArguments() { + builder.add((String[]) null); + } + + @Test(expected=NullPointerException.class) + public void testAddNullIterableArguments() { + builder.add((Iterable) null); + } + + @Test(expected=NullPointerException.class) + public void testAddArgumentNull() { + builder.addArgument(null); + } + + @Test(expected=NullPointerException.class) + public void testAddArgumentsNullVarargArguments() { + builder.addArguments((String[]) null); + } + + @Test(expected=NullPointerException.class) + public void testAddArgumentsNullIterableArguments() { + builder.addArguments((Iterable) null); + } + + @Test(expected=NullPointerException.class) + public void testAddEnvironmentNullVariable() { + builder.addEnvironment(null, "value"); + } + + @Test(expected=NullPointerException.class) + public void testAddEnvironmentNullValue() { + builder.addEnvironment("VARIABLE", null); + } + + @Test(expected=NullPointerException.class) + public void testAddEnvironmentNullEnvironment() { + builder.addEnvironment((Map) null); + } + + @Test(expected=NullPointerException.class) + public void testAddFileNull() { + builder.addFile(null); + } + + @Test(expected=NullPointerException.class) + public void testAddFilesNullVarargFiles() { + builder.addFiles((String[]) null); + } + + @Test(expected=NullPointerException.class) + public void testAddFilesNullIterableFiles() { + builder.addFiles((Iterable) null); + } + + @Test(expected=NullPointerException.class) + public void testSetExecutableNull() { + builder.setExecutable(null); + } + + @Test(expected=NullPointerException.class) + public void testSetTimeoutNullTimeUnit() { + builder.setTimeout(1000L, null); + } + + @Test + public void testResetEmpty() { + builder.reset(); + assertNull(builder.getExecutable()); + assertNull(builder.getTimeout()); + assertNull(builder.getFlankSize()); + assertTrue(builder.getEnvironment().isEmpty()); + assertTrue(builder.getFiles().isEmpty()); + assertTrue(builder.getArguments().isEmpty()); + } + + @Test + public void testResetFull() { + builder + .setExecutable("foo") + .setTimeout(1000L) + .setFlankSize(100) + .addEnvironment("VARIABLE", "value") + .addFile("file") + .addArgument("--help"); + + builder.reset(); + assertNull(builder.getExecutable()); + assertNull(builder.getTimeout()); + assertNull(builder.getFlankSize()); + assertTrue(builder.getEnvironment().isEmpty()); + assertTrue(builder.getFiles().isEmpty()); + assertTrue(builder.getArguments().isEmpty()); + } + + @Test(expected=IllegalStateException.class) + public void testBuildNullExecutable() { + builder.build(); + } + + @Test + public void testBuild() { + builder + .setExecutable("foo") + .setTimeout(1000L) + .setFlankSize(100) + .addEnvironment("VARIABLE", "value") + .addFile("file") + .addArgument("--help"); + + assertEquals("foo", builder.getExecutable()); + assertEquals(Long.valueOf(1000L), builder.getTimeout()); + assertEquals(Long.valueOf(1000L), builder.getOptTimeout().get()); + assertEquals(Integer.valueOf(100), builder.getFlankSize()); + assertEquals(Integer.valueOf(100), builder.getOptFlankSize().get()); + assertEquals(1, builder.getEnvironment().size()); + assertEquals("value", builder.getEnvironment().get("VARIABLE")); + assertEquals(1, builder.getFiles().size()); + assertEquals("file", builder.getFiles().get(0)); + assertEquals(1, builder.getArguments().size()); + assertEquals("--help", builder.getArguments().get(0)); + + List command = builder.build(); + assertEquals(2, command.size()); + assertEquals("foo", command.get(0)); + assertEquals("--help", command.get(1)); + } +} diff --git a/core/src/test/java/org/bdgenomics/cannoli/builder/SingularityBuilderTest.java b/core/src/test/java/org/bdgenomics/cannoli/builder/SingularityBuilderTest.java new file mode 100644 index 00000000..7b0ca9bb --- /dev/null +++ b/core/src/test/java/org/bdgenomics/cannoli/builder/SingularityBuilderTest.java @@ -0,0 +1,114 @@ +/** + * Licensed to Big Data Genomics (BDG) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The BDG licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.bdgenomics.cannoli.builder; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; + +import java.util.List; + +import org.junit.Before; +import org.junit.Test; + +/** + * Unit test for SingularityBuilder. + */ +public final class SingularityBuilderTest { + private SingularityBuilder builder; + + @Before + public void setUp() { + builder = new SingularityBuilder(); + } + + @Test + public void testCtr() { + assertNotNull(builder); + } + + @Test + public void testBuild() { + builder + .setExecutable("foo") + .setTimeout(1000L) + .setFlankSize(100) + .addEnvironment("VARIABLE", "value") + .addFile("file") + .addArgument("--help") + .setSudo(true) + .setImage("image") + .addMount("/source", "/target"); + + assertEquals("foo", builder.getExecutable()); + assertEquals(Long.valueOf(1000L), builder.getTimeout()); + assertEquals(Long.valueOf(1000L), builder.getOptTimeout().get()); + assertEquals(Integer.valueOf(100), builder.getFlankSize()); + assertEquals(Integer.valueOf(100), builder.getOptFlankSize().get()); + assertEquals(1, builder.getEnvironment().size()); + assertEquals("value", builder.getEnvironment().get("VARIABLE")); + assertEquals(1, builder.getFiles().size()); + assertEquals("file", builder.getFiles().get(0)); + assertEquals(1, builder.getArguments().size()); + assertEquals("--help", builder.getArguments().get(0)); + assertTrue(builder.getSudo()); + assertEquals("image", builder.getImage()); + assertEquals(1, builder.getMounts().size()); + assertEquals("/target", builder.getMounts().get("/source")); + + List command = builder.build(); + + assertEquals(9, command.size()); + assertEquals("SINGULARITYENV_VARIABLE=value", command.get(0)); + assertEquals("sudo", command.get(1)); + assertEquals("singularity", command.get(2)); + assertEquals("exec", command.get(3)); + assertEquals("--bind", command.get(4)); + assertEquals("/source:/target", command.get(5)); + assertEquals("docker://image", command.get(6)); + assertEquals("foo", command.get(7)); + assertEquals("--help", command.get(8)); + } + + @Test + public void testImage() { + assertTrue(new SingularityBuilder("foo", "/image").build().contains("/image")); + } + + @Test + public void testShubImage() { + assertTrue(new SingularityBuilder("foo", "shub://image").build().contains("shub://image")); + } + + @Test + public void testDockerImage() { + assertTrue(new SingularityBuilder("foo", "image").build().contains("docker://image")); + } + + @Test + public void testBuildMount() { + builder + .setExecutable("foo") + .setImage("image") + .addMount("/mount"); + + assertTrue(builder.build().contains("/mount")); + } +} diff --git a/pom.xml b/pom.xml index 3fbfee67..6ae9623d 100644 --- a/pom.xml +++ b/pom.xml @@ -28,6 +28,7 @@ 1.8 1.8 2.7.3 + 4.12 1.1.1 0.2.13 @@ -289,13 +290,6 @@ - - org.apache.maven.plugins - maven-surefire-plugin - - true - - org.scalariform scalariform-maven-plugin @@ -348,6 +342,11 @@ + + junit + junit + ${junit.version} + org.apache.hadoop hadoop-client