Skip to content

Commit

Permalink
Merge 9c7b3ab into 4ea7617
Browse files Browse the repository at this point in the history
  • Loading branch information
heuermh committed Apr 19, 2018
2 parents 4ea7617 + 9c7b3ab commit e19aeb8
Show file tree
Hide file tree
Showing 2 changed files with 174 additions and 0 deletions.
Expand Up @@ -36,6 +36,7 @@ object Cannoli {
Freebayes,
SamtoolsMpileup,
SnpEff,
Vep,
VtNormalize)),
CommandGroup("CANNOLI TOOLS", List(InterleaveFastq,
SampleReads)))
Expand Down
173 changes: 173 additions & 0 deletions cli/src/main/scala/org/bdgenomics/cannoli/cli/Vep.scala
@@ -0,0 +1,173 @@
/**
* Licensed to Big Data Genomics (BDG) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The BDG licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.bdgenomics.cannoli.cli

import htsjdk.samtools.ValidationStringency
import org.apache.spark.SparkContext
import org.bdgenomics.adam.models.VariantContext
import org.bdgenomics.adam.rdd.ADAMContext._
import org.bdgenomics.adam.rdd.ADAMSaveAnyArgs
import org.bdgenomics.adam.rdd.variant.{
VariantContextRDD,
VCFInFormatter,
VCFOutFormatter
}
import org.bdgenomics.adam.sql.{ VariantContext => VariantContextProduct }
import org.bdgenomics.cannoli.builder.CommandBuilders
import org.bdgenomics.utils.cli._
import org.bdgenomics.utils.misc.Logging
import org.kohsuke.args4j.{ Argument, Option => Args4jOption }
import scala.collection.JavaConversions._

/**
* Vep function arguments.
*/
class VepFnArgs extends Args4jBase {
@Args4jOption(required = false, name = "-executable", usage = "Path to the Ensembl VEP executable. Defaults to vep.")
var executable: String = "vep"

@Args4jOption(required = false, name = "-image", usage = "Container image to use. Defaults to quay.io/biocontainers/ensembl-vep:91.3--htslib1.7_1.")
var image: String = "quay.io/biocontainers/ensembl-vep:91.3--htslib1.7_1"

@Args4jOption(required = false, name = "-sudo", usage = "Run via sudo.")
var sudo: Boolean = false

@Args4jOption(required = false, name = "-add_files", usage = "If true, use the SparkFiles mechanism to distribute files to executors.")
var addFiles: Boolean = false

@Args4jOption(required = false, name = "-use_docker", usage = "If true, uses Docker to launch Ensembl VEP.")
var useDocker: Boolean = false

@Args4jOption(required = false, name = "-use_singularity", usage = "If true, uses Singularity to launch Ensembl VEP.")
var useSingularity: Boolean = false

@Args4jOption(required = false, name = "-species", usage = "Species, this can be the latin name e.g. \"homo_sapiens\" or any Ensembl alias e.g. \"mouse\".")
var species: String = null

@Args4jOption(required = false, name = "-assembly", usage = "Assembly version to use if more than one are available.")
var assembly: String = null

@Args4jOption(required = true, name = "-cache", usage = "Ensembl VEP cache directory to use.")
var cachePath: String = null
}

/**
* Vep wrapper as a function VariantContextRDD → VariantContextRDD,
* for use in cannoli-shell or notebooks.
*
* @param args Vep function arguments.
* @param stringency Validation stringency. Defaults to ValidationStringency.LENIENT.
* @param sc Spark context.
*/
class VepFn(
val args: VepFnArgs,
val stringency: ValidationStringency = ValidationStringency.LENIENT,
sc: SparkContext) extends CannoliFn[VariantContextRDD, VariantContextRDD](sc) with Logging {

override def apply(variantContexts: VariantContextRDD): VariantContextRDD = {

var builder = CommandBuilders.create(args.useDocker, args.useSingularity)
.setExecutable(args.executable)
.add("--format")
.add("vcf")
.add("--output_file")
.add("STDOUT")
.add("--vcf_info_field")
.add("ANN")
.add("--terms")
.add("so")
.add("--no_stats")
.add("--offline")
.add("--dir_cache")
.add(if (args.addFiles) "$0" else absolute(args.cachePath))

Option(args.species).foreach(builder.add("--species").add(_))
Option(args.assembly).foreach(builder.add("--assembly").add(_))

if (args.addFiles) {
builder.addFile(args.cachePath)
}

if (args.useDocker || args.useSingularity) {
builder
.setImage(args.image)
.setSudo(args.sudo)
.addMount(if (args.addFiles) "$0" else absolute(args.cachePath))
}

log.info("Piping {} to vep with command: {} files: {}",
variantContexts, builder.build(), builder.getFiles())

implicit val tFormatter = VCFInFormatter
implicit val uFormatter = new VCFOutFormatter(sc.hadoopConfiguration, stringency)

variantContexts.pipe[VariantContext, VariantContextProduct, VariantContextRDD, VCFInFormatter](
cmd = builder.build(),
files = builder.getFiles()
)
}
}

object Vep extends BDGCommandCompanion {
val commandName = "vep"
val commandDescription = "ADAM Pipe API wrapper for Ensembl VEP."

def apply(cmdLine: Array[String]) = {
new Vep(Args4j[VepArgs](cmdLine))
}
}

/**
* Vep command line arguments.
*/
class VepArgs extends VepFnArgs with ADAMSaveAnyArgs with ParquetArgs {
@Argument(required = true, metaVar = "INPUT", usage = "Location to pipe from, in VCF format.", index = 0)
var inputPath: String = null

@Argument(required = true, metaVar = "OUTPUT", usage = "Location to pipe to, in VCF format.", index = 1)
var outputPath: String = null

@Args4jOption(required = false, name = "-single", usage = "Saves OUTPUT as single file.")
var asSingleFile: Boolean = false

@Args4jOption(required = false, name = "-defer_merging", usage = "Defers merging single file output.")
var deferMerging: Boolean = false

@Args4jOption(required = false, name = "-disable_fast_concat", usage = "Disables the parallel file concatenation engine.")
var disableFastConcat: Boolean = false

@Args4jOption(required = false, name = "-stringency", usage = "Stringency level for various checks; can be SILENT, LENIENT, or STRICT. Defaults to STRICT.")
var stringency: String = "STRICT"

// must be defined due to ADAMSaveAnyArgs, but unused here
var sortFastqOutput: Boolean = false
}

/**
* Vep command line wrapper.
*/
class Vep(protected val args: VepArgs) extends BDGSparkCommand[VepArgs] with Logging {
val companion = Vep
val stringency: ValidationStringency = ValidationStringency.valueOf(args.stringency)

def run(sc: SparkContext) {
val variantContexts = sc.loadVcf(args.inputPath, stringency = stringency)
val pipedVariantContexts = new VepFn(args, stringency, sc).apply(variantContexts)
pipedVariantContexts.saveAsVcf(args, stringency)
}
}

0 comments on commit e19aeb8

Please sign in to comment.