Skip to content

Commit

Permalink
Merge ef7b858 into dccc4a1
Browse files Browse the repository at this point in the history
  • Loading branch information
heuermh committed Jul 5, 2017
2 parents dccc4a1 + ef7b858 commit bf683aa
Show file tree
Hide file tree
Showing 2 changed files with 192 additions and 0 deletions.
96 changes: 96 additions & 0 deletions src/main/scala/org/bdgenomics/cannoli/Gmap.scala
@@ -0,0 +1,96 @@
/**
* Licensed to Big Data Genomics (BDG) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The BDG licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.bdgenomics.cannoli

import htsjdk.samtools.ValidationStringency
import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._
import org.apache.spark.rdd.RDD
import org.bdgenomics.adam.rdd.ADAMContext._
import org.bdgenomics.adam.rdd.ADAMSaveAnyArgs
import org.bdgenomics.adam.rdd.fragment.{ FragmentRDD, InterleavedFASTQInFormatter }
import org.bdgenomics.adam.rdd.read.{ AlignmentRecordRDD, AnySAMOutFormatter }
import org.bdgenomics.formats.avro.AlignmentRecord
import org.bdgenomics.utils.cli._
import org.bdgenomics.utils.misc.Logging
import org.kohsuke.args4j.{ Argument, Option => Args4jOption }

object Gmap extends BDGCommandCompanion {
val commandName = "gmap"
val commandDescription = "ADAM Pipe API wrapper for GMAP."

def apply(cmdLine: Array[String]) = {
new Gmap(Args4j[GmapArgs](cmdLine))
}
}

class GmapArgs extends Args4jBase with ADAMSaveAnyArgs with ParquetArgs {
@Argument(required = true, metaVar = "INPUT", usage = "Location to pipe from, in interleaved FASTQ format.", index = 0)
var inputPath: String = null

@Argument(required = true, metaVar = "OUTPUT", usage = "Location to pipe to.", index = 1)
var outputPath: String = null

@Args4jOption(required = true, name = "-genome_path", usage = "Genome database path. gmap -D, --dir argument.")
var genomePath: String = null

@Args4jOption(required = true, name = "-genome_name", usage = "Genome database name. gmap -d, --db argument.")
var genomeName: String = null

@Args4jOption(required = false, name = "-single", usage = "Saves OUTPUT as single file.")
var asSingleFile: Boolean = false

@Args4jOption(required = false, name = "-defer_merging", usage = "Defers merging single file output.")
var deferMerging: Boolean = false

@Args4jOption(required = false, name = "-disable_fast_concat", usage = "Disables the parallel file concatenation engine.")
var disableFastConcat: Boolean = false

@Args4jOption(required = false, name = "-stringency", usage = "Stringency level for various checks; can be SILENT, LENIENT, or STRICT. Defaults to STRICT.")
var stringency: String = "STRICT"

@Args4jOption(required = false, name = "-gmap_path", usage = "Path to the GMAP executable. Defaults to gmap.")
var gmapPath: String = "gmap"

// must be defined due to ADAMSaveAnyArgs, but unused here
var sortFastqOutput: Boolean = false
}

/**
* Gmap.
*/
class Gmap(protected val args: GmapArgs) extends BDGSparkCommand[GmapArgs] with Logging {
val companion = Gmap
val stringency = ValidationStringency.valueOf(args.stringency)

def run(sc: SparkContext) {
val input: FragmentRDD = sc.loadFragments(args.inputPath)

implicit val tFormatter = InterleavedFASTQInFormatter
implicit val uFormatter = new AnySAMOutFormatter

val gmapCommand = Seq(args.gmapPath,
"--dir=" + args.genomePath,
"--db=" + args.genomeName,
"--format=sampe").mkString(" ")

val output: AlignmentRecordRDD = input.pipe[AlignmentRecord, AlignmentRecordRDD, InterleavedFASTQInFormatter](gmapCommand)

output.save(args)
}
}
96 changes: 96 additions & 0 deletions src/main/scala/org/bdgenomics/cannoli/Gsnap.scala
@@ -0,0 +1,96 @@
/**
* Licensed to Big Data Genomics (BDG) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The BDG licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.bdgenomics.cannoli

import htsjdk.samtools.ValidationStringency
import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._
import org.apache.spark.rdd.RDD
import org.bdgenomics.adam.rdd.ADAMContext._
import org.bdgenomics.adam.rdd.ADAMSaveAnyArgs
import org.bdgenomics.adam.rdd.fragment.{ FragmentRDD, InterleavedFASTQInFormatter }
import org.bdgenomics.adam.rdd.read.{ AlignmentRecordRDD, AnySAMOutFormatter }
import org.bdgenomics.formats.avro.AlignmentRecord
import org.bdgenomics.utils.cli._
import org.bdgenomics.utils.misc.Logging
import org.kohsuke.args4j.{ Argument, Option => Args4jOption }

object Gsnap extends BDGCommandCompanion {
val commandName = "gsnap"
val commandDescription = "ADAM Pipe API wrapper for GSNAP."

def apply(cmdLine: Array[String]) = {
new Gsnap(Args4j[GsnapArgs](cmdLine))
}
}

class GsnapArgs extends Args4jBase with ADAMSaveAnyArgs with ParquetArgs {
@Argument(required = true, metaVar = "INPUT", usage = "Location to pipe from, in interleaved FASTQ format.", index = 0)
var inputPath: String = null

@Argument(required = true, metaVar = "OUTPUT", usage = "Location to pipe to.", index = 1)
var outputPath: String = null

@Args4jOption(required = true, name = "-genome_path", usage = "Genome database path. gsnap -D, --dir argument.")
var genomePath: String = null

@Args4jOption(required = true, name = "-genome_name", usage = "Genome database name. gsnap -d, --db argument.")
var genomeName: String = null

@Args4jOption(required = false, name = "-single", usage = "Saves OUTPUT as single file.")
var asSingleFile: Boolean = false

@Args4jOption(required = false, name = "-defer_merging", usage = "Defers merging single file output.")
var deferMerging: Boolean = false

@Args4jOption(required = false, name = "-disable_fast_concat", usage = "Disables the parallel file concatenation engine.")
var disableFastConcat: Boolean = false

@Args4jOption(required = false, name = "-stringency", usage = "Stringency level for various checks; can be SILENT, LENIENT, or STRICT. Defaults to STRICT.")
var stringency: String = "STRICT"

@Args4jOption(required = false, name = "-gsnap_path", usage = "Path to the GSNAP executable. Defaults to gsnap.")
var gsnapPath: String = "gsnap"

// must be defined due to ADAMSaveAnyArgs, but unused here
var sortFastqOutput: Boolean = false
}

/**
* Gsnap.
*/
class Gsnap(protected val args: GsnapArgs) extends BDGSparkCommand[GsnapArgs] with Logging {
val companion = Gsnap
val stringency = ValidationStringency.valueOf(args.stringency)

def run(sc: SparkContext) {
val input: FragmentRDD = sc.loadFragments(args.inputPath)

implicit val tFormatter = InterleavedFASTQInFormatter
implicit val uFormatter = new AnySAMOutFormatter

val gsnapCommand = Seq(args.gsnapPath,
"--dir=" + args.genomePath,
"--db=" + args.genomeName,
"--format=sam").mkString(" ")

val output: AlignmentRecordRDD = input.pipe[AlignmentRecord, AlignmentRecordRDD, InterleavedFASTQInFormatter](gsnapCommand)

output.save(args)
}
}

0 comments on commit bf683aa

Please sign in to comment.