Skip to content

Commit

Permalink
Removing nucleotide contig fragment context.
Browse files Browse the repository at this point in the history
  • Loading branch information
fnothaft authored and massie committed Feb 12, 2015
1 parent 6de9f90 commit 32f51ce
Show file tree
Hide file tree
Showing 6 changed files with 44 additions and 63 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,6 @@ package org.bdgenomics.adam.cli
import org.apache.hadoop.mapreduce.Job
import org.apache.spark.{ Logging, SparkContext }
import org.bdgenomics.adam.rdd.ADAMContext._
import org.bdgenomics.adam.rdd.contig.NucleotideContigFragmentContext._
import org.bdgenomics.adam.rdd.contig.NucleotideContigFragmentContext
import org.kohsuke.args4j.{ Argument, Option => Args4jOption }

object Fasta2ADAM extends ADAMCommandCompanion {
Expand Down Expand Up @@ -51,7 +49,7 @@ class Fasta2ADAM(protected val args: Fasta2ADAMArgs) extends ADAMSparkCommand[Fa

def run(sc: SparkContext, job: Job) {
log.info("Loading FASTA data from disk.")
val adamFasta = new NucleotideContigFragmentContext(sc).adamSequenceLoad(args.fastaFile, args.fragmentLength)
val adamFasta = sc.loadSequence(args.fastaFile, fragmentLength = args.fragmentLength)
if (args.verbose) {
println("FASTA contains:")
println(adamFasta.adamGetSequenceDictionary())
Expand Down
42 changes: 40 additions & 2 deletions adam-core/src/main/scala/org/bdgenomics/adam/rdd/ADAMContext.scala
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,16 @@ import htsjdk.samtools.SAMFileHeader
import org.apache.avro.Schema
import org.apache.avro.specific.SpecificRecord
import org.apache.hadoop.fs.{ FileSystem, Path }
import org.apache.hadoop.io.LongWritable
import org.apache.hadoop.io.{ LongWritable, Text }
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat
import org.apache.spark.rdd.MetricsContext._
import org.apache.spark.{ Logging, SparkConf, SparkContext }
import org.bdgenomics.adam.converters.SAMRecordConverter
import org.bdgenomics.adam.converters.{ FastaConverter, SAMRecordConverter }
import org.bdgenomics.adam.instrumentation.Timers._
import org.bdgenomics.adam.models._
import org.bdgenomics.adam.predicates.ADAMPredicate
import org.bdgenomics.adam.projections.{ AlignmentRecordField, NucleotideContigFragmentField, Projection }
import org.bdgenomics.adam.rdd.contig.NucleotideContigFragmentRDDFunctions
import org.bdgenomics.adam.rdd.pileup.{ PileupRDDFunctions, RodRDDFunctions }
import org.bdgenomics.adam.rdd.read.{ AlignmentRecordContext, AlignmentRecordRDDFunctions }
import org.bdgenomics.adam.rdd.variation.VariationContext._
Expand Down Expand Up @@ -64,6 +66,9 @@ object ADAMContext {
// Add methods specific to the Rod RDDs
implicit def rddToRodRDD(rdd: RDD[Rod]) = new RodRDDFunctions(rdd)

// Add methods specific to the ADAMNucleotideContig RDDs
implicit def rddToContigFragmentRDD(rdd: RDD[NucleotideContigFragment]) = new NucleotideContigFragmentRDDFunctions(rdd)

// Add implicits for the rich adam objects
implicit def recordToRichRecord(record: AlignmentRecord): RichAlignmentRecord = new RichAlignmentRecord(record)

Expand Down Expand Up @@ -305,6 +310,39 @@ class ADAMContext(val sc: SparkContext) extends Serializable with Logging {
None
}

private def maybeLoadFasta[U <: ADAMPredicate[NucleotideContigFragment]](
filePath: String,
predicate: Option[Class[U]] = None,
projection: Option[Schema] = None,
fragmentLength: Long): Option[RDD[NucleotideContigFragment]] = {
if (filePath.endsWith(".fasta") || filePath.endsWith(".fa")) {
val fastaData: RDD[(LongWritable, Text)] = sc.newAPIHadoopFile(filePath,
classOf[TextInputFormat],
classOf[LongWritable],
classOf[Text])

val remapData = fastaData.map(kv => (kv._1.get, kv._2.toString))

log.info("Converting FASTA to ADAM.")
Some(FastaConverter(remapData, fragmentLength))
} else {
None
}
}

def loadSequence[U <: ADAMPredicate[NucleotideContigFragment]](
filePath: String,
predicate: Option[Class[U]] = None,
projection: Option[Schema] = None,
fragmentLength: Long = 10000): RDD[NucleotideContigFragment] = {
maybeLoadFasta(filePath,
predicate,
projection,
fragmentLength).getOrElse(
adamLoad[NucleotideContigFragment, U](filePath, predicate, projection)
)
}

def loadGenotypes[U <: ADAMPredicate[Genotype]](
filePath: String,
predicate: Option[Class[U]] = None,
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
package org.bdgenomics.adam.converters

import org.bdgenomics.adam.rdd.ADAMContext._
import org.bdgenomics.adam.rdd.contig.NucleotideContigFragmentContext
import org.bdgenomics.adam.util.ADAMFunSuite
import java.io.File

Expand Down Expand Up @@ -211,7 +210,7 @@ class FastaConverterSuite extends ADAMFunSuite {

sparkTest("convert reference fasta file") {
//Loading "human_g1k_v37_chr1_59kb.fasta"
val referenceSequences = new NucleotideContigFragmentContext(sc).adamSequenceLoad(chr1File, 10).collect()
val referenceSequences = sc.loadSequence(chr1File, fragmentLength = 10).collect()
assert(referenceSequences.forall(_.getContig.getContigName.toString == "1"))
assert(referenceSequences.slice(0, referenceSequences.length - 2).forall(_.getFragmentSequence.length == 10))

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,10 @@ package org.bdgenomics.adam.rdd.contig
import org.apache.spark.rdd.RDD
import org.bdgenomics.adam.models._
import org.bdgenomics.adam.rdd.ADAMContext._
import org.bdgenomics.adam.rdd.contig.NucleotideContigFragmentContext._
import org.bdgenomics.adam.util.ADAMFunSuite
import org.bdgenomics.formats.avro._

class ADAMRDDFunctionsSuite extends ADAMFunSuite {
class NucleotideContigFragmentRDDFunctionsSuite extends ADAMFunSuite {

sparkTest("generate sequence dict from fasta") {
val contig0 = Contig.newBuilder
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
package org.bdgenomics.adam.rdd.pileup

import org.apache.spark.rdd.RDD
import org.bdgenomics.adam.rdd.pileup.PileupContext._
import org.bdgenomics.adam.rdd.ADAMContext._
import org.bdgenomics.adam.util.ADAMFunSuite
import org.bdgenomics.formats.avro._

Expand Down

0 comments on commit 32f51ce

Please sign in to comment.