Skip to content

Commit

Permalink
Adding ADAMContig back to ADAMVariant.
Browse files Browse the repository at this point in the history
  • Loading branch information
fnothaft committed May 21, 2014
1 parent 0b2cf5d commit dde2625
Show file tree
Hide file tree
Showing 15 changed files with 41 additions and 31 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -173,10 +173,18 @@ class VariantContextConverter(dict: Option[SequenceDictionary] = None) extends S
new ADAMDatabaseVariantAnnotation()
}

private def createContig(vc: VariantContext): ADAMContig = {
val contigName = contigToRefSeq.getOrElse(vc.getChr, vc.getChr)

ADAMContig.newBuilder()
.setContigName(contigName)
.build()
}

private def createADAMVariant(vc: VariantContext, alt: Option[String]): ADAMVariant = {
// VCF CHROM, POS, REF and ALT
val builder = ADAMVariant.newBuilder
.setContig(contigToRefSeq.getOrElse(vc.getChr, vc.getChr))
.setContig(createContig(vc))
.setPosition(vc.getStart - 1 /* ADAM is 0-indexed */ )
.setExclusiveEnd(vc.getEnd /* ADAM is 0-indexed, so the 1-indexed inclusive end becomes exclusive */ )
.setReferenceAllele(vc.getReference.getBaseString)
Expand Down Expand Up @@ -275,7 +283,8 @@ class VariantContextConverter(dict: Option[SequenceDictionary] = None) extends S
def convert(vc: ADAMVariantContext): VariantContext = {
val variant: ADAMVariant = vc.variant
val vcb = new VariantContextBuilder()
.chr(refSeqToContig.getOrElse(variant.getContig.toString, variant.getContig.toString))
.chr(refSeqToContig.getOrElse(variant.getContig.getContigName.toString,
variant.getContig.getContigName.toString))
.start(variant.getPosition + 1 /* Recall ADAM is 0-indexed */ )
.stop(variant.getPosition + variant.getReferenceAllele.length)
.alleles(VariantContextConverter.convertAlleles(variant))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ object ReferencePosition {
* @return The reference position of this variant.
*/
def apply(variant: ADAMVariant): ReferencePosition = {
new ReferencePosition(variant.getContig, variant.getPosition)
new ReferencePosition(variant.getContig.getContigName, variant.getPosition)
}

/**
Expand All @@ -127,7 +127,7 @@ object ReferencePosition {
*/
def apply(genotype: ADAMGenotype): ReferencePosition = {
val variant = genotype.getVariant()
new ReferencePosition(variant.getContig, variant.getPosition)
new ReferencePosition(variant.getContig.getContigName, variant.getPosition)
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ object SnpTable {
}

def apply(variants: RDD[RichADAMVariant]): SnpTable = {
val positions = variants.map(variant => (variant.getContig.toString, variant.getPosition)).collect()
val positions = variants.map(variant => (variant.getContig.getContigName.toString, variant.getPosition)).collect()
val table = new mutable.HashMap[String, mutable.HashSet[Long]]
positions.foreach(tup => table.getOrElseUpdate(tup._1, { new mutable.HashSet[Long] }) += tup._2)
new SnpTable(table.mapValues(_.toSet).toMap)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ class VariantContextConverterSuite extends FunSuite {
.chr("1")

def adamSNVBuilder(contig: String = "1"): ADAMVariant.Builder = ADAMVariant.newBuilder()
.setContig(contig)
.setContig(ADAMContig.newBuilder().setContigName(contig).build())
.setPosition(0L)
.setReferenceAllele("A")
.setVariantAllele("T")
Expand All @@ -65,7 +65,7 @@ class VariantContextConverterSuite extends FunSuite {
assert(adamVC.genotypes.length === 0)

val variant = adamVC.variant
assert(variant.getContig === "1")
assert(variant.getContig.getContigName === "1")

assert(variant.getReferenceAllele === "A")
assert(variant.getPosition === 0L)
Expand All @@ -79,7 +79,7 @@ class VariantContextConverterSuite extends FunSuite {

val adamVC = adamVCs.head
val variant = adamVC.variant
assert(variant.getContig === "NC_000001.10")
assert(variant.getContig.getContigName === "NC_000001.10")
}

test("Convert GATK SNV w/ genotypes w/ phase information to ADAM") {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ class ReferencePositionSuite extends FunSuite {

test("create reference position from variant") {
val variant = ADAMVariant.newBuilder()
.setContig("chr10")
.setContig(ADAMContig.newBuilder.setContigName("chr10").build())
.setReferenceAllele("A")
.setVariantAllele("T")
.setPosition(10L)
Expand All @@ -127,7 +127,7 @@ class ReferencePositionSuite extends FunSuite {
test("create reference position from genotype") {
val variant = ADAMVariant.newBuilder()
.setPosition(100L)
.setContig("chr10")
.setContig(ADAMContig.newBuilder.setContigName("chr10").build())
.setReferenceAllele("A")
.setVariantAllele("T")
.build()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import java.io.File
import org.bdgenomics.adam.avro.{
ADAMVariant,
ADAMGenotype,
ADAMContig,
VariantCallingAnnotations
}
import org.apache.spark.rdd.RDD
Expand All @@ -35,7 +36,7 @@ class GenotypePredicatesSuite extends SparkFunSuite {
ParquetLogger.hadoopLoggerLevel(Level.SEVERE)

val v0 = ADAMVariant.newBuilder
.setContig("chr11")
.setContig(ADAMContig.newBuilder.setContigName("chr11").build)
.setPosition(17409571)
.setReferenceAllele("T")
.setVariantAllele("C")
Expand Down Expand Up @@ -73,7 +74,7 @@ class GenotypePredicatesSuite extends SparkFunSuite {
ParquetLogger.hadoopLoggerLevel(Level.SEVERE)

val v0 = ADAMVariant.newBuilder
.setContig("11")
.setContig(ADAMContig.newBuilder.setContigName("11").build)
.setPosition(17409571)
.setReferenceAllele("T")
.setVariantAllele("C")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ class RecordConditionSuite extends FunSuite {

test("create record condition from nested field condition") {
val v0 = ADAMVariant.newBuilder
.setContig("11")
.setContig(ADAMContig.newBuilder.setContigName("11").build)
.setPosition(17409571)
.setReferenceAllele("T")
.setVariantAllele("C")
Expand Down Expand Up @@ -168,7 +168,7 @@ class RecordConditionSuite extends FunSuite {

test("passing genotype record condition") {
val v0 = ADAMVariant.newBuilder
.setContig("11")
.setContig(ADAMContig.newBuilder.setContigName("11").build)
.setPosition(17409571)
.setReferenceAllele("T")
.setVariantAllele("C")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -428,13 +428,13 @@ class ADAMRDDFunctionsSuite extends SparkFunSuite {
.setPosition(0L)
.setVariantAllele("A")
.setReferenceAllele("T")
.setContig("chr0")
.setContig(ADAMContig.newBuilder.setContigName("chr0").build)
.build()
val variant1 = ADAMVariant.newBuilder()
.setPosition(0L)
.setVariantAllele("C")
.setReferenceAllele("T")
.setContig("chr0")
.setContig(ADAMContig.newBuilder.setContigName("chr0").build)
.build()
val genotype0 = ADAMGenotype.newBuilder()
.setVariant(variant0)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class GenotypesSummarySuite extends SparkFunSuite {

private def variant(reference: String, alternate: String, position: Int): ADAMVariant = {
ADAMVariant.newBuilder()
.setContig("chr1")
.setContig(ADAMContig.newBuilder.setContigName("chr1").build)
.setPosition(position)
.setReferenceAllele(reference)
.setVariantAllele(alternate)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@ class BaseQualityRecalibrationSuite extends SparkFunSuite {
assert(bqsr.result.count == reads.count)

// Compare the ObservatonTables
val referenceObs: Set[String] = scala.io.Source.fromFile(new File(obsFilepath)).getLines.filter(_.length > 0).toSet
val testObs: Set[String] = bqsr.observed.toCSV.split('\n').filter(_.length > 0).toSet
assert(testObs == referenceObs)
val referenceObs: Seq[String] = scala.io.Source.fromFile(new File(obsFilepath)).getLines.filter(_.length > 0).toSeq.sortWith((kv1, kv2) => kv1.compare(kv2) < 0)
val testObs: Seq[String] = bqsr.observed.toCSV.split('\n').filter(_.length > 0).toSeq.sortWith((kv1, kv2) => kv1.compare(kv2) < 0)
referenceObs.zip(testObs).foreach(p => assert(p._1 === p._2))
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ import org.apache.spark.rdd.RDD

class ADAMGenotypeRDDFunctionsSuite extends SparkFunSuite {
def v0 = ADAMVariant.newBuilder
.setContig("11")
.setContig(ADAMContig.newBuilder.setContigName("11").build)
.setPosition(17409572)
.setReferenceAllele("T")
.setVariantAllele("C")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class ADAMVariantContextRDDFunctionsSuite extends SparkFunSuite {

sparkTest("joins SNV database annotation") {
val v0 = ADAMVariant.newBuilder
.setContig("11")
.setContig(ADAMContig.newBuilder.setContigName("11").build)
.setPosition(17409572)
.setReferenceAllele("T")
.setVariantAllele("C")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,15 @@ import org.bdgenomics.adam.models.ADAMVariantContext
import org.bdgenomics.adam.rdd.variation.ADAMVariationContext._
import com.google.common.io.Files
import java.io.File
import org.bdgenomics.adam.avro.{ ADAMGenotypeAllele, ADAMGenotype, ADAMVariant }
import org.bdgenomics.adam.avro.{ ADAMGenotypeAllele, ADAMGenotype, ADAMVariant, ADAMContig }
import scala.collection.JavaConversions._

class ADAMVariationContextSuite extends SparkFunSuite {
val tempDir = Files.createTempDir()

def variants: RDD[ADAMVariantContext] = {
val v0 = ADAMVariant.newBuilder
.setContig("chr11")
.setContig(ADAMContig.newBuilder.setContigName("chr11").build)
.setPosition(17409572)
.setReferenceAllele("T")
.setVariantAllele("C")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ import scala.collection.JavaConversions._
class RichADAMGenotypeSuite extends FunSuite {

def v0 = ADAMVariant.newBuilder
.setContig("chr1")
.setContig(ADAMContig.newBuilder.setContigName("chr1").build)
.setPosition(0).setReferenceAllele("A").setVariantAllele("T")
.build

Expand Down
12 changes: 6 additions & 6 deletions adam-format/src/main/resources/avro/adam.avdl
Original file line number Diff line number Diff line change
Expand Up @@ -138,10 +138,10 @@ record ADAMNestedPileup {


record ADAMVariant {
string contig;
long position;
union { null, long } exclusiveEnd = null;
string referenceAllele;
union { null, ADAMContig } contig = null;
union { null, long } position = null;
union { null, long } exclusiveEnd = null;
union { null, string } referenceAllele = null;
union { null, string } variantAllele = null;
}

Expand Down Expand Up @@ -169,7 +169,7 @@ record VariantCallingAnnotations {
// FILTER: True or false implies that filters were applied and this variant PASSed or not.
// While 'null' implies not filters were applied.
union { null, boolean } variantIsPassing = null;
union { null, array <string> } variantFilters = null;
array <string> variantFilters = [];

union { null, int } readDepth = null;
union { null, boolean } downsampled = null;
Expand Down Expand Up @@ -226,7 +226,7 @@ record ADAMGenotype {
// In ADAM we split multi-allelic VCF lines into multiple
// single-alternate records. This bit is set if that happened for this
// record.
boolean splitFromMultiAllelic = false;
union { boolean, null } splitFromMultiAllelic = false;

// Whether this is a phased genotype, and if so the phase set and quality
union { null, boolean } isPhased = null;
Expand Down

0 comments on commit dde2625

Please sign in to comment.