Skip to content

Commit

Permalink
Merge pull request #245 from fnothaft/variant-avdl
Browse files Browse the repository at this point in the history
Adding ADAMContig back to ADAMVariant.
  • Loading branch information
massie committed May 21, 2014
2 parents 0b2cf5d + dde2625 commit a7e71dc
Show file tree
Hide file tree
Showing 15 changed files with 41 additions and 31 deletions.
Expand Up @@ -173,10 +173,18 @@ class VariantContextConverter(dict: Option[SequenceDictionary] = None) extends S
new ADAMDatabaseVariantAnnotation()
}

private def createContig(vc: VariantContext): ADAMContig = {
val contigName = contigToRefSeq.getOrElse(vc.getChr, vc.getChr)

ADAMContig.newBuilder()
.setContigName(contigName)
.build()
}

private def createADAMVariant(vc: VariantContext, alt: Option[String]): ADAMVariant = {
// VCF CHROM, POS, REF and ALT
val builder = ADAMVariant.newBuilder
.setContig(contigToRefSeq.getOrElse(vc.getChr, vc.getChr))
.setContig(createContig(vc))
.setPosition(vc.getStart - 1 /* ADAM is 0-indexed */ )
.setExclusiveEnd(vc.getEnd /* ADAM is 0-indexed, so the 1-indexed inclusive end becomes exclusive */ )
.setReferenceAllele(vc.getReference.getBaseString)
Expand Down Expand Up @@ -275,7 +283,8 @@ class VariantContextConverter(dict: Option[SequenceDictionary] = None) extends S
def convert(vc: ADAMVariantContext): VariantContext = {
val variant: ADAMVariant = vc.variant
val vcb = new VariantContextBuilder()
.chr(refSeqToContig.getOrElse(variant.getContig.toString, variant.getContig.toString))
.chr(refSeqToContig.getOrElse(variant.getContig.getContigName.toString,
variant.getContig.getContigName.toString))
.start(variant.getPosition + 1 /* Recall ADAM is 0-indexed */ )
.stop(variant.getPosition + variant.getReferenceAllele.length)
.alleles(VariantContextConverter.convertAlleles(variant))
Expand Down
Expand Up @@ -113,7 +113,7 @@ object ReferencePosition {
* @return The reference position of this variant.
*/
def apply(variant: ADAMVariant): ReferencePosition = {
new ReferencePosition(variant.getContig, variant.getPosition)
new ReferencePosition(variant.getContig.getContigName, variant.getPosition)
}

/**
Expand All @@ -127,7 +127,7 @@ object ReferencePosition {
*/
def apply(genotype: ADAMGenotype): ReferencePosition = {
val variant = genotype.getVariant()
new ReferencePosition(variant.getContig, variant.getPosition)
new ReferencePosition(variant.getContig.getContigName, variant.getPosition)
}

/**
Expand Down
Expand Up @@ -72,7 +72,7 @@ object SnpTable {
}

def apply(variants: RDD[RichADAMVariant]): SnpTable = {
val positions = variants.map(variant => (variant.getContig.toString, variant.getPosition)).collect()
val positions = variants.map(variant => (variant.getContig.getContigName.toString, variant.getPosition)).collect()
val table = new mutable.HashMap[String, mutable.HashSet[Long]]
positions.foreach(tup => table.getOrElseUpdate(tup._1, { new mutable.HashSet[Long] }) += tup._2)
new SnpTable(table.mapValues(_.toSet).toMap)
Expand Down
Expand Up @@ -50,7 +50,7 @@ class VariantContextConverterSuite extends FunSuite {
.chr("1")

def adamSNVBuilder(contig: String = "1"): ADAMVariant.Builder = ADAMVariant.newBuilder()
.setContig(contig)
.setContig(ADAMContig.newBuilder().setContigName(contig).build())
.setPosition(0L)
.setReferenceAllele("A")
.setVariantAllele("T")
Expand All @@ -65,7 +65,7 @@ class VariantContextConverterSuite extends FunSuite {
assert(adamVC.genotypes.length === 0)

val variant = adamVC.variant
assert(variant.getContig === "1")
assert(variant.getContig.getContigName === "1")

assert(variant.getReferenceAllele === "A")
assert(variant.getPosition === 0L)
Expand All @@ -79,7 +79,7 @@ class VariantContextConverterSuite extends FunSuite {

val adamVC = adamVCs.head
val variant = adamVC.variant
assert(variant.getContig === "NC_000001.10")
assert(variant.getContig.getContigName === "NC_000001.10")
}

test("Convert GATK SNV w/ genotypes w/ phase information to ADAM") {
Expand Down
Expand Up @@ -112,7 +112,7 @@ class ReferencePositionSuite extends FunSuite {

test("create reference position from variant") {
val variant = ADAMVariant.newBuilder()
.setContig("chr10")
.setContig(ADAMContig.newBuilder.setContigName("chr10").build())
.setReferenceAllele("A")
.setVariantAllele("T")
.setPosition(10L)
Expand All @@ -127,7 +127,7 @@ class ReferencePositionSuite extends FunSuite {
test("create reference position from genotype") {
val variant = ADAMVariant.newBuilder()
.setPosition(100L)
.setContig("chr10")
.setContig(ADAMContig.newBuilder.setContigName("chr10").build())
.setReferenceAllele("A")
.setVariantAllele("T")
.build()
Expand Down
Expand Up @@ -22,6 +22,7 @@ import java.io.File
import org.bdgenomics.adam.avro.{
ADAMVariant,
ADAMGenotype,
ADAMContig,
VariantCallingAnnotations
}
import org.apache.spark.rdd.RDD
Expand All @@ -35,7 +36,7 @@ class GenotypePredicatesSuite extends SparkFunSuite {
ParquetLogger.hadoopLoggerLevel(Level.SEVERE)

val v0 = ADAMVariant.newBuilder
.setContig("chr11")
.setContig(ADAMContig.newBuilder.setContigName("chr11").build)
.setPosition(17409571)
.setReferenceAllele("T")
.setVariantAllele("C")
Expand Down Expand Up @@ -73,7 +74,7 @@ class GenotypePredicatesSuite extends SparkFunSuite {
ParquetLogger.hadoopLoggerLevel(Level.SEVERE)

val v0 = ADAMVariant.newBuilder
.setContig("11")
.setContig(ADAMContig.newBuilder.setContigName("11").build)
.setPosition(17409571)
.setReferenceAllele("T")
.setVariantAllele("C")
Expand Down
Expand Up @@ -31,7 +31,7 @@ class RecordConditionSuite extends FunSuite {

test("create record condition from nested field condition") {
val v0 = ADAMVariant.newBuilder
.setContig("11")
.setContig(ADAMContig.newBuilder.setContigName("11").build)
.setPosition(17409571)
.setReferenceAllele("T")
.setVariantAllele("C")
Expand Down Expand Up @@ -168,7 +168,7 @@ class RecordConditionSuite extends FunSuite {

test("passing genotype record condition") {
val v0 = ADAMVariant.newBuilder
.setContig("11")
.setContig(ADAMContig.newBuilder.setContigName("11").build)
.setPosition(17409571)
.setReferenceAllele("T")
.setVariantAllele("C")
Expand Down
Expand Up @@ -428,13 +428,13 @@ class ADAMRDDFunctionsSuite extends SparkFunSuite {
.setPosition(0L)
.setVariantAllele("A")
.setReferenceAllele("T")
.setContig("chr0")
.setContig(ADAMContig.newBuilder.setContigName("chr0").build)
.build()
val variant1 = ADAMVariant.newBuilder()
.setPosition(0L)
.setVariantAllele("C")
.setReferenceAllele("T")
.setContig("chr0")
.setContig(ADAMContig.newBuilder.setContigName("chr0").build)
.build()
val genotype0 = ADAMGenotype.newBuilder()
.setVariant(variant0)
Expand Down
Expand Up @@ -29,7 +29,7 @@ class GenotypesSummarySuite extends SparkFunSuite {

private def variant(reference: String, alternate: String, position: Int): ADAMVariant = {
ADAMVariant.newBuilder()
.setContig("chr1")
.setContig(ADAMContig.newBuilder.setContigName("chr1").build)
.setPosition(position)
.setReferenceAllele(reference)
.setVariantAllele(alternate)
Expand Down
Expand Up @@ -63,8 +63,8 @@ class BaseQualityRecalibrationSuite extends SparkFunSuite {
assert(bqsr.result.count == reads.count)

// Compare the ObservatonTables
val referenceObs: Set[String] = scala.io.Source.fromFile(new File(obsFilepath)).getLines.filter(_.length > 0).toSet
val testObs: Set[String] = bqsr.observed.toCSV.split('\n').filter(_.length > 0).toSet
assert(testObs == referenceObs)
val referenceObs: Seq[String] = scala.io.Source.fromFile(new File(obsFilepath)).getLines.filter(_.length > 0).toSeq.sortWith((kv1, kv2) => kv1.compare(kv2) < 0)
val testObs: Seq[String] = bqsr.observed.toCSV.split('\n').filter(_.length > 0).toSeq.sortWith((kv1, kv2) => kv1.compare(kv2) < 0)
referenceObs.zip(testObs).foreach(p => assert(p._1 === p._2))
}
}
Expand Up @@ -25,7 +25,7 @@ import org.apache.spark.rdd.RDD

class ADAMGenotypeRDDFunctionsSuite extends SparkFunSuite {
def v0 = ADAMVariant.newBuilder
.setContig("11")
.setContig(ADAMContig.newBuilder.setContigName("11").build)
.setPosition(17409572)
.setReferenceAllele("T")
.setVariantAllele("C")
Expand Down
Expand Up @@ -26,7 +26,7 @@ class ADAMVariantContextRDDFunctionsSuite extends SparkFunSuite {

sparkTest("joins SNV database annotation") {
val v0 = ADAMVariant.newBuilder
.setContig("11")
.setContig(ADAMContig.newBuilder.setContigName("11").build)
.setPosition(17409572)
.setReferenceAllele("T")
.setVariantAllele("C")
Expand Down
Expand Up @@ -22,15 +22,15 @@ import org.bdgenomics.adam.models.ADAMVariantContext
import org.bdgenomics.adam.rdd.variation.ADAMVariationContext._
import com.google.common.io.Files
import java.io.File
import org.bdgenomics.adam.avro.{ ADAMGenotypeAllele, ADAMGenotype, ADAMVariant }
import org.bdgenomics.adam.avro.{ ADAMGenotypeAllele, ADAMGenotype, ADAMVariant, ADAMContig }
import scala.collection.JavaConversions._

class ADAMVariationContextSuite extends SparkFunSuite {
val tempDir = Files.createTempDir()

def variants: RDD[ADAMVariantContext] = {
val v0 = ADAMVariant.newBuilder
.setContig("chr11")
.setContig(ADAMContig.newBuilder.setContigName("chr11").build)
.setPosition(17409572)
.setReferenceAllele("T")
.setVariantAllele("C")
Expand Down
Expand Up @@ -24,7 +24,7 @@ import scala.collection.JavaConversions._
class RichADAMGenotypeSuite extends FunSuite {

def v0 = ADAMVariant.newBuilder
.setContig("chr1")
.setContig(ADAMContig.newBuilder.setContigName("chr1").build)
.setPosition(0).setReferenceAllele("A").setVariantAllele("T")
.build

Expand Down
12 changes: 6 additions & 6 deletions adam-format/src/main/resources/avro/adam.avdl
Expand Up @@ -138,10 +138,10 @@ record ADAMNestedPileup {


record ADAMVariant {
string contig;
long position;
union { null, long } exclusiveEnd = null;
string referenceAllele;
union { null, ADAMContig } contig = null;
union { null, long } position = null;
union { null, long } exclusiveEnd = null;
union { null, string } referenceAllele = null;
union { null, string } variantAllele = null;
}

Expand Down Expand Up @@ -169,7 +169,7 @@ record VariantCallingAnnotations {
// FILTER: True or false implies that filters were applied and this variant PASSed or not.
// While 'null' implies not filters were applied.
union { null, boolean } variantIsPassing = null;
union { null, array <string> } variantFilters = null;
array <string> variantFilters = [];

union { null, int } readDepth = null;
union { null, boolean } downsampled = null;
Expand Down Expand Up @@ -226,7 +226,7 @@ record ADAMGenotype {
// In ADAM we split multi-allelic VCF lines into multiple
// single-alternate records. This bit is set if that happened for this
// record.
boolean splitFromMultiAllelic = false;
union { boolean, null } splitFromMultiAllelic = false;

// Whether this is a phased genotype, and if so the phase set and quality
union { null, boolean } isPhased = null;
Expand Down

0 comments on commit a7e71dc

Please sign in to comment.