Skip to content

Commit

Permalink
Enrich CSV output
Browse files Browse the repository at this point in the history
  • Loading branch information
jey committed Mar 11, 2014
1 parent 43ecc0a commit 2677c97
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -60,14 +60,16 @@ extends Serializable with Logging {
read.alignmentQuality.exists(_ > QualityScore.zero) &&
read.passedQualityChecks

// first phase
// First phase
val observed: ObservationTable = reads.
filter(shouldIncludeRead).map(observe).
aggregate(ObservationAccumulator(covariates))(_ ++= _, _ += _).result

println("ObservationTable:\n%s".format(observed.toCSV))
// Log the ObservationTable
// TODO: delete once unneeded; temporarily added for creating plots
println(observed.toCSV)

// second phase
// Second phase
val recalibrator = Recalibrator(observed, minAcceptableQuality)
reads.map(recalibrator)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ trait Covariate {
case None => "(none)"
case Some(value) => value.toString
}

// A short name for this covariate, used in CSV output header
def csvFieldName: String
}

abstract class AbstractCovariate[ValueT] extends Covariate with Serializable {
Expand All @@ -47,6 +50,8 @@ class CovariateKey(

def parts: Seq[Any] = Seq(readGroup, quality) ++ extras

def containsNone: Boolean = extras.exists(_.isEmpty)

override def toString: String = "[" + parts.mkString(", ") + "]"

override def equals(other: Any) = other match {
Expand Down Expand Up @@ -77,14 +82,15 @@ class CovariateSpace(val extras: IndexedSeq[Covariate]) extends Serializable {
}

// Format the provided key to be compatible with GATK's CSV output
def toCSV(key: CovariateKey): String = {
def toCSV(key: CovariateKey): Seq[String] = {
val extraFields: Seq[String] = extras.zip(key.extras).map{
case (cov, value) => cov.toCSV(value.asInstanceOf[Option[cov.Value]])
}
val allFields: Seq[String] = Seq(key.readGroup, key.quality.phred.toString) ++ extraFields
allFields.mkString(",")
Seq(key.readGroup, key.quality.phred.toString) ++ extraFields
}

def csvHeader: Seq[String] = Seq("ReadGroup", "ReportedQ") ++ extras.map(_.csvFieldName)

override def equals(other: Any): Boolean = other match {
case that: CovariateSpace => this.extras == that.extras
case _ => false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ class DinucCovariate extends AbstractCovariate[(Char, Char)] {
case Some(value) => "%s%s".format(value._1, value._2)
}

override def csvFieldName: String = "Dinuc"

override def equals(other: Any) = other match {
case that: DinucCovariate => true
case _ => false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ class Observation(val total: Long, val mismatches: Long) extends Serializable {
}

// Format as string compatible with GATK's CSV output
def toCSV: String = "%s,%s,%s".format(total, mismatches, empiricalQualityForCSV.phred)
def toCSV: Seq[String] = Seq(total.toString, mismatches.toString, empiricalQualityForCSV.phred.toString)
def empiricalQualityForCSV: QualityScore = QualityScore.fromErrorProbability(gatkErrorProbability(0))

override def toString: String =
Expand Down Expand Up @@ -115,7 +115,14 @@ class ObservationTable(
override def toString = entries.map{ case (k, v) => "%s\t%s".format(k, v) }.mkString("\n")

// Format as CSV compatible with GATK's output
def toCSV = entries.map{ case (k, v) => "%s,%s".format(space.toCSV(k), v.toCSV) }.mkString("\n")
def toCSV: String = {
val rows = entries.map{ case (key, obs) =>
space.toCSV(key) ++ obs.toCSV ++ (if(key.containsNone) Seq("**") else Seq())
}
(Seq(csvHeader) ++ rows).map(_.mkString(",")).mkString("\n")
}

def csvHeader: Seq[String] = space.csvHeader ++ Seq("TotalCount", "MismatchCount", "EmpiricalQ", "IsSkipped")

// `func' computes the aggregation key
def aggregate[K](func: (CovariateKey, Observation) => K): Map[K, Aggregate] = {
Expand Down

0 comments on commit 2677c97

Please sign in to comment.