Browse files

Write GATK recalibration filter results separately in output summary.…

… Provide table of metrics used for filtering
  • Loading branch information...
1 parent 0ab3216 commit e79aec509c5b68e0fa6dd7acc03c3cf9e0c86bf8 @chapmanb committed Mar 27, 2012
Showing with 30 additions and 6 deletions.
  1. +7 −5 src/bcbio/variation/compare.clj
  2. +1 −0 src/bcbio/variation/filter.clj
  3. +22 −1 src/bcbio/variation/report.clj
View
12 src/bcbio/variation/compare.clj
@@ -14,7 +14,8 @@
[bcbio.variation.report :only [concordance-report-metrics
write-concordance-metrics
write-scoring-table
- top-level-metrics]]
+ top-level-metrics
+ write-classification-metrics]]
[bcbio.variation.combine :only [combine-variants create-merged
gatk-normalize gatk-cl-intersect-intervals]]
[bcbio.variation.annotation :only [add-variant-annotations]]
@@ -67,9 +68,8 @@
[call1 call2 "discordance"]
[call2 call1 "discordance"]]]
(let [file-info {:out-vcf (str (fs/file base-dir
- (format "%s-%s%s-%s%s-%s.vcf"
- sample (:name c1) (get c1 :mod "")
- (:name c2) (get c2 :mod "") cmp-type)))}
+ (format "%s-%s-%s-%s.vcf"
+ sample (:name c1) (:name c2) cmp-type)))}
args (concat
["-R" ref
"--sample_name" sample
@@ -209,7 +209,7 @@
(get cmps-by-name (get finalizer :support (:target finalizer)))
(:params finalizer)
(:ref exp))]
- (assoc cur-cmps (:target finalizer)
+ (assoc cur-cmps (map #(get-in updated-cmp [% :name]) [:c1 :c2])
(compare-two-vcf (:c1 updated-cmp) (:c2 updated-cmp) exp config))))]
(map add-summary (vals (reduce update-w-finalizer cmps-by-name (:finalize exp)))))))
@@ -260,6 +260,8 @@
(-> x :c1 :name) (-> x :c2 :name)))
(write-scoring-table (:metrics x) w)
(write-concordance-metrics (:summary x) w)
+ (when (get-in x [:c1 :mod])
+ (write-classification-metrics x w))
(doseq [f (:c-files x)]
(.write w (format "** %s\n" (fs/base-name f)))
(write-summary-table (vcf-stats f (get-in x [:exp :ref])) :wrtr w))))
View
1 src/bcbio/variation/filter.clj
@@ -102,4 +102,5 @@
(assoc-in [:c1 :file] (-> in-vcf
(#(if-not anns % (variant-recal-filter % train-info anns ref)))
(#(if-not hard-filters % (variant-filter % hard-filters ref)))))
+ (#(assoc-in % [:c1 :name] (format "%s-%s" (get-in % [:c1 :name]) "recal")))
(assoc-in [:c1 :mod] "recal"))))
View
23 src/bcbio/variation/report.clj
@@ -68,6 +68,7 @@
(letfn [(level-from-string [x]
(case (when-not (nil? x) (string/lower-case x))
"quick" :quick
+ "full" :full
:standard))
(get-string-level [config to-try]
(loop [cur-try to-try]
@@ -118,7 +119,7 @@
:discordant2 (all-vrn-counts (nth (:c-files compared) 2) :c1 compared)
:discordant_both (apply discordance-metrics (conj (vec (rest (:c-files compared)))
ref-file)))]
- (if (= sum-level :quick) base
+ (if-not (= sum-level :full) base
(assoc base
:ml_metrics (ml-on-vcf-metrics ref-file (take 2 (:c-files compared)))))))))
@@ -194,3 +195,23 @@
{:metric metric :value val}))]
(.write wrtr (str (doric/table [:metric :value] (map get-value to-write))
"\n")))))
+
+;; ## Classification metrics
+
+(defn write-classification-metrics
+ "Summary table of classification metrics from GATK variant recalibration."
+ [cmp-info wrtr]
+ (letfn [(get-metric-counts [in-vcf]
+ (with-open [vcf-source (get-vcf-source in-vcf (get-in cmp-info [:exp :ref]))]
+ (reduce (fn [coll vc]
+ (let [culprit (get-in vc [:attributes "culprit"])]
+ (if (or (nil? culprit) (= (count (:filters vc)) 0)) coll
+ (assoc coll culprit (inc (get coll culprit 0))))))
+ {} (parse-vcf vcf-source))))
+ (get-recal-metrics [in-vcf]
+ (sort-by :count >
+ (map (fn [[m c]] {:metric m :count c}) (get-metric-counts in-vcf))))]
+ (.write wrtr "** GATK recalibration filter metrics\n")
+ (.write wrtr (str (doric/table [:metric :count]
+ (get-recal-metrics (get-in cmp-info [:c1 :file])))
+ "\n"))))

0 comments on commit e79aec5

Please sign in to comment.