Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Write GATK recalibration filter results separately in output summary.…
… Provide table of metrics used for filtering
  • Loading branch information
chapmanb committed Mar 27, 2012
1 parent 0ab3216 commit e79aec5
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 6 deletions.
12 changes: 7 additions & 5 deletions src/bcbio/variation/compare.clj
Expand Up @@ -14,7 +14,8 @@
[bcbio.variation.report :only [concordance-report-metrics
write-concordance-metrics
write-scoring-table
top-level-metrics]]
top-level-metrics
write-classification-metrics]]
[bcbio.variation.combine :only [combine-variants create-merged
gatk-normalize gatk-cl-intersect-intervals]]
[bcbio.variation.annotation :only [add-variant-annotations]]
Expand Down Expand Up @@ -67,9 +68,8 @@
[call1 call2 "discordance"]
[call2 call1 "discordance"]]]
(let [file-info {:out-vcf (str (fs/file base-dir
(format "%s-%s%s-%s%s-%s.vcf"
sample (:name c1) (get c1 :mod "")
(:name c2) (get c2 :mod "") cmp-type)))}
(format "%s-%s-%s-%s.vcf"
sample (:name c1) (:name c2) cmp-type)))}
args (concat
["-R" ref
"--sample_name" sample
Expand Down Expand Up @@ -209,7 +209,7 @@
(get cmps-by-name (get finalizer :support (:target finalizer)))
(:params finalizer)
(:ref exp))]
(assoc cur-cmps (:target finalizer)
(assoc cur-cmps (map #(get-in updated-cmp [% :name]) [:c1 :c2])
(compare-two-vcf (:c1 updated-cmp) (:c2 updated-cmp) exp config))))]
(map add-summary (vals (reduce update-w-finalizer cmps-by-name (:finalize exp)))))))

Expand Down Expand Up @@ -260,6 +260,8 @@
(-> x :c1 :name) (-> x :c2 :name)))
(write-scoring-table (:metrics x) w)
(write-concordance-metrics (:summary x) w)
(when (get-in x [:c1 :mod])
(write-classification-metrics x w))
(doseq [f (:c-files x)]
(.write w (format "** %s\n" (fs/base-name f)))
(write-summary-table (vcf-stats f (get-in x [:exp :ref])) :wrtr w))))
Expand Down
1 change: 1 addition & 0 deletions src/bcbio/variation/filter.clj
Expand Up @@ -102,4 +102,5 @@
(assoc-in [:c1 :file] (-> in-vcf
(#(if-not anns % (variant-recal-filter % train-info anns ref)))
(#(if-not hard-filters % (variant-filter % hard-filters ref)))))
(#(assoc-in % [:c1 :name] (format "%s-%s" (get-in % [:c1 :name]) "recal")))
(assoc-in [:c1 :mod] "recal"))))
23 changes: 22 additions & 1 deletion src/bcbio/variation/report.clj
Expand Up @@ -68,6 +68,7 @@
(letfn [(level-from-string [x]
(case (when-not (nil? x) (string/lower-case x))
"quick" :quick
"full" :full
:standard))
(get-string-level [config to-try]
(loop [cur-try to-try]
Expand Down Expand Up @@ -118,7 +119,7 @@
:discordant2 (all-vrn-counts (nth (:c-files compared) 2) :c1 compared)
:discordant_both (apply discordance-metrics (conj (vec (rest (:c-files compared)))
ref-file)))]
(if (= sum-level :quick) base
(if-not (= sum-level :full) base
(assoc base
:ml_metrics (ml-on-vcf-metrics ref-file (take 2 (:c-files compared)))))))))

Expand Down Expand Up @@ -194,3 +195,23 @@
{:metric metric :value val}))]
(.write wrtr (str (doric/table [:metric :value] (map get-value to-write))
"\n")))))

;; ## Classification metrics

(defn write-classification-metrics
"Summary table of classification metrics from GATK variant recalibration."
[cmp-info wrtr]
(letfn [(get-metric-counts [in-vcf]
(with-open [vcf-source (get-vcf-source in-vcf (get-in cmp-info [:exp :ref]))]
(reduce (fn [coll vc]
(let [culprit (get-in vc [:attributes "culprit"])]
(if (or (nil? culprit) (= (count (:filters vc)) 0)) coll
(assoc coll culprit (inc (get coll culprit 0))))))
{} (parse-vcf vcf-source))))
(get-recal-metrics [in-vcf]
(sort-by :count >
(map (fn [[m c]] {:metric m :count c}) (get-metric-counts in-vcf))))]
(.write wrtr "** GATK recalibration filter metrics\n")
(.write wrtr (str (doric/table [:metric :count]
(get-recal-metrics (get-in cmp-info [:c1 :file])))
"\n"))))

0 comments on commit e79aec5

Please sign in to comment.