Permalink
Browse files

Handle allele depth with FreeBayes calls using assigned AO and DP values

  • Loading branch information...
1 parent 1d0f662 commit 9ace36b4d327b576955b8c8e6bb8334fede791d4 @chapmanb committed May 19, 2012
Showing with 25 additions and 8 deletions.
  1. +22 −8 src/bcbio/variation/filter/classify.clj
  2. +3 −0 test/bcbio/variation/test/validate.clj
@@ -24,14 +24,28 @@
(defmethod get-vc-attr "AD"
[vc attr]
"AD: Allelic depth for ref and alt alleles. Converted to percent
- deviation from expected for haploid/diploid calls."
- (let [g (-> vc :genotypes first)
- ads (map #(Integer/parseInt %) (string/split (get-in g [:attributes attr]) #","))
- alleles (cons (:ref-allele vc) (:alt-alleles vc))
- ref-count (first ads)
- allele-count (apply + (map #(nth ads (.indexOf alleles %)) (set (:alleles g))))]
- (when-let [e-pct (get {"HOM_VAR" 1.0 "HET" 0.5 "HOM_REF" 0.0} (:type g))]
- (Math/abs (- e-pct (/ allele-count (+ allele-count ref-count)))))))
+ deviation from expected for haploid/diploid calls.
+ Also calculates allele depth from AO and DP used by FreeBayes.
+ AO is the count of the alternative allele."
+ (letfn [(calc-expected [g ref-count allele-count]
+ {:pre [(not (neg? ref-count))]}
+ (when-let [e-pct (get {"HOM_VAR" 1.0 "HET" 0.5 "HOM_REF" 0.0} (:type g))]
+ (Math/abs (- e-pct (/ allele-count (+ allele-count ref-count))))))
+ (from-ad [g]
+ (let [ads (map #(Float/parseFloat %) (string/split (get-in g [:attributes attr]) #","))
+ alleles (cons (:ref-allele vc) (:alt-alleles vc))
+ ref-count (first ads)
+ allele-count (apply + (map #(nth ads (.indexOf alleles %)) (set (:alleles g))))]
+ (calc-expected g ref-count allele-count)))
+ (from-ao [g]
+ (let [alt-count (Float/parseFloat (get-in g [:attributes "AO"]))
+ total-count (Float/parseFloat (get-in g [:attributes "DP"]))]
+ (calc-expected g (- total-count alt-count) alt-count)))]
+ (let [g (-> vc :genotypes first)]
+ (cond
+ (get-in g [:attributes attr]) (from-ad g)
+ (get-in g [:attributes "AO"]) (from-ao g)
+ :else (Exception. (str "AD not found in attributes" (:attributes g)))))))
(defmethod get-vc-attr "QUAL"
[vc attr]
@@ -13,6 +13,7 @@
(let [data-dir (str (fs/file "." "test" "data"))
ref (str (fs/file data-dir "GRCh37.fa"))
top-vcf (str (fs/file data-dir "gatk-calls.vcf"))
+ fb-vcf (str (fs/file data-dir "freebayes-calls.vcf"))
c-neg-vcf (str (fs/file data-dir "sv-indels-gatk.vcf"))
top-out (itx/add-file-part top-vcf "topsubset")
dip-vcf (str (fs/file data-dir "phasing-input-diploid.vcf"))
@@ -40,6 +41,8 @@
(first (#'bcbio.variation.filter.classify/get-train-inputs
1 top-vcf attrs normalizer ref)) => (just [0.0 (roughly 0.2943) 1.0 1])
(-> (get-vc-attr-ranges attrs top-vcf ref) (get "DP")) => [241.5 250.0]
+ (-> (get-vc-attr-ranges attrs fb-vcf ref) (get "AD")) => (just [(roughly 6.045E-4)
+ (roughly 0.024326)])
(get-vc-attrs (first vcf-iter) attrs) => {"AD" 0.0 "QUAL" 5826.09 "DP" 250.0}
(-> (first vcf-iter) normalizer (get "QUAL")) => (roughly 0.2943))))

0 comments on commit 9ace36b

Please sign in to comment.