Permalink
Browse files

Supporting code and configuration for BioStar NGS challenge from Pierre

  • Loading branch information...
1 parent 0031cba commit 74328f275ae035fe90e6bec637a23aab4f818bf9 @chapmanb committed Dec 19, 2011
View
8 biostar/biostar.ngschallenge/.gitignore
@@ -0,0 +1,8 @@
+/pom.xml
+*jar
+/lib
+/classes
+/native
+/.lein-failures
+/checkouts
+/.lein-deps-sum
View
12 biostar/biostar.ngschallenge/README.md
@@ -0,0 +1,12 @@
+# biostar.ngschallenge
+
+Work for NGS challenge on BioStar:
+
+http://biostar.stackexchange.com/questions/15581/ngs-challenge-can-you-give-an-insight-about-this-imaginary-genetic-disease
+
+## Usage
+
+ $ lein deps
+ $ lein run :find data/father_111217_biostar-variants.vcf
+ data/mother_111217_biostar-variants.vcf
+ data/child_111217_biostar-variants.vcf
View
22 biostar/biostar.ngschallenge/config/run_info.yaml
@@ -0,0 +1,22 @@
+---
+fc_date: 111217
+fc_name: biostar
+details:
+ - files: [child_1-ready.fq, child_2-ready.fq]
+ description: child
+ analysis: SNP calling
+ genome_build: hg19
+ algorithm:
+ quality_format: Standard
+ - files: [father_1-ready.fq, father_2-ready.fq]
+ description: father
+ analysis: SNP calling
+ genome_build: hg19
+ algorithm:
+ quality_format: Standard
+ - files: [mother_1-ready.fq, mother_2-ready.fq]
+ description: mother
+ analysis: SNP calling
+ genome_build: hg19
+ algorithm:
+ quality_format: Standard
View
5 biostar/biostar.ngschallenge/project.clj
@@ -0,0 +1,5 @@
+(defproject biostar.ngschallenge "0.0.1-SNAPSHOT"
+ :description "Custom code for BioStar NGS challenge"
+ :dependencies [[org.clojure/clojure "1.3.0"]
+ [bcbio.variation "0.0.1-SNAPSHOT"]]
+ :run-aliases {:find biostar.ngschallenge.core})
View
39 biostar/biostar.ngschallenge/src/biostar/ngschallenge/core.clj
@@ -0,0 +1,39 @@
+(ns biostar.ngschallenge.core
+ (:use [bcbio.variation.variantcontext :only [parse-vcf]]))
+
+(defn find-interest-vrns [fname wanted-genotype]
+ "Check for variations of interest with the given genotypes.
+ Requires that a call is:
+ - Not filtered
+ - Has a high or moderate predicted impact
+ - Is in the desired genotype (heterozygous or homozygous)"
+ (letfn [(is-problem? [vc]
+ (and (= 0 (count (:filters vc)))
+ (= (-> vc :genotypes first :type) wanted-genotype)
+ (contains? #{"HIGH" "MODERATE"}
+ (get (:attributes vc) "SNPEFF_IMPACT"))))]
+ (filter is-problem? (parse-vcf fname))))
+
+(defn combine-interest-vrns [father-fname mother-fname child-fname]
+ "Combine variations of interest from family variant calls."
+ (letfn [(get-unique-positions [xs]
+ (vec (set (map (juxt :chr :start) xs))))
+ (count-by-position [coll x]
+ (assoc coll x (+ 1 (get coll x 0))))]
+ (reduce count-by-position {}
+ (->>
+ (map get-unique-positions
+ [(find-interest-vrns father-fname "HET")
+ (find-interest-vrns mother-fname "HET")
+ (find-interest-vrns child-fname "HOM_VAR")])
+ flatten
+ (partition 2)))))
+
+(defn filter-interest-vrns [pos-map]
+ "Print positions of interest present more than once in father/mother/child."
+ (println (filter (fn [[_ count]] (> count 1)) pos-map)))
+
+(defn -main [father-fname mother-fname child-fname]
+ (->> [father-fname mother-fname child-fname]
+ (apply combine-interest-vrns)
+ filter-interest-vrns))

0 comments on commit 74328f2

Please sign in to comment.