Skip to content
Browse files

Expose additional preparation parameters to top level config and gene…

…ralize. Allows prep of haploid calls without conversion to diploid
  • Loading branch information...
1 parent 69f88a9 commit 83f940fabf5fc1993dbd989bdbb99a303ec5af77 @chapmanb committed Jun 3, 2012
View
2 README.md
@@ -94,6 +94,8 @@ provide example starting points and details on available options are below:
not coordinate sorted within chromosomes. (boolean; default false).
prep-sv-genotype: Normalize structural variant genotypes to a single
ref call (boolean; default false).
+ prep-allele-count: Number of alleles to convert calls to during
+ prep work (default 2)
preclean: Remove problematic characters from input VCFs
(boolean; default false).
remove-refcalls: Remove reference, non-variant calls.
View
1 config/single-process.yaml
@@ -9,5 +9,6 @@ experiments:
calls:
- name: gatk
file: test/data/freebayes-calls-indels.vcf
+ prep-sv-genotype: true
prep: true
normalize: true
View
3 src/bcbio/variation/combine.clj
@@ -181,8 +181,7 @@
prep-file (if (true? (:prep call))
(prep-vcf sample-file (:ref exp) (:sample exp) :out-dir out-dir
:out-fname out-fname :orig-ref-file (:ref call)
- :sort-pos (get call :prep-sort-pos false)
- :sv-genotype (get call :prep-sv-genotype false))
+ :config call)
sample-file)
hap-file (if (true? (:make-haploid call))
(diploid-calls-to-haploid prep-file (:ref exp) :out-dir out-dir)
View
22 src/bcbio/variation/normalize.clj
@@ -91,7 +91,7 @@
(defn- fix-vc
"Build a new variant context with updated sample name and normalized alleles.
- Based on :allele-count in the configuration updates haploid allele calls. This
+ Based on :prep-allele-count in the configuration updates haploid allele calls. This
normalizes the representation in Mitochondrial and Y chromosomes which are
haploid but are often represented as diploid with a single call."
[sample config orig]
@@ -101,9 +101,9 @@
[(Genotype/modifyName g sample)])
(.getGenotypes vc)))
(normalize-allele-calls [g]
- {:pre [(contains? #{1 (:allele-count config)} (count (.getAlleles g)))]}
- (if (= (count (.getAlleles g)) (:allele-count config)) g
- (Genotype/modifyAlleles g (repeat (:allele-count config)
+ {:pre [(contains? #{1 (:prep-allele-count config)} (count (.getAlleles g)))]}
+ (if (= (count (.getAlleles g)) (:prep-allele-count config)) g
+ (Genotype/modifyAlleles g (repeat (:prep-allele-count config)
(first (.getAlleles g))))))]
(-> orig
(assoc :vc
@@ -148,7 +148,7 @@
0 [(Genotype. sample [alt-allele])]
1 [(maybe-fix-vc (first gs) alt-allele)]
(map :genotype gs)))]
- (if (:sv-genotype config)
+ (if (:prep-sv-genotype config)
(let [new-gs (ref-vc-genotype (:genotypes orig)
(first (:alt-alleles orig)))]
(-> orig
@@ -164,7 +164,7 @@
[rdr vcf-decoder sample config]
(->> rdr
line-seq
- (#(if (:sort-pos config) (sort-by-position %) %))
+ (#(if (:prep-sort-pos config) (sort-by-position %) %))
(remove nochange-alt?)
(map vcf-decoder)
(map (partial normalize-sv-genotype config sample))
@@ -190,7 +190,7 @@
(assoc xs 0 new))]
(let [parts (string/split line #"\t")
cur-chrom (first (vals
- (chr-name-remap (:org config) ref-info [(first parts)])))]
+ (chr-name-remap (:prep-org config) ref-info [(first parts)])))]
{:chrom cur-chrom
:line (->> parts
(fix-chrom cur-chrom)
@@ -258,9 +258,11 @@
Assumes by position sorting of variants in the input VCF. Chromosomes do
not require a specific order, but positions internal to a chromosome do.
Currently configured for human preparation."
- [in-vcf-file ref-file sample & {:keys [out-dir out-fname sort-pos sv-genotype]
- :or {sort-pos false}}]
- (let [config {:org :GRCh37 :allele-count 2 :sort-pos sort-pos :sv-genotype sv-genotype}
+ [in-vcf-file ref-file sample & {:keys [out-dir out-fname config]
+ :or {config {}}}]
+ (let [config (merge-with #(or %1 %2) config
+ {:prep-org :GRCh37 :prep-allele-count 2
+ :prep-sort-pos false :prep-sv-genotype false})
base-name (if (nil? out-fname) (itx/remove-zip-ext in-vcf-file) out-fname)
out-file (itx/add-file-part base-name "prep" out-dir)]
(when (itx/needs-run? out-file)
View
2 test/bcbio/variation/test/compare.clj
@@ -170,7 +170,7 @@
(facts "Check for multiple samples in a VCF file"
(multiple-samples? vcf) => false)
(facts "Normalize variant representation of chromosomes, order, genotypes and samples."
- (prep-vcf vcf ref "Test1" :sort-pos true) => out-vcf)
+ (prep-vcf vcf ref "Test1" :config {:prep-sort-pos true}) => out-vcf)
(facts "Pre-cleaning of problematic VCF input files"
(clean-problem-vcf prevcf) => out-prevcf)))
View
2 test/bcbio/variation/test/utils.clj
@@ -17,5 +17,5 @@
?form)))
(facts "Add Complete Genomics metrics to VCF file."
- (let [ready-vcf (prep-vcf cg-vcf ref "NA12939" :sort-pos true)]
+ (let [ready-vcf (prep-vcf cg-vcf ref "NA12939" :config {:prep-sort-pos true})]
(add-cgmetrics ready-vcf cg-var ref) => out-cg-var))

0 comments on commit 83f940f

Please sign in to comment.
Something went wrong with that request. Please try again.