-
Notifications
You must be signed in to change notification settings - Fork 2
/
vcf_lift.clj
69 lines (65 loc) · 3.39 KB
/
vcf_lift.clj
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
(ns varity.vcf-lift
(:require [cljam.io.vcf.util.normalize :as norm]
[cljam.io.vcf.util :as vcf-util]
[cljam.io.vcf.util.check :as check]
[cljam.util.sequence :as util-seq]
[cljam.util.chromosome :as chr]
[varity.chain :as ch]))
(defn- calc-new-interval [chr beg end indexed-chains]
(some #(when-let [overlap-blocks
(seq (ch/search-overlap-blocks beg end (:data %)))]
(let [start-offset (max 0 (- beg (:t-start (first overlap-blocks))))
end-offset (max 0 (- (:t-end (last overlap-blocks)) end))
to-start (+ (:q-start (first overlap-blocks)) start-offset)
to-end (- (:q-end (last overlap-blocks)) end-offset)
strand (get-in % [:header :q-strand])]
(if (= strand :reverse)
{:chr (get-in % [:header :q-name])
:start (inc (- (get-in % [:header :q-size]) to-end))
:end (inc (- (get-in % [:header :q-size]) to-start))
:strand strand}
{:chr (get-in % [:header :q-name])
:start to-start :end to-end :strand strand})))
(ch/search-containing-chains chr beg end indexed-chains)))
(defn liftover-variant*
"`liftover-variant` without checking reference sequence and realignment."
[indexed-chain {:keys [chr pos ref info] :as variant}]
(when-let [{:keys [chr start strand]} (calc-new-interval
chr pos (dec (+ pos (count ref)))
indexed-chain)]
(cond-> (assoc variant :chr chr :pos start)
(:END info) (assoc-in [:info :END] (dec (+ start (count ref))))
(= strand :reverse) (-> (update :ref util-seq/revcomp)
(update :alt #(map util-seq/revcomp %))))))
(defn liftover-variant
"Lifts over a `variant` to another coordinate. Returns a converted and
realigned variant if succeeded to lift over, otherwise nil.
`indexed-chain` supplies conversions between sequences and must be indexed by
`varity.chain/index`. `target-seq-reader` is a reader instance of the target
sequence that the variant coordinate is converted to."
[target-seq-reader indexed-chain variant]
(when-let [x (liftover-variant* indexed-chain variant)]
(when (check/same-ref? target-seq-reader x)
(norm/realign target-seq-reader x))))
(defn liftover-variants
"Lifts over variants data from chains applied chain/index and seq-reader.
Variants that succeed in liftover will be put in :success,
and those that failed will be put in :failure."
[seq-reader indexed-chains variants]
(let [separated-variants
(group-by
(fn [variant]
(->> (:alt variant)
(map #(:type (vcf-util/inspect-allele (:ref variant) %)))
(every? #{:mnv :ref :complex :insertion :deletion :snv})))
variants)]
(->> (get separated-variants true)
(map #(if-let [new-variant (liftover-variant seq-reader
indexed-chains %)]
{:success (list new-variant)}
{:failure (list %)}))
(cons {:failure (get separated-variants false)})
(apply merge-with
(comp #(sort-by (juxt (comp chr/chromosome-order-key :chr)
:pos :ref (comp vec :alt)) %)
concat)))))