-
Notifications
You must be signed in to change notification settings - Fork 12
/
chunk.clj
61 lines (55 loc) · 2.37 KB
/
chunk.clj
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
(ns cljam.io.bam-index.chunk
(:refer-clojure :exclude [compare])
(:require [cljam.io.util.bgzf :as bgzf]))
;;; A chunk expresses a range in a BAM index.
;;; It consists of a start position and a end position.
;;; A chunk is a map like `{:beg 10, :end 20}`.
(defrecord Chunk [^long beg ^long end])
(defn compare
"Returns a negative if chunk1 is earlier than chunk2, a positive if it is
later, 0 if it is equal."
[^Chunk chunk1 ^Chunk chunk2]
(let [ret (Long/signum (- (.beg chunk1) (.beg chunk2)))]
(if (zero? ret)
(Long/signum (- (.end chunk1) (.end chunk2)))
ret)))
(defn overlap?
"Returns true if the two chunks overlap."
[^Chunk chunk1 ^Chunk chunk2]
(let [comparison (compare chunk1 chunk2)]
(or (zero? comparison)
(let [left (if (neg? comparison) chunk1 chunk2)
right (if (pos? comparison) chunk1 chunk2)
left-fp (bgzf/get-block-address (.end left))
right-fp (bgzf/get-block-address (.beg right))]
(or (> left-fp right-fp)
(and (= left-fp right-fp)
(let [left-offset (bgzf/get-block-offset (.end left))
right-offset (bgzf/get-block-offset (.beg right))]
(> left-offset right-offset))))))))
(defn adjacent?
"Returns true if the two chunks are adjacent."
[^Chunk chunk1 ^Chunk chunk2]
(or (and (= (bgzf/get-block-address (.end chunk1))
(bgzf/get-block-address (.beg chunk2)))
(= (bgzf/get-block-offset (.end chunk1))
(bgzf/get-block-offset (.beg chunk2))))
(and (= (bgzf/get-block-address (.beg chunk1))
(bgzf/get-block-address (.end chunk2)))
(= (bgzf/get-block-offset (.beg chunk1))
(bgzf/get-block-offset (.end chunk2))))))
(defn optimize-chunks
[chunks ^long min-offset]
(let [chunks (sort compare chunks)]
(loop [[^Chunk f & r] chunks
^Chunk last-chunk nil
ret (transient [])]
(if f
(cond
(<= (.end f) min-offset) (recur r last-chunk ret)
(nil? last-chunk) (recur r f (conj! ret f))
(and (not (overlap? last-chunk f))
(not (adjacent? last-chunk f))) (recur r f (conj! ret f))
(> (.end f) (.end last-chunk)) (let [l (assoc last-chunk :end (.end f))] (recur r l (conj! (pop! ret) l)))
:else (recur r last-chunk ret))
(persistent! ret)))))