-
Notifications
You must be signed in to change notification settings - Fork 12
/
header.clj
124 lines (107 loc) · 3.62 KB
/
header.clj
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
(ns cljam.io.sam.util.header
"Utility functions for SAM header."
(:refer-clojure :exclude [sorted?])
(:require [clojure.string :as cstr]
[cljam.io.sam.common :as sam-common])
(:import clojure.lang.IEditableCollection))
;;; parse
(defn- parse-header-keyvalues
"e.g. \"LN:45 SN:ref\" -> {:LN 45, :SN \"ref\"}"
[keyvalues]
(into
{}
(map (fn [kv]
(let [[k v] (cstr/split kv #":" 2)]
[(keyword k)
(case k
"LN" (Integer/parseInt v)
"PI" (Integer/parseInt v)
v)])))
keyvalues))
(defn parse-header-line
"e.g. \"@SQ SN:ref LN:45\" => [:SQ {:SN \"ref\" :LN 45}]"
[line]
(let [[typ & kvs] (cstr/split line #"\t")]
[(keyword (subs typ 1)) (parse-header-keyvalues kvs)]))
(defn- finalize-rf
"Wrap a reducing function with finalizing function."
[rf f]
(fn finalize-rf-inner
([] (rf))
([x] (f (rf x)))
([x y] (rf x y))))
(defn- into-rf
"Returns a reducing function which returns a new coll with elements conjoined."
[to]
(if (instance? IEditableCollection to)
(fn into-rf-editable
([] (transient to))
([x] (with-meta (persistent! x) (meta to)))
([r x] (conj! r x)))
(fn into-rf-non-editable
([] to)
([x] x)
([r x] (conj r x)))))
(defn- group-by-rf
"Returns a reducing function acts like `group-by`.
Second argument `rf` is a reducing function appliede to each group. Default is (into-rf [])."
([keyfn] (group-by-rf keyfn (into-rf [])))
([keyfn rf]
(fn group-by-rf-inner
([] (transient {}))
([x] (into {} (map (fn group-by-rf-finalize [[k v]] [k (rf v)])) (persistent! x)))
([x y] (let [k (keyfn y)]
(if-let [old (get x k)]
(assoc! x k (rf old y))
(assoc! x k (rf (rf) y))))))))
(def into-header
(-> (group-by-rf first ((map second) (into-rf [])))
(finalize-rf (fn [m] (if (some? (:HD m)) (update m :HD first) m)))))
(defn parse-header
"Parse a header string, returning a map of the header."
[s]
(->> (cstr/split-lines s)
(transduce
(map parse-header-line)
into-header)))
;;; stringify
(defn- stringify-header-keyvalues [kv-map]
(cstr/join \tab
(map (fn [kv]
(let [[k v] (seq kv)]
(str (name k) \: v)))
kv-map)))
(defn stringify-header [hdr]
(cstr/join \newline
(map (fn [h]
(let [[typ kvs] h]
(if (= typ :HD)
(str "@HD" \tab (stringify-header-keyvalues kvs))
(cstr/join \newline
(map #(str \@ (name typ) \tab (stringify-header-keyvalues %)) kvs)))))
(seq hdr))))
;;; @HD
(defn update-version
"Overwrites format version in SAM header."
[header]
(assoc-in header [:HD :VN] sam-common/sam-version))
(def ^:const order-unknown :unknown)
(def ^:const order-unsorted :unsorted)
(def ^:const order-coordinate :coordinate)
(def ^:const order-queryname :queryname)
(defn sorted-by
"Replaces the sorting order field in SAM header."
[order header]
(assoc-in header [:HD :SO] (name order)))
(defn sort-order
"Returns sorting order of the sam as Keyword. Returning order is one of the
following: :queryname, :coordinate, :unsorted, :unknown."
[header]
(or (keyword (:SO (:HD header))) order-unknown))
(defn sorted?
"Returns true if the sam is sorted, false if not. It is detected by
`@HD SO:***` tag in the header."
[header]
(let [so (:SO (:HD header))]
(or (= so (name order-queryname))
(= so (name order-coordinate)))))