Skip to content

Commit

Permalink
Merge pull request #38 from chrovis/feature/bam-header-so
Browse files Browse the repository at this point in the history
Fix StackOverflowError in parse-header of SAM/BAM files.
  • Loading branch information
totakke committed Apr 5, 2017
2 parents 33a550f + d08c91f commit 2b915b9
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 23 deletions.
12 changes: 7 additions & 5 deletions src/cljam/sam/reader.clj
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@
(:require [clojure.java.io :refer [file]]
[clojure.tools.logging :as logging]
[cljam.util.sam-util :refer [make-refs
parse-alignment
parse-header-line]])
parse-alignment] :as sam-util])
(:import [java.io BufferedReader Closeable]))

;;; reader
Expand Down Expand Up @@ -48,9 +47,12 @@
(read-blocks* this))))

(defn- read-header* [^BufferedReader rdr]
(when-let [line (.readLine rdr)]
(if (= (first line) \@)
(merge-with #(vec (concat %1 %2)) (parse-header-line line) (read-header* rdr)))))
(->> (line-seq rdr)
(transduce
(comp
(take-while (fn [line] (= (first line) \@)))
(map sam-util/parse-header-line))
sam-util/into-header)))

(defn reader [f]
(let [header (with-open [r (clojure.java.io/reader f)]
Expand Down
75 changes: 57 additions & 18 deletions src/cljam/util/sam_util.clj
Original file line number Diff line number Diff line change
Expand Up @@ -11,29 +11,68 @@
(defn- parse-header-keyvalues
"e.g. \"LN:45 SN:ref\" -> {:LN 45, :SN \"ref\"}"
[keyvalues]
(apply merge
(map (fn [kv]
(let [[k v] (cstr/split kv #":")]
{(keyword k) (case k
"LN" (Integer/parseInt v)
"PI" (Integer/parseInt v)
v)}))
keyvalues)))

(defn parse-header-line [line]
(into
{}
(map (fn [kv]
(let [[k v] (cstr/split kv #":")]
[(keyword k)
(case k
"LN" (Integer/parseInt v)
"PI" (Integer/parseInt v)
v)])))
keyvalues))

(defn parse-header-line
"e.g. \"@SQ SN:ref LN:45\" => [:SQ {:SN \"ref\" :LN 45}]"
[line]
(let [[typ & kvs] (cstr/split line #"\t")]
{(keyword (subs typ 1)) (if (= typ "@HD")
(parse-header-keyvalues kvs)
(vector (parse-header-keyvalues kvs)))}))

(defn- parse-header* [col]
(when (seq col)
(merge-with #(vec (concat %1 %2)) (parse-header-line (first col)) (parse-header* (rest col)))))
[(keyword (subs typ 1)) (parse-header-keyvalues kvs)]))

(defn- finalize-rf
"Wrap a reducing function with finalizing function."
[rf f]
(fn finalize-rf-inner
([] (rf))
([x] (f (rf x)))
([x y] (rf x y))))

(defn- into-rf
"Returns a reducing function which returns a new coll with elements conjoined."
[to]
(if (instance? clojure.lang.IEditableCollection to)
(fn into-rf-editable
([] (transient to))
([x] (with-meta (persistent! x) (meta to)))
([r x] (conj! r x)))
(fn into-rf-non-editable
([] to)
([x] x)
([r x] (conj r x)))))

(defn- group-by-rf
"Returns a reducing function acts like `group-by`.
Second argument `rf` is a reducing function appliede to each group. Default is (into-rf [])."
([keyfn] (group-by-rf keyfn (into-rf [])))
([keyfn rf]
(fn group-by-rf-inner
([] (transient {}))
([x] (into {} (map (fn group-by-rf-finalize [[k v]] [k (rf v)])) (persistent! x)))
([x y] (let [k (keyfn y)]
(if-let [old (get x k)]
(assoc! x k (rf old y))
(assoc! x k (rf (rf) y))))))))

(def into-header
(-> (group-by-rf first ((map second) (into-rf [])))
(finalize-rf (fn [m] (if-let [hd (:HD m)] (update m :HD first) m)))))

(defn parse-header
"Parse a header string, returning a map of the header."
[s]
(parse-header* (cstr/split s #"\n")))
(->> (cstr/split-lines s)
(transduce
(map parse-header-line)
into-header)))

(defn- parse-tag-single [val-type val]
(case val-type
Expand Down
7 changes: 7 additions & 0 deletions test/cljam/util/t_sam_util.clj
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,13 @@
[cljam.util.sam-util :as sam-util]
[clojure.string :as cstr]))

(fact
"about parse-header"
(sam-util/parse-header "@HD VN:1.3 SO:coordinate\n@SQ SN:ref LN:10\n@SQ SN:ref2 LN:20\n@PG ID:cljam PN:cljam VN:1.0 CL:java -jar cljam.jar")
=> {:HD {:VN "1.3" :SO "coordinate"}
:SQ [{:SN "ref" :LN 10} {:SN "ref2" :LN 20}]
:PG [{:ID "cljam" :PN "cljam" :VN "1.0" :CL "java -jar cljam.jar"}]})

(def nibble-table "=ACMGRSVTWYHKDBN")

(defn encode [^String s]
Expand Down

0 comments on commit 2b915b9

Please sign in to comment.