Skip to content

Commit

Permalink
Extract region utils to cljam.util.region and add some functions.
Browse files Browse the repository at this point in the history
  • Loading branch information
alumi committed Sep 11, 2017
1 parent 2e1a4d1 commit cbe4dd7
Show file tree
Hide file tree
Showing 6 changed files with 434 additions and 208 deletions.
6 changes: 3 additions & 3 deletions src/cljam/algo/depth.clj
Expand Up @@ -3,7 +3,7 @@
(:require [com.climate.claypoole :as cp]
[com.climate.claypoole.lazy :as lazy]
[cljam.common :as common]
[cljam.util :as util]
[cljam.util.region :as region]
[cljam.io.sam :as sam]
[cljam.io.sam.util :as sam-util])
(:import [cljam.io.protocols SAMRegionBlock]))
Expand Down Expand Up @@ -39,7 +39,7 @@
(fn [[start end]]
(with-open [r (sam/clone-bam-reader rdr)]
(count-for-positions (read-fn r start end) start end))) xs)))]
(->> (util/divide-region start end step)
(->> (region/divide-region start end step)
count-fn
(apply concat))))

Expand Down Expand Up @@ -108,7 +108,7 @@
(f (sam/read-blocks rdr region {:mode :region}) start end 0 pile)
(cp/pdoseq
n-threads
[[s e] (util/divide-region start end step)]
[[s e] (region/divide-region start end step)]
(with-open [r (sam/clone-bam-reader rdr)]
(-> (sam/read-blocks r {:chr chr, :start s, :end e} {:mode :region})
(f s e (- s start) pile)))))
Expand Down
4 changes: 2 additions & 2 deletions src/cljam/tools/cli.clj
Expand Up @@ -16,7 +16,7 @@
[cljam.algo.pileup :as plp]
[cljam.algo.convert :as convert]
[cljam.algo.level :as level]
[cljam.util :as util])
[cljam.util.region :as region])
(:import [java.io Closeable BufferedWriter OutputStreamWriter]))

;; CLI functions
Expand Down Expand Up @@ -267,7 +267,7 @@
(when-not (sorter/sorted-by? r)
(exit 1 "Not sorted"))
(if (:region options)
(if-let [region (util/parse-region (:region options))]
(if-let [region (region/parse-region (:region options))]
(cond
(:simple options) (pileup-simple r (:thread options) region)
(:ref options) (pileup-with-ref r (:ref options) region)
Expand Down
65 changes: 0 additions & 65 deletions src/cljam/util.clj
Expand Up @@ -87,68 +87,3 @@
(-> (CompressorStreamFactory.)
(.createCompressorOutputStream s os))
os))))

;; region utils
;; ---------

(defn divide-region
"Divides a region [start end] into several chunks with maximum length 'step'.
Returns a lazy sequence of vector."
[start end step]
(->> [(inc end)]
(concat (range start (inc end) step))
(partition 2 1)
(map (fn [[s e]] [s (dec e)]))))

(defn divide-refs
"Divides refs into several chunks with maximum length 'step'.
Returns a lazy sequence of map containing {:chr :start :end}."
[refs step]
(mapcat
(fn [{:keys [name len]}]
(map (fn [[s e]] {:chr name :start s :end e})
(divide-region 1 len step)))
refs))

(defn valid-rname?
"Checks if the given rname conforms to the spec of sam."
[rname]
(and rname (string? rname) (re-matches #"[!-)+-<>-~][!-~]*" rname)))

(defn valid-region?
"Checks if the given region map is a valid 1-based closed range."
[{:keys [chr start end]}]
(and start end
(valid-rname? chr)
(number? start) (pos? start)
(number? end) (pos? end)
(<= start end)))

(defn parse-region
"Parse a region string into a map."
[region-str]
(when region-str
(let [[_ chr _ start _ end] (re-matches #"([!-)+-<>-~][!-~]*?)(:(\d+)?(-(\d+))?)?" region-str)
start' (proton/as-long start)
end' (proton/as-long end)]
(when chr
(cond-> {:chr chr}
start' (assoc :start start')
end' (assoc :end end'))))))

(defn parse-region-strict
"Parse a region string into a map strictly."
[region-str]
(let [region-map (parse-region region-str)]
(when (valid-region? region-map) region-map)))

(defn format-region
"Format a region map into a string."
[{:keys [chr start end]}]
(let [result (apply str (interleave [nil \: \-] (take-while some? [chr start end])))]
(when-not (cstr/blank? result) result)))

(defn format-region-strict
"Format a region map into a string strictly."
[region-map]
(when (valid-region? region-map) (format-region region-map)))
157 changes: 157 additions & 0 deletions src/cljam/util/region.clj
@@ -0,0 +1,157 @@
(ns cljam.util.region
"Utility functions for manipulating chromosomal regions."
(:require [clojure.string :as cstr]
[proton.core :as proton]))

;;; region conversion
;;; ----------

(defn merge-regions-with
"Returns a lazy sequence of merged regions. Input regions must be sorted.
Neighboring regions apart less than or equal to 'max-gap' will be merged
with 'merge-fn'. Returns a stateful transducer when no input 'regions' is
provided."
([merge-fn]
(merge-regions-with merge-fn 0))
([merge-fn ^long max-gap]
(fn merge-regions-transducer [rf]
(let [last-reg (volatile! nil)]
(fn merge-regions-rf
([] (rf))
([r] (rf (if-let [l @last-reg] (rf r l) r)))
([r x] (if-let [l @last-reg]
(if (and (= (:chr l) (:chr x))
(<= (- (dec (:start x)) (:end l)) max-gap))
(do (vswap! last-reg merge-fn x) r)
(do (vreset! last-reg x) (rf r l)))
(do (vreset! last-reg x) r)))))))
([merge-fn ^long max-gap regs]
(if-let [f (first regs)]
(if-let [s (second regs)]
(if (and (= (:chr f) (:chr s))
(<= (- (dec (:start s)) (:end f)) max-gap))
(let [next-regs (cons (merge-fn f s) (nnext regs))]
(lazy-seq (merge-regions-with merge-fn max-gap next-regs)))
(cons f (lazy-seq (merge-regions-with merge-fn max-gap (next regs)))))
[f])
[])))

(defn- merge-two-regions
"Default function to merge two regions."
[x y]
(update x :end max (:end y)))

(def
^{:doc "Same as 'merge-regions-with' except for 'merge-two-regions' is
partially applied as merge-fn."
:arglists '([] [max-gap] [max-gap regions])}
merge-regions
(partial merge-regions-with merge-two-regions))

(defn subtract-region
"Subtract a region from another one.
Returns a vector of regions."
[lhs-reg rhs-reg]
(if (= (:chr lhs-reg) (:chr rhs-reg))
(filterv
#(<= (:start %) (:end %))
[(update lhs-reg :end min (dec (:start rhs-reg)))
(update lhs-reg :start max (inc (:end rhs-reg)))])
[lhs-reg]))

(defn complement-regions
"Returns a sequence of regions complement to in-regions.
in-regions must be sorted.
Returns a stateful transducer when no regions provided."
([base-region]
(fn [rf]
(let [last-reg (volatile! base-region)]
(fn
([] (rf))
([r] (rf (if-let [l @last-reg] (rf r l) r)))
([r x]
(if-let [l @last-reg]
(let [[a b] (subtract-region l x)]
(vreset! last-reg (or b a))
(if b (rf r a) r))
r))))))
([base-region in-regions]
(if-let [reg (first in-regions)]
(if-let [[a b] (seq (subtract-region base-region reg))]
(if b
(cons a (lazy-seq (complement-regions b (next in-regions))))
(lazy-seq (complement-regions a (next in-regions))))
[])
[base-region])))

(defn divide-region
"Divides a region [start end] into several chunks with maximum length 'step'.
Returns a lazy sequence of vector."
[start end step]
(->> [(inc end)]
(concat (range start (inc end) step))
(partition 2 1)
(map (fn [[s e]] [s (dec e)]))))

(defn divide-refs
"Divides refs into several chunks with maximum length 'step'.
Returns a lazy sequence of map containing {:chr :start :end}."
[refs step]
(mapcat
(fn [{:keys [name len]}]
(map (fn [[s e]] {:chr name :start s :end e})
(divide-region 1 len step)))
refs))

;;; validation
;;; ----------

(defn valid-rname?
"Checks if the given rname conforms to the spec of sam."
[rname]
(and rname (string? rname) (re-matches #"[!-)+-<>-~][!-~]*" rname)))

(defn valid-region?
"Checks if the given region map is a valid 1-based closed range."
[{:keys [chr start end]}]
(and start end
(valid-rname? chr)
(number? start) (pos? start)
(number? end) (pos? end)
(<= start end)))

;;; region <=> string
;;; ----------

(defn parse-region
"Parse a region string into a map."
[region-str]
(when region-str
(let [pattern #"([!-)+-<>-~][!-~]*?)(:(\d+)?(-(\d+))?)?"
[_ chr _ start _ end] (re-matches pattern region-str)
start' (proton/as-long start)
end' (proton/as-long end)]
(when chr
(cond-> {:chr chr}
start' (assoc :start start')
end' (assoc :end end'))))))

(defn parse-region-strict
"Parse a region string into a map strictly."
[region-str]
(let [region-map (parse-region region-str)]
(when (valid-region? region-map) region-map)))

(defn format-region
"Format a region map into a string."
[{:keys [chr start end]}]
(let [result (->> [chr start end]
(take-while some?)
(interleave [nil \: \-])
(apply str))]
(when-not (cstr/blank? result) result)))

(defn format-region-strict
"Format a region map into a string strictly."
[region-map]
(when (valid-region? region-map) (format-region region-map)))

0 comments on commit cbe4dd7

Please sign in to comment.