diff --git a/src/cljam/algo/depth.clj b/src/cljam/algo/depth.clj index dc79a6b2..3db5159a 100644 --- a/src/cljam/algo/depth.clj +++ b/src/cljam/algo/depth.clj @@ -3,7 +3,7 @@ (:require [com.climate.claypoole :as cp] [com.climate.claypoole.lazy :as lazy] [cljam.common :as common] - [cljam.util :as util] + [cljam.util.region :as region] [cljam.io.sam :as sam] [cljam.io.sam.util :as sam-util]) (:import [cljam.io.protocols SAMRegionBlock])) @@ -39,7 +39,7 @@ (fn [[start end]] (with-open [r (sam/clone-bam-reader rdr)] (count-for-positions (read-fn r start end) start end))) xs)))] - (->> (util/divide-region start end step) + (->> (region/divide-region start end step) count-fn (apply concat)))) @@ -108,7 +108,7 @@ (f (sam/read-blocks rdr region {:mode :region}) start end 0 pile) (cp/pdoseq n-threads - [[s e] (util/divide-region start end step)] + [[s e] (region/divide-region start end step)] (with-open [r (sam/clone-bam-reader rdr)] (-> (sam/read-blocks r {:chr chr, :start s, :end e} {:mode :region}) (f s e (- s start) pile))))) diff --git a/src/cljam/tools/cli.clj b/src/cljam/tools/cli.clj index c2b1c133..4ce1ee7c 100644 --- a/src/cljam/tools/cli.clj +++ b/src/cljam/tools/cli.clj @@ -16,7 +16,7 @@ [cljam.algo.pileup :as plp] [cljam.algo.convert :as convert] [cljam.algo.level :as level] - [cljam.util :as util]) + [cljam.util.region :as region]) (:import [java.io Closeable BufferedWriter OutputStreamWriter])) ;; CLI functions @@ -267,7 +267,7 @@ (when-not (sorter/sorted-by? r) (exit 1 "Not sorted")) (if (:region options) - (if-let [region (util/parse-region (:region options))] + (if-let [region (region/parse-region (:region options))] (cond (:simple options) (pileup-simple r (:thread options) region) (:ref options) (pileup-with-ref r (:ref options) region) diff --git a/src/cljam/util.clj b/src/cljam/util.clj index 04700012..a188e364 100644 --- a/src/cljam/util.clj +++ b/src/cljam/util.clj @@ -87,68 +87,3 @@ (-> (CompressorStreamFactory.) (.createCompressorOutputStream s os)) os)))) - -;; region utils -;; --------- - -(defn divide-region - "Divides a region [start end] into several chunks with maximum length 'step'. - Returns a lazy sequence of vector." - [start end step] - (->> [(inc end)] - (concat (range start (inc end) step)) - (partition 2 1) - (map (fn [[s e]] [s (dec e)])))) - -(defn divide-refs - "Divides refs into several chunks with maximum length 'step'. - Returns a lazy sequence of map containing {:chr :start :end}." - [refs step] - (mapcat - (fn [{:keys [name len]}] - (map (fn [[s e]] {:chr name :start s :end e}) - (divide-region 1 len step))) - refs)) - -(defn valid-rname? - "Checks if the given rname conforms to the spec of sam." - [rname] - (and rname (string? rname) (re-matches #"[!-)+-<>-~][!-~]*" rname))) - -(defn valid-region? - "Checks if the given region map is a valid 1-based closed range." - [{:keys [chr start end]}] - (and start end - (valid-rname? chr) - (number? start) (pos? start) - (number? end) (pos? end) - (<= start end))) - -(defn parse-region - "Parse a region string into a map." - [region-str] - (when region-str - (let [[_ chr _ start _ end] (re-matches #"([!-)+-<>-~][!-~]*?)(:(\d+)?(-(\d+))?)?" region-str) - start' (proton/as-long start) - end' (proton/as-long end)] - (when chr - (cond-> {:chr chr} - start' (assoc :start start') - end' (assoc :end end')))))) - -(defn parse-region-strict - "Parse a region string into a map strictly." - [region-str] - (let [region-map (parse-region region-str)] - (when (valid-region? region-map) region-map))) - -(defn format-region - "Format a region map into a string." - [{:keys [chr start end]}] - (let [result (apply str (interleave [nil \: \-] (take-while some? [chr start end])))] - (when-not (cstr/blank? result) result))) - -(defn format-region-strict - "Format a region map into a string strictly." - [region-map] - (when (valid-region? region-map) (format-region region-map))) diff --git a/src/cljam/util/region.clj b/src/cljam/util/region.clj new file mode 100644 index 00000000..66c6d230 --- /dev/null +++ b/src/cljam/util/region.clj @@ -0,0 +1,157 @@ +(ns cljam.util.region + "Utility functions for manipulating chromosomal regions." + (:require [clojure.string :as cstr] + [proton.core :as proton])) + +;;; region conversion +;;; ---------- + +(defn merge-regions-with + "Returns a lazy sequence of merged regions. Input regions must be sorted. + Neighboring regions apart less than or equal to 'max-gap' will be merged + with 'merge-fn'. Returns a stateful transducer when no input 'regions' is + provided." + ([merge-fn] + (merge-regions-with merge-fn 0)) + ([merge-fn ^long max-gap] + (fn merge-regions-transducer [rf] + (let [last-reg (volatile! nil)] + (fn merge-regions-rf + ([] (rf)) + ([r] (rf (if-let [l @last-reg] (rf r l) r))) + ([r x] (if-let [l @last-reg] + (if (and (= (:chr l) (:chr x)) + (<= (- (dec (:start x)) (:end l)) max-gap)) + (do (vswap! last-reg merge-fn x) r) + (do (vreset! last-reg x) (rf r l))) + (do (vreset! last-reg x) r))))))) + ([merge-fn ^long max-gap regs] + (if-let [f (first regs)] + (if-let [s (second regs)] + (if (and (= (:chr f) (:chr s)) + (<= (- (dec (:start s)) (:end f)) max-gap)) + (let [next-regs (cons (merge-fn f s) (nnext regs))] + (lazy-seq (merge-regions-with merge-fn max-gap next-regs))) + (cons f (lazy-seq (merge-regions-with merge-fn max-gap (next regs))))) + [f]) + []))) + +(defn- merge-two-regions + "Default function to merge two regions." + [x y] + (update x :end max (:end y))) + +(def + ^{:doc "Same as 'merge-regions-with' except for 'merge-two-regions' is + partially applied as merge-fn." + :arglists '([] [max-gap] [max-gap regions])} + merge-regions + (partial merge-regions-with merge-two-regions)) + +(defn subtract-region + "Subtract a region from another one. + Returns a vector of regions." + [lhs-reg rhs-reg] + (if (= (:chr lhs-reg) (:chr rhs-reg)) + (filterv + #(<= (:start %) (:end %)) + [(update lhs-reg :end min (dec (:start rhs-reg))) + (update lhs-reg :start max (inc (:end rhs-reg)))]) + [lhs-reg])) + +(defn complement-regions + "Returns a sequence of regions complement to in-regions. + in-regions must be sorted. + Returns a stateful transducer when no regions provided." + ([base-region] + (fn [rf] + (let [last-reg (volatile! base-region)] + (fn + ([] (rf)) + ([r] (rf (if-let [l @last-reg] (rf r l) r))) + ([r x] + (if-let [l @last-reg] + (let [[a b] (subtract-region l x)] + (vreset! last-reg (or b a)) + (if b (rf r a) r)) + r)))))) + ([base-region in-regions] + (if-let [reg (first in-regions)] + (if-let [[a b] (seq (subtract-region base-region reg))] + (if b + (cons a (lazy-seq (complement-regions b (next in-regions)))) + (lazy-seq (complement-regions a (next in-regions)))) + []) + [base-region]))) + +(defn divide-region + "Divides a region [start end] into several chunks with maximum length 'step'. + Returns a lazy sequence of vector." + [start end step] + (->> [(inc end)] + (concat (range start (inc end) step)) + (partition 2 1) + (map (fn [[s e]] [s (dec e)])))) + +(defn divide-refs + "Divides refs into several chunks with maximum length 'step'. + Returns a lazy sequence of map containing {:chr :start :end}." + [refs step] + (mapcat + (fn [{:keys [name len]}] + (map (fn [[s e]] {:chr name :start s :end e}) + (divide-region 1 len step))) + refs)) + +;;; validation +;;; ---------- + +(defn valid-rname? + "Checks if the given rname conforms to the spec of sam." + [rname] + (and rname (string? rname) (re-matches #"[!-)+-<>-~][!-~]*" rname))) + +(defn valid-region? + "Checks if the given region map is a valid 1-based closed range." + [{:keys [chr start end]}] + (and start end + (valid-rname? chr) + (number? start) (pos? start) + (number? end) (pos? end) + (<= start end))) + +;;; region <=> string +;;; ---------- + +(defn parse-region + "Parse a region string into a map." + [region-str] + (when region-str + (let [pattern #"([!-)+-<>-~][!-~]*?)(:(\d+)?(-(\d+))?)?" + [_ chr _ start _ end] (re-matches pattern region-str) + start' (proton/as-long start) + end' (proton/as-long end)] + (when chr + (cond-> {:chr chr} + start' (assoc :start start') + end' (assoc :end end')))))) + +(defn parse-region-strict + "Parse a region string into a map strictly." + [region-str] + (let [region-map (parse-region region-str)] + (when (valid-region? region-map) region-map))) + +(defn format-region + "Format a region map into a string." + [{:keys [chr start end]}] + (let [result (->> [chr start end] + (take-while some?) + (interleave [nil \: \-]) + (apply str))] + (when-not (cstr/blank? result) result))) + +(defn format-region-strict + "Format a region map into a string strictly." + [region-map] + (when (valid-region? region-map) (format-region region-map))) diff --git a/test/cljam/util/region_test.clj b/test/cljam/util/region_test.clj new file mode 100644 index 00000000..b6274141 --- /dev/null +++ b/test/cljam/util/region_test.clj @@ -0,0 +1,272 @@ +(ns cljam.util.region-test + (:require [clojure.test :refer :all] + [cljam.util.region :as region])) + +(defn into* ([] []) ([r] r) ([r x] (conj r x))) + +(deftest merge-regions + (testing "(merge-regions)" + (are [?regs ?expected] + (= (sequence (region/merge-regions) ?regs) + (transduce identity ((region/merge-regions) into*) ?regs) + ?expected) + + nil + [] + + [] + [] + + [{:chr "1", :start 1, :end 10}] + [{:chr "1", :start 1, :end 10}] + + [{:chr "1", :start 1, :end 10} {:chr "1", :start 1, :end 14}] + [{:chr "1", :start 1, :end 14}] + + [{:chr "1", :start 1, :end 10} {:chr "1", :start 10, :end 20}] + [{:chr "1", :start 1, :end 20}] + + [{:chr "1", :start 1, :end 10} {:chr "1", :start 11, :end 20}] + [{:chr "1", :start 1, :end 20}] + + [{:chr "1", :start 1, :end 10} {:chr "1", :start 12, :end 20}] + [{:chr "1", :start 1, :end 10} {:chr "1", :start 12, :end 20}] + + [{:chr "1", :start 1, :end 10} {:chr "1", :start 11, :end 20} + {:chr "1", :start 21, :end 30}] + [{:chr "1", :start 1, :end 30}] + + [{:chr "1", :start 1, :end 10} {:chr "1", :start 11, :end 20} + {:chr "1", :start 22, :end 30}] + [{:chr "1", :start 1, :end 20} {:chr "1", :start 22, :end 30}])) + + (testing "(merge-regions gap) and (merge-regions gap regs)" + (are [?gap ?regs ?expected] + (= (sequence (region/merge-regions ?gap) ?regs) + (region/merge-regions ?gap ?regs) + ?expected) + + 3 nil + [] + + 3 [] + [] + + 3 [{:chr "1", :start 1, :end 10}] + [{:chr "1", :start 1, :end 10}] + + 3 [{:chr "1", :start 1, :end 10} {:chr "1", :start 1, :end 8}] + [{:chr "1", :start 1, :end 10}] + + 3 [{:chr "1", :start 1, :end 10} {:chr "1", :start 5, :end 14}] + [{:chr "1", :start 1, :end 14}] + + 3 [{:chr "1", :start 1, :end 10} {:chr "1", :start 13, :end 20}] + [{:chr "1", :start 1, :end 20}] + + 3 [{:chr "1", :start 1, :end 10} {:chr "1", :start 14, :end 20}] + [{:chr "1", :start 1, :end 20}] + + 2 [{:chr "1", :start 1, :end 10} {:chr "1", :start 14, :end 20}] + [{:chr "1", :start 1, :end 10} {:chr "1", :start 14, :end 20}] + + 3 [{:chr "1", :start 1, :end 10} {:chr "1", :start 15, :end 20}] + [{:chr "1", :start 1, :end 10} {:chr "1", :start 15, :end 20}] + + 3 [{:chr "1", :start 1, :end 10} {:chr "1", :start 14, :end 20} + {:chr "1", :start 24, :end 30}] + [{:chr "1", :start 1, :end 30}] + + 3 [{:chr "1", :start 1, :end 10} {:chr "1", :start 14, :end 20} + {:chr "1", :start 25, :end 30}] + [{:chr "1", :start 1, :end 20} {:chr "1", :start 25, :end 30}] + + 3 [{:chr "1", :start 1, :end 10} {:chr "1", :start 14, :end 20} {:chr "2", :start 24, :end 30}] + [{:chr "1", :start 1, :end 20} {:chr "2", :start 24, :end 30}]))) + +(deftest subtract-region + (are [?lhs ?rhs ?expected] + (= (region/subtract-region ?lhs ?rhs) ?expected) + {:chr "1" :start 10 :end 20} nil + [{:chr "1" :start 10 :end 20}] + + {:chr "1" :start 10 :end 20} {:chr "1" :start 1 :end 9} + [{:chr "1" :start 10 :end 20}] + + {:chr "1" :start 10 :end 20} {:chr "1" :start 1 :end 10} + [{:chr "1" :start 11 :end 20}] + + {:chr "1" :start 10 :end 20} {:chr "1" :start 10 :end 19} + [{:chr "1" :start 20 :end 20}] + + {:chr "1" :start 10 :end 20} {:chr "1" :start 11 :end 19} + [{:chr "1" :start 10 :end 10} {:chr "1" :start 20 :end 20}] + + {:chr "1" :start 10 :end 20} {:chr "1" :start 10 :end 20} + [] + + {:chr "1" :start 10 :end 20} {:chr "1" :start 20 :end 30} + [{:chr "1" :start 10 :end 19}] + + {:chr "1" :start 10 :end 20} {:chr "1" :start 21 :end 30} + [{:chr "1" :start 10 :end 20}] + + {:chr "1" :start 10 :end 20} {:chr "2" :start 10 :end 20} + [{:chr "1" :start 10 :end 20}])) + +(deftest complement-regions + (are [?base-region ?in-region ?expected] + (= (sequence (region/complement-regions ?base-region) ?in-region) + (transduce identity ((region/complement-regions ?base-region) into*) ?in-region) + (region/complement-regions ?base-region ?in-region) + ?expected) + {:chr "1" :start 100 :end 200} [] [{:chr "1" :start 100 :end 200}] + {:chr "1" :start 100 :end 200} [{:chr "1" :start 1 :end 99}] [{:chr "1" :start 100 :end 200}] + {:chr "1" :start 100 :end 200} [{:chr "1" :start 201 :end 300}] [{:chr "1" :start 100 :end 200}] + {:chr "1" :start 100 :end 200} [{:chr "1" :start 99 :end 100}] [{:chr "1" :start 101 :end 200}] + {:chr "1" :start 100 :end 200} [{:chr "1" :start 100 :end 150}] [{:chr "1" :start 151 :end 200}] + {:chr "1" :start 100 :end 200} [{:chr "1" :start 100 :end 200}] [] + {:chr "1" :start 100 :end 200} [{:chr "1" :start 100 :end 200} {:chr "1" :start 210 :end 230}] [] + {:chr "1" :start 100 :end 200} [{:chr "1" :start 150 :end 200}] [{:chr "1" :start 100 :end 149}] + {:chr "1" :start 100 :end 200} [{:chr "1" :start 110 :end 120} {:chr "1" :start 130 :end 140}] + [{:chr "1" :start 100 :end 109} {:chr "1" :start 121 :end 129} {:chr "1" :start 141 :end 200}])) + +(deftest divide-region + (are [?start ?end ?step ?expected] + (= (region/divide-region ?start ?end ?step) ?expected) + 1 10 1 [[1 1] [2 2] [3 3] [4 4] [5 5] [6 6] [7 7] [8 8] [9 9] [10 10]] + 1 10 2 [[1 2] [3 4] [5 6] [7 8] [9 10]] + 1 10 3 [[1 3] [4 6] [7 9] [10 10]] + 1 10 4 [[1 4] [5 8] [9 10]] + 1 10 5 [[1 5] [6 10]] + 1 10 6 [[1 6] [7 10]] + 1 10 7 [[1 7] [8 10]] + 1 10 8 [[1 8] [9 10]] + 1 10 9 [[1 9] [10 10]] + 1 10 10 [[1 10]] + 1 10 11 [[1 10]])) + +(deftest divide-refs + (are [?refs ?step ?expected] + (= (region/divide-refs ?refs ?step) ?expected) + [{:name "chr1" :len 10}] 4 [{:chr "chr1" :start 1 :end 4} + {:chr "chr1" :start 5 :end 8} + {:chr "chr1" :start 9 :end 10}] + [{:name "chr1" :len 10}] 5 [{:chr "chr1" :start 1 :end 5} + {:chr "chr1" :start 6 :end 10}] + [{:name "chr1" :len 10}] 10 [{:chr "chr1" :start 1 :end 10}] + [{:name "chr1" :len 10} + {:name "chr2" :len 5}] 6 [{:chr "chr1" :start 1 :end 6} + {:chr "chr1" :start 7 :end 10} + {:chr "chr2" :start 1 :end 5}])) + +(deftest valid-rname? + (are [?rname ?expected] + (= ?expected (boolean (region/valid-rname? ?rname))) + nil false + [\c \h \r] false + "" false + "c" true + "chr1" true + "*" false + "=" false + ":" true + "chr:1" true + "chr1:1-10" true)) + +(deftest valid-region? + (are [?region-map ?expected] + (= ?expected (boolean (region/valid-region? ?region-map))) + nil false + {} false + {:chr "chr1"} false + {:chr "chr1", :start 1} false + {:chr "chr1", :start 1, :end 10} true + {:chr "chr1", :start 100, :end 10} false + {:chr "chr1", :start "1", :end 10} false + {:chr "chr1", :start 1, :end "10"} false + {:chr "chr1", :start 0, :end 10} false + {:chr "chr1", :start 1, :end 0} false + {:chr "", :start 100, :end 10} false + {:chr " ", :start 100, :end 10} false)) + +(deftest parse-region + (are [?region-str ?expected] + (= ?expected (region/parse-region ?region-str)) + nil nil + "*" nil + "=" nil + "c" {:chr "c"} + "chr1" {:chr "chr1"} + "chrUn" {:chr "chrUn"} + "*chr1" nil + "=chr1" nil + "chr1*" {:chr "chr1*"} + "chr1=" {:chr "chr1="} + "chr1-" {:chr "chr1-"} + "chr1:" {:chr "chr1"} + "chr1:-" {:chr "chr1:-"} + "!\"#$%&'()*+,-./0123456789;<=>?@[\\]^_`{|}~" {:chr "!\"#$%&'()*+,-./0123456789;<=>?@[\\]^_`{|}~"} + "chr1:1" {:chr "chr1", :start 1} + "chr1:1-" {:chr "chr1:1-"} + "chr1:1-2" {:chr "chr1", :start 1, :end 2} + "chr1:-2" {:chr "chr1", :end 2} + "chr1:001-200" {:chr "chr1", :start 1, :end 200} + "chr1:100-2" {:chr "chr1", :start 100, :end 2} + "chr1:2:3-4" {:chr "chr1:2", :start 3, :end 4} + "chr1:2-3:4-5" {:chr "chr1:2-3", :start 4, :end 5} + "chr1:2-3:4-5:6-7" {:chr "chr1:2-3:4-5", :start 6, :end 7})) + +(deftest parse-region-strict + (are [?region-str ?expected] + (= ?expected (region/parse-region-strict ?region-str)) + nil nil + "*" nil + "=" nil + "c" nil + "chr1" nil + "chrUn" nil + "*chr1" nil + "=chr1" nil + "chr1*" nil + "chr1=" nil + "chr1-" nil + "chr1:" nil + "chr1:-" nil + "!\"#$%&'()*+,-./0123456789;<=>?@[\\]^_`{|}~" nil + "chr1:1" nil + "chr1:1-" nil + "chr1:1-2" {:chr "chr1", :start 1, :end 2} + "chr1:-2" nil + "chr1:001-200" {:chr "chr1", :start 1, :end 200} + "chr1:100-2" nil + "chr1:2:3-4" {:chr "chr1:2", :start 3, :end 4} + "chr1:2-3:4-5" {:chr "chr1:2-3", :start 4, :end 5} + "chr1:2-3:4-5:6-7" {:chr "chr1:2-3:4-5", :start 6, :end 7})) + +(deftest format-region + (are [?region-map ?expected] + (= ?expected (region/format-region ?region-map)) + nil nil + {} nil + {:chr "chr1"} "chr1" + {:chr "chr1", :start 1} "chr1:1" + {:chr "chr1", :start 1, :end 10} "chr1:1-10" + {:chr "chr1", :start 10, :end 1} "chr1:10-1" + {:chr "chr1", :end 10} "chr1" + {:start 1} nil + {:end 10} nil)) + +(deftest format-region-strict + (are [?region-map ?expected] + (= ?expected (region/format-region-strict ?region-map)) + nil nil + {} nil + {:chr "chr1"} nil + {:chr "chr1", :start 1} nil + {:chr "chr1", :start 1, :end 10} "chr1:1-10" + {:chr "chr1", :start 10, :end 1} nil + {:chr "chr1", :end 10} nil + {:start 1} nil + {:end 10} nil)) + diff --git a/test/cljam/util_test.clj b/test/cljam/util_test.clj index 47db5449..f316aa27 100644 --- a/test/cljam/util_test.clj +++ b/test/cljam/util_test.clj @@ -12,141 +12,3 @@ (are [?n] (thrown? AssertionError (util/ubyte ?n)) -1 256)) - -(deftest divide-region - (are [?start ?end ?step ?expected] - (= (util/divide-region ?start ?end ?step) ?expected) - 1 10 1 [[1 1] [2 2] [3 3] [4 4] [5 5] [6 6] [7 7] [8 8] [9 9] [10 10]] - 1 10 2 [[1 2] [3 4] [5 6] [7 8] [9 10]] - 1 10 3 [[1 3] [4 6] [7 9] [10 10]] - 1 10 4 [[1 4] [5 8] [9 10]] - 1 10 5 [[1 5] [6 10]] - 1 10 6 [[1 6] [7 10]] - 1 10 7 [[1 7] [8 10]] - 1 10 8 [[1 8] [9 10]] - 1 10 9 [[1 9] [10 10]] - 1 10 10 [[1 10]] - 1 10 11 [[1 10]])) - -(deftest divide-refs - (are [?refs ?step ?expected] - (= (util/divide-refs ?refs ?step) ?expected) - [{:name "chr1" :len 10}] 4 [{:chr "chr1" :start 1 :end 4} - {:chr "chr1" :start 5 :end 8} - {:chr "chr1" :start 9 :end 10}] - [{:name "chr1" :len 10}] 5 [{:chr "chr1" :start 1 :end 5} - {:chr "chr1" :start 6 :end 10}] - [{:name "chr1" :len 10}] 10 [{:chr "chr1" :start 1 :end 10}] - [{:name "chr1" :len 10} - {:name "chr2" :len 5}] 6 [{:chr "chr1" :start 1 :end 6} - {:chr "chr1" :start 7 :end 10} - {:chr "chr2" :start 1 :end 5}])) - -(deftest valid-rname? - (are [?rname ?expected] - (= ?expected (boolean (util/valid-rname? ?rname))) - nil false - "" false - "c" true - "chr1" true - "*" false - "=" false - ":" true - "chr:1" true - "chr1:1-10" true)) - -(deftest valid-region? - (are [?region-map ?expected] - (= ?expected (boolean (util/valid-region? ?region-map))) - nil false - {} false - {:chr "chr1"} false - {:chr "chr1", :start 1} false - {:chr "chr1", :start 1, :end 10} true - {:chr "chr1", :start 100, :end 10} false - {:chr "chr1", :start "1", :end 10} false - {:chr "chr1", :start 1, :end "10"} false - {:chr "chr1", :start 0, :end 10} false - {:chr "chr1", :start 1, :end 0} false - {:chr "", :start 100, :end 10} false - {:chr " ", :start 100, :end 10} false)) - -(deftest parse-region - (are [?region-str ?expected] - (= ?expected (util/parse-region ?region-str)) - nil nil - "*" nil - "=" nil - "c" {:chr "c"} - "chr1" {:chr "chr1"} - "chrUn" {:chr "chrUn"} - "*chr1" nil - "=chr1" nil - "chr1*" {:chr "chr1*"} - "chr1=" {:chr "chr1="} - "chr1-" {:chr "chr1-"} - "chr1:" {:chr "chr1"} - "chr1:-" {:chr "chr1:-"} - "!\"#$%&'()*+,-./0123456789;<=>?@[\\]^_`{|}~" {:chr "!\"#$%&'()*+,-./0123456789;<=>?@[\\]^_`{|}~"} - "chr1:1" {:chr "chr1", :start 1} - "chr1:1-" {:chr "chr1:1-"} - "chr1:1-2" {:chr "chr1", :start 1, :end 2} - "chr1:-2" {:chr "chr1", :end 2} - "chr1:001-200" {:chr "chr1", :start 1, :end 200} - "chr1:100-2" {:chr "chr1", :start 100, :end 2} - "chr1:2:3-4" {:chr "chr1:2", :start 3, :end 4} - "chr1:2-3:4-5" {:chr "chr1:2-3", :start 4, :end 5} - "chr1:2-3:4-5:6-7" {:chr "chr1:2-3:4-5", :start 6, :end 7})) - -(deftest parse-region-strict - (are [?region-str ?expected] - (= ?expected (util/parse-region-strict ?region-str)) - nil nil - "*" nil - "=" nil - "c" nil - "chr1" nil - "chrUn" nil - "*chr1" nil - "=chr1" nil - "chr1*" nil - "chr1=" nil - "chr1-" nil - "chr1:" nil - "chr1:-" nil - "!\"#$%&'()*+,-./0123456789;<=>?@[\\]^_`{|}~" nil - "chr1:1" nil - "chr1:1-" nil - "chr1:1-2" {:chr "chr1", :start 1, :end 2} - "chr1:-2" nil - "chr1:001-200" {:chr "chr1", :start 1, :end 200} - "chr1:100-2" nil - "chr1:2:3-4" {:chr "chr1:2", :start 3, :end 4} - "chr1:2-3:4-5" {:chr "chr1:2-3", :start 4, :end 5} - "chr1:2-3:4-5:6-7" {:chr "chr1:2-3:4-5", :start 6, :end 7})) - -(deftest format-region - (are [?region-map ?expected] - (= ?expected (util/format-region ?region-map)) - nil nil - {} nil - {:chr "chr1"} "chr1" - {:chr "chr1", :start 1} "chr1:1" - {:chr "chr1", :start 1, :end 10} "chr1:1-10" - {:chr "chr1", :start 10, :end 1} "chr1:10-1" - {:chr "chr1", :end 10} "chr1" - {:start 1} nil - {:end 10} nil)) - -(deftest format-region-strict - (are [?region-map ?expected] - (= ?expected (util/format-region-strict ?region-map)) - nil nil - {} nil - {:chr "chr1"} nil - {:chr "chr1", :start 1} nil - {:chr "chr1", :start 1, :end 10} "chr1:1-10" - {:chr "chr1", :start 10, :end 1} nil - {:chr "chr1", :end 10} nil - {:start 1} nil - {:end 10} nil))