New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We鈥檒l occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add utilities for whole-genome coordinate. #103
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
(ns cljam.util.whole-genome | ||
"Utilities for conversions between chromosomal positions and whole-genome positions.") | ||
|
||
(defn create-chr-to-whole-genome-index | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In cljam, |
||
"Creates a map of [chromosome-name offset&length] from refs." | ||
[refs] | ||
(->> refs | ||
(map :len) | ||
(reductions + 0) | ||
(map (fn [r offset] [(:name r) {:offset offset, :len (:len r)}]) refs) | ||
(into {}))) | ||
|
||
(defn to-whole-genome-coord | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
"Transforms a position in a chromosome into a whole-genome position." | ||
[chr->offset chr ^long pos] | ||
(when-let [{:keys [^long offset ^long len]} (chr->offset chr)] | ||
(when (<= 1 pos len) | ||
(inc (+ offset (dec pos)))))) | ||
|
||
(defn create-whole-genome-to-chr-index | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same as the comment on |
||
"Creates a sorted-map of [offset chromosome-name&length] from refs." | ||
[refs] | ||
(into (sorted-map) | ||
(map vector | ||
(->> refs | ||
(map :len) | ||
(reductions + 0)) | ||
refs))) | ||
|
||
(defn to-chr-and-pos | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I prefer |
||
"Transforms a whole-genome position to a vector of a chromosome name and a position in the chromosome." | ||
[offset->ref ^long wg-pos] | ||
(when-let [[^long offset {:keys [name len]}] (first (rsubseq offset->ref <= (dec wg-pos)))] | ||
(when (<= 1 (- wg-pos offset) len) | ||
[name (- wg-pos offset)]))) | ||
|
||
(defn to-regions | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I prefer |
||
"Transforms a region in whole-genome coordinate into a sequence of chromosomal regions." | ||
[offset->ref ^long start ^long end] | ||
(let [wg-len (let [[o r] (first (rsubseq offset->ref > 0))] (+ o (:len r)))] | ||
(when (and (<= start end) (or (pos? start) (pos? end)) (or (<= start wg-len) (<= end wg-len))) | ||
(let [start' (min (max 1 start) wg-len) | ||
end' (min (max 1 end) wg-len) | ||
[s-offset {s-name :name s-len :len}] (first (rsubseq offset->ref <= (dec start'))) | ||
[e-offset {e-name :name e-len :len}] (first (rsubseq offset->ref <= (dec end')))] | ||
(if (= s-offset e-offset) | ||
[{:chr s-name, :start (- start' s-offset), :end (min (- end' s-offset) s-len)}] | ||
(concat | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
[{:chr s-name, :start (- start' s-offset), :end s-len}] | ||
(map | ||
(fn [[_ r]] {:chr (:name r), :start 1, :end (:len r)}) | ||
(subseq offset->ref > s-offset < e-offset)) | ||
[{:chr e-name, :start 1, :end (min (- end' e-offset) e-len)}])))))) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
(ns cljam.util.t-whole-genome | ||
(:require [clojure.test :refer :all] | ||
[cljam.util.whole-genome :as wg])) | ||
|
||
(def simple-refs [{:name "1", :len 100} {:name "2", :len 200} {:name "3", :len 300}]) | ||
(def refs | ||
[{:name "chr1", :len 248956422} {:name "chr2", :len 242193529} {:name "chr3", :len 198295559} | ||
{:name "chr4", :len 190214555} {:name "chr5", :len 181538259} {:name "chr6", :len 170805979} | ||
{:name "chr7", :len 159345973} {:name "chr8", :len 145138636} {:name "chr9", :len 138394717} | ||
{:name "chr10", :len 133797422} {:name "chr11", :len 135086622} {:name "chr12", :len 133275309} | ||
{:name "chr13", :len 114364328} {:name "chr14", :len 107043718} {:name "chr15", :len 101991189} | ||
{:name "chr16", :len 90338345} {:name "chr17", :len 83257441} {:name "chr18", :len 80373285} | ||
{:name "chr19", :len 58617616} {:name "chr20", :len 64444167} {:name "chr21", :len 46709983} | ||
{:name "chr22", :len 50818468} {:name "chrX", :len 156040895} {:name "chrY", :len 57227415} | ||
{:name "chrM", :len 16569}]) | ||
|
||
(deftest chr->wg | ||
(let [idx (wg/create-chr-to-whole-genome-index simple-refs)] | ||
(testing "chr -> whole-genome index" | ||
(is (= idx {"1" {:offset 0, :len 100}, "2" {:offset 100, :len 200}, "3" {:offset 300, :len 300}}))) | ||
|
||
(testing "chr -> whole-genome (simple)" | ||
(are [?chr ?pos ?wg-pos] | ||
(= (wg/to-whole-genome-coord idx ?chr ?pos) ?wg-pos) | ||
"0" 1 nil | ||
"1" 0 nil | ||
"1" 1 1 | ||
"1" 100 100 | ||
"2" 0 nil | ||
"2" 1 101 | ||
"2" 200 300 | ||
"2" 201 nil | ||
"3" 1 301 | ||
"3" 300 600 | ||
"3" 301 nil))) | ||
|
||
(let [idx (wg/create-chr-to-whole-genome-index refs)] | ||
(testing "chr -> whole-genome" | ||
(are [?chr ?pos ?wg-pos] | ||
(= (wg/to-whole-genome-coord idx ?chr ?pos) ?wg-pos) | ||
"chr1" 1 1 | ||
"chr1" 248956422 248956422 | ||
"chr2" 1 (+ 248956422 1) | ||
"chr2" 242193529 (+ 248956422 242193529) | ||
"chr3" 1 (+ 248956422 242193529 1))))) | ||
|
||
(deftest wg->chr | ||
(let [idx (wg/create-whole-genome-to-chr-index simple-refs)] | ||
(testing "whole-genome -> chr index" | ||
(is (= idx {0 {:name "1", :len 100}, 100 {:name "2", :len 200}, 300 {:name "3", :len 300}}))) | ||
|
||
(testing "whole-genome -> chr" | ||
(are [?wg-pos ?chr-and-pos] | ||
(= (wg/to-chr-and-pos idx ?wg-pos) ?chr-and-pos) | ||
0 nil | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Incorrect indentations |
||
1 ["1" 1] | ||
2 ["1" 2] | ||
99 ["1" 99] | ||
100 ["1" 100] | ||
101 ["2" 1] | ||
300 ["2" 200] | ||
301 ["3" 1] | ||
600 ["3" 300] | ||
601 nil)) | ||
|
||
(testing "whole-genome [start, end] -> regions" | ||
(are [?wg-start ?wg-end ?regions] | ||
(= (wg/to-regions idx ?wg-start ?wg-end) ?regions) | ||
0 0 nil | ||
0 1 [{:chr "1", :start 1, :end 1}] | ||
1 0 nil | ||
1 1 [{:chr "1", :start 1, :end 1}] | ||
1 2 [{:chr "1", :start 1, :end 2}] | ||
2 1 nil | ||
50 100 [{:chr "1", :start 50, :end 100}] | ||
50 150 [{:chr "1", :start 50, :end 100} {:chr "2", :start 1, :end 50}] | ||
50 350 [{:chr "1", :start 50, :end 100} {:chr "2", :start 1, :end 200} {:chr "3", :start 1, :end 50}] | ||
150 550 [{:chr "2", :start 50, :end 200} {:chr "3", :start 1, :end 250}] | ||
301 600 [{:chr "3", :start 1, :end 300}] | ||
301 601 [{:chr "3", :start 1, :end 300}] | ||
600 601 [{:chr "3", :start 300, :end 300}] | ||
601 601 nil)))) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please wrap a line at 80 characters if possible. cf. clojure-style-guide#80-character-limits