Skip to content

Commit

Permalink
Merge pull request #101 from chrovis/feature/chr-order
Browse files Browse the repository at this point in the history
Add an ordering function for chromosome name.
  • Loading branch information
totakke committed Aug 31, 2017
2 parents 0999dfa + eae1c2f commit 7df1f95
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 6 deletions.
8 changes: 3 additions & 5 deletions src/cljam/io/bed.clj
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
[proton.core :refer [as-int as-long]]
[cljam.io.protocols :as protocols]
[cljam.util :as util]
[cljam.util.chromosome :as chr-util]
[cljam.util.chromosome :as chr]
[clojure.tools.logging :as logging])
(:import [java.io BufferedReader BufferedWriter Closeable]))

Expand Down Expand Up @@ -154,7 +154,7 @@
This function converts the coordinate into cljam style: 1-origin and inclusice-start / inclusive-end."
[m]
(-> m
(update :chr chr-util/normalize-chromosome-key)
(update :chr chr/normalize-chromosome-key)
(update :start inc)
(update-some :thick-start inc)))

Expand Down Expand Up @@ -190,9 +190,7 @@
[xs]
(sort-by
(fn [m]
[(or (as-int (last (re-find #"(chr)?(\d+)" (:chr m)))) Integer/MAX_VALUE)
(or ({"X" 23 "Y" 24 "M" 25} (last (re-find #"(chr)?([X|Y|M])" (:chr m)))) Integer/MAX_VALUE)
(:chr m)
[(chr/chromosome-order-key (:chr m))
(:start m)
(:end m)])
xs))
Expand Down
10 changes: 9 additions & 1 deletion src/cljam/util/chromosome.clj
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
(ns cljam.util.chromosome
"Utilities for handling chromosome name."
(:require [clojure.string :as cstr]))
(:require [clojure.string :as cstr]
[proton.core :as proton]))

(defn normalize-name
[s]
Expand Down Expand Up @@ -48,3 +49,10 @@
[s]
(some? (re-matches #"^chr([0-9]{1,2}|X|Y|M|MT)"
(normalize-chromosome-key s))))

(defn chromosome-order-key [s]
(if-let [[_ _ chr suffix] (re-find #"(?i)^(chr)?([1-9][0-9]*|X|Y|MT|M)(\S*)" s)]
(if-let [num (proton/as-int chr)]
[num suffix]
[(- Integer/MAX_VALUE (case chr "X" 4 "Y" 3 "M" 2 "MT" 1)) suffix])
[Integer/MAX_VALUE s]))
23 changes: 23 additions & 0 deletions test/cljam/util/t_chromosome.clj
Original file line number Diff line number Diff line change
Expand Up @@ -107,3 +107,26 @@

"chr4_GL000257v2_alt" false
"14_KI270723V1_random" false))

(deftest chromosome-order-key
(are [?in ?out] (= (sort-by chr/chromosome-order-key ?in) ?out)
["chr2" "chr11" "chr3" "chr1" "chr21"] ["chr1" "chr2" "chr3" "chr11" "chr21"]
["2" "11" "3" "1" "21"] ["1" "2" "3" "11" "21"]
["chrM_foo" "chrMT_foo"] ["chrM_foo" "chrMT_foo"]

["chrY" "chrM_foo" "chrMT" "chrM" "chr22" "chrUn" "chrMT_foo" "chrX"]
["chr22" "chrX" "chrY" "chrM" "chrM_foo" "chrMT" "chrMT_foo" "chrUn"]

["Y" "M_foo" "MT" "M" "22" "Z" "Un" "MT_foo" "X"]
["22" "X" "Y" "M" "M_foo" "MT" "MT_foo" "Un" "Z"]

["chr19_KI270930v1_alt" "chr22_KI270879v1_alt" "chrUn_KI270425v1" "chrX" "chrUn_KI270423v1" "chrUn_KI270590v1"
"chr1" "chr9_GL383541v1_alt" "chr10_GL383545v1_alt" "chr6_GL000253v2_alt" "chrY" "chr9_KI270717v1_random"
"chr22_KI270734v1_random" "chr2" "chr18_GL383572v1_alt" "chr6_GL000251v2_alt" "chr9_KI270823v1_alt"
"chr19_KI270916v1_alt" "chr22_KI270875v1_alt" "chrUn_KI270330v1" "chr19_GL949753v2_alt" "chr11"
"chrUn_KI270312v1" "chr4_GL000008v2_random" "chr19_KI270929v1_alt"]
["chr1" "chr2" "chr4_GL000008v2_random" "chr6_GL000251v2_alt" "chr6_GL000253v2_alt"
"chr9_GL383541v1_alt" "chr9_KI270717v1_random" "chr9_KI270823v1_alt" "chr10_GL383545v1_alt" "chr11"
"chr18_GL383572v1_alt" "chr19_GL949753v2_alt" "chr19_KI270916v1_alt" "chr19_KI270929v1_alt"
"chr19_KI270930v1_alt" "chr22_KI270734v1_random" "chr22_KI270875v1_alt" "chr22_KI270879v1_alt" "chrX" "chrY"
"chrUn_KI270312v1" "chrUn_KI270330v1" "chrUn_KI270423v1" "chrUn_KI270425v1" "chrUn_KI270590v1"]))

0 comments on commit 7df1f95

Please sign in to comment.