-
Notifications
You must be signed in to change notification settings - Fork 12
/
core.clj
108 lines (96 loc) · 3.05 KB
/
core.clj
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
(ns cljam.io.fasta.core
(:refer-clojure :exclude [read])
(:require [clojure.java.io :as cio]
[clojure.string :as cstr]
[cljam.io.protocols :as protocols]
[cljam.util :as util]
[cljam.io.fasta-index.core :as fai]
[cljam.io.fasta.reader :as reader])
(:import [java.io FileNotFoundException RandomAccessFile]
[cljam.io.fasta.reader FASTAReader]))
;; Reading
;; -------
(defn- fasta-index
[fasta-url]
(let [fasta-exts #"(?i)(\.(fa|fasta|fas|fsa|seq|fna|faa|ffn|frn|mpfa)?)$"]
(or (->> ["$1.fai" ".fai" "$1.FAI" ".FAI"]
(eduction
(comp
(map #(cstr/replace (str fasta-url) fasta-exts %))
(map util/as-url)
(keep #(try (fai/reader %) (catch FileNotFoundException _)))))
first)
(throw (FileNotFoundException.
(str "Could not find FASTA Index file for " fasta-url))))))
(defn ^FASTAReader reader
[f]
(let [f (.getAbsolutePath (cio/file f))]
(FASTAReader. (RandomAccessFile. f "r")
(util/compressor-input-stream f)
(util/as-url f)
(delay (fasta-index (util/as-url f))))))
(defn ^FASTAReader clone-reader
"Clones fasta reader sharing persistent objects."
[^FASTAReader rdr]
(let [url (.url rdr)
raf (RandomAccessFile. (cio/as-file url) "r")
stream (util/compressor-input-stream url)]
(FASTAReader. raf stream url (.index-delay rdr))))
(defn read-headers
[^FASTAReader rdr]
(try
(fai/get-headers @(.index-delay rdr))
(catch FileNotFoundException _
(reader/load-headers (.reader rdr)))))
(defn read-indices
[^FASTAReader rdr]
(fai/get-indices @(.index-delay rdr)))
(defn read-sequences
"Reads sequences by line, returning the line-separated sequences
as lazy sequence."
[rdr]
(reader/read-sequences rdr))
(defn read-sequence
[rdr {:keys [chr start end]} opts]
(reader/read-sequence rdr chr start end opts))
(defn read
[rdr]
(reader/read rdr))
(defn reset
[rdr]
(reader/reset rdr))
(defn sequential-read
([rdr]
(sequential-read rdr {}))
([^FASTAReader rdr opts]
(reader/sequential-read-string (.stream rdr) (* 1024 1024 10) 536870912 opts)))
(extend-type FASTAReader
protocols/IReader
(reader-url [this] (.url this))
(read
([this] (protocols/read this {}))
([this option] (protocols/read-all-sequences this option)))
(indexed? [this]
(try
@(.index-delay this)
true
(catch FileNotFoundException _
false)))
protocols/IRegionReader
(read-in-region
([this region]
(protocols/read-in-region this region {}))
([this region option]
(protocols/read-sequence this region option)))
protocols/ISequenceReader
(read-indices
[this] (read-indices this))
(read-all-sequences
([this] (protocols/read-all-sequences this {}))
([this opts]
(sequential-read this opts)))
(read-sequence
([this region]
(protocols/read-sequence this region {}))
([this region opts]
(read-sequence this region opts))))