-
Notifications
You must be signed in to change notification settings - Fork 12
/
sequence.clj
109 lines (95 loc) · 3.8 KB
/
sequence.clj
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
(ns cljam.io.sequence
"Functions to read and write formats representing sequences such as FASTA and
TwoBit."
(:refer-clojure :exclude [indexed?])
(:require [cljam.io.fasta.core :as fa-core]
[cljam.io.fasta.writer :as fa-writer]
[cljam.io.protocols :as protocols]
[cljam.io.twobit.reader :as tb-reader]
[cljam.io.twobit.writer :as tb-writer]
[cljam.io.util :as io-util])
(:import java.io.Closeable
cljam.io.fasta.reader.FASTAReader
cljam.io.fasta.writer.FASTAWriter
cljam.io.twobit.reader.TwoBitReader
cljam.io.twobit.writer.TwoBitWriter))
;; Reading
;; -------
(defn ^FASTAReader fasta-reader
"Returns an open cljam.io.fasta.reader.FASTAReader of f. Should be used inside
with-open to ensure the reader is properly closed."
[f]
(fa-core/reader f))
(defn ^TwoBitReader twobit-reader
"Returns an open cljam.io.twobit.reader.TwoBitReader of f. Should be used
inside with-open to ensure the reader is properly closed."
[f]
(tb-reader/reader f))
(defn ^Closeable reader
"Selects suitable reader from f's extension, returning the open reader. Opens
a new reader if the arg represents a file such as String path, java.io.File,
or java.net.URL. If a reader is given, clones the reader. This function
supports FASTA and TwoBit formats."
[f]
(cond
(io-util/fasta-reader? f) (fa-core/clone-reader f)
(io-util/twobit-reader? f) (tb-reader/clone-reader f)
:else (case (io-util/file-type f)
:fasta (fasta-reader f)
:2bit (twobit-reader f)
(throw (IllegalArgumentException. "Invalid file type")))))
(defn read-sequence
"Reads sequence in region of FASTA/TwoBit file."
([rdr region] (protocols/read-sequence rdr region))
([rdr region option] (protocols/read-sequence rdr region option)))
(defn read-all-sequences
"Reads all sequences of FASTA/TwoBit file."
([rdr] (protocols/read-all-sequences rdr))
([rdr option] (protocols/read-all-sequences rdr option)))
(defn read-seq-summaries
"Returns summaries of sequences in FASTA/TwoBit file. Returns a vector of maps
containing `:name` and `:len`."
[rdr]
(protocols/read-seq-summaries rdr))
(defn read-indices
"Reads metadata of indexed sequences. Returns a vector of maps containing
`:name`, `:len` and other format-specific keys. Forces loading all indices."
[rdr]
(protocols/read-indices rdr))
(defn indexed?
"Returns true if the reader can be randomly accessed, false if not. Note this
function immediately realizes a delayed index."
[rdr]
(protocols/indexed? rdr))
;; Writing
;; -------
(defn ^FASTAWriter fasta-writer
"Returns an open cljam.io.fasta.writer.FASTAWriter of f with options:
:cols - Maximum number of characters written in one row.
:create-index? - If true, .fai will be created simultaneously.
Should be used inside with-open to ensure the writer is properly closed."
([f]
(fasta-writer f {}))
([f options]
(fa-writer/writer f options)))
(defn ^TwoBitWriter twobit-writer
"Returns an open cljam.io.twobit.writer.TwoBitWriter of f with options:
:index - metadata of indexed sequences. The amount of memory usage can be
reduced if index is supplied.
Should be used inside with-open to ensure the writer is properly closed."
([f]
(twobit-writer f {}))
([f options]
(tb-writer/writer f options)))
(defn ^Closeable writer
"Selects suitable writer from f's extension, returning the open writer. This
function supports FASTA and TwoBit format."
[f & options]
(case (io-util/file-type f)
:fasta (apply fasta-writer f options)
:2bit (apply twobit-writer f options)
(throw (IllegalArgumentException. "Invalid file type"))))
(defn write-sequences
"Writes all sequences to FASTA/TwoBit file."
[wtr seqs]
(protocols/write-sequences wtr seqs))