/
io.clj
151 lines (125 loc) · 5.69 KB
/
io.clj
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
;;; io.clj -- Data I/O library for Clojure built on CSVReader
;; by David Edgar Liebke http://incanter.org
;; March 11, 2009
;; Copyright (c) David Edgar Liebke, 2009. All rights reserved. The use
;; and distribution terms for this software are covered by the Eclipse
;; Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php)
;; which can be found in the file epl-v10.htincanter.at the root of this
;; distribution. By using this software in any fashion, you are
;; agreeing to be bound by the terms of this license. You must not
;; remove this notice, or any other, from this software.
;; CHANGE LOG
;; March 11, 2009: First version
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; DATA IO FUNCTIONS
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(ns
^{:doc
"Library for reading and writing Incanter datasets and matrices."}
incanter.io
(:import (java.io FileReader FileWriter File)
(au.com.bytecode.opencsv CSVReader))
(:use [incanter.core :only (dataset save)])
(:require [clojure.java.io :as io]))
(defn- parse-string [value]
(if (re-matches #"\d+" value)
(try (Long/parseLong value)
(catch NumberFormatException _ value))
(try (Double/parseDouble value)
(catch NumberFormatException _ value))))
(defn read-dataset
"
Returns a dataset read from a file or a URL.
Options:
:delim (default \\,), other options (\\tab \\space \\| etc)
:quote (default \\\") character used for quoting strings
:skip (default 0) the number of lines to skip at the top of the file.
:header (default false) indicates the file has a header line
:compress-delim (default true if delim = \\space, false otherwise) means
compress multiple adjacent delimiters into a single delimiter
"
([filename & {:keys [delim keyword-headers quote skip header compress-delim]
:or {delim \,
quote \"
skip 0
header false
keyword-headers true}}]
(with-open [reader ^CSVReader (CSVReader.
(io/reader filename)
delim
quote
skip)]
(let [compress-delim? (or compress-delim (= delim \space))
compress-delim (if compress-delim?
(fn [line] (filter #(not= % "") line))
(fn [x] x))
remove-empty #(when (some (fn [field] (not= field "")) %) %)
parse-data #(vec (map parse-string %))
parsed-data (vec
(filter boolean
(loop [lines []]
(if-let [line (.readNext reader)]
(recur (conj lines (seq (-> line
compress-delim
remove-empty
parse-data))))
lines))))]
(if header
; have header row
(dataset (if keyword-headers
(map keyword (first parsed-data))
(first parsed-data))
(rest parsed-data))
; no header row so build a default one
(let [col-count (count (first parsed-data))
col-names (apply vector (map str
(repeat col-count "col")
(iterate inc 0)))]
(dataset (if keyword-headers
(map keyword col-names)
col-names)
parsed-data)))))))
(defmethod save incanter.Matrix [mat filename & {:keys [delim header append]
:or {append false delim \,}}]
(let [file-writer (java.io.FileWriter. filename append)]
(do
(when (and header (not append))
(.write file-writer (str (first header)))
(doseq [column-name (rest header)]
(.write file-writer (str delim column-name)))
(.write file-writer (str \newline)))
(doseq [row mat]
(if (number? row)
(.write file-writer (str row \newline))
(do
(.write file-writer (str (first row)))
(doseq [column (rest row)]
(.write file-writer (str delim column)))
(.write file-writer (str \newline)))))
(.flush file-writer)
(.close file-writer))))
(defmethod save :incanter.core/dataset [dataset filename & {:keys [delim header append]
:or {append false delim \,}}]
(let [header (or header (map #(if (keyword? %) (name %) %) (:column-names dataset)))
file-writer (java.io.FileWriter. filename append)
rows (:rows dataset)
columns (:column-names dataset)]
(do
(when (and header (not append))
(.write file-writer (str (first header)))
(doseq [column-name (rest header)]
(.write file-writer (str delim column-name)))
(.write file-writer (str \newline)))
(doseq [row rows]
(do
(.write file-writer (str (row (first columns))))
(doseq [column-name (rest columns)]
(.write file-writer (str delim (row column-name))))
(.write file-writer (str \newline))))
(.flush file-writer)
(.close file-writer))))
(defmethod save java.awt.image.BufferedImage
([img filename & {:keys [format] :or {format "png"}}]
(javax.imageio.ImageIO/write img
format
(.getAbsoluteFile (java.io.File. filename)))))