-
Notifications
You must be signed in to change notification settings - Fork 15
/
utils.clj
233 lines (182 loc) · 5.47 KB
/
utils.clj
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
(ns panthera.pandas.utils
(:require
[libpython-clj.python :as py]
[libpython-clj.require :refer [require-python]]
[camel-snake-kebab.core :as csk]
[camel-snake-kebab.extras :as cske]
[clojure.core.memoize :as m]))
(py/initialize!)
(require-python '[builtins :as bt])
;(defonce builtins (py/import-module "builtins"))
(defonce pd (py/import-module "pandas"))
(defn slice
"Returns a Python slice. This is what you'd get by doing something like
`1:10` and it is similar to `(range 1 10)`, but works with everything
not only numbers, so `(slice \"a\" \"f\")` would mean
[\"a\" \"b\" \"c\" \"d\" \"e\" \"f\"]. Use this for subsetting arrays,
serieses and data-frames.
Example:
```
(slice) ; the empty slice, it means every index
(slice 5) ; every index up to 5
(slice 3 5) ; every index from 3 to 5
(slice \"2019-10-11\" \"2019-12-3\") ; works with dates as well
(slice \"a\" \"d\") ; works with strings
(slice 1 10 2) ; every 2 values between 1 and 10
```"
[& args]
(apply bt/slice args)
(comment
([]
(py/call-attr builtins "slice" nil))
([start]
(py/call-attr builtins "slice" start))
([start stop]
(py/call-attr builtins "slice" start stop))
([start stop incr]
(py/call-attr builtins "slice" start stop incr))))
(defn pytype
"Return the Python type of the given objects
Examples:
```
(pytype obj)
(pytype my-df my-srs this)
```"
([] nil)
([obj]
(py/python-type obj))
([obj & objs]
(map pytype (concat (vector obj) objs))))
(def memo-key-converter
"Convert regular Clojure kebab-case keys to idiomatic
Python snake_case strings.
Example:
```
(memo-key-converter :a-key) ; \"a_key\"
```"
(m/fifo csk/->snake_case_string {} :fifo/threshold 512))
(def memo-columns-converter
"Converts Python strings to idiomatic Clojure keys.
Examples:
```
(memo-columns-converter \"a_name\") ; :a-name
(memo-columns-converter \"ALL_CAPS\") ; :all-caps
```"
(m/fifo
#(cond
(number? %) %
(string? %) (csk/->kebab-case-keyword %)
(nil? %) nil
:else (mapv csk/->kebab-case-keyword %)) {} :fifo/threshold 512))
(defn vec->pylist
"Converts an iterable Clojure data structure to a Python list
Example:
```
(vec->pylist my-df)
```"
[v]
(py/->py-list v))
(defn nested-vector?
"Check if the given argument is a nested vector or not.
Example:
```
(nested-vector? [[1 2] [3 4]])
```"
[v]
(some vector? v))
(defn nested-slice?
"Check if the given value contains at least one `:slice`.
Example:
```
(nested-slice? [(slice 3 5) (slice)])
```"
[v]
(some #(identical? :slice (pytype %)) v))
(defn vals->pylist
"Takes some values and dispatches them to the right conversion to a Python
data structure.
Examples:
```
(vals->pylist [1 2 3])
(vals->pylist [[1 2] [3 4]])
(vals->pylist [(slice 1 5) (slice)])
```"
[obj]
(cond
(not (coll? obj)) obj
(map? obj) obj
(nested-vector? obj) (to-array-2d obj)
(vector? obj) (if (nested-slice? obj)
obj
(py/->py-list obj))
:else obj))
(defn keys->pyargs
"Takes a map as an argument and converts keys to Python strings
and values to the proper data structure.
Examples:
```
(keys->pyargs {:a 1 :a-key [1 2 3] \"c\" (slice)})
```"
[m]
(let [nm (reduce-kv
(fn [m k v]
(assoc m k (vals->pylist v)))
{} m)]
(cske/transform-keys memo-key-converter nm)))
(defn series?
"Check if the given argument is a series"
[obj]
(identical? :series (pytype obj)))
(defn data-frame?
"Check if the given argument is a data-frame"
[obj]
(identical? :data-frame (pytype obj)))
(defmulti to-clj
(fn [obj] (series? obj)))
(defmethod to-clj false
[obj]
{:id (py/get-attr obj "index")
:cols (py/get-attr obj "columns")
:data (lazy-seq (py/get-attr obj "values"))})
(defmethod to-clj true
[obj]
{:id (py/get-attr obj "index")
:cols (or (py/get-attr obj "name") "unnamed")
:data (lazy-seq (py/get-attr obj "values"))})
(defn ->clj
"Convert the given panthera data-frame or series to a Clojure vector of maps.
The idea is to have a common, simple and fast access point to conversion of
the main data structures between languages.
- `series`: a `series` gets converted to a vector of maps with only one key and
one value. If the series has a name that becomes the key of the maps,
otherwise `->clj` falls back to the `:unnamed` key.
- data-frame: a data-frame is converted to a vector of maps with names
of the columns as keys and values as the corresponding row/column value.
Examples:
```
(->clj my-srs)
(->clj my-df)
```"
[df-or-srs & [clj?]]
(if-not clj?
(to-clj df-or-srs)
(if (series? df-or-srs)
(let [nm (memo-columns-converter
(or (py/get-attr df-or-srs "name")
"unnamed"))]
(into [] (map #(assoc {} nm %))
(vec df-or-srs)))
(let [ks (map memo-columns-converter
(py/get-attr df-or-srs "columns"))]
(into [] (map #(zipmap ks %))
(py/get-attr df-or-srs "values"))))))
(defn simple-kw-call
"Helper for a cleaner access to `call-attr-kw` from `libpython-clj`"
[df kw & [attrs]]
(py/call-attr-kw df kw []
(keys->pyargs attrs)))
(defn kw-call
"Helper for a cleaner access to `call-attr-kw` from `libpython-clj`"
[df kw pos & [attrs]]
(py/call-attr-kw df kw [(vals->pylist pos)]
(keys->pyargs attrs)))