Skip to content

Commit

Permalink
Working packing deeper into the system.
Browse files Browse the repository at this point in the history
  • Loading branch information
cnuernber committed Oct 2, 2020
1 parent 09d1a12 commit f8230db
Show file tree
Hide file tree
Showing 7 changed files with 183 additions and 117 deletions.
2 changes: 2 additions & 0 deletions src/tech/v3/datatype.clj
Original file line number Diff line number Diff line change
Expand Up @@ -164,8 +164,10 @@ user> (dtype/make-reader :float32 5 (* idx 2))
make-container
copy!
->array-buffer
->array
->byte-array
->short-array
->char-array
->int-array
->long-array
->float-array
Expand Down
23 changes: 18 additions & 5 deletions src/tech/v3/datatype/array_buffer.clj
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
(:require [tech.v3.datatype.protocols :as dtype-proto]
[tech.v3.datatype.typecast :as typecast]
[tech.v3.datatype.casting :as casting]
[tech.v3.datatype.packing :as packing]
[tech.v3.datatype.pprint :as dtype-pp]
[primitive-math :as pmath])
(:import [clojure.lang IObj Counted Indexed IFn]
Expand All @@ -14,7 +15,10 @@

(defmacro java-array-buffer->io
[datatype cast-dtype advertised-datatype buffer java-ary offset n-elems]
`(let [~'java-ary (typecast/datatype->array ~datatype ~java-ary)]
`(let [~'java-ary (typecast/datatype->array ~datatype ~java-ary)
{~'unpacking-read :unpacking-read
~'packing-write :packing-write} (packing/buffer-packing-pair
~advertised-datatype)]
(reify
dtype-proto/PToArrayBuffer
(convertible-to-array-buffer? [this#] true)
Expand Down Expand Up @@ -73,9 +77,14 @@
(aget ~'java-ary (pmath/+ ~offset ~'idx)))))
:else (throw (Exception. (format "Macro expansion error-%s"
cast-dtype))))])
(when (= :char cast-dtype)
(if (= :char cast-dtype)
[`(readObject [rdr# ~'idx]
(.readChar rdr# ~'idx))]))
(.readChar rdr# ~'idx))]
;;Integer types may be representing packed objects
[`(readObject [~'rdr ~'idx]
(if ~'unpacking-read
(~'unpacking-read ~'rdr ~'idx)
(.readLong ~'rdr ~'idx)))]))
(casting/float-type? cast-dtype)
[`(readDouble [rdr# ~'idx]
(casting/datatype->unchecked-cast-fn
Expand Down Expand Up @@ -134,9 +143,13 @@
:else (throw (Exception. (format "Macro expansion error-%s"
cast-dtype))))])
;;Overload the writeObject pathway to use writeChar instead of writeLong
(when (= :char cast-dtype)
(if (= :char cast-dtype)
[`(writeObject [rdr# ~'idx ~'value]
(.writeChar rdr# ~'idx (char ~'value)))]))
(.writeChar rdr# ~'idx (char ~'value)))]
[`(writeObject [~'rdr ~'idx ~'value]
(if ~'packing-write
(~'packing-write ~'rdr ~'idx ~'value)
(.writeLong ~'rdr ~'idx (long ~'value))))]))
(casting/float-type? cast-dtype)
[`(writeDouble [rdr# ~'idx ~'value]
(ArrayHelpers/aset ~'java-ary (pmath/+ ~offset ~'idx)
Expand Down
17 changes: 9 additions & 8 deletions src/tech/v3/datatype/casting.clj
Original file line number Diff line number Diff line change
Expand Up @@ -145,14 +145,15 @@

(defn numeric-byte-width
^long [dtype]
(long (cond
(int-types dtype)
(quot (int-width dtype) 8)
(float-types dtype)
(quot (float-width dtype) 8)
:else
(throw (ex-info (format "datatype is not numeric: %s" dtype)
{:datatype dtype})))))
(let [dtype (un-alias-datatype dtype)]
(long (cond
(int-types dtype)
(quot (int-width dtype) 8)
(float-types dtype)
(quot (float-width dtype) 8)
:else
(throw (ex-info (format "datatype is not numeric: %s" dtype)
{:datatype dtype}))))))


(defn numeric-type?
Expand Down
27 changes: 19 additions & 8 deletions src/tech/v3/datatype/copy_make_container.clj
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
(ns tech.v3.datatype.copy-make-container
(:require [tech.v3.datatype.base :as dtype-base]
[tech.v3.datatype.copy :as dtype-copy]
[tech.v3.datatype.packing :as packing]
[tech.v3.datatype.protocols :as dtype-proto]
[tech.v3.datatype.array-buffer :as array-buffer]
[tech.v3.datatype.native-buffer :as native-buffer]
Expand Down Expand Up @@ -115,8 +116,10 @@
(defn ->array-buffer
"Perform a NaN-aware conversion into an array buffer. Default
nan-strategy is :remove which forces a pass over float datatypes
in order to remove nan data. Nan strategies can be:
[:keep :remove :exception]"
in order to remove nan data.
Nan strategies can be: [:keep :remove :exception]"
(^ArrayBuffer [datatype {:keys [nan-strategy]} item]
(let [nan-strategy (if (or (= datatype :float32)
(= datatype :float64))
Expand All @@ -132,7 +135,8 @@
(^ArrayBuffer [datatype item]
(->array-buffer datatype nil item))
(^ArrayBuffer [item]
(->array-buffer (dtype-base/elemwise-datatype item) nil item)))
(->array-buffer (packing/unpack-datatype (dtype-base/elemwise-datatype item))
nil item)))


(defn ->array
Expand All @@ -152,25 +156,32 @@
([datatype item]
(->array datatype nil item))
(^ArrayBuffer [item]
(->array (dtype-base/elemwise-datatype item) nil item)))
(->array (packing/unpack-datatype (dtype-base/elemwise-datatype item))
nil item)))

(defn ->byte-array
"Efficiently convert nearly anyting into a byte array."
"Efficiently convert nearly anything into a byte array."
^bytes [data]
(->array :int8 nil data))

(defn ->short-array
"Efficiently convert nearly anyting into a short array."
"Efficiently convert nearly anything into a short array."
^shorts [data]
(->array :int16 nil data))

(defn ->char-array
"Efficiently convert nearly anything into a char array."
^chars [data]
(->array :char nil data))


(defn ->int-array
"Efficiently convert nearly anyting into a int array."
"Efficiently convert nearly anything into a int array."
^ints [data]
(->array :int32 nil data))

(defn ->long-array
"Efficiently convert nearly anyting into a long array."
"Efficiently convert nearly anything into a long array."
^longs [data]
(->array :int64 nil data))

Expand Down
9 changes: 5 additions & 4 deletions src/tech/v3/datatype/datetime/packing.clj
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
(ns tech.v3.datatype.datetime.packing
(:require [tech.v3.datatype.packing :as packing]
[tech.v3.datatype.datetime.base :as dt-base]
[tech.v3.datatype.protocols :as dtype-proto]
[primitive-math :as pmath]
[tech.v3.datatype.base :as dtype-base])
[primitive-math :as pmath])
(:import [tech.v3.datatype PackedLocalDate]
[java.time Instant LocalDate Duration]))

Expand All @@ -24,7 +22,10 @@
(PackedLocalDate/pack %)
0)
(fn [^long value]
(if-not (== 0 value)
;;The missing value indicator for integers is int/MIN_VALUE
;;and arrays are initialized to be zero
(if-not (or (== 0 value)
(== Integer/MIN_VALUE value))
(PackedLocalDate/asLocalDate (pmath/int value))
nil)))

Expand Down
202 changes: 110 additions & 92 deletions src/tech/v3/datatype/native_buffer.clj
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
(:require [tech.resource :as resource]
[tech.v3.datatype.protocols :as dtype-proto]
[tech.v3.datatype.casting :as casting]
[tech.v3.datatype.packing :as packing]
[tech.v3.datatype.typecast :as typecast]
[tech.v3.datatype.errors :as errors]
[tech.v3.datatype.pprint :as dtype-pp]
Expand Down Expand Up @@ -167,98 +168,115 @@
(defmacro native-buffer->buffer-macro
[datatype advertised-datatype buffer address n-elems swap?]
(let [byte-width (casting/numeric-byte-width datatype)]
`(reify
dtype-proto/PToNativeBuffer
(convertible-to-native-buffer? [this#] true)
(->native-buffer [this#] ~buffer)
dtype-proto/PEndianness
(endianness [item] (dtype-proto/endianness ~buffer))
;;Forward protocol methods that are efficiently implemented by the buffer
dtype-proto/PSubBuffer
(sub-buffer [this# offset# length#]
(-> (dtype-proto/sub-buffer ~buffer offset# length#)
(dtype-proto/->reader)))
~(typecast/datatype->io-type (casting/safe-flatten datatype))
(elemwiseDatatype [rdr#] ~advertised-datatype)
(lsize [rdr#] ~n-elems)
(allowsRead [rdr#] true)
(allowsWrite [rdr#] true)
~@(cond
(= datatype :boolean)
[`(readBoolean [rdr# ~'idx]
(read-value ~address ~swap? ~datatype ~byte-width ~n-elems))]
;;For integer types, everything implements readlong.
;;They also implement readX where X maps to exactly the datatype.
;;For example byte arrays implement readLong and readByte.
(casting/integer-type? datatype)
(concat
[`(readLong [rdr# ~'idx]
(casting/datatype->unchecked-cast-fn
~datatype :int64
(read-value ~address ~swap? ~datatype ~byte-width ~n-elems)))]
(when-not (= :int64 (casting/safe-flatten datatype))
;;Exact reader fns for the exact datatype
[(cond
(= datatype :int8)
`(readByte [rdr# ~'idx]
(read-value ~address ~swap? ~datatype ~byte-width ~n-elems))
(= (casting/safe-flatten datatype) :int16)
`(readShort [rdr# ~'idx]
(read-value ~address ~swap? ~datatype ~byte-width ~n-elems))
(= datatype :char)
`(readChar [rdr# ~'idx]
(read-value ~address ~swap? ~datatype ~byte-width ~n-elems))
(= (casting/safe-flatten datatype) :int32)
`(readInt [rdr# ~'idx]
(read-value ~address ~swap? ~datatype ~byte-width ~n-elems))
:else (throw (Exception. (format "Macro expansion error-%s"
datatype))))]))
(casting/float-type? datatype)
[`(readDouble [rdr# ~'idx]
(casting/datatype->unchecked-cast-fn
~datatype :float64
(read-value ~address ~swap? ~datatype ~byte-width ~n-elems)))
`(readFloat [rdr# ~'idx]
(casting/datatype->unchecked-cast-fn
~datatype :float32
(read-value ~address ~swap? ~datatype ~byte-width ~n-elems)))]
:else
[`(readObject [rdr# ~'idx]
(read-value ~address ~swap? ~datatype ~byte-width ~n-elems))])
~@(cond
(= :boolean datatype)
[`(writeBoolean [wtr# idx# ~'value]
(write-value ~address ~swap? ~datatype ~byte-width ~n-elems))]
(casting/integer-type? datatype)
(concat
[`(writeLong [rdr# ~'idx ~'value]
(write-value ~address ~swap? ~datatype ~byte-width ~n-elems))]
(when-not (= :int64 (casting/safe-flatten datatype))
;;Exact reader fns for the exact datatype
[(cond
(= datatype :int8)
`(writeByte [rdr# ~'idx ~'value]
(write-value ~address ~swap? ~datatype ~byte-width ~n-elems))
(= (casting/safe-flatten datatype) :int16)
`(writeShort [rdr# ~'idx ~'value]
(write-value ~address ~swap? ~datatype ~byte-width ~n-elems))
(= datatype :char)
`(writeChar [rdr# ~'idx ~'value]
(write-value ~address ~swap? ~datatype ~byte-width ~n-elems))
(= (casting/safe-flatten datatype) :int32)
`(writeInt [rdr# ~'idx ~'value]
(write-value ~address ~swap? ~datatype ~byte-width ~n-elems))
:else (throw (Exception. (format "Macro expansion error-%s"
datatype))))]))
(casting/float-type? datatype)
[`(writeDouble [rdr# ~'idx ~'value]
(write-value ~address ~swap? ~datatype ~byte-width ~n-elems))
`(writeFloat [rdr# ~'idx ~'value]
(write-value ~address ~swap? ~datatype ~byte-width ~n-elems))]
:else
[`(writeObject [wtr# idx# val#]
;;Writing values is always checked, no options.
(write-value ~address ~swap? ~datatype ~byte-width ~n-elems))]))))
`(let [{~'unpacking-read :unpacking-read
~'packing-write :packing-write} (packing/buffer-packing-pair ~advertised-datatype)]
(reify
dtype-proto/PToNativeBuffer
(convertible-to-native-buffer? [this#] true)
(->native-buffer [this#] ~buffer)
dtype-proto/PEndianness
(endianness [item] (dtype-proto/endianness ~buffer))
;;Forward protocol methods that are efficiently implemented by the buffer
dtype-proto/PSubBuffer
(sub-buffer [this# offset# length#]
(-> (dtype-proto/sub-buffer ~buffer offset# length#)
(dtype-proto/->reader)))
~(typecast/datatype->io-type (casting/safe-flatten datatype))
(elemwiseDatatype [rdr#] ~advertised-datatype)
(lsize [rdr#] ~n-elems)
(allowsRead [rdr#] true)
(allowsWrite [rdr#] true)
~@(cond
(= datatype :boolean)
[`(readBoolean [rdr# ~'idx]
(read-value ~address ~swap? ~datatype ~byte-width ~n-elems))]
;;For integer types, everything implements readlong.
;;They also implement readX where X maps to exactly the datatype.
;;For example byte arrays implement readLong and readByte.
(casting/integer-type? datatype)
(concat
[`(readLong [rdr# ~'idx]
(casting/datatype->unchecked-cast-fn
~datatype :int64
(read-value ~address ~swap? ~datatype ~byte-width ~n-elems)))]
(when-not (= :int64 (casting/safe-flatten datatype))
;;Exact reader fns for the exact datatype
[(cond
(= datatype :int8)
`(readByte [rdr# ~'idx]
(read-value ~address ~swap? ~datatype ~byte-width ~n-elems))
(= (casting/safe-flatten datatype) :int16)
`(readShort [rdr# ~'idx]
(read-value ~address ~swap? ~datatype ~byte-width ~n-elems))
(= datatype :char)
`(readChar [rdr# ~'idx]
(read-value ~address ~swap? ~datatype ~byte-width ~n-elems))
(= (casting/safe-flatten datatype) :int32)
`(readInt [rdr# ~'idx]
(read-value ~address ~swap? ~datatype ~byte-width ~n-elems))
:else (throw (Exception. (format "Macro expansion error-%s"
datatype))))])
(if (= :char datatype)
[`(readObject [rdr# ~'idx]
(.readChar rdr# ~'idx))]
;;Integer types may be representing packed objects
[`(readObject [~'rdr ~'idx]
(if ~'unpacking-read
(~'unpacking-read ~'rdr ~'idx)
(.readLong ~'rdr ~'idx)))]))
(casting/float-type? datatype)
[`(readDouble [rdr# ~'idx]
(casting/datatype->unchecked-cast-fn
~datatype :float64
(read-value ~address ~swap? ~datatype ~byte-width ~n-elems)))
`(readFloat [rdr# ~'idx]
(casting/datatype->unchecked-cast-fn
~datatype :float32
(read-value ~address ~swap? ~datatype ~byte-width ~n-elems)))]
:else
[`(readObject [rdr# ~'idx]
(read-value ~address ~swap? ~datatype ~byte-width ~n-elems))])
~@(cond
(= :boolean datatype)
[`(writeBoolean [wtr# idx# ~'value]
(write-value ~address ~swap? ~datatype ~byte-width ~n-elems))]
(casting/integer-type? datatype)
(concat
[`(writeLong [rdr# ~'idx ~'value]
(write-value ~address ~swap? ~datatype ~byte-width ~n-elems))]
(when-not (= :int64 (casting/safe-flatten datatype))
;;Exact reader fns for the exact datatype
[(cond
(= datatype :int8)
`(writeByte [rdr# ~'idx ~'value]
(write-value ~address ~swap? ~datatype ~byte-width ~n-elems))
(= (casting/safe-flatten datatype) :int16)
`(writeShort [rdr# ~'idx ~'value]
(write-value ~address ~swap? ~datatype ~byte-width ~n-elems))
(= datatype :char)
`(writeChar [rdr# ~'idx ~'value]
(write-value ~address ~swap? ~datatype ~byte-width ~n-elems))
(= (casting/safe-flatten datatype) :int32)
`(writeInt [rdr# ~'idx ~'value]
(write-value ~address ~swap? ~datatype ~byte-width ~n-elems))
:else (throw (Exception. (format "Macro expansion error-%s"
datatype))))])
(if (= :char datatype)
[`(writeObject [rdr# ~'idx ~'value]
(.writeChar rdr# ~'idx (char ~'value)))]
[`(writeObject [~'rdr ~'idx ~'value]
(if ~'packing-write
(~'packing-write ~'rdr ~'idx ~'value)
(.writeLong ~'rdr ~'idx (long ~'value))))]))
(casting/float-type? datatype)
[`(writeDouble [rdr# ~'idx ~'value]
(write-value ~address ~swap? ~datatype ~byte-width ~n-elems))
`(writeFloat [rdr# ~'idx ~'value]
(write-value ~address ~swap? ~datatype ~byte-width ~n-elems))]
:else
[`(writeObject [wtr# idx# val#]
;;Writing values is always checked, no options.
(write-value ~address ~swap? ~datatype ~byte-width ~n-elems))])))))


(declare native-buffer->buffer)
Expand Down

0 comments on commit f8230db

Please sign in to comment.