Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
cnuernber committed Nov 6, 2020
1 parent 3d6acc6 commit 129d935
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 2 deletions.
3 changes: 2 additions & 1 deletion java/tech/v3/datatype/Buffer.java
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,9 @@ public interface Buffer extends DatatypeBase, Iterable, IFn,
default int size() { return RT.intCast(lsize()); }
default Object get(int idx) { return readObject(idx); }
default Object set(int idx, Object val) {
Object current = get(idx);
writeObject(idx, val);
return null;
return current;
}
default boolean isEmpty() { return lsize() == 0; }
default Object[] toArray() {
Expand Down
23 changes: 22 additions & 1 deletion src/tech/v3/datatype/argops.clj
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
Buffer
UnaryOperator BinaryOperator
UnaryPredicate BinaryPredicate]
[java.util Comparator Arrays List Map Iterator]
[java.util Comparator Arrays List Map Iterator Collections Random]
[org.roaringbitmap RoaringBitmap]))


Expand Down Expand Up @@ -504,3 +504,24 @@
(->binary-predicate partition-pred)))))
(^Iterable [unary-op item-iterable]
(argpartition-by unary-op :tech.numerics/eq item-iterable)))


(defn argshuffle
"Serially shuffle N indexes into a an array of data.
Returns an array of indexes."
([^long n-indexes {:keys [seed container-type]
:or {container-type :jvm-heap}}]
(let [data (if (< n-indexes (long Integer/MAX_VALUE))
(dtype-cmc/make-container container-type :int32 (range n-indexes))
(dtype-cmc/make-container container-type :int64 (range n-indexes)))
^Random rgen (when seed
(if (number? seed)
(java.util.Random. (int seed))
seed))
data-buf (dtype-base/->buffer data)]
(if rgen
(Collections/shuffle data-buf rgen)
(Collections/shuffle data-buf))
data))
([n-indexes]
(argshuffle n-indexes nil)))
9 changes: 9 additions & 0 deletions test/tech/v3/datatype_test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -557,6 +557,15 @@
{:keys [result missing]} (dfn/fill-range long-test-data 3)])))


(deftest argshuffle-test
(let [test-data (dtype/make-container :float32 (range 20))
buf (dtype/->buffer test-data)]
(is (= (vec (argops/argshuffle 20 {:seed 20}))
(vec (argops/argshuffle 20 {:seed 20}))))
(is (not= (vec (argops/argshuffle 20))
(vec (argops/argshuffle 20))))))


(deftest ctime-min-max
(is (dtype-proto/has-constant-time-min-max? 4))
(is (= 4 (dtype-proto/constant-time-min 4)))
Expand Down

2 comments on commit 129d935

@cnuernber
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@daslu - This changelist I think is relevant to your interests.

I first tried using java.util.Collections/shuffle on a buffer and it failed because my generalized implementation of set wasn't correct. Fixes that at the java interface definition level transitively fixed it on every single container that supports read/write.

I then implemented an arg method that does the repeatable shuffle in index space so that enables all readers (like dataset columns) to be repeatedly shuffled in-place via the indexed-reader pathway.

@daslu
Copy link

@daslu daslu commented on 129d935 Nov 7, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@cnuernber many thanks, that is great.

Much better than the usual habit of actually shuffling the data in all columns!

Please sign in to comment.