Skip to content

Commit

Permalink
Updated convolve, added gradient1d, and updated rolling to include va…
Browse files Browse the repository at this point in the history
…riable windows.
  • Loading branch information
cnuernber committed May 27, 2021
1 parent 0c590a6 commit 84f51fa
Show file tree
Hide file tree
Showing 11 changed files with 372 additions and 81 deletions.
71 changes: 48 additions & 23 deletions java/tech/v3/datatype/Convolve1D.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,24 +13,34 @@ public static enum Mode
Same,
Valid,
};
public static enum EdgeMode {
Clamp,
Zero,
}
//Precondition is that ndata is <= nwindow
public static double[] convolve
//The difference between convolve and correlate is convolve reverses
//the window. Callers can reverse the window before passing it in.
//The reason this works with double-arrays is because it is an inherently
//n^2 operation so we want to minimize indexing costs.
public static double[] correlate
(BiFunction<Long,BiFunction<Long,Long,Object>,Object> parallelizer,
Buffer data,
Buffer rev_window,
double[] data,
double[] window,
int stepsize,
DoubleUnaryOperator in_finalize,
Mode mode) {
Mode mode,
EdgeMode edgeMode) {

final int nData = data.size();
final int nWin = rev_window.size();
final int nData = data.length;
final int nWin = window.length;
if (nData < nWin)
throw new RuntimeException("Data size must be <= window size");
final int nWinDec = nWin - 1;
final int nResult;
switch (mode) {
case Full: nResult = nData + nWinDec; break;
case Same: nResult = nData; break;
case Valid: nResult = nData - nWin + 1; break;
case Full: nResult = (nData + nWinDec)/stepsize; break;
case Same: nResult = nData/stepsize; break;
case Valid: nResult = (nData - nWin + 1)/stepsize; break;
default: throw new RuntimeException("Unrecognized mode.");
}

Expand All @@ -40,31 +50,46 @@ public static enum Mode
};
}
final DoubleUnaryOperator finalize = in_finalize;
double[] window = new double[nWin];
for (int idx = 0; idx < nWin; ++idx) {
window[idx] = rev_window.readDouble(nWinDec - idx);


final int windowOffset;
switch(mode) {
case Full: windowOffset = nWinDec; break;
case Same: windowOffset = (nWinDec / 2); break;
case Valid: windowOffset = 0; break;
default: throw new RuntimeException("Unrecognized mode.");
}


final double left_edge;
final double right_edge;
switch(edgeMode) {
case Clamp: left_edge=data[0]; right_edge=data[nData-1]; break;
case Zero: left_edge=0.0; right_edge=0.0; break;
default: throw new RuntimeException("Unrecognized edge mode");
}


double[] retval = new double[nResult];
parallelizer.apply( new Long(nResult), new BiFunction<Long,Long,Object> () {
public Object apply(Long sidx, Long glen) {
int start_idx = RT.intCast(sidx);
int group_len = RT.intCast(glen);
int end_idx = start_idx + group_len;
final int windowOffset;
switch(mode) {
case Full: windowOffset = nWinDec; break;
case Same: windowOffset = (nWinDec / 2); break;
case Valid: windowOffset = 0; break;
default: throw new RuntimeException("Unrecognized mode.");
}

//This loop is an interesting case for a blog post on the vectorization API.
for(int idx = start_idx; idx < end_idx; ++idx ) {
int data_start_idx = idx - windowOffset;
int data_start_idx = (idx*stepsize) - windowOffset;
double sum = 0.0;
for (int win_idx = 0; win_idx < nWin; ++win_idx) {
int data_idx = data_start_idx + win_idx;
double data_val = 0.0;
if (data_idx < nData && data_idx >= 0)
data_val = data.readDouble(data_idx);
double data_val;
if (data_idx < 0)
data_val = left_edge;
else if (data_idx >= nData)
data_val = right_edge;
else
data_val = data[data_idx];
sum += data_val * window[win_idx];
}
retval[idx] = finalize.applyAsDouble(sum);
Expand Down
1 change: 1 addition & 0 deletions project.clj
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
tech.v3.datatype.datetime
tech.v3.datatype.mmap
tech.v3.datatype.convolve
tech.v3.datatype.gradient
tech.v3.datatype.mmap-writer
tech.v3.datatype.native-buffer
tech.v3.datatype.sampling
Expand Down
110 changes: 75 additions & 35 deletions src/tech/v3/datatype/convolve.clj
Original file line number Diff line number Diff line change
Expand Up @@ -3,82 +3,121 @@
convolutions are supported."
(:require [tech.v3.datatype.base :as dt-base]
[tech.v3.datatype.array-buffer :as array-buffer]
[tech.v3.datatype.copy-make-container :as dt-cmc]
[tech.v3.parallel.for :as pfor])
(:import [tech.v3.datatype Convolve1D Convolve1D$Mode ArrayHelpers]
(:import [tech.v3.datatype Convolve1D Convolve1D$Mode ArrayHelpers DoubleReader
Convolve1D$EdgeMode]
[java.util.function BiFunction]
[org.apache.commons.math3.distribution NormalDistribution]))


(defn convolve1d
(defn correlate1d
"Convolve window over data. Window must be smaller than data.
Returns result in `:float64` space. Convolutions outside the valid
space of data are supplied zeros to match numpy conventions.
Options:
* `:mode` - One of :full, :same, or :valid.
* `:edge-mode` - One of :zero or :clamp. Clamp clamps the edge value to the start
or end respectively. Defaults to :zero.
* `:finalizer` - A java.util.function.DoubleUnaryOperator that performs
one more operation on the data after summation but before assigned to
result.
* `:force-serial` - For serial execution of the convolution. Unnecessary except
for profiling and comparison purposes.
* `:stepsize` - Defaults to 1, this steps across the input data in stepsize
increments.
```"
([data win {:keys [mode finalizer force-serial edge-mode stepsize]
:or {mode :full
edge-mode :zero
stepsize 1}}]
(let [data (dt-cmc/->double-array data)
win (dt-cmc/->double-array win)]
(-> (Convolve1D/correlate
(reify BiFunction
(apply [this n-elems applier]
(if force-serial
(.apply ^BiFunction applier 0 n-elems)
(pfor/indexed-map-reduce
n-elems
(fn [start-idx group-len]
(.apply ^BiFunction applier start-idx group-len))
dorun))))
data
win
(int stepsize)
finalizer
(case mode
:full Convolve1D$Mode/Full
:same Convolve1D$Mode/Same
:valid Convolve1D$Mode/Valid)
(case edge-mode
:zero Convolve1D$EdgeMode/Zero
:clamp Convolve1D$EdgeMode/Clamp))
(array-buffer/array-buffer))))
([data win]
(correlate1d data win nil)))

Example:

(defn convolve1d
"Convolve a window across a signal. The only difference from correlate is the
window is reversed and then correlate is called. See options for correlate, this
has the same defaults.
Examples:
```clojure
user> (require '[tech.v3.datatype.convolve :as conv])
user> (require '[tech.v3.datatype.convolve :as dt-conv])
nil
user> (conv/convolve1d [1, 2, 3], [0, 1, 0.5])
user> (dt-conv/convolve1d [1, 2, 3], [0, 1, 0.5])
#array-buffer<float64>[5]
[0.000, 1.000, 2.500, 4.000, 1.500]
user> (conv/convolve1d [1, 2, 3], [0, 1, 0.5] {:mode :same})
user> (dt-conv/convolve1d [1, 2, 3], [0, 1, 0.5] {:mode :same})
#array-buffer<float64>[3]
[1.000, 2.500, 4.000]
user> (conv/convolve1d [1, 2, 3], [0, 1, 0.5] {:mode :valid})
user> (dt-conv/convolve1d [1, 2, 3], [0, 1, 0.5] {:mode :valid})
#array-buffer<float64>[1]
[2.500]
```"
([data win {:keys [mode finalizer force-serial]
:or {mode :full}}]
(-> (Convolve1D/convolve
(reify BiFunction
(apply [this n-elems applier]
(if force-serial
(.apply ^BiFunction applier 0 n-elems)
(pfor/indexed-map-reduce
n-elems
(fn [start-idx group-len]
(.apply ^BiFunction applier start-idx group-len))
dorun))))
(dt-base/->buffer data)
(dt-base/->buffer win)
finalizer
(case mode
:full Convolve1D$Mode/Full
:same Convolve1D$Mode/Same
:valid Convolve1D$Mode/Valid))
(array-buffer/array-buffer)))
([data win options]
(let [win (dt-base/->buffer win)
n-win (.lsize win)
n-win-dec (dec n-win)]
(correlate1d data
(reify DoubleReader
(lsize [this] n-win)
(readDouble [this idx]
(.readDouble win (- n-win-dec idx))))
options)))
([data win]
(convolve1d data win nil)))


(defn gaussian1d
"Perform a 1d gaussian convolution. Options are passed to convolve1d - but the
default mode is `:same` as opposed to `:full`. Values are drawn from a 0 centered
normal distribution with stddev of 1 on the range of -range,range and with
normal distribution with stddev of 1 on the range of -range*sigma,range*sigma at
window-len increments. Then the window vector is normalized to length 1 and convolved
over the data using convolve1d. This means with a range of '2' you are drawing
over the data using correlate1d. This means with a range of '2' you are drawing
from a 0-centered normal distribution 2 standard distributions so the edges will have
a pre-normalized value of around 0.05.
Options:
* `:mode` - default so `:same`, see options for convolve1d.
* `:mode` - defaults to `:same`, see options for correlate1d.
* `:edge-mode` - defaults to `:clamp`, see options for correlate1d.
* `:range` - Defaults to 2, values are drawn from (-range, range) with
window-len increments."
([data window-len {:keys [range mode]
window-len increments.
* `:stepsize` - Defaults to 1. Increasing this decreases the size of the
result by a factor of stepsize. So you can efficiently downsample your data
by using a gaussian window and increasing stepsize to the desired downsample
amount."
([data window-len {:keys [range mode edge-mode]
:or {range 2
mode :same}
mode :same
edge-mode :clamp}
:as options}]
(let [dist (NormalDistribution.)
range (double range)
Expand All @@ -104,7 +143,7 @@ user> (conv/convolve1d [1, 2, 3], [0, 1, 0.5] {:mode :valid})
;;Normalize the vector
(dotimes [idx window-len]
(ArrayHelpers/accumMul window idx inv-len))
(convolve1d data window (assoc options :mode mode))))
(correlate1d data window (assoc options :mode mode :edge-mode edge-mode))))
([data window-len]
(gaussian1d data window-len nil)))

Expand All @@ -114,5 +153,6 @@ user> (conv/convolve1d [1, 2, 3], [0, 1, 0.5] {:mode :valid})
(convolve1d [1, 2, 3], [0, 1, 0.5])
(convolve1d [1, 2, 3], [0, 1, 0.5] {:mode :same})
(convolve1d [1, 2, 3], [0, 1, 0.5] {:mode :valid})

(convolve1d (range 100), [0, 1, 0.5] {:edge-mode :zero
:stepsize 2})
)
2 changes: 1 addition & 1 deletion src/tech/v3/datatype/datetime.clj
Original file line number Diff line number Diff line change
Expand Up @@ -106,4 +106,4 @@ A general outline is:
datetime->epoch
epoch->datetime
between
variable-rolling-window)
variable-rolling-window-indexes)
21 changes: 10 additions & 11 deletions src/tech/v3/datatype/datetime/operations.clj
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
[tech.v3.datatype.packing :as packing]
[tech.v3.datatype.datetime.packing :as dt-packing]
[tech.v3.datatype.datetime.base :as dt-base]
[tech.v3.datatype.rolling :as dt-rolling]
[tech.v3.datatype.functional :as dfn]
[tech.v3.datatype.binary-op :as bin-op]
[tech.v3.datatype.argops :as argops]
Expand Down Expand Up @@ -571,20 +572,18 @@
retval)))


(defn variable-rolling-window
(defn variable-rolling-window-indexes
"Given a reader of monotonically increasing source datetime data, return an
iterable of ranges that describe the windows in index space. There will be one
window per source input and windows are applied in int64 microsecond space. Be
aware that windows near the end cannot possibly satisfy the windowing requirement."
aware that windows near the end cannot possibly satisfy the windowing requirement.
See options for tech.v3.datatype.rolling/variable-rolling-window-indexes for further
options, of which stepsize may be of interest."
([src-data window-length units options]
(let [^BinaryOperator tweener (between-op (dtype-base/elemwise-datatype src-data)
units)
src-data (dtype-base/->buffer src-data)
n-windows (long (:n-windows options (.lsize src-data)))]
(reify Iterable
(iterator [this]
(ObjectRollingIterator. 0 1 (long window-length)
n-windows (.lsize src-data)
tweener src-data)))))
units)]
(dt-rolling/variable-rolling-window-indexes
src-data window-length (merge {:comp-fn tweener} options))))
([src-data window-length units]
(variable-rolling-window src-data window-length units nil)))
(variable-rolling-window-indexes src-data window-length units nil)))

0 comments on commit 84f51fa

Please sign in to comment.