eddelbuettel · lsilvest · May 9, 2023 · May 3, 2023 · May 8, 2023 · May 9, 2023
diff --git a/ChangeLog b/ChangeLog
@@ -1,3 +1,18 @@
+2023-05-08  Leonardo Silvestri  <lsilvestri@ztsdb.org>
+
+	* DESCRIPTION (Version, Date): Roll minor version and date
+
+	* NAMESPACE: Added function 'ops'
+	* R/dtts.R: Ditto
+	* inst/tinytest/test_dtts.R: Ditto
+	* man/ops.Rd: Ditto
+	* src/align.cpp: Ditto
+	* man/align.Rd: Trivial change on regeneration
+	* man/align.idx.Rd: Ditto
+	* man/frequency-data.table-method.Rd: Ditto
+	* man/grid.align.Rd: Ditto
+	* README.md: Added example for 'ops'
+
 2023-04-26  Leonardo Silvestri  <lsilvestri@ztsdb.org>
 
 	* DESCRIPTION (Version, Date): Roll minor version and date

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: dtts
 Type: Package
 Title: 'data.table' Time-Series
-Version: 0.1.0.2
-Date: 2023-04-26
+Version: 0.1.0.3
+Date: 2023-05-08
 Author: Dirk Eddelbuettel and Leonardo Silvestri
 Maintainer: Dirk Eddelbuettel <edd@debian.org>
 Description: High-frequency time-series support via 'nanotime' and 'data.table'.
@@ -11,5 +11,5 @@ Imports: nanotime, data.table, methods, bit64, Rcpp (>= 0.11.5), RcppCCTZ (>= 0.
 Suggests: tinytest
 LinkingTo: Rcpp, RcppCCTZ, RcppDate, nanotime
 BugReports: https://github.com/eddelbuettel/dtts/issues
-RoxygenNote: 7.1.2
+RoxygenNote: 7.2.2
 Encoding: UTF-8
diff --git a/NAMESPACE b/NAMESPACE
@@ -9,3 +9,4 @@ importFrom("utils", "tail")
 exportMethods("align.idx")
 exportMethods("align")
 exportMethods("grid.align")
+exportMethods("ops")
diff --git a/R/RcppExports.R b/R/RcppExports.R
@@ -17,3 +17,7 @@
     .Call('_dtts_align_idx_period', PACKAGE = 'dtts', x, y, start, end, sopen, eopen, tz)
 }
 
+.ops <- function(xdata, ydata, op_string) {
+    .Call('_dtts_ops', PACKAGE = 'dtts', xdata, ydata, op_string)
+}
+
diff --git a/R/dtts.R b/R/dtts.R
@@ -543,3 +543,71 @@ setMethod("frequency",
                   stop("argument 'by' must be either 'nanoduration' or 'nanotime'")
               }
           })
+
+
+##' @rdname ops
+setGeneric("ops", function(x, y, op_string) standardGeneric("ops"))
+
+
+##' Arithmetic operations on two \code{data.table} time-series
+##'
+##' \code{ops} returns the \code{y} time-series on which the \code{x}
+##' time-series values are applied using the specified operator
+##' \code{op}.
+##'
+##' @section Details:
+##'
+##' The n elements of the \code{x} time-series operand define a set of
+##' n-1 intervals, and the value associated with each interval is
+##' applied to all the observations in the \code{y} time-series
+##' operand that fall in the interval. Note that the interval is
+##' closed at the beginning and open at the end. The supported values
+##' for \code{op} are "*", "/", "+", "-".
+##'
+##' There has to be one numeric column in \code{x} and \code{y}; there
+##' has to be either a one to one correspondance between the number of
+##' numeric columns in \code{x} and \code{y}, or there must be only
+##' one numeric column in \code{x} that will be applied to all numeric
+##' columns in \code{y}. Non-numeric columns must not appear in
+##' \code{x}, whereas they will be skipped of they appear in \code{y}.
+##' 
+##' @param x the \code{data.table} time-series that determines the
+##'     left operand
+##' @param y the \code{data.table} time-series that determines the
+##'     right operand \code{nanoperiod}.
+##' @param op_string string defining the operation to apply; the
+##'     supported values for \code{op} are "*", "/", "+", "-".
+##'
+##' @rdname ops
+##' 
+##' @examples
+##' \dontrun{
+##' one_second_duration  <- as.nanoduration("00:00:01")
+##' t1 <- nanotime(1:2 * one_second_duration * 3)
+##' t2 <- nanotime(1:4 * one_second_duration)
+##' dt1 <- data.table(index=t1, data1 = 1:length(t1))
+##' setkey(dt1, index)
+##' dt2 <- data.table(index=t2, data1 = 1:length(t2))
+##' setkey(dt2, index)
+##' ops(dt1, dt2, "+")
+##' }
+setMethod("ops",
+          signature("data.table", "data.table", "character"),
+          function(x, y, op_string)
+          {
+              if (!inherits(x[[1]], "nanotime")) {
+                  stop("first column of 'x' must be of type 'nanotime'")
+              }
+              if (!inherits(y[[1]], "nanotime")) {
+                  stop("first column of 'y' must be of type 'nanotime'")
+              }
+              if (is.null(key(x)) || names(x)[1] != key(x)[1]) {
+                  stop("first column of 'x' must be the first key")
+              }
+              if (is.null(key(y)) || names(y)[1] != key(y)[1]) {
+                  stop("first column of 'y' must be the first key")
+              }
+
+
+              .ops(x, y, op_string)
+          })
diff --git a/README.md b/README.md
@@ -257,6 +257,53 @@ Which produces:
 10: 1970-01-01T00:01:40+00:00 10
 ~~~
 
+#### ops
+
+`ops` performs arithmetic operations between two time-series and has
+the following signature, where `x` and `y` are time-series and `op` is
+a string denoting an arithmetic operator.
+
+~~~ R
+ops(x, y, op_string)
+~~~
+
+Each entry in the left time-series operand defines an interval from
+the previous entry, and the value associated with this interval will
+be applied to all the observations in the right time-series operand
+that fall in the interval. Note that the interval is closed at the
+beginning and open and the end. The available values for op are "*",
+"/", "+", "-".
+
+This function is particulary useful to apply a multiplier or to add a
+constant that changes over time; one example would be the adjustment
+of stock prices for splits.
+
+Here is a visualization of `ops`:
+
+<img src="./inst/images/ops.svg">
+
+
+Here is an example:
+
+~~~ R
+one_second_duration  <- as.nanoduration("00:00:01")
+t1 <- nanotime(1:2 * one_second_duration * 3)
+t2 <- nanotime(1:4 * one_second_duration)
+dt1 <- data.table(index=t1, data1 = 1:length(t1))
+setkey(dt1, index)
+dt2 <- data.table(index=t2, data1 = 1:length(t2))
+setkey(dt2, index)
+ops(dt1, dt2, "+")
+~~~
+
+Which produces:
+```
+                       index data1
+1: 1970-01-01T00:00:01+00:00     2
+2: 1970-01-01T00:00:02+00:00     3
+3: 1970-01-01T00:00:03+00:00     3
+4: 1970-01-01T00:00:04+00:00     4
+```
 
 ### Time-series subsetting
 

diff --git a/inst/tinytest/test_dtts.R b/inst/tinytest/test_dtts.R
@@ -644,6 +644,176 @@ y <- as.nanotime(4:2)
 expect_error(align.idx(x, y, start=-as.nanoperiod("00:00:01"), tz="UTC"), "'y' must be sorted in ascending order")
 
 
+## tests for 'ops' function:
+## ------------------------
+## 1 'x' col, 1 'y' col:
+t1 <- nanotime(1:2 * one_second_duration * 2)
+t2 <- nanotime(1:4 * one_second_duration)
+dt1 <- data.table(index=t1, data1 = 1:2)
+setkey(dt1, index)
+dt2 <- data.table(index=t2, data1 = 1)
+setkey(dt2, index)
+expected_dt = copy(dt2)
+expected_dt[, data1 := c(2, 3, 3, 1)]
+expect_equal(expected_dt, ops(dt1, dt2, "+"))
+## 1 'x' col, 1 'y' col, ops=='-':
+t1 <- nanotime(1:2 * one_second_duration * 2)
+t2 <- nanotime(1:4 * one_second_duration)
+dt1 <- data.table(index=t1, data1 = 1:2)
+setkey(dt1, index)
+dt2 <- data.table(index=t2, data1 = 1)
+setkey(dt2, index)
+expected_dt = copy(dt2)
+expected_dt[, data1 := c(0, 1, 1, 1)]
+expect_equal(expected_dt, ops(dt1, dt2, "-"))
+## 1 'x' col, 1 'y' col, ops=='*':
+t1 <- nanotime(1:2 * one_second_duration * 2)
+t2 <- nanotime(1:4 * one_second_duration)
+dt1 <- data.table(index=t1, data1 = 1:2)
+setkey(dt1, index)
+dt2 <- data.table(index=t2, data1 = 1)
+setkey(dt2, index)
+expected_dt = copy(dt2)
+expected_dt[, data1 := c(1, 2, 2, 1)]
+expect_equal(expected_dt, ops(dt1, dt2, "*"))
+## 1 'x' col, 1 'y' col, ops=='/':
+t1 <- nanotime(1:2 * one_second_duration * 2)
+t2 <- nanotime(1:4 * one_second_duration)
+dt1 <- data.table(index=t1, data1 = 1:2)
+setkey(dt1, index)
+dt2 <- data.table(index=t2, data1 = 1)
+setkey(dt2, index)
+expected_dt = copy(dt2)
+expected_dt[, data1 := c(1, 2, 2, 1)]
+expect_equal(expected_dt, ops(dt1, dt2, "/"))
+## 1 'x' col, 3 'y' cols:
+t1 <- nanotime(1:2 * one_second_duration * 2)
+t2 <- nanotime(1:4 * one_second_duration)
+dt1 <- data.table(index=t1, data1 = 1:2)
+setkey(dt1, index)
+dt2 <- data.table(index=t2, data1=1, data2=2, data3=3)
+setkey(dt2, index)
+ops(dt1, dt2, "+")
+expected_dt = copy(dt2)
+expected_dt[, data1 := c(2, 3, 3, 1)]
+expected_dt[, data2 := data1 + 1]
+expected_dt[, data3 := data2 + 1]
+expect_equal(expected_dt, ops(dt1, dt2, "+"))
+## 3 'x' col, 3 'y' cols:
+t1 <- nanotime(1:2 * one_second_duration * 2)
+t2 <- nanotime(1:4 * one_second_duration)
+dt1 <- data.table(index=t1, c1=1:2, c2=3:4, c3=5:6)
+setkey(dt1, index)
+dt2 <- data.table(index=t2, data1=1, data2=2, data3=3)
+setkey(dt2, index)
+expected_dt = copy(dt2)
+expected_dt[, data1 := c(2, 3, 3, 1)]
+expected_dt[, data2 := c(5, 6, 6, 2)]
+expected_dt[, data3 := c(8, 9, 9, 3)]
+expect_equal(expected_dt, ops(dt1, dt2, "+"))
+## no overlap -> no change
+t1 <- nanotime(1:2 * one_second_duration * 10)
+t2 <- nanotime(1:4 * one_second_duration)
+dt1 <- data.table(index=t1, data1 = 1:2)
+setkey(dt1, index)
+dt2 <- data.table(index=t2, data1 = 1)
+setkey(dt2, index)
+expect_equal(dt2, ops(dt1, dt2, "+"))
+## 3 'x' col, 3 'y' cols:, skip extra string cols in 'y':
+t1 <- nanotime(1:2 * one_second_duration * 2)
+t2 <- nanotime(1:4 * one_second_duration)
+dt1 <- data.table(index=t1, c1=1:2, c2=3:4, c3=5:6)
+setkey(dt1, index)
+dt2 <- data.table(index=t2, d1=1, c1="a", d2=2, c2="b", d3=3, c3="c")
+setkey(dt2, index)
+expected_dt = copy(dt2)
+expected_dt[, d1 := c(2, 3, 3, 1)]
+expected_dt[, d2 := c(5, 6, 6, 2)]
+expected_dt[, d3 := c(8, 9, 9, 3)]
+expect_equal(expected_dt, ops(dt1, dt2, "+"))
+## same, but mix of int and double:
+t1 <- nanotime(1:2 * one_second_duration * 2)
+t2 <- nanotime(1:4 * one_second_duration)
+dt1 <- data.table(index=t1, d1=1:2, d2=3:4, d3=5:6)
+setkey(dt1, index)
+dt2 <- data.table(index=t2, d1=as.integer(1), c1="a", d2=2, c2="b", d3=3, c3="c")
+setkey(dt2, index)
+expected_dt = copy(dt2)
+expected_dt[, d1 := c(2, 3, 3, 1)]
+expected_dt[, d2 := c(5, 6, 6, 2)]
+expected_dt[, d3 := c(8, 9, 9, 3)]
+expect_equal(expected_dt, ops(dt1, dt2, "+"))
+## error, non-numeric column in x:
+t1 <- nanotime(1:2 * one_second_duration * 2)
+t2 <- nanotime(1:4 * one_second_duration)
+dt1 <- data.table(index=t1, d1=1:2, c1="a", d2=3:4, d3=5:6)
+setkey(dt1, index)
+dt2 <- data.table(index=t2, d1=as.integer(1), c1="a", d2=2, c2="b", d3=3, c3="c")
+setkey(dt2, index)
+expect_error(ops(dt1, dt2, "+"), "all data columns of 'x' must be numeric")
+## error, 2 cols in 'x', 3 cols in 'y'
+t1 <- nanotime(1:2 * one_second_duration * 2)
+t2 <- nanotime(1:4 * one_second_duration)
+dt1 <- data.table(index=t1, c1=1:2, c2=3:4)
+setkey(dt1, index)
+dt2 <- data.table(index=t2, d1=1, c1="a", d2=2, c2="b", d3=3, c3="c")
+setkey(dt2, index)
+expect_error(ops(dt1, dt2, "+"), "'x' must have one numeric column or the same number as 'y'")
+## error, no numerical columns in x:
+t1 <- nanotime(1:2 * one_second_duration * 2)
+t2 <- nanotime(1:4 * one_second_duration)
+dt1 <- data.table(index=t1, c1="a", c2="b")
+setkey(dt1, index)
+dt2 <- data.table(index=t2, d1=1, c1="a", d2=2, c2="b", d3=3, c3="c")
+setkey(dt2, index)
+expect_error(ops(dt1, dt2, "+"), "'x' must have at least one numeric column")
+## error, 3 cols in 'x', 2 cols in 'y'
+t1 <- nanotime(1:2 * one_second_duration * 2)
+t2 <- nanotime(1:4 * one_second_duration)
+dt1 <- data.table(index=t1, c1=1:2, c2=3:4, c3=5:6)
+setkey(dt1, index)
+dt2 <- data.table(index=t2, d1=1, c1="a", d2=2, c2="b", c3="c")
+setkey(dt2, index)
+expect_error(ops(dt1, dt2, "+"), "'x' must have one numeric column or the same number as 'y'")
+## error key check dt1:
+t1 <- nanotime(1:2 * one_second_duration * 2)
+t2 <- nanotime(1:4 * one_second_duration)
+dt1 <- data.table(index=t1, c1=1:2, c2=3:4, c3=5:6)
+dt2 <- data.table(index=t2, d1=1, c1="a", d2=2, c2="b", d3=3, c3="c")
+setkey(dt2, index)
+expect_error(ops(dt1, dt2, "+"), "first column of 'x' must be the first key")
+## error key check dt2:
+t1 <- nanotime(1:2 * one_second_duration * 2)
+t2 <- nanotime(1:4 * one_second_duration)
+dt1 <- data.table(index=t1, c1=1:2, c2=3:4, c3=5:6)
+setkey(dt1, index)
+dt2 <- data.table(index=t2, d1=1, c1="a", d2=2, c2="b", d3=3, c3="c")
+expect_error(ops(dt1, dt2, "+"), "first column of 'y' must be the first key")
+## error key check first col of 'x' not nanotime:
+t1 <- 1:2
+t2 <- nanotime(1:4 * one_second_duration)
+dt1 <- data.table(index=t1, c1=1:2, c2=3:4, c3=5:6)
+setkey(dt1, index)
+dt2 <- data.table(index=t2, d1=1, c1="a", d2=2, c2="b", d3=3, c3="c")
+setkey(dt2, index)
+expect_error(ops(dt1, dt2, "+"), "first column of 'x' must be of type 'nanotime'")
+## error key check first col of 'y' not nanotime:
+t1 <- nanotime(1:2 * one_second_duration * 2)
+t2 <- 1:4
+dt1 <- data.table(index=t1, c1=1:2, c2=3:4, c3=5:6)
+setkey(dt1, index)
+dt2 <- data.table(index=t2, d1=1, c1="a", d2=2, c2="b", d3=3, c3="c")
+setkey(dt2, index)
+expect_error(ops(dt1, dt2, "+"), "first column of 'y' must be of type 'nanotime'")
+## check unknown 'op':
+t1 <- nanotime(1:2 * one_second_duration * 2)
+t2 <- nanotime(1:4 * one_second_duration)
+dt1 <- data.table(index=t1, c1=1:2, c2=3:4, c3=5:6)
+setkey(dt1, index)
+dt2 <- data.table(index=t2, data1=1, data2=2, data3=3)
+setkey(dt2, index)
+expect_error(ops(dt1, dt2, "sdf"), "unsupported operator 'sdf'")
+
 
 if (FALSE) {
     ## don't do this; must appear in vignette!

diff --git a/man/align.Rd b/man/align.Rd
diff --git a/man/align.idx.Rd b/man/align.idx.Rd
diff --git a/man/frequency-data.table-method.Rd b/man/frequency-data.table-method.Rd
diff --git a/man/grid.align.Rd b/man/grid.align.Rd