version 1.0.0

cran · Dec 21, 2021 · ad4f1c2 · ad4f1c2
commit ad4f1c2
Show file tree

Hide file tree

Showing 33 changed files with 2,229 additions and 0 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -0,0 +1,28 @@
+Package: nsp
+Title: Inference for Multiple Change-Points in Linear Models
+Version: 1.0.0
+Authors@R: 
+    person(given = "Piotr",
+           family = "Fryzlewicz",
+           role = c("aut", "cre"),
+           email = "p.fryzlewicz@lse.ac.uk",
+           comment = c(ORCID = "0000-0002-9676-902X"))
+Description: Implementation of Narrowest Significance Pursuit, a general and
+    flexible methodology for automatically detecting localised regions in data sequences
+    which each must contain a change-point (understood as an abrupt change in the
+    parameters of an underlying linear model), at a prescribed global significance level.
+    Narrowest Significance Pursuit works with a wide range of distributional assumptions
+    on the errors, and yields exact desired finite-sample coverage probabilities,
+    regardless of the form or number of the covariates. For details, see P. Fryzlewicz
+    (2021) <https://stats.lse.ac.uk/fryzlewicz/nsp/nsp.pdf>.
+License: GPL (>= 3)
+Encoding: UTF-8
+RoxygenNote: 7.1.2
+Depends: R (>= 3.0.0)
+Imports: lpSolve
+NeedsCompilation: no
+Packaged: 2021-12-20 18:01:02 UTC; piotr
+Author: Piotr Fryzlewicz [aut, cre] (<https://orcid.org/0000-0002-9676-902X>)
+Maintainer: Piotr Fryzlewicz <p.fryzlewicz@lse.ac.uk>
+Repository: CRAN
+Date/Publication: 2021-12-21 07:10:06 UTC
diff --git a/MD5 b/MD5
@@ -0,0 +1,32 @@
+32e6e98615a3993a661f654362ffdeff *DESCRIPTION
+86af9173641a743387c62f795aee6cf4 *NAMESPACE
+c3c21fdbac789b208650c26bf7b44623 *R/cov_dep_multi_norm.R
+872026c7b1fa364056fc7de872db50f3 *R/cov_dep_multi_norm_poly.R
+bf5d56acaaa66f4101e6eb67e5ba1fa2 *R/cpt_importance.R
+cec2e87fd72f7382047a757e9d2a8176 *R/draw_rects.R
+c30683981f3bb48e520b17f0dbb5e97c *R/draw_rects_advanced.R
+fb263eaa59ca89bc71ff663ae31c4d4d *R/nsp-package.R
+9711ffb7e6465b71e8e7b98ffb948f4f *R/nsp.R
+6609780656f3f00112a3d831058e2d96 *R/nsp_poly.R
+bbd74d3fdf9ef809c9aedaf714414727 *R/nsp_poly_ar.R
+4f6c4cfc9b678bb5bfd529a9230ec435 *R/nsp_poly_selfnorm.R
+37c926a50f6b0a8711bfac2192bafbc9 *R/nsp_selfnorm.R
+4e45ad019e7b6de40367052ce6e01ecc *R/nsp_tvreg.R
+d1adf2a441e66dc4dcf3ef71a9f5e0e4 *R/sim_max_holder.R
+cdfab7ef02c72de2db3e132130033f08 *R/sysdata.rda
+4cb7d46f41a25aae1e6b99bf28300233 *R/thresh_kab.R
+5b0b175efc2866cc470276157774fa81 *R/utils.R
+25cd545609f401fd272bab374b36f94e *man/cov_dep_multi_norm.Rd
+3a65aee0302a2b58d7123bede7e00f8e *man/cov_dep_multi_norm_poly.Rd
+c8e00274924bdbc915f37df05815a195 *man/cpt_importance.Rd
+e689a11d95e501be0855a5b4b48bfdab *man/draw_rects.Rd
+04aedb723d6760d0def565b1bd4461f8 *man/draw_rects_advanced.Rd
+e69a11d65123f0668b52a1e1cefd5ca7 *man/nsp-package.Rd
+8ce1fa5682ed11ec5e91292a65966d23 *man/nsp.Rd
+dcf05ff43dc4c4774b24bc4769ba987b *man/nsp_poly.Rd
+6410959b4722720b5a37afe89de63dce *man/nsp_poly_ar.Rd
+48b009f3342d5b9837fad8c04034a8a5 *man/nsp_poly_selfnorm.Rd
+33ac803c18659565e5efb12ab9ef8a7a *man/nsp_selfnorm.Rd
+afdb817b92dc9340cca9382fc0445ae6 *man/nsp_tvreg.Rd
+751d41974ac00f66e0f895e67ec6443d *man/sim_max_holder.Rd
+a9e926a95af068cf1c193ee190a1e201 *man/thresh_kab.Rd
diff --git a/NAMESPACE b/NAMESPACE
@@ -0,0 +1,22 @@
+# Generated by roxygen2: do not edit by hand
+
+export(cov_dep_multi_norm)
+export(cov_dep_multi_norm_poly)
+export(cpt_importance)
+export(draw_rects)
+export(draw_rects_advanced)
+export(nsp)
+export(nsp_poly)
+export(nsp_poly_ar)
+export(nsp_poly_selfnorm)
+export(nsp_selfnorm)
+export(nsp_tvreg)
+export(sim_max_holder)
+export(thresh_kab)
+importFrom(graphics,barplot)
+importFrom(graphics,rect)
+importFrom(stats,filter)
+importFrom(stats,mad)
+importFrom(stats,quantile)
+importFrom(stats,rnorm)
+importFrom(stats,ts.plot)
diff --git a/R/cov_dep_multi_norm.R b/R/cov_dep_multi_norm.R
@@ -0,0 +1,48 @@
+#' Simulate covariate-dependent multiscale sup-norm for use in NSP
+#'
+#' This function simulates the multiscale sup-norm adjusted for the form of the covariates, as described in Section 5.3
+#' of the paper. This is done for i.i.d. N(0,1) innovations.
+#'
+#' The NSP algorithm is described in P. Fryzlewicz (2021) "Narrowest Significance Pursuit: inference for multiple change-points in linear
+#' models", preprint.
+#' 
+#' @param x The design matrix with the regressors (covariates) as columns.
+#' @param N Desired number of simulated values of the norm.
+#' @return Sample of size \code{N} containing the simulated norms.
+#' @author Piotr Fryzlewicz, \email{p.fryzlewicz@@lse.ac.uk}
+#' @seealso \code{\link{cov_dep_multi_norm_poly}}, \code{\link{sim_max_holder}}
+#' @examples
+#' set.seed(1)
+#' g <- c(rep(0, 100), rep(2, 100))
+#' x.g <- g + stats::rnorm(200)
+#' mscale.norm.200 <- cov_dep_multi_norm(matrix(1, 200, 1), 100)
+#' nsp_poly(x.g, 100, thresh.val = stats::quantile(mscale.norm.200, .95))
+#' @importFrom stats rnorm quantile
+#' @export
+
+
+
+
+cov_dep_multi_norm <- function(x, N = 1000) {
+
+	# Covariate-dependent multiresolution norm simulation
+
+	d <- dim(x)
+
+	res <- rep(0, N)
+
+	for (i in 1:N) {
+
+		z <- stats::rnorm(d[1])
+
+		x.c <- cbind(z, x)
+
+		x.c.ads <- all_dyadic_scans_array(x.c)
+
+		res[i] <- check_interval_array(c(1, d[1]), x.c.ads, 0)
+
+	}
+
+	res
+
+}
diff --git a/R/cov_dep_multi_norm_poly.R b/R/cov_dep_multi_norm_poly.R
@@ -0,0 +1,39 @@
+#' Simulate covariate-dependent multiscale sup-norm for use in NSP, for piecewise-polynomial models
+#'
+#' This function simulates the multiscale sup-norm adjusted for the form of the covariates, as described in Section 5.3
+#' of the paper, for piecewise-polynomial models of degree \code{deg}. This is done for i.i.d. N(0,1) innovations.
+#'
+#' The NSP algorithm is described in P. Fryzlewicz (2021) "Narrowest Significance Pursuit: inference for multiple change-points in linear
+#' models", preprint.
+#' 
+#' @param n The data length (for which the multiscale norm is to be simulated)
+#' @param deg The degree of the polynomial model (0 for the piecewise-constant model; 1 for piecewise-linearity, etc.).
+#' @param N Desired number of simulated values of the norm.
+#' @return Sample of size \code{N} containing the simulated norms.
+#' @author Piotr Fryzlewicz, \email{p.fryzlewicz@@lse.ac.uk}
+#' @seealso \code{\link{cov_dep_multi_norm}}, \code{\link{sim_max_holder}}
+#' @examples
+#' set.seed(1)
+#' g <- c(rep(0, 100), rep(2, 100))
+#' x.g <- g + stats::rnorm(200)
+#' mscale.norm.200 <- cov_dep_multi_norm_poly(200, 0, 100)
+#' nsp_poly(x.g, 100, thresh.val = stats::quantile(mscale.norm.200, .95))
+#' @importFrom stats rnorm quantile
+#' @export
+
+
+
+
+cov_dep_multi_norm_poly <- function(n, deg, N = 10000) {
+
+	x.c <- matrix(0, n, deg+1)
+
+	for (i in 1:(deg+1)) {
+
+		x.c[,i] <- seq(from=0, to=1, length=n)^(i-1)
+
+	}
+
+	cov_dep_multi_norm(x.c, N)	
+
+}
diff --git a/R/cpt_importance.R b/R/cpt_importance.R
@@ -0,0 +1,41 @@
+#' Change-point importance (prominence) plot
+#'
+#' This function produces a change-point prominence plot based on the NSP object provided. The heights of the bars are arranged in non-decreasing
+#' order and correspond directly to the lengths of the NSP intervals of significance. Each bar is labelled as s-e where s (e) is the start (end) of the
+#' corresponding NSP interval of significance, respectively. The change-points corresponding to the narrower intervals can be seen as more prominent.
+#'
+#' The NSP algorithm is described in P. Fryzlewicz (2021) "Narrowest Significance Pursuit: inference for multiple change-points in linear
+#' models", preprint.
+#' 
+#' @param nsp.obj Object returned by one of the \code{nsp*} functions.
+#' @return The function does not return a value.
+#' @author Piotr Fryzlewicz, \email{p.fryzlewicz@@lse.ac.uk}
+#' @seealso \code{\link{draw_rects}}, \code{\link{draw_rects_advanced}}
+#' @examples
+#' set.seed(1)
+#' f <- c(rep(0, 100), 1:100, rep(101, 100))
+#' x.f <- f + 15 * stats::rnorm(300)
+#' x.f.n <- nsp_poly(x.f, 100, "sim", deg=1)
+#' cpt_importance(x.f.n)
+#' @importFrom graphics barplot
+#' @importFrom stats rnorm
+#' @export
+
+
+
+
+cpt_importance <- function(nsp.obj) {
+
+	# Change-point prominence plot as described in Section 4 of the paper.
+	# nsp.obj - quantity returned by one of the nsp_* functions.
+
+	d <- dim(nsp.obj$intervals)
+	if (d[1]) {
+		heights <- nsp.obj$intervals[,2] - nsp.obj$intervals[,1]
+		h.ord <- order(heights)
+		labels <- paste(as.character(round(nsp.obj$intervals[h.ord,1])), "-", as.character(round(nsp.obj$intervals[h.ord,2])), sep = "")
+		graphics::barplot(heights[h.ord], names.arg=labels)
+	}
+	else warning("No change-points to arrange in order of importance.")
+
+}
diff --git a/R/draw_rects.R b/R/draw_rects.R
@@ -0,0 +1,45 @@
+#' Draw NSP intervals of significance as shaded rectangular areas on the current plot
+#'
+#' This function draws intervals of significance returned by one of the \code{nsp*} functions on the current plot. It shows them as shaded
+#' rectangular areas (hence the name of the function).
+#'
+#' The NSP algorithm is described in P. Fryzlewicz (2021) "Narrowest Significance Pursuit: inference for multiple change-points in linear
+#' models", preprint.
+#' 
+#' @param nsp.obj Object returned by one of the \code{nsp*} functions.
+#' @param yrange Vector of length two specifying the (lower, upper) vertical limit of the rectangles.
+#' @param density Density of the shading.
+#' @param col Colour of the shading.
+#' @param x.axis.start Time index the x axis starts from. The NSP intervals of significance get shifted by \code{x.axis.start-1} prior to plotting.
+#' @return The function does not return a value.
+#' @author Piotr Fryzlewicz, \email{p.fryzlewicz@@lse.ac.uk}
+#' @seealso \code{\link{draw_rects_advanced}}, \code{\link{nsp}}
+#' @examples
+#' set.seed(1)
+#' h <- c(rep(0, 150), 1:150)
+#' x.h <- h + stats::rnorm(300) * 50
+#' x.h.n <- nsp_poly(x.h, 1000, "sim", deg=1)
+#' draw_rects(x.h.n, c(-100, 100))
+#' @importFrom graphics rect
+#' @importFrom stats rnorm
+#' @export
+
+
+
+draw_rects <- function(nsp.obj, yrange, density = 10, col = "red", x.axis.start = 1) {
+
+	# Draw intervals of significance, as shaded rectangular areas, on the current plot.
+	# nsp.obj - quantity returned by one of the nsp_* functions.
+	# yrange - vector of length two specifying the (lower, upper) vertical limit of the rectangles.
+	# density - density of the shading; try using 10 or 20.
+	# col - colour of the shading.
+	# x.axis.start - time index the x axis stars from.
+
+	d <- dim(nsp.obj$intervals)
+	if (d[1]) for (i in 1:d[1]) {
+
+		graphics::rect(nsp.obj$intervals[i,1]+x.axis.start-1, yrange[1], nsp.obj$intervals[i,2]+x.axis.start-1, yrange[2], density=density, col=col)		
+
+	}
+
+}
diff --git a/R/draw_rects_advanced.R b/R/draw_rects_advanced.R
@@ -0,0 +1,53 @@
+#' Plot NSP intervals of significance at appropriate places along the graph of data
+#'
+#' This function plots the intervals of significance returned by one of the \code{nsp*} functions, at appropriate places along the graph of data.
+#' It shows them as shaded rectangular areas (hence the name of the function) "attached" to the graph of the data. Note: the data sequence \code{y} 
+#' needs to have been plotted beforehand.
+#'
+#' The NSP algorithm is described in P. Fryzlewicz (2021) "Narrowest Significance Pursuit: inference for multiple change-points in linear
+#' models", preprint.
+#' 
+#' @param y The data.
+#' @param nsp.obj Object returned by one of the \code{nsp*} functions with \code{y} on input.
+#' @param half.height Half-height of each rectangle; if \code{NULL} then set to twice the estimated standard deviation of the data.
+#' @param show.middles Whether to display lines corresponding to the midpoints of the rectanlges (rough change-point location estimates).
+#' @param col.middles Colour of the midpoint lines.
+#' @param lwd Line width for the midpoint lines.
+#' @param density Density of the shading.
+#' @param col.rects Colour of the shading.
+#' @param x.axis.start Time index the x axis starts from. The NSP intervals of significance get shifted by \code{x.axis.start-1} prior to plotting.
+#' @return The function does not return a value.
+#' @author Piotr Fryzlewicz, \email{p.fryzlewicz@@lse.ac.uk}
+#' @seealso \code{\link{draw_rects}}, \code{\link{nsp}}
+#' @examples
+#' set.seed(1)
+#' f <- c(rep(0, 100), 1:100, rep(101, 100))
+#' x.f <- f + 15 * stats::rnorm(300)
+#' x.f.n <- nsp_poly(x.f, 100, "sim", deg=1)
+#' stats::ts.plot(x.f)
+#' draw_rects_advanced(x.f, x.f.n, density = 3)
+#' @importFrom graphics rect
+#' @importFrom stats mad ts.plot rnorm
+#' @export
+
+
+
+draw_rects_advanced <- function(y, nsp.obj, half.height = NULL, show.middles = TRUE, col.middles = "blue", lwd = 3, density = 10, col.rects = "red", x.axis.start = 1) {
+
+
+	loc.est <- round((nsp.obj$intervals[,1] + nsp.obj$intervals[,2])/2)
+
+	if (is.null(half.height)) half.height <- 2 * stats::mad(diff(y)/sqrt(2))
+
+	centres.y <- y[loc.est]
+
+	d <- dim(nsp.obj$intervals)
+	if (d[1]) for (i in 1:d[1]) {
+
+		graphics::rect(nsp.obj$intervals[i,1]+x.axis.start-1, centres.y[i]-half.height, nsp.obj$intervals[i,2]+x.axis.start-1, centres.y[i]+half.height, density=density, col=col.rects)
+
+		if (show.middles) graphics::lines(rep(loc.est[i]+x.axis.start-1, 2), c(centres.y[i]-half.height, centres.y[i]+half.height), col=col.middles, lwd = lwd)
+
+	}
+
+}
diff --git a/R/nsp-package.R b/R/nsp-package.R
@@ -0,0 +1,17 @@
+#' nsp: Narrowest Significance Pursuit: Inference for Multiple Change-points in Linear Models
+#'
+#' Implementation of Narrowest Significance Pursuit (NSP), a general and
+#' flexible methodology for automatically detecting localised regions in data sequences
+#' which each must contain a change-point (understood as an abrupt change in the
+#' parameters of an underlying linear model), at a prescribed global significance level.
+#' NSP works with a wide range of distributional assumptions on the errors, and yields
+#' exact desired finite-sample coverage probabilities, regardless of the form or number
+#' of the regressors. A good place to start exploring the package are the \code{nsp*} functions.
+#' @author Piotr Fryzlewicz, \email{p.fryzlewicz@@lse.ac.uk}
+#' @references P. Fryzlewicz (2021) "Narrowest Significance Pursuit: inference for multiple change-points in linear
+#' models", preprint.
+#' @seealso \code{\link{nsp}}, \code{\link{nsp_poly}}, \code{\link{nsp_poly_ar}}, \code{\link{nsp_tvreg}}, \code{\link{nsp_selfnorm}},
+#' \code{\link{nsp_poly_selfnorm}}
+#' @aliases nsp-package
+#' @keywords internal
+"_PACKAGE"
diff --git a/R/nsp.R b/R/nsp.R
@@ -0,0 +1,62 @@
+#' Narrowest Significance Pursuit algorithm with general covariates and user-specified threshold
+#'
+#' This function runs the bare-bones Narrowest Significance Pursuit (NSP) algorithm on data sequence \code{y} and design matrix \code{x}
+#' to obtain localised regions (intervals) of the domain in which the parameters of the linear regression model y_t = beta(t) x_t + z_t significantly
+#' depart from constancy (e.g. by containing change-points). For any interval considered by the algorithm, 
+#' significance is achieved if the multiscale supremum-type
+#' deviation measure (see Details for the literature reference) exceeds \code{lambda}. This function is
+#' mainly to be used by the higher-level functions \code{\link{nsp_poly}}, \code{\link{nsp_poly_ar}} and \code{\link{nsp_tvreg}}
+#' (which estimate a suitable \code{lambda} so that a given global significance level is guaranteed), and human users may prefer to use those functions
+#' instead; however, \code{nsp} can also be run directly, if desired.
+#' The function works best when the errors z_t in the linear regression formulation y_t = beta(t) x_t + z_t are independent and
+#' identically distributed Gaussians.
+#' 
+#' The NSP algorithm is described in P. Fryzlewicz (2021) "Narrowest Significance Pursuit: inference for multiple change-points in linear
+#' models", preprint.
+#' 
+#' @param y A vector containing the data sequence being the response in the linear model y_t = beta(t) x_t + z_t.
+#' @param x The design matrix in the regression model above, with the regressors as columns.
+#' @param M The minimum number of intervals considered at each recursive stage, unless the number of all intervals is smaller, in which case all intervals
+#' are used.
+#' @param lambda The threshold parameter for measuring the significance of non-constancy (of the linear regression parameters), for use with the multiscale
+#' supremum-type deviation measure described in the paper.
+#' @param overlap If \code{FALSE}, then on discovering a significant interval, the search continues recursively to the left and to the right of that 
+#' interval. If \code{TRUE}, then the search continues to the left and to the right of the midpoint of that interval.
+#' @param buffer A non-negative integer specifying how many observations to leave out immediately to the left and to the right of a detected interval of
+#' significance before recursively continuing the search
+#' for the next interval.
+#' @return A list with the following components:
+#' \item{intervals}{A data frame containing the estimated intervals of significance: \code{starts} and \code{ends} is where the intervals start and end, 
+#'	respectively; \code{values} are the values of the deviation measure on each given interval; \code{midpoints} are the midpoints of the intervals.}
+#' \item{threshold.used}{The threshold \code{lambda}.}
+#' @author Piotr Fryzlewicz, \email{p.fryzlewicz@@lse.ac.uk}
+#' @seealso \code{\link{nsp_poly}}, \code{\link{nsp_poly_ar}}, \code{\link{nsp_tvreg}}, \code{\link{nsp_selfnorm}}, \code{\link{nsp_poly_selfnorm}}
+#' @examples
+#' set.seed(1)
+#' f <- c(1:100, 100:1, 1:100)
+#' y <- f + stats::rnorm(300) * 15
+#' x <- matrix(0, 300, 2)
+#' x[,1] <- 1
+#' x[,2] <- seq(from = 0, to = 1, length = 300)
+#' nsp(y, x, 100, 15 * thresh_kab(300, .1))
+#' @importFrom stats rnorm
+#' @export
+
+
+
+nsp <- function(y, x, M, lambda, overlap = FALSE, buffer = 0) {
+
+	d <- dim(x)
+
+	x.c <- cbind(y, x)
+
+	x.c.ads <- all_dyadic_scans_array(x.c)
+
+	res <- iter_random_checks_scan_array(c(1, d[1]), x.c.ads, M, lambda, overlap, buffer)
+
+	intervals <- data.frame(t(order_chron(res)))
+	colnames(intervals) <- c("starts", "ends", "values")
+	intervals$midpoints <- floor((intervals$starts+intervals$ends)/2)
+
+	list(intervals=intervals, threshold.used=lambda)
+}