Skip to content

Commit

Permalink
version 1.0.0
Browse files Browse the repository at this point in the history
  • Loading branch information
Piotr Fryzlewicz authored and cran-robot committed Dec 21, 2021
0 parents commit ad4f1c2
Show file tree
Hide file tree
Showing 33 changed files with 2,229 additions and 0 deletions.
28 changes: 28 additions & 0 deletions DESCRIPTION
@@ -0,0 +1,28 @@
Package: nsp
Title: Inference for Multiple Change-Points in Linear Models
Version: 1.0.0
Authors@R:
person(given = "Piotr",
family = "Fryzlewicz",
role = c("aut", "cre"),
email = "p.fryzlewicz@lse.ac.uk",
comment = c(ORCID = "0000-0002-9676-902X"))
Description: Implementation of Narrowest Significance Pursuit, a general and
flexible methodology for automatically detecting localised regions in data sequences
which each must contain a change-point (understood as an abrupt change in the
parameters of an underlying linear model), at a prescribed global significance level.
Narrowest Significance Pursuit works with a wide range of distributional assumptions
on the errors, and yields exact desired finite-sample coverage probabilities,
regardless of the form or number of the covariates. For details, see P. Fryzlewicz
(2021) <https://stats.lse.ac.uk/fryzlewicz/nsp/nsp.pdf>.
License: GPL (>= 3)
Encoding: UTF-8
RoxygenNote: 7.1.2
Depends: R (>= 3.0.0)
Imports: lpSolve
NeedsCompilation: no
Packaged: 2021-12-20 18:01:02 UTC; piotr
Author: Piotr Fryzlewicz [aut, cre] (<https://orcid.org/0000-0002-9676-902X>)
Maintainer: Piotr Fryzlewicz <p.fryzlewicz@lse.ac.uk>
Repository: CRAN
Date/Publication: 2021-12-21 07:10:06 UTC
32 changes: 32 additions & 0 deletions MD5
@@ -0,0 +1,32 @@
32e6e98615a3993a661f654362ffdeff *DESCRIPTION
86af9173641a743387c62f795aee6cf4 *NAMESPACE
c3c21fdbac789b208650c26bf7b44623 *R/cov_dep_multi_norm.R
872026c7b1fa364056fc7de872db50f3 *R/cov_dep_multi_norm_poly.R
bf5d56acaaa66f4101e6eb67e5ba1fa2 *R/cpt_importance.R
cec2e87fd72f7382047a757e9d2a8176 *R/draw_rects.R
c30683981f3bb48e520b17f0dbb5e97c *R/draw_rects_advanced.R
fb263eaa59ca89bc71ff663ae31c4d4d *R/nsp-package.R
9711ffb7e6465b71e8e7b98ffb948f4f *R/nsp.R
6609780656f3f00112a3d831058e2d96 *R/nsp_poly.R
bbd74d3fdf9ef809c9aedaf714414727 *R/nsp_poly_ar.R
4f6c4cfc9b678bb5bfd529a9230ec435 *R/nsp_poly_selfnorm.R
37c926a50f6b0a8711bfac2192bafbc9 *R/nsp_selfnorm.R
4e45ad019e7b6de40367052ce6e01ecc *R/nsp_tvreg.R
d1adf2a441e66dc4dcf3ef71a9f5e0e4 *R/sim_max_holder.R
cdfab7ef02c72de2db3e132130033f08 *R/sysdata.rda
4cb7d46f41a25aae1e6b99bf28300233 *R/thresh_kab.R
5b0b175efc2866cc470276157774fa81 *R/utils.R
25cd545609f401fd272bab374b36f94e *man/cov_dep_multi_norm.Rd
3a65aee0302a2b58d7123bede7e00f8e *man/cov_dep_multi_norm_poly.Rd
c8e00274924bdbc915f37df05815a195 *man/cpt_importance.Rd
e689a11d95e501be0855a5b4b48bfdab *man/draw_rects.Rd
04aedb723d6760d0def565b1bd4461f8 *man/draw_rects_advanced.Rd
e69a11d65123f0668b52a1e1cefd5ca7 *man/nsp-package.Rd
8ce1fa5682ed11ec5e91292a65966d23 *man/nsp.Rd
dcf05ff43dc4c4774b24bc4769ba987b *man/nsp_poly.Rd
6410959b4722720b5a37afe89de63dce *man/nsp_poly_ar.Rd
48b009f3342d5b9837fad8c04034a8a5 *man/nsp_poly_selfnorm.Rd
33ac803c18659565e5efb12ab9ef8a7a *man/nsp_selfnorm.Rd
afdb817b92dc9340cca9382fc0445ae6 *man/nsp_tvreg.Rd
751d41974ac00f66e0f895e67ec6443d *man/sim_max_holder.Rd
a9e926a95af068cf1c193ee190a1e201 *man/thresh_kab.Rd
22 changes: 22 additions & 0 deletions NAMESPACE
@@ -0,0 +1,22 @@
# Generated by roxygen2: do not edit by hand

export(cov_dep_multi_norm)
export(cov_dep_multi_norm_poly)
export(cpt_importance)
export(draw_rects)
export(draw_rects_advanced)
export(nsp)
export(nsp_poly)
export(nsp_poly_ar)
export(nsp_poly_selfnorm)
export(nsp_selfnorm)
export(nsp_tvreg)
export(sim_max_holder)
export(thresh_kab)
importFrom(graphics,barplot)
importFrom(graphics,rect)
importFrom(stats,filter)
importFrom(stats,mad)
importFrom(stats,quantile)
importFrom(stats,rnorm)
importFrom(stats,ts.plot)
48 changes: 48 additions & 0 deletions R/cov_dep_multi_norm.R
@@ -0,0 +1,48 @@
#' Simulate covariate-dependent multiscale sup-norm for use in NSP
#'
#' This function simulates the multiscale sup-norm adjusted for the form of the covariates, as described in Section 5.3
#' of the paper. This is done for i.i.d. N(0,1) innovations.
#'
#' The NSP algorithm is described in P. Fryzlewicz (2021) "Narrowest Significance Pursuit: inference for multiple change-points in linear
#' models", preprint.
#'
#' @param x The design matrix with the regressors (covariates) as columns.
#' @param N Desired number of simulated values of the norm.
#' @return Sample of size \code{N} containing the simulated norms.
#' @author Piotr Fryzlewicz, \email{p.fryzlewicz@@lse.ac.uk}
#' @seealso \code{\link{cov_dep_multi_norm_poly}}, \code{\link{sim_max_holder}}
#' @examples
#' set.seed(1)
#' g <- c(rep(0, 100), rep(2, 100))
#' x.g <- g + stats::rnorm(200)
#' mscale.norm.200 <- cov_dep_multi_norm(matrix(1, 200, 1), 100)
#' nsp_poly(x.g, 100, thresh.val = stats::quantile(mscale.norm.200, .95))
#' @importFrom stats rnorm quantile
#' @export




cov_dep_multi_norm <- function(x, N = 1000) {

# Covariate-dependent multiresolution norm simulation

d <- dim(x)

res <- rep(0, N)

for (i in 1:N) {

z <- stats::rnorm(d[1])

x.c <- cbind(z, x)

x.c.ads <- all_dyadic_scans_array(x.c)

res[i] <- check_interval_array(c(1, d[1]), x.c.ads, 0)

}

res

}
39 changes: 39 additions & 0 deletions R/cov_dep_multi_norm_poly.R
@@ -0,0 +1,39 @@
#' Simulate covariate-dependent multiscale sup-norm for use in NSP, for piecewise-polynomial models
#'
#' This function simulates the multiscale sup-norm adjusted for the form of the covariates, as described in Section 5.3
#' of the paper, for piecewise-polynomial models of degree \code{deg}. This is done for i.i.d. N(0,1) innovations.
#'
#' The NSP algorithm is described in P. Fryzlewicz (2021) "Narrowest Significance Pursuit: inference for multiple change-points in linear
#' models", preprint.
#'
#' @param n The data length (for which the multiscale norm is to be simulated)
#' @param deg The degree of the polynomial model (0 for the piecewise-constant model; 1 for piecewise-linearity, etc.).
#' @param N Desired number of simulated values of the norm.
#' @return Sample of size \code{N} containing the simulated norms.
#' @author Piotr Fryzlewicz, \email{p.fryzlewicz@@lse.ac.uk}
#' @seealso \code{\link{cov_dep_multi_norm}}, \code{\link{sim_max_holder}}
#' @examples
#' set.seed(1)
#' g <- c(rep(0, 100), rep(2, 100))
#' x.g <- g + stats::rnorm(200)
#' mscale.norm.200 <- cov_dep_multi_norm_poly(200, 0, 100)
#' nsp_poly(x.g, 100, thresh.val = stats::quantile(mscale.norm.200, .95))
#' @importFrom stats rnorm quantile
#' @export




cov_dep_multi_norm_poly <- function(n, deg, N = 10000) {

x.c <- matrix(0, n, deg+1)

for (i in 1:(deg+1)) {

x.c[,i] <- seq(from=0, to=1, length=n)^(i-1)

}

cov_dep_multi_norm(x.c, N)

}
41 changes: 41 additions & 0 deletions R/cpt_importance.R
@@ -0,0 +1,41 @@
#' Change-point importance (prominence) plot
#'
#' This function produces a change-point prominence plot based on the NSP object provided. The heights of the bars are arranged in non-decreasing
#' order and correspond directly to the lengths of the NSP intervals of significance. Each bar is labelled as s-e where s (e) is the start (end) of the
#' corresponding NSP interval of significance, respectively. The change-points corresponding to the narrower intervals can be seen as more prominent.
#'
#' The NSP algorithm is described in P. Fryzlewicz (2021) "Narrowest Significance Pursuit: inference for multiple change-points in linear
#' models", preprint.
#'
#' @param nsp.obj Object returned by one of the \code{nsp*} functions.
#' @return The function does not return a value.
#' @author Piotr Fryzlewicz, \email{p.fryzlewicz@@lse.ac.uk}
#' @seealso \code{\link{draw_rects}}, \code{\link{draw_rects_advanced}}
#' @examples
#' set.seed(1)
#' f <- c(rep(0, 100), 1:100, rep(101, 100))
#' x.f <- f + 15 * stats::rnorm(300)
#' x.f.n <- nsp_poly(x.f, 100, "sim", deg=1)
#' cpt_importance(x.f.n)
#' @importFrom graphics barplot
#' @importFrom stats rnorm
#' @export




cpt_importance <- function(nsp.obj) {

# Change-point prominence plot as described in Section 4 of the paper.
# nsp.obj - quantity returned by one of the nsp_* functions.

d <- dim(nsp.obj$intervals)
if (d[1]) {
heights <- nsp.obj$intervals[,2] - nsp.obj$intervals[,1]
h.ord <- order(heights)
labels <- paste(as.character(round(nsp.obj$intervals[h.ord,1])), "-", as.character(round(nsp.obj$intervals[h.ord,2])), sep = "")
graphics::barplot(heights[h.ord], names.arg=labels)
}
else warning("No change-points to arrange in order of importance.")

}
45 changes: 45 additions & 0 deletions R/draw_rects.R
@@ -0,0 +1,45 @@
#' Draw NSP intervals of significance as shaded rectangular areas on the current plot
#'
#' This function draws intervals of significance returned by one of the \code{nsp*} functions on the current plot. It shows them as shaded
#' rectangular areas (hence the name of the function).
#'
#' The NSP algorithm is described in P. Fryzlewicz (2021) "Narrowest Significance Pursuit: inference for multiple change-points in linear
#' models", preprint.
#'
#' @param nsp.obj Object returned by one of the \code{nsp*} functions.
#' @param yrange Vector of length two specifying the (lower, upper) vertical limit of the rectangles.
#' @param density Density of the shading.
#' @param col Colour of the shading.
#' @param x.axis.start Time index the x axis starts from. The NSP intervals of significance get shifted by \code{x.axis.start-1} prior to plotting.
#' @return The function does not return a value.
#' @author Piotr Fryzlewicz, \email{p.fryzlewicz@@lse.ac.uk}
#' @seealso \code{\link{draw_rects_advanced}}, \code{\link{nsp}}
#' @examples
#' set.seed(1)
#' h <- c(rep(0, 150), 1:150)
#' x.h <- h + stats::rnorm(300) * 50
#' x.h.n <- nsp_poly(x.h, 1000, "sim", deg=1)
#' draw_rects(x.h.n, c(-100, 100))
#' @importFrom graphics rect
#' @importFrom stats rnorm
#' @export



draw_rects <- function(nsp.obj, yrange, density = 10, col = "red", x.axis.start = 1) {

# Draw intervals of significance, as shaded rectangular areas, on the current plot.
# nsp.obj - quantity returned by one of the nsp_* functions.
# yrange - vector of length two specifying the (lower, upper) vertical limit of the rectangles.
# density - density of the shading; try using 10 or 20.
# col - colour of the shading.
# x.axis.start - time index the x axis stars from.

d <- dim(nsp.obj$intervals)
if (d[1]) for (i in 1:d[1]) {

graphics::rect(nsp.obj$intervals[i,1]+x.axis.start-1, yrange[1], nsp.obj$intervals[i,2]+x.axis.start-1, yrange[2], density=density, col=col)

}

}
53 changes: 53 additions & 0 deletions R/draw_rects_advanced.R
@@ -0,0 +1,53 @@
#' Plot NSP intervals of significance at appropriate places along the graph of data
#'
#' This function plots the intervals of significance returned by one of the \code{nsp*} functions, at appropriate places along the graph of data.
#' It shows them as shaded rectangular areas (hence the name of the function) "attached" to the graph of the data. Note: the data sequence \code{y}
#' needs to have been plotted beforehand.
#'
#' The NSP algorithm is described in P. Fryzlewicz (2021) "Narrowest Significance Pursuit: inference for multiple change-points in linear
#' models", preprint.
#'
#' @param y The data.
#' @param nsp.obj Object returned by one of the \code{nsp*} functions with \code{y} on input.
#' @param half.height Half-height of each rectangle; if \code{NULL} then set to twice the estimated standard deviation of the data.
#' @param show.middles Whether to display lines corresponding to the midpoints of the rectanlges (rough change-point location estimates).
#' @param col.middles Colour of the midpoint lines.
#' @param lwd Line width for the midpoint lines.
#' @param density Density of the shading.
#' @param col.rects Colour of the shading.
#' @param x.axis.start Time index the x axis starts from. The NSP intervals of significance get shifted by \code{x.axis.start-1} prior to plotting.
#' @return The function does not return a value.
#' @author Piotr Fryzlewicz, \email{p.fryzlewicz@@lse.ac.uk}
#' @seealso \code{\link{draw_rects}}, \code{\link{nsp}}
#' @examples
#' set.seed(1)
#' f <- c(rep(0, 100), 1:100, rep(101, 100))
#' x.f <- f + 15 * stats::rnorm(300)
#' x.f.n <- nsp_poly(x.f, 100, "sim", deg=1)
#' stats::ts.plot(x.f)
#' draw_rects_advanced(x.f, x.f.n, density = 3)
#' @importFrom graphics rect
#' @importFrom stats mad ts.plot rnorm
#' @export



draw_rects_advanced <- function(y, nsp.obj, half.height = NULL, show.middles = TRUE, col.middles = "blue", lwd = 3, density = 10, col.rects = "red", x.axis.start = 1) {


loc.est <- round((nsp.obj$intervals[,1] + nsp.obj$intervals[,2])/2)

if (is.null(half.height)) half.height <- 2 * stats::mad(diff(y)/sqrt(2))

centres.y <- y[loc.est]

d <- dim(nsp.obj$intervals)
if (d[1]) for (i in 1:d[1]) {

graphics::rect(nsp.obj$intervals[i,1]+x.axis.start-1, centres.y[i]-half.height, nsp.obj$intervals[i,2]+x.axis.start-1, centres.y[i]+half.height, density=density, col=col.rects)

if (show.middles) graphics::lines(rep(loc.est[i]+x.axis.start-1, 2), c(centres.y[i]-half.height, centres.y[i]+half.height), col=col.middles, lwd = lwd)

}

}
17 changes: 17 additions & 0 deletions R/nsp-package.R
@@ -0,0 +1,17 @@
#' nsp: Narrowest Significance Pursuit: Inference for Multiple Change-points in Linear Models
#'
#' Implementation of Narrowest Significance Pursuit (NSP), a general and
#' flexible methodology for automatically detecting localised regions in data sequences
#' which each must contain a change-point (understood as an abrupt change in the
#' parameters of an underlying linear model), at a prescribed global significance level.
#' NSP works with a wide range of distributional assumptions on the errors, and yields
#' exact desired finite-sample coverage probabilities, regardless of the form or number
#' of the regressors. A good place to start exploring the package are the \code{nsp*} functions.
#' @author Piotr Fryzlewicz, \email{p.fryzlewicz@@lse.ac.uk}
#' @references P. Fryzlewicz (2021) "Narrowest Significance Pursuit: inference for multiple change-points in linear
#' models", preprint.
#' @seealso \code{\link{nsp}}, \code{\link{nsp_poly}}, \code{\link{nsp_poly_ar}}, \code{\link{nsp_tvreg}}, \code{\link{nsp_selfnorm}},
#' \code{\link{nsp_poly_selfnorm}}
#' @aliases nsp-package
#' @keywords internal
"_PACKAGE"
62 changes: 62 additions & 0 deletions R/nsp.R
@@ -0,0 +1,62 @@
#' Narrowest Significance Pursuit algorithm with general covariates and user-specified threshold
#'
#' This function runs the bare-bones Narrowest Significance Pursuit (NSP) algorithm on data sequence \code{y} and design matrix \code{x}
#' to obtain localised regions (intervals) of the domain in which the parameters of the linear regression model y_t = beta(t) x_t + z_t significantly
#' depart from constancy (e.g. by containing change-points). For any interval considered by the algorithm,
#' significance is achieved if the multiscale supremum-type
#' deviation measure (see Details for the literature reference) exceeds \code{lambda}. This function is
#' mainly to be used by the higher-level functions \code{\link{nsp_poly}}, \code{\link{nsp_poly_ar}} and \code{\link{nsp_tvreg}}
#' (which estimate a suitable \code{lambda} so that a given global significance level is guaranteed), and human users may prefer to use those functions
#' instead; however, \code{nsp} can also be run directly, if desired.
#' The function works best when the errors z_t in the linear regression formulation y_t = beta(t) x_t + z_t are independent and
#' identically distributed Gaussians.
#'
#' The NSP algorithm is described in P. Fryzlewicz (2021) "Narrowest Significance Pursuit: inference for multiple change-points in linear
#' models", preprint.
#'
#' @param y A vector containing the data sequence being the response in the linear model y_t = beta(t) x_t + z_t.
#' @param x The design matrix in the regression model above, with the regressors as columns.
#' @param M The minimum number of intervals considered at each recursive stage, unless the number of all intervals is smaller, in which case all intervals
#' are used.
#' @param lambda The threshold parameter for measuring the significance of non-constancy (of the linear regression parameters), for use with the multiscale
#' supremum-type deviation measure described in the paper.
#' @param overlap If \code{FALSE}, then on discovering a significant interval, the search continues recursively to the left and to the right of that
#' interval. If \code{TRUE}, then the search continues to the left and to the right of the midpoint of that interval.
#' @param buffer A non-negative integer specifying how many observations to leave out immediately to the left and to the right of a detected interval of
#' significance before recursively continuing the search
#' for the next interval.
#' @return A list with the following components:
#' \item{intervals}{A data frame containing the estimated intervals of significance: \code{starts} and \code{ends} is where the intervals start and end,
#' respectively; \code{values} are the values of the deviation measure on each given interval; \code{midpoints} are the midpoints of the intervals.}
#' \item{threshold.used}{The threshold \code{lambda}.}
#' @author Piotr Fryzlewicz, \email{p.fryzlewicz@@lse.ac.uk}
#' @seealso \code{\link{nsp_poly}}, \code{\link{nsp_poly_ar}}, \code{\link{nsp_tvreg}}, \code{\link{nsp_selfnorm}}, \code{\link{nsp_poly_selfnorm}}
#' @examples
#' set.seed(1)
#' f <- c(1:100, 100:1, 1:100)
#' y <- f + stats::rnorm(300) * 15
#' x <- matrix(0, 300, 2)
#' x[,1] <- 1
#' x[,2] <- seq(from = 0, to = 1, length = 300)
#' nsp(y, x, 100, 15 * thresh_kab(300, .1))
#' @importFrom stats rnorm
#' @export



nsp <- function(y, x, M, lambda, overlap = FALSE, buffer = 0) {

d <- dim(x)

x.c <- cbind(y, x)

x.c.ads <- all_dyadic_scans_array(x.c)

res <- iter_random_checks_scan_array(c(1, d[1]), x.c.ads, M, lambda, overlap, buffer)

intervals <- data.frame(t(order_chron(res)))
colnames(intervals) <- c("starts", "ends", "values")
intervals$midpoints <- floor((intervals$starts+intervals$ends)/2)

list(intervals=intervals, threshold.used=lambda)
}

0 comments on commit ad4f1c2

Please sign in to comment.