version 0.22

cran · Feb 20, 2024 · 24a457f · 24a457f
commit 24a457f
Show file tree

Hide file tree

Showing 11 changed files with 525 additions and 0 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -0,0 +1,17 @@
+Package: EncompassTest
+Title: Direct Multi-Step Forecast Based Comparison of Nested Models via
+        an Encompassing Test
+Version: 0.22
+Authors@R: 
+    person("Rong", "Peng", , "r.peng@soton.ac.uk", role = c("aut", "cre"))
+Description: The encompassing test is developed based on multi-step-ahead predictions of two nested models as in Pitarakis, J. (2023) <doi:10.48550/arXiv.2312.16099>. The statistics are standardised to a normal distribution, and the null hypothesis is that the larger model contains no additional useful information. P-values will be provided in the output.
+Encoding: UTF-8
+RoxygenNote: 7.3.1
+License: GPL (>= 3)
+Imports: pracma, stats
+NeedsCompilation: no
+Packaged: 2024-02-17 15:07:44 UTC; rp1e23
+Author: Rong Peng [aut, cre]
+Maintainer: Rong Peng <r.peng@soton.ac.uk>
+Repository: CRAN
+Date/Publication: 2024-02-19 17:10:22 UTC
diff --git a/MD5 b/MD5
@@ -0,0 +1,10 @@
+8acdb62d428c0dbd110a80270044f5e6 *DESCRIPTION
+4a3ba44bb94093ad80866ed16888d033 *NAMESPACE
+d33de4671a5b58049b34f65c34ea868d *R/NW_lrv.R
+d7e1c40cbaa9d652db46b8d09db0bf27 *R/andrews_lrv.R
+c5e7818ce16dc1fc2b2e5a8289b5ee7a *R/pred_encompass_dnorm.R
+09cb77eacd66257a2f19dd5886029d30 *R/recursivehstep.R
+31e014d265fbc70db0755bb700abf4a4 *man/NW_lrv.Rd
+680fbaa1dbaf7ba622be42a351b738fb *man/andrews_lrv.Rd
+3478b4b1933c053910d9b6851ece01d7 *man/pred_encompass_dnorm.Rd
+053d1a482257a66ce546f027ecbedd5d *man/recursive_hstep_fast.Rd
diff --git a/NAMESPACE b/NAMESPACE
@@ -0,0 +1,7 @@
+# Generated by roxygen2: do not edit by hand
+
+export(NW_lrv)
+export(andrews_lrv)
+export(pred_encompass_dnorm)
+export(recursive_hstep_fast)
+import(pracma)
diff --git a/R/NW_lrv.R b/R/NW_lrv.R
@@ -0,0 +1,83 @@
+#' Long-run covariance estimation using Newey-West (Bartlett) weights
+#' 
+#' Given a vector of residuals, it generates the Heteroskedastic Long run variance.
+#' 
+#' @param u a vector of residual series, for which we recommend to use the recursive residuals from larger model.
+#' @param nlag Non-negative integer containing the lag length to use. If empty or not included,
+#' nleg = min(floor(1.2*T^(1/3)),T) will be used.
+#' @param demean Logical true of false (0 or 1) indicating whether the mean should be subtracted when computing.
+#' @return K by K vector of Long run variance using Newey-West (Bartlett) weights.
+#' @examples
+#' x<- rnorm(15);
+#' #Newey-West covariance with automatic BW selection
+#' lrcov = NW_lrv(x)
+#' #Newey-West covariance with 10 lags
+#' lrcov = NW_lrv(x, 10)
+#' #Newey-West covariance with 10 lags and no demeaning
+#' lrcov = NW_lrv(x, 10, 0)
+#' @export
+
+
+NW_lrv = function(u,nlag=NULL,demean=TRUE){
+
+  #Stopping criteria
+  if(is.null(u)){
+    stop("u must be a numeric series")
+  }else if(!is.numeric(u)){
+    stop("u must be a numeric series")
+  }
+
+  if(length(dim(u))>2){
+    stop('u must be a P by K matrix of data.')
+  }
+
+  P = dim(as.matrix(u))[1]
+  K = dim(as.matrix(u))[2]
+
+  if(is.null(nlag)){
+    nlag = min(floor(1.2*P^(1/3)),P)
+  }
+
+  if(is.null(demean)){
+    demean = TRUE
+  }
+
+  if(demean == 1){
+    demean = TRUE
+  }
+  else if(demean == 0){
+    demean = FALSE
+  }else if(is.logical(demean)==FALSE){
+      stop('DEMEAN must be either logical true or false')
+  }
+
+  if(nlag != floor(nlag) || nlag < 0){
+    stop('NLAG must be a non-negative integer')
+  }
+
+
+  u = as.matrix(u)
+
+  #Initialisation of parameters
+  if(demean){
+    u = u - kronecker(matrix(1,P,K),mean(u))
+  }
+
+
+
+  #NW weights
+  w = rep(NA,nlag+1)
+  w=(nlag+1-(0:nlag))/(nlag+1)
+
+  #Calculate covariance
+  V=t(u)%*%u/P
+
+  for (i in 1:nlag){
+    Gammai=(t(u[(i+1):P,])%*%u[1:(P-i),])/P
+    GplusGprime=Gammai+t(Gammai)
+    V=V+w[i+1]*GplusGprime
+  }
+
+return(V)
+
+}
diff --git a/R/andrews_lrv.R b/R/andrews_lrv.R
@@ -0,0 +1,50 @@
+#' Long-run covariance estimation using Andrews quadratic spectral kernel.
+#' 
+#' Given a vector of residuals, it generates the Heteroskedastic Long run variance.
+#'
+#' @param e a vector of residual series, for which we recommend to use the recursive residuals from larger model.
+#' @return  a vector of Long run variance using Andrews quadratic spectral kernel HAC.
+#' @references Andrews, D. W. (1991). Heteroskedasticity and autocorrelation consistent covariance matrix estimation. Econometrica: Journal of the Econometric Society, 817-858.
+#' @import pracma
+#' @examples
+#' set.seed(2014)
+#' x<- rnorm(15);
+#' #Andrews kernel HAC
+#' andrews_lrcov = andrews_lrv(x)
+#' @export
+
+
+
+andrews_lrv=function(e){
+  e=as.vector(e)
+  t=length(e)
+
+  a  =  pracma::mldivide(e[2:t],e[1:(t-1)])
+  k = 0.8
+  c_left = 1.1447*(((4*(a^2)*t)/(((1+a)^2)*((1-a)^2)))^(1/3))
+  c_right = 1.1447*(((4*(k^2)*t)/(((1+k)^2)*((1-k)^2)))^(1/3))
+  veco = c(c_left,c_right)
+  l  =  min(veco)
+  l = floor(l)
+  lrv = t(e)%*%e/t
+
+  #fix the empty element error
+  if (l >= t){
+    l = t
+
+    for (i in 1:(l-1)){
+      w = (1-(i/(1+l)))
+      lrv = lrv + 2*t(e[1:(t-i)])%*%e[(1+i):t]*w/t
+    }
+
+  }else if (l > 0){
+    for (i in 1:l){
+      w = (1-(i/(1+l)))
+      lrv = lrv + 2*t(e[1:(t-i)])%*%e[(1+i):t]*w/t
+    }
+  }
+
+
+
+  return(lrv)
+}
diff --git a/R/pred_encompass_dnorm.R b/R/pred_encompass_dnorm.R
@@ -0,0 +1,106 @@
+#' Direct Multi-Step Forecast Based Comparison of Nested Models via an Encompassing Test
+#'
+#' It calculates the dbar statistics for nested models with null hypothesis being the larger
+#' model failing to add any useful information to the small model following Pitarakis (2023).
+#' There are in total six versions of dbar, based on the assumptions of variance (homo or hete)
+#' and residuals (original, adjusted based on NW, or adjusted based on Andrews).
+#' All dbar statistics will be standarised to a standard N(0,1) normal distribution,
+#' and corresponding P values would be provided.
+#'
+#'
+#' @param  e1hat a vector of out of sample forecast errors from model 1 (smaller model)
+#' @param  e2hat a vector of out of sample forecast errors from model 2 (larger model)
+#' @param  mu0   Fraction of the sample, which should be within 0 and 1 (0.5 should be avoid)
+#' @return A list of normalised d statistics and corresponding P values will be produced.
+#' @references Pitarakis, J. Y. (2023). Direct Multi-Step Forecast based Comparison of Nested Models via an Encompassing Test. arXiv preprint arXiv:2312.16099.
+#' @examples
+#' e1<- rnorm(15);
+#' e2<- rnorm(15);
+#' temp1 <- pred_encompass_dnorm(e1,e2,mu0=0.2)
+#' temp1$T1_d_alrv     #normalised d statistics with Andrews quadratic kernel long-run variance
+#' temp1$Pval_T1_d_alrv  #P value of it
+#' @export
+
+pred_encompass_dnorm=function(e1hat,e2hat,mu0){
+
+  #Stopping criteria
+  if(is.null(e1hat)){
+    stop("e1hat must be a numeric series")
+  }else if(!is.numeric(e1hat)){
+    stop("e1hat must be a numeric series")
+  }
+
+  if(is.null(e2hat)){
+    stop("e2hat must be a numeric series")
+  }else if(!is.numeric(e2hat)){
+    stop("e2hat must be a numeric series")
+  }
+
+   e1hat <- as.matrix(e1hat)
+   e2hat <- as.matrix(e2hat)
+
+  if(dim(e1hat)[1] != dim(e2hat)[1]){
+    stop("e1hat and e2hat must have same length")
+  }else if(dim(e1hat)[2]>1 | dim(e2hat)[2]>1){
+    stop("e1hat and e2hat must be one dimension")
+  }
+
+  if(is.null(mu0)){
+    stop("mu0 must be between 0 and 1")
+  }else if(!is.numeric(mu0)){
+    stop("mu0 must be between 0 and 1")
+  }
+
+   if(mu0 == 0.5 ){
+     stop("mu0 cannot equal to 0.5")
+   }
+
+
+  #e1hat <- as.matrix(e1hat)
+  n=dim(e1hat)[1]
+
+  time_vec = c(1:n)
+  m0 = round(n*mu0)
+  w = (time_vec<=m0)
+
+  e1sq = e1hat^2
+  e2sq = e2hat^2
+  e12 = e1hat*e2hat
+  sigsq2 = mean(e2sq)
+  e2sq_demean = e2sq-sigsq2
+
+  fm = ((1-2*mu0)^2)/(4*mu0*(1-mu0))
+  nw_lags = min(floor(1.2*n^(1/3)),n)
+
+  #d= e1sq - 0.5*((1/mu0)*e12*w+(1/(1-mu0))*e12*(1-w))
+  d = (e1sq-sigsq2) - 0.5*((1/mu0)*(e12-sigsq2)*w+(1/(1-mu0))*(e12-sigsq2)*(1-w))
+
+  sigsq_d = t(d-mean(d))%*%(d-mean(d))/n
+  sigsq_d_nw = NW_lrv(d,nw_lags,1)
+  sigsq_d_alrv = andrews_lrv(d)
+
+  phihatsq = t(e2sq_demean)%*%(e2sq_demean)/n
+  phihatsq_nw = NW_lrv(e2sq_demean,nw_lags,1)
+  phihatsq_alrv = andrews_lrv(e2sq_demean)
+
+  T1 = sqrt(n)*mean(d)/sqrt(fm*phihatsq)
+  T1_nw = sqrt(n)*mean(d)/sqrt(fm*phihatsq_nw)
+  T1_alrv = sqrt(n)*mean(d)/sqrt(fm*phihatsq_alrv)
+
+  T1_d = sqrt(n)*mean(d)/sqrt(sigsq_d)
+  T1_d_nw = sqrt(n)*mean(d)/sqrt(sigsq_d_nw)
+  T1_d_alrv = sqrt(n)*mean(d)/sqrt(sigsq_d_alrv)
+
+  Pval_T1 = stats::pnorm(T1, lower.tail = FALSE)
+  Pval_T1_nw = stats::pnorm(T1_nw, lower.tail = FALSE)
+  Pval_T1_alrv = stats::pnorm(T1_alrv, lower.tail = FALSE)
+
+  Pval_T1_d = stats::pnorm(T1_d, lower.tail = FALSE)
+  Pval_T1_d_nw = stats::pnorm(T1_d_nw, lower.tail = FALSE)
+  Pval_T1_d_alrv = stats::pnorm(T1_d_alrv, lower.tail = FALSE)
+
+  return(list(T1=T1,T1_nw=T1_nw,T1_alrv=T1_alrv,T1_d=T1_d,T1_d_nw=T1_d_nw,T1_d_alrv=T1_d_alrv,
+              Pval_T1=Pval_T1,Pval_T1_nw=Pval_T1_nw,Pval_T1_alrv=Pval_T1_alrv,
+              Pval_T1_d=Pval_T1_d,Pval_T1_d_nw=Pval_T1_d_nw,Pval_T1_d_alrv=Pval_T1_d_alrv) )
+
+}
diff --git a/R/recursivehstep.R b/R/recursivehstep.R
@@ -0,0 +1,119 @@
+#' Forecasting h-steps ahead using Recursive Least Squares Fast
+#'
+#' Consider the following LS-fitted Model with intercept:
+#' y_(t+h) = beta_0 + x_t * beta + u_(t+h)
+#' which is used to generate out-of-sample forecasts of y, h-steps ahead (h=1,2,3,. . . ).
+#' Notes: (1) first estimation window is (1,...,k0) and last window is 
+#' (1,....,n-h) for k0 = round(n*pi0). First forecast is yhat(k0+h|k0)
+#' and last forecast is yhat(n|n-h). There are a total of (n-h-k0+1)
+#' forecasts and corresponding forecast errors. (2) this fast version of the
+#' recursive least squares algorithm uses the Sherman-Morrison matrix
+#' formula to avoid matrix inversions at each recursion. 
+#'
+#' recursive_hstep_fast is the fast version that avoids the recursive calculation of inverse of the matrix using Sherman-Morrison formula.
+#' recursive_hstep_slow is the slow version that calculates the standard OLS recursively.
+#'
+#' @param y an outcome series, which should be numeric and one dimensional.
+#' @param x a predictor matrix (intercept would be added automatically).
+#' @param pi0 Fraction of the sample, which should be within 0 and 1.
+#' @param h Number of steps ahead to predict, which should be a positive integer.
+#' @return Series of residuals estimated
+#' @examples
+#' x<- rnorm(15);
+#' y<- x+rnorm(15);
+#' temp1 <- recursive_hstep_fast(y,x,pi0=0.5,h=1);
+#' @export
+
+
+
+recursive_hstep_fast=function(y,x,pi0,h)
+{
+  #Stopping criteria
+  if(is.null(y)){
+    stop("y must be one dimension")
+  }
+
+  y <- as.matrix(y)
+  ny=dim(y)[1]
+  py=dim(y)[2]
+
+  if(py > 1){
+    stop("y must be one dimension")
+  }
+
+  if(anyNA(as.numeric(y))){
+    stop("y must not contain NA")
+  }
+
+  if(is.null(x)){
+    stop("x must be one dimension")
+  }
+
+  x <- as.matrix(x)
+  n=dim(x)[1]
+  p=dim(x)[2] #[n,p] = size(X);
+
+  if(anyNA(as.numeric(x))){
+    stop("x must not contain NA")
+  }
+
+  if(ny != n){
+    stop("y and x must have same length of rows")
+  }
+
+  if(is.null(pi0)){
+    stop("pi0 must be between 0 and 1")
+  }else if(!is.numeric(pi0)){
+    stop("pi0 must be between 0 and 1")
+  }
+
+  if(pi0 >= 1 || pi0 <= 0){
+    stop("pi0 must be between 0 and 1")
+  }
+
+  if(is.null(h)){
+    stop("h must be a positive integer")
+  }else if(!is.numeric(h)){
+    stop("h must be a positive integer")
+  }
+
+  h <- as.integer(h)
+
+  if(h <= 0 || h > (n-1)){
+    stop("h must be a positive integer")
+  }
+
+
+  #Initialisation of parameters
+  x=as.matrix(x)
+  x= cbind(rep(1,n),x)
+  nc=dim(x)[2]
+  k0 = round(n * pi0)
+
+  M = array(NA, dim = c(nc,nc,n-k0+1))
+  beta = matrix(NA,nc,n-k0+1) #
+  ehat = matrix(NA,n-k0-h+1,1) #
+
+
+  #Calculate the first M and Beta at k0
+  iXmatk0 = solve(t(x[1:(k0-h),])%*%x[1:(k0-h),]) #M_k0
+  bhat_k0 = iXmatk0%*%(t(x[1:(k0-h),])%*%y[(1+h):(k0)]) #Beta_k0
+
+  M[,,1] = iXmatk0
+  beta[,1] = bhat_k0
+
+
+  #iteractively update M and Beta for (k0+h) to n
+  for (t in 1:(n-k0)){
+    M[,,t+1] = M[,,t]-((M[,,t]%*%x[t+k0-h,]%*%t(x[t+k0-h,])%*%M[,,t])/c(1+t(x[t+k0-h,])%*%M[,,t]%*%x[t+k0-h,]))
+    beta[,t+1] = beta[,t]+(M[,,t]%*%x[t+k0-h,]%*%(y[t+k0]-x[t+k0-h,]%*%beta[,t]))/c(1+t(x[t+k0-h,])%*%M[,,t]%*%x[t+k0-h,])
+  }
+
+  #update ehat from k0+h to n
+  for (s in (k0+h):n){
+    ehat[s-k0-h+1] = y[s]-x[s-h,]%*%beta[,s-k0-h+1]
+  }
+
+  return(ehat)
+}
+