version 0.1.1

cran · Mar 24, 2024 · ccb3655 · ccb3655
commit ccb3655
Show file tree

Hide file tree

Showing 42 changed files with 5,018 additions and 0 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -0,0 +1,21 @@
+Package: tensorMiss
+Type: Package
+Title: Handle Missing Tensor Data with C++ Integration
+Version: 0.1.1
+Date: 2024-03-21
+Author: Zetai Cen [aut, cre]
+Authors@R: c(person("Zetai", "Cen", email = "z.cen@lse.ac.uk", role=c("aut","cre")))
+Maintainer: Zetai Cen <z.cen@lse.ac.uk>
+Description: To handle higher-order tensor data. See Kolda and Bader (2009) <doi:10.1137/07070111X> for details on tensor. While existing packages on tensor data extend the base 'array' class to some data classes, this package serves as an alternative resort to handle tensor only as 'array' class.
+    Some functionalities related to missingness are also supported.
+License: GPL-3
+Imports: Rcpp (>= 1.0.11), RcppEigen, rTensor, stats
+Encoding: UTF-8
+LinkingTo: Rcpp, RcppEigen
+RoxygenNote: 7.2.3
+Suggests: knitr, rmarkdown
+VignetteBuilder: knitr
+NeedsCompilation: yes
+Packaged: 2024-03-22 20:22:23 UTC; pc
+Repository: CRAN
+Date/Publication: 2024-03-23 11:30:05 UTC
diff --git a/MD5 b/MD5
@@ -0,0 +1,41 @@
+29011b869a54cd352fbcb6b5db12b656 *DESCRIPTION
+44635a834108dbfee3063ae3501897fc *NAMESPACE
+7f3c4c4a6bb7a74bbde366d10d8bf8c6 *R/RcppExports.R
+4a9ee5db69a8ca3b2867be87649f9fb2 *R/fle.R
+d674a10552c9341ff6a40ee91584b992 *R/miss_factor_est.R
+10e21a98a110231993585bdf838faf53 *R/miss_gen.R
+13a73096ca3c6a5cc08872ecd95e99cb *R/qMSE.R
+7a85eb7827a6ad4ff6fc8b6c1a9933bd *R/refold.R
+41c8afc9da68804494e28d9f286b039a *R/sigmaD.R
+9284f124d11383f7ac89a158d18077ca *R/tensor_gen.R
+b4d5c65eaebfd639c6b984834eb58c0f *R/ttm.R
+9bb7041dcc1a54000424b2d622dc20c8 *R/unfold.R
+78cb04b6c150e907429915e4a91df9f0 *build/vignette.rds
+74200160cb64a09a015a84af025c14d4 *inst/doc/A-short-introduction-to-tensorMiss.R
+f47b7c6b6aa310e29821579d2726025b *inst/doc/A-short-introduction-to-tensorMiss.Rmd
+86d8094be17224bf8ad604a52a756106 *inst/doc/A-short-introduction-to-tensorMiss.html
+bace2ead001085e4c8f897a96c5caf4f *man/fle.Rd
+0003659c971bf0d5f9e2d9448bace66d *man/miss_factor_est.Rd
+6088654fae42ece750baa6ec5c458be3 *man/miss_gen.Rd
+a3a0b7b0e7d4f1b3b715e7757720c511 *man/qMSE.Rd
+cebae2f477a01bf924a2bd07dc2dbb88 *man/refold.Rd
+3ddf917351bf24a5cc337b022a6da512 *man/sigmaD.Rd
+8685be3dd62e5ed37cba3564a1830629 *man/tensor_gen.Rd
+95f2aca95d9ff50e062283b0c529835c *man/ttm.Rd
+e606ddec268297af69c2b85014762f3f *man/unfold.Rd
+3cd9833893779c4174ec35ac89a27351 *src/K1_D_nu.cpp
+8cc43056bd1681bcef5cd755d357ebe4 *src/K1_Ft_est.cpp
+4589c42ef361a5c9093326ef9c383a2e *src/K1_cov_est.cpp
+9d9485481c47e85e25fa654d7efc67b8 *src/K2_D_nu.cpp
+e85381c559cd335b31561edd90e64fec *src/K2_Ft_est.cpp
+f479773c39e0e2aeff33300684f41510 *src/K2_cov_est.cpp
+4c67f90d08129467a144d6ed73ae6ba7 *src/K3_D_nu.cpp
+49fb1f4f5d3cd2ae7ee2f76b950a6932 *src/K3_Ft_est.cpp
+acc33ab321ffc72743d0d329fc15cab2 *src/K3_cov_est.cpp
+44c54138eecdc43e8d936812de8e6617 *src/Makevars
+44c54138eecdc43e8d936812de8e6617 *src/Makevars.win
+32ee594dc44159006910d45f6fb132e6 *src/RcppExports.cpp
+4ce29ad8e2e739ece54df94d79e23826 *src/data_gen.cpp
+d906fb35563f77f39a725006e3d467f6 *src/partition_MSE.cpp
+f47b7c6b6aa310e29821579d2726025b *vignettes/A-short-introduction-to-tensorMiss.Rmd
+329b5dc06a7ea242c47108eb66e782d3 *vignettes/ref.bib
diff --git a/NAMESPACE b/NAMESPACE
@@ -0,0 +1,16 @@
+# Generated by roxygen2: do not edit by hand
+
+export(fle)
+export(miss_factor_est)
+export(miss_gen)
+export(qMSE)
+export(refold)
+export(sigmaD)
+export(tensor_gen)
+export(ttm)
+export(unfold)
+import(RcppEigen)
+import(rTensor)
+import(stats)
+importFrom(Rcpp,evalCpp)
+useDynLib(tensorMiss, .registration = TRUE)
diff --git a/R/RcppExports.R b/R/RcppExports.R
@@ -0,0 +1,47 @@
+# Generated by using Rcpp::compileAttributes() -> do not edit by hand
+# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
+
+K1_D_nu <- function(k, D, Q, dim, C, Y, jj, nu = 0L) {
+    .Call(`_tensorMiss_K1_D_nu`, k, D, Q, dim, C, Y, jj, nu)
+}
+
+K1_Ft_est <- function(x, A1) {
+    .Call(`_tensorMiss_K1_Ft_est`, x, A1)
+}
+
+K1_cov_est <- function(x, dim, k) {
+    .Call(`_tensorMiss_K1_cov_est`, x, dim, k)
+}
+
+K2_D_nu <- function(k, D, Q, dim, C, Y, jj, nu = 0L) {
+    .Call(`_tensorMiss_K2_D_nu`, k, D, Q, dim, C, Y, jj, nu)
+}
+
+K2_Ft_est <- function(x, A1, A2) {
+    .Call(`_tensorMiss_K2_Ft_est`, x, A1, A2)
+}
+
+K2_cov_est <- function(x, dim, k) {
+    .Call(`_tensorMiss_K2_cov_est`, x, dim, k)
+}
+
+K3_D_nu <- function(k, D, Q, dim, C, Y, jj, nu = 0L) {
+    .Call(`_tensorMiss_K3_D_nu`, k, D, Q, dim, C, Y, jj, nu)
+}
+
+K3_Ft_est <- function(x, A1, A2, A3) {
+    .Call(`_tensorMiss_K3_Ft_est`, x, A1, A2, A3)
+}
+
+K3_cov_est <- function(x, dim, k) {
+    .Call(`_tensorMiss_K3_cov_est`, x, dim, k)
+}
+
+data_gen <- function(K, n, d, r, re, eta, coef_f, coef_fe, coef_e, heavy_tailed = FALSE, t_df = 3L, seed = 2023L) {
+    .Call(`_tensorMiss_data_gen`, K, n, d, r, re, eta, coef_f, coef_fe, coef_e, heavy_tailed, t_df, seed)
+}
+
+partition_MSE <- function(x1, x2, par_num = 100L) {
+    .Call(`_tensorMiss_partition_MSE`, x1, x2, par_num)
+}
+
diff --git a/R/fle.R b/R/fle.R
@@ -0,0 +1,20 @@
+
+#' Factor loading error
+#'
+#' @description Computing the column space distance between two matrix
+#' @param A1 A matrix of m rows and n columns.
+#' @param A2 A matrix of m rows and l columns where l can equal n.
+#'
+#' @return A numeric number
+#' @export
+#'
+#' @examples
+#' fle(matrix(1:12, nrow=4), matrix(11:22, nrow=4));
+#'
+#'
+#'
+fle <- function(A1, A2){
+  rot.A1 <- qr.Q(qr(A1))%*%t(qr.Q(qr(A1)))
+  rot.A2 <- qr.Q(qr(A2))%*%t(qr.Q(qr(A2)))
+  return( norm(rot.A1 - rot.A2, type = '2') )
+}
diff --git a/R/miss_factor_est.R b/R/miss_factor_est.R
@@ -0,0 +1,181 @@
+
+#' Estimation of tensor factor models with missing data
+#'
+#' @description Estimate the factor structure on an order-K tensor at each time t, with maximum K as 3 and missing entries allowed
+#' @param dt Tensor time series, written in an array with dimension K+1 and mode-1 as the time mode.
+#' @param r Rank of core tensors, written in a vector of length K. First value as 0 is to denote unknown rank which would be automatically estimated using ratio-based estimators. Default is 0.
+#' @param delta Non-negative number as the correction parameter for rank estimation. Default is 0.2.
+#'
+#' @return A list containing the following:
+#' r: a vector representing either the given rank or the estimated rank, with length K;
+#' A: a list of estimated K factor loading matrices;
+#' Ft: the estimated core factor series, as multi-dimensional array with dimension K+1, where mode-1 is the time mode;
+#' imputation: the imputed common component time series, as multi-dimensional array with dimension K+1, where mode-1 is the time mode;
+#' covMatrix: a list of estimated covariance matrix which are used to estimate loading matrices;
+#' 
+#' 
+#' @export
+#' @import RcppEigen
+#' @importFrom Rcpp evalCpp
+#' @useDynLib tensorMiss, .registration = TRUE
+#'
+#' @examples
+#' K = 3;
+#' TT = 10;
+#' d = c(20,20,20);
+#' r = c(2,2,2);
+#' re = c(2,2,2);
+#' eta = list(c(0,0), c(0,0), c(0,0));
+#' coef_f = c(0.7, 0.3, -0.4, 0.2, -0.1);
+#' coef_fe = c(-0.7, -0.3, -0.4, 0.2, 0.1);
+#' coef_e = c(0.8, 0.4, -0.4, 0.2, -0.1);
+#' data_test = tensor_gen(K,TT,d,r,re,eta, coef_f, coef_fe, coef_e);
+#' data_miss = miss_gen(data_test$X);
+#' miss_factor_est(data_miss, r);
+#'
+#'
+#'
+miss_factor_est <- function(dt, r=0, delta=0.2){
+  if (requireNamespace('RcppEigen', quietly = TRUE)){
+    if (length(dim(dt)) > 4){
+      message('Order of the given tensor is too large. If there is need to extend to higher order, check the source Rcpp file or consult the author.')
+      return()
+    } else if (length(dim(dt)) == 1){
+      message('Scalar time series is not supported.')
+      return()
+    }
+
+    K <- length(dim(dt)) - 1
+
+    if ( (r[1]!=0)&(K!=length(r)) ){
+      message('Length of r is inconsistent with the order of given tensor time series.')
+      return()
+    }
+    if (delta < 0){
+      message('Parameter delta needs to at least 0.')
+      return()
+    }
+
+    # start estimation -----------------------------------------------------------
+    if (K==1){
+      TT <- dim(dt)[1]
+      I1 <- dim(dt)[2]
+
+      # loading matrix estimation
+      est_loading <- list()
+      for (k in 2:length(dim(dt)) ){
+        est_loading[[k-1]] <- K1_cov_est(c(dt), dim(dt), k)
+      }
+
+      # rank estimation if necessary
+      if ( (r[1])==0 ){
+        xi_1 <- delta * I1*(TT^(-0.5) + I1^(-0.5))
+        e1_val <- svd( est_loading[[1]] )$d[1:floor(I1/2)] + xi_1
+        r1 <- which.min( ((e1_val[-1])/(e1_val[-length(e1_val)])) )
+      } else {
+        r1 <- r[1] 
+      } # rank estimation end here
+
+      A1.est <- matrix(svd( est_loading[[1]] )$u[, (1:r1)], nrow=I1, ncol=r1)
+
+      # factor series estimation
+      F.est <- array(0, dim=c(TT, r1))
+      for (t in 1:TT ){
+        F_t.est <- K1_Ft_est(c(dt[t,]), A1.est)
+        F.est[t,] <- array(F_t.est, dim=c(r1))
+      }
+
+      # imputation
+      Y.imp <- ttm(F.est,  A1.est, 2)
+      return(list(r=c(r1), A=list(A1.est), Ft=F.est, imputation=Y.imp, covMatrix=est_loading))
+
+    } else if (K==2){
+      TT <- dim(dt)[1]
+      I1 <- dim(dt)[2]
+      I2 <- dim(dt)[3]
+
+      # loading matrix estimation
+      est_loading <- list()
+      for (k in 2:length(dim(dt)) ){
+        est_loading[[k-1]] <- K2_cov_est(c(dt), dim(dt), k)
+      }
+
+      # rank estimation if necessary
+      if ( (r[1])==0 ){
+        xi_1 <- delta * I1*I2*((TT*I2)^(-0.5) + I1^(-0.5))
+        e1_val <- svd( est_loading[[1]] )$d[1:floor(I1/2)] + xi_1
+        r1 <- which.min( ((e1_val[-1])/(e1_val[-length(e1_val)])) )
+
+        xi_2 <- delta * I1*I2*((TT*I1)^(-0.5) + I2^(-0.5))
+        e2_val <- svd( est_loading[[2]] )$d[1:floor(I2/2)] + xi_2
+        r2 <- which.min( ((e2_val[-1])/(e2_val[-length(e2_val)])) )
+      } else {
+        r1 <- r[1] 
+        r2 <- r[2]
+      } # rank estimation end here
+
+      A1.est <- matrix(svd( est_loading[[1]] )$u[, (1:r1)], nrow=I1, ncol=r1)
+      A2.est <- matrix(svd( est_loading[[2]] )$u[, (1:r2)], nrow=I2, ncol=r2)
+
+      # factor series estimation
+      F.est <- array(0, dim=c(TT, r1, r2))
+      for (t in 1:TT ){
+        F_t.est <- K2_Ft_est(c(dt[t,,]), A1.est, A2.est)
+        F.est[t,,] <- array(F_t.est, dim=c(r1, r2))
+      }
+
+      # imputation
+      Y.imp <- ttm(F.est,  A1.est,2)
+      Y.imp <- ttm(Y.imp, A2.est,3)
+      return(list(r=c(r1,r2), A=list(A1.est,A2.est), Ft=F.est, imputation=Y.imp, covMatrix=est_loading))
+
+    } else if (K==3){
+      TT <- dim(dt)[1]
+      I1 <- dim(dt)[2]
+      I2 <- dim(dt)[3]
+      I3 <- dim(dt)[4]
+
+      # loading matrix estimation
+      est_loading <- list()
+      for (k in 2:length(dim(dt)) ){
+        est_loading[[k-1]] <- K3_cov_est(c(dt), dim(dt), k)
+      }
+
+      # rank estimation if necessary
+      if ( (r[1])==0 ){
+        xi_1 <- delta * I1*I2*I3*((TT*I2*I3)^(-0.5) + I1^(-0.5))
+        e1_val <- svd( est_loading[[1]] )$d[1:floor(I1/2)] + xi_1
+        r1 <- which.min( ((e1_val[-1])/(e1_val[-length(e1_val)])) )
+
+        xi_2 <- delta * I1*I2*I3*((TT*I1*I3)^(-0.5) + I2^(-0.5))
+        e2_val <- svd( est_loading[[2]] )$d[1:floor(I2/2)] + xi_2
+        r2 <- which.min( ((e2_val[-1])/(e2_val[-length(e2_val)])) )
+
+        xi_3 <- delta * I1*I2*I3*((TT*I2*I1)^(-0.5) + I3^(-0.5))
+        e3_val <- svd( est_loading[[3]] )$d[1:floor(I3/2)] + xi_3
+        r3 <- which.min( ((e3_val[-1])/(e3_val[-length(e3_val)])) )
+      } else {
+        r1 <- r[1] 
+        r2 <- r[2]
+        r3 <- r[3]
+      } # rank estimation end here
+
+      A1.est <- matrix(svd( est_loading[[1]] )$u[, (1:r1)], nrow=I1, ncol=r1)
+      A2.est <- matrix(svd( est_loading[[2]] )$u[, (1:r2)], nrow=I2, ncol=r2)
+      A3.est <- matrix(svd( est_loading[[3]] )$u[, (1:r3)], nrow=I3, ncol=r3)
+
+      # factor series estimation
+      F.est <- array(0, dim=c(TT, r1, r2, r3))
+      for (t in 1:TT ){
+        F_t.est <- K3_Ft_est(c(dt[t,,,]), A1.est, A2.est, A3.est)
+        F.est[t,,,] <- array(F_t.est, dim=c(r1, r2, r3))
+      }
+
+      # imputation
+      Y.imp <- ttm(F.est,  A1.est,2)
+      Y.imp <- ttm(Y.imp, A2.est,3)
+      Y.imp <- ttm(Y.imp, A3.est,4)
+      return(list(r=c(r1,r2), A=list(A1.est,A2.est), Ft=F.est, imputation=Y.imp, covMatrix=est_loading))
+    }
+  }
+}