version 0.3.0

cran · May 17, 2019 · 28aa075 · 28aa075
1 parent 29b8e11
commit 28aa075
Show file tree

Hide file tree

Showing 6 changed files with 142 additions and 162 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,15 +1,16 @@
 Package: rlmDataDriven
 Type: Package
 Title: Robust Regression with Data Driven Tuning Parameter
-Version: 0.2.1
-Author: You-Gan Wang <you-gan.wang@qut.edu.au>, Benoit Liquet <benoit.liquet@qut.edu.au>, Aurelien Callens <aurelien.callens@univ-pau.fr>
+Version: 0.3.0
+Author: You-Gan Wang
 Maintainer: You-Gan Wang <you-gan.wang@qut.edu.au>
 Imports: stats, MASS, tseries
 Description: Data driven approach for robust regression estimation in homoscedastic and heteroscedastic context. See Wang et al. (2007), <doi:10.1198/106186007X180156> regarding homoscedastic framework.  
 License: GPL (>= 2.0)
 Encoding: UTF-8
 LazyData: true
+RoxygenNote: 6.1.0
 NeedsCompilation: no
-Packaged: 2019-03-12 01:03:22 UTC; liquetwe
+Packaged: 2019-05-17 08:11:01 UTC; aurelien
 Repository: CRAN
-Date/Publication: 2019-03-12 14:00:02 UTC
+Date/Publication: 2019-05-17 08:50:07 UTC
diff --git a/MD5 b/MD5
@@ -1,12 +1,12 @@
-3295275f7edb1c08e8748a8314321f9b *DESCRIPTION
+e4a16057ed578ad5fb30c042d40747ff *DESCRIPTION
 83af3ef5dc9d34f8c8636e908f2db9af *NAMESPACE
 a856d9caba372bc66122240d916e9754 *R/DD-internal.R
 2801b315c3d0a5a1cb7b2af36ff8b95a *R/rlmDD.R
-98b0f6d67265eb93b6cdda6045faffc5 *R/rlmDD_het.R
+66d59e96135aa41438286e46519622fb *R/rlmDD_het.R
 611920c4014bf0220079dab79a9f8d38 *R/whm.R
 86db0b2c4c3a8a7f44ae40bbaf3ab29e *data/plasma.RData
 1cbee3fec615e7b13578f68d8ffc1f75 *man/DD-internal.Rd
 887918ce9fb1ef10e70d2bcaff33e50e *man/plasma.Rd
-ba7fcd8d357187f8d5fc09425658376f *man/rlmDD.Rd
-0ed97475764a9ede18e52152941f939a *man/rlmDD_het.Rd
-505e7f3a60fe4534f1195efd8c708c34 *man/whm.Rd
+43523c88c72076dfbf579c2d90ce0722 *man/rlmDD.Rd
+a09e824ab9ffbf7d5388e97fbf574e74 *man/rlmDD_het.Rd
+8df3db3323c65b5dde9a5ff4fc7e119b *man/whm.Rd
diff --git a/R/rlmDD_het.R b/R/rlmDD_het.R
@@ -1,180 +1,155 @@
-rlmDD_het <- function(yy, xx, var.function = c("power", "exponential"), phi.par = TRUE, tuning.para = NULL, step = 0.1, n.lag = NULL, 
-                                    print.summary = TRUE){
+rlmDD_het <- function (yy, xx, var.function = c("power", "exponential"), 
+                       phi.par = TRUE, tuning.para = NULL, step = 0.1, n.lag = NULL, 
+                       print.summary = TRUE){
   Y <- as.matrix(yy)
-  n <- length(Y) 
+  n <- length(Y)
   z <- c()
-  rlm.model <- rlm(yy ~ xx -1)
-  X <- as.matrix(rlm.model$x) 
-
-
-
-  switch(var.function,
-         "power"={
-           est <- function(x){sum(chi((Y-mu)/(phi*abs(mu)^x))*log(abs(mu)))}
-           est_lag <- function(x){sum(chi((Y[-c(1:nlag)]-mu)/(phi*abs(mu)^x))*log(abs(mu)))}
-           var.func <- function(mu, esti, phi){phi*abs(mu)^esti} 
-         },
-         "exponential"={
-           est <- function(x){sum(chi((Y-mu)/(phi*exp(x*abs(mu))))*abs(mu))}
-           est_lag <- function(x){sum(chi((Y[-c(1:nlag)]-mu)/(phi*exp(x*abs(mu))))*abs(mu))}
-           var.func <- function(mu, esti, phi){phi*exp(esti*abs(mu))}
-         },
-         stop("Wrong function name")
-  )
-
-  #Loop to find the best tuning parameter
-
-  if(is.null(tuning.para)){
-    c_i <- seq(from = 0.1, to = 3, by = step) 
+  rlm.model <- rlm(yy ~ xx - 1)
+  X <- as.matrix(rlm.model$x)
+  switch(var.function, power = {
+    est <- function(x) {
+      sum(rlmDataDriven::chi((Y - mu)/(phi * abs(mu)^x)) * log(abs(mu)))
+    }
+    est_lag <- function(x) {
+      sum(rlmDataDriven::chi((Y[-c(1:nlag)] - mu)/(phi * abs(mu)^x)) * 
+            log(abs(mu)))
+    }
+    var.func <- function(mu, esti, phi) {
+      phi * abs(mu)^esti
+    }
+  }, exponential = {
+    est <- function(x) {
+      sum(rlmDataDriven::chi((Y - mu)/(phi * exp(x * abs(mu)))) * abs(mu))
+    }
+    est_lag <- function(x) {
+      sum(rlmDataDriven::chi((Y[-c(1:nlag)] - mu)/(phi * exp(x * abs(mu)))) * 
+            abs(mu))
+    }
+    var.func <- function(mu, esti, phi) {
+      phi * exp(esti * abs(mu))
+    }
+  }, stop("Wrong function name"))
+  if (is.null(tuning.para)) {
+    c_i <- seq(from = 0.1, to = 3, by = step)
     indic <- c()
-
-
-    if(print.summary) pb <- txtProgressBar(min = 0, max = length(c_i), style = 3)
-
-    for(i in 1:length(c_i)){
-
+    if (print.summary) 
+      pb <- txtProgressBar(min = 0, max = length(c_i), 
+                           style = 3)
+    for (i in 1:length(c_i)) {
       c <- c_i[i]
       mu <- rlm.model$fitted.values
       phi <- 1
-
-      esti<- uniroot(est,c(-4,4))$root
-
-      if(phi.par == T){
-        phi <- median(abs(Y-mu)/(var.func(mu, esti, phi = 1)))/0.6745 
-      }else{
+      esti <- uniroot(est, c(-4, 4))$root
+      if (phi.par == T) {
+        phi <- median(abs(Y - mu)/(var.func(mu, esti, 
+                                            phi = 1)))/0.6745
+      }
+      else {
         phi <- 1
       }
-      pearson_res <- (Y-mu) / (var.func(mu, esti, phi))
-
+      pearson_res <- (Y - mu)/(var.func(mu, esti, phi))
       w_i <- as.vector((psi.Huber(pearson_res, c)/(pearson_res)))
-
-      W <- pmin(as.vector(w_i / (var.func(mu, esti, phi)))^2, 1)
-
-      B_est <- solve(crossprod(X,W*X), crossprod(X,W*Y))
-
-      mu <-  X %*% B_est
-
-      pearson_res <- (Y-mu) / (var.func(mu, esti, phi))
-
-      psip <- dpsi.Huber(pearson_res, c)
+      W <- pmin(as.vector(w_i/(var.func(mu, esti, phi)))^2, 
+                1)
+      B_est <- solve(crossprod(X, W * X), crossprod(X, 
+                                                    W * Y))
+      mu <- X %*% B_est
+      pearson_res <- (Y - mu)/(var.func(mu, esti, phi))
+      psip <- rlmDataDriven::dpsi.Huber(pearson_res, c)
       mn <- mean(psip)
-      p<- as.numeric(length(B_est))
+      p <- as.numeric(length(B_est))
       K <- 1 + p/n * var(psip)/(mn^2)
-      Kc <- (1/(n-p))*sum(psi.Huber(pearson_res,c)^2)/mn^2*K^2
-      indic[i]<- sum(diag(Kc*solve(t(X) %*%  diag(as.vector(1 / (var.func(mu, esti, phi))))^2 %*% X) ))
-      if(print.summary){setTxtProgressBar(pb, i)}
+      Kc <- (1/(n - p)) * sum(psi.Huber(pearson_res, c)^2)/mn^2 * 
+        K^2
+      indic[i] <- sum(diag(Kc * solve(t(X) %*% diag(as.vector(1/(var.func(mu, 
+                                                                          esti, phi))))^2 %*% X)))
+      if (print.summary) {
+        setTxtProgressBar(pb, i)
+      }
     }
-
-    #Fitting of the model with the best tuning parameter 
-
     c <- c_i[which.min(indic)]
   }else{
     c <- tuning.para
   }
   mu <- rlm.model$fitted.values
   phi <- 1
-
-
-  esti<- uniroot(est,c(-4,4))$root
-
-  if(phi.par == T){
-    phi <- median(abs(Y-mu)/(var.func(mu, esti, phi = 1)))/0.6745 
-  }else{
+  esti <- uniroot(est, c(-4, 4))$root
+  if (phi.par == T) {
+    phi <- median(abs(Y - mu)/(var.func(mu, esti, phi = 1)))/0.6745
+  }else {
     phi <- 1
   }
-
-  pearson_res <- (Y-mu) / (var.func(mu, esti, phi))
-
+  pearson_res <- (Y - mu)/(var.func(mu, esti, phi))
   w_i <- as.vector((psi.Huber(pearson_res, c)/(pearson_res)))
-
-  W <- pmin(as.vector(w_i / (var.func(mu, esti, phi)))^2, 1)
-
-  B_est <- solve(crossprod(X,W*X), crossprod(X,W*Y))
-
-  mu <-  X %*% B_est
-
-  pearson_res <- (Y-mu) / (var.func(mu, esti, phi))
-
-
-  if(is.null(n.lag)){
-    pacf(psi.Huber(pearson_res,c), main = "",ylim=c(0, 1))
-
+  W <- pmin(as.vector(w_i/(var.func(mu, esti, phi)))^2, 1)
+  B_est <- solve(crossprod(X, W * X), crossprod(X, W * Y))
+  mu <- X %*% B_est
+  pearson_res <- (Y - mu)/(var.func(mu, esti, phi))
+  if (is.null(n.lag)) {
+    pacf(psi.Huber(pearson_res, c), main = "", ylim = c(0, 
+                                                        1))
     cat("\n")
     cat("Select the number of lags: ")
     nlag <- as.integer(readLines(n = 1))
-  }else{
-    nlag <- n.lag 
   }
-  if(nlag == 0){stop("Number of lag must be at least 1")  }
-  list.l <- create_lag(pearson_res,n.lag = nlag)
-  for(a in 1:nlag)
-    assign(paste0('lag',a),list.l[[a]])
-
-  dat.l <- as.data.frame(list.l)*(var.func(mu, esti, phi))
-  dat.ln<- matrix(c(rep(1,nlag)),ncol=1)
-  row.names(dat.ln) <- names(dat.l)
-
-
-  B_est <- as.matrix(rbind(B_est,dat.ln))
-
-  X <- as.matrix(cbind(X,dat.l)) 
-
-  B_est <- solve(crossprod(X[-c(1:nlag),],W[-c(1:nlag)]*X[-c(1:nlag),]), crossprod(X[-c(1:nlag),],W[-c(1:nlag)]*Y[-c(1:nlag)]))
-
-  mu <-  X[-c(1:nlag),] %*% B_est
-
-  esti<- uniroot(est_lag,c(-4,4))$root
-
-  if(phi.par == T){
-    phi <- median(abs(Y[-c(1:nlag)]-mu)/(var.func(mu, esti, phi = 1)))/0.6745 
-  }else{
-    phi <- 1
+  else {
+    nlag <- n.lag
+  }
+  if (nlag == 0) {
+    stop("Number of lag must be at least 1")
   }
 
+  list.l <- create_lag(pearson_res, n.lag = nlag)
 
-  pearson_res <- (Y[-c(1:nlag)]-mu) / (var.func(mu, esti, phi))
-
-  w_i <- as.vector((psi.Huber(pearson_res, c)/(pearson_res)))
-
-  W <- pmin(as.vector(w_i / (var.func(mu, esti, phi)))^2, 1)
-
-  B_est <- solve(crossprod(X[-c(1:nlag),],W*X[-c(1:nlag),]), crossprod(X[-c(1:nlag),],W*Y[-c(1:nlag)]))
-
-  mu <-  X[-c(1:nlag),] %*% B_est
-  pearson_res <- (Y[-c(1:nlag)]-mu) / (var.func(mu, esti, phi))
-
-
-  p<- length(B_est)
-  psip <- dpsi.Huber(pearson_res,c)
-  mn <-mean(psip)
+  for (a in 1:nlag) assign(paste0("lag", a), list.l[[a]])
+  dat.l <- as.data.frame(list.l) * (var.func(mu, esti, phi))
+  dat.ln <- matrix(c(rep(1, nlag)), ncol = 1)
+  row.names(dat.ln) <- names(dat.l)
+  B_est <- as.matrix(rbind(B_est, dat.ln))
+  X <- as.matrix(cbind(X, dat.l))
+  B_est <- solve(crossprod(X[-c(1:nlag), ], W[-c(1:nlag)] * 
+                             X[-c(1:nlag), ]), crossprod(X[-c(1:nlag), ], W[-c(1:nlag)] * 
+                                                           Y[-c(1:nlag)]))
+  mu <- X[-c(1:nlag), ] %*% B_est
+
+  pearson_res <- (Y[-c(1:nlag)] - mu)/(var.func(mu, esti, 
+                                                phi))
+
+  p <- length(B_est)
+  psip <- dpsi.Huber(pearson_res, c)
+  mn <- mean(psip)
   K <- 1 + p/n * var(psip)/(mn^2)
-  CC <- solve(t(X[-c(1:nlag),])%*%  diag(as.vector(1 / (var.func(mu, esti, phi))))^2 %*% X[-c(1:nlag),]) 
-  vcov.mod<-(1/(n-p))*sum(psi.Huber(pearson_res,c)^2)/mn^2*K^2*CC
-
-  sum_table <- data.frame(Estimate = B_est, Std.Error=sqrt(diag(vcov.mod)))
+  CC <- solve(t(X[-c(1:nlag), ]) %*% diag(as.vector(1/(var.func(mu, 
+                                                                esti, phi))))^2 %*% X[-c(1:nlag), ])
+  vcov.mod <- (1/(n - p)) * sum(psi.Huber(pearson_res, c)^2)/mn^2 * 
+    K^2 * CC
+  sum_table <- data.frame(Estimate = B_est, Std.Error = sqrt(diag(vcov.mod)))
   sum_table$t_value <- sum_table$Estimate/sum_table$Std.Error
-  sum_table<- round(sum_table[,-5], digits = 5)
-
+  sum_table <- round(sum_table[, -5], digits = 5)
   z$coefficients <- B_est
-  z$residuals <-  (Y[-c(1:nlag)]-mu) 
+  z$residuals <- (Y[-c(1:nlag)] - mu)
   z$fitted.values <- mu
   z$vcov <- vcov.mod
-  z$summary <-sum_table
-  z$model <- cbind(yy,X)
-  z$p_residuals <- pearson_res 
-  z$r_residuals <- psi.Huber(pearson_res, c = c)
-  z$tuningpara <- if(is.null(tuning.para)){list(optimal = c,
-                                                tested_values = c_i,
-                                                obtained_var = indic)
-
-  }else{list(c = c)}
-  z$varpara <-data.frame(gamma=esti, phi= phi)
-  if(print.summary){
+  z$summary <- sum_table
+  z$model <- cbind(yy, X)
+  z$p_residuals <- pearson_res
+  z$r_residuals <- psi.Huber((Y[-c(1:nlag)] - mu), c = c)
+  z$tuningpara <- if (is.null(tuning.para)) {
+    list(optimal = c, tested_values = c_i, obtained_var = indic)
+  }
+  else {
+    list(c = c)
+  }
+  z$varpara <- data.frame(gamma = esti, phi = phi)
+  if (print.summary) {
     cat("Summary : \n ")
     print(z$summary)
-
-    if(is.null(tuning.para)){cat("\n Optimal tuning parameter:", c)
-    }else{cat("\n Tuning parameter:",c)}}
-  return(z)
+    if (is.null(tuning.para)) {
+      cat("\n Optimal tuning parameter:", c)
+    }
+    else {
+      cat("\n Tuning parameter:", c)
+    }
   }
-
+  return(z)
+}
diff --git a/man/rlmDD.Rd b/man/rlmDD.Rd
@@ -17,7 +17,7 @@ rlmDD(yy, xx, beta0, betaR, method, plot)
 \arguments{
   \item{yy}{Vector representing the response variable
 }
-  \item{xx}{Design matrix of the covariates inclunding the intercept in the first column
+  \item{xx}{Design matrix of the covariates excluding the intercept in the first column
 }
   \item{beta0}{Initial parameter estimate using \code{lm}
 
@@ -67,15 +67,18 @@ data(stackloss)
 
 LS <- lm(stack.loss ~ stack.x)
 RB <- rlm(stack.loss ~ stack.x, psi = psi.huber, k = 1.345)
-DD1 <- rlmDD(stack.loss, stack.x, LS$coef, RB$coef, method = "Huber", plot = "Y")
+DD1 <- rlmDD(stack.loss, stack.x, LS$coef, RB$coef, method = "Huber", 
+plot = "Y")
 
 LS <- lm(stack.loss ~ stack.x)
 RB <- rlm(stack.loss ~ stack.x, psi = psi.bisquare, c = 4.685)
-DD2 <- rlmDD(stack.loss, stack.x, LS$coef, RB$coef, method = "Bisquare", plot = "Y")
+DD2 <- rlmDD(stack.loss, stack.x, LS$coef, RB$coef, method = "Bisquare", 
+plot = "Y")
 
 LS <- lm(stack.loss ~ stack.x)
 RB <- rlm(stack.loss ~ stack.x, psi = psi.huber, k = 1.345)
-DD3 <- rlmDD(stack.loss, stack.x, LS$coef, RB$coef, method = "Exponential", plot = "Y")
+DD3 <- rlmDD(stack.loss, stack.x, LS$coef, RB$coef, method = "Exponential",
+plot = "Y")
 
 
 ## Plasma dataset

diff --git a/man/rlmDD_het.Rd b/man/rlmDD_het.Rd
@@ -12,8 +12,9 @@ First, a M-estimation is performed on the data assuming that the variance is con
 Finally, lagged term are built and added to the regression model therefore accounting for temporal correlations. The loss function used is Huber's function.
 }
 \usage{
-rlmDD_het(yy, xx, var.function = c("power", "exponential"), phi.par = TRUE, 
-tuning.para = NULL, step = 0.1, n.lag = NULL, print.summary = TRUE)
+rlmDD_het(yy, xx, var.function = c("power", "exponential"),
+phi.par = TRUE, tuning.para = NULL, step = 0.1, n.lag = NULL,
+print.summary = TRUE)
 }
 
 \arguments{
@@ -95,7 +96,7 @@ qqline(least_square$residuals, col = "red", lwd = 2)
 
 #With choice of optimal tuning parameter and 2 lags. 
 #Note that if lag = NULL, a Pacf plot will appear to help you choose 
-#the number of lags  and you will need to input this number in the console. 
+#the number of lags, you will need to input this number in the console. 
 
 model_1 <- rlmDD_het(yy, xx, var.function = "exponential", 
                                     tuning.para = NULL, n.lag = 2)