Skip to content

Commit

Permalink
version 0.3.0
Browse files Browse the repository at this point in the history
  • Loading branch information
You-Gan Wang authored and cran-robot committed May 17, 2019
1 parent 29b8e11 commit 28aa075
Show file tree
Hide file tree
Showing 6 changed files with 142 additions and 162 deletions.
9 changes: 5 additions & 4 deletions DESCRIPTION
@@ -1,15 +1,16 @@
Package: rlmDataDriven
Type: Package
Title: Robust Regression with Data Driven Tuning Parameter
Version: 0.2.1
Author: You-Gan Wang <you-gan.wang@qut.edu.au>, Benoit Liquet <benoit.liquet@qut.edu.au>, Aurelien Callens <aurelien.callens@univ-pau.fr>
Version: 0.3.0
Author: You-Gan Wang
Maintainer: You-Gan Wang <you-gan.wang@qut.edu.au>
Imports: stats, MASS, tseries
Description: Data driven approach for robust regression estimation in homoscedastic and heteroscedastic context. See Wang et al. (2007), <doi:10.1198/106186007X180156> regarding homoscedastic framework.
License: GPL (>= 2.0)
Encoding: UTF-8
LazyData: true
RoxygenNote: 6.1.0
NeedsCompilation: no
Packaged: 2019-03-12 01:03:22 UTC; liquetwe
Packaged: 2019-05-17 08:11:01 UTC; aurelien
Repository: CRAN
Date/Publication: 2019-03-12 14:00:02 UTC
Date/Publication: 2019-05-17 08:50:07 UTC
10 changes: 5 additions & 5 deletions MD5
@@ -1,12 +1,12 @@
3295275f7edb1c08e8748a8314321f9b *DESCRIPTION
e4a16057ed578ad5fb30c042d40747ff *DESCRIPTION
83af3ef5dc9d34f8c8636e908f2db9af *NAMESPACE
a856d9caba372bc66122240d916e9754 *R/DD-internal.R
2801b315c3d0a5a1cb7b2af36ff8b95a *R/rlmDD.R
98b0f6d67265eb93b6cdda6045faffc5 *R/rlmDD_het.R
66d59e96135aa41438286e46519622fb *R/rlmDD_het.R
611920c4014bf0220079dab79a9f8d38 *R/whm.R
86db0b2c4c3a8a7f44ae40bbaf3ab29e *data/plasma.RData
1cbee3fec615e7b13578f68d8ffc1f75 *man/DD-internal.Rd
887918ce9fb1ef10e70d2bcaff33e50e *man/plasma.Rd
ba7fcd8d357187f8d5fc09425658376f *man/rlmDD.Rd
0ed97475764a9ede18e52152941f939a *man/rlmDD_het.Rd
505e7f3a60fe4534f1195efd8c708c34 *man/whm.Rd
43523c88c72076dfbf579c2d90ce0722 *man/rlmDD.Rd
a09e824ab9ffbf7d5388e97fbf574e74 *man/rlmDD_het.Rd
8df3db3323c65b5dde9a5ff4fc7e119b *man/whm.Rd
263 changes: 119 additions & 144 deletions R/rlmDD_het.R
@@ -1,180 +1,155 @@
rlmDD_het <- function(yy, xx, var.function = c("power", "exponential"), phi.par = TRUE, tuning.para = NULL, step = 0.1, n.lag = NULL,
print.summary = TRUE){
rlmDD_het <- function (yy, xx, var.function = c("power", "exponential"),
phi.par = TRUE, tuning.para = NULL, step = 0.1, n.lag = NULL,
print.summary = TRUE){
Y <- as.matrix(yy)
n <- length(Y)
n <- length(Y)
z <- c()
rlm.model <- rlm(yy ~ xx -1)
X <- as.matrix(rlm.model$x)



switch(var.function,
"power"={
est <- function(x){sum(chi((Y-mu)/(phi*abs(mu)^x))*log(abs(mu)))}
est_lag <- function(x){sum(chi((Y[-c(1:nlag)]-mu)/(phi*abs(mu)^x))*log(abs(mu)))}
var.func <- function(mu, esti, phi){phi*abs(mu)^esti}
},
"exponential"={
est <- function(x){sum(chi((Y-mu)/(phi*exp(x*abs(mu))))*abs(mu))}
est_lag <- function(x){sum(chi((Y[-c(1:nlag)]-mu)/(phi*exp(x*abs(mu))))*abs(mu))}
var.func <- function(mu, esti, phi){phi*exp(esti*abs(mu))}
},
stop("Wrong function name")
)

#Loop to find the best tuning parameter

if(is.null(tuning.para)){
c_i <- seq(from = 0.1, to = 3, by = step)
rlm.model <- rlm(yy ~ xx - 1)
X <- as.matrix(rlm.model$x)
switch(var.function, power = {
est <- function(x) {
sum(rlmDataDriven::chi((Y - mu)/(phi * abs(mu)^x)) * log(abs(mu)))
}
est_lag <- function(x) {
sum(rlmDataDriven::chi((Y[-c(1:nlag)] - mu)/(phi * abs(mu)^x)) *
log(abs(mu)))
}
var.func <- function(mu, esti, phi) {
phi * abs(mu)^esti
}
}, exponential = {
est <- function(x) {
sum(rlmDataDriven::chi((Y - mu)/(phi * exp(x * abs(mu)))) * abs(mu))
}
est_lag <- function(x) {
sum(rlmDataDriven::chi((Y[-c(1:nlag)] - mu)/(phi * exp(x * abs(mu)))) *
abs(mu))
}
var.func <- function(mu, esti, phi) {
phi * exp(esti * abs(mu))
}
}, stop("Wrong function name"))
if (is.null(tuning.para)) {
c_i <- seq(from = 0.1, to = 3, by = step)
indic <- c()


if(print.summary) pb <- txtProgressBar(min = 0, max = length(c_i), style = 3)

for(i in 1:length(c_i)){

if (print.summary)
pb <- txtProgressBar(min = 0, max = length(c_i),
style = 3)
for (i in 1:length(c_i)) {
c <- c_i[i]
mu <- rlm.model$fitted.values
phi <- 1

esti<- uniroot(est,c(-4,4))$root

if(phi.par == T){
phi <- median(abs(Y-mu)/(var.func(mu, esti, phi = 1)))/0.6745
}else{
esti <- uniroot(est, c(-4, 4))$root
if (phi.par == T) {
phi <- median(abs(Y - mu)/(var.func(mu, esti,
phi = 1)))/0.6745
}
else {
phi <- 1
}
pearson_res <- (Y-mu) / (var.func(mu, esti, phi))

pearson_res <- (Y - mu)/(var.func(mu, esti, phi))
w_i <- as.vector((psi.Huber(pearson_res, c)/(pearson_res)))

W <- pmin(as.vector(w_i / (var.func(mu, esti, phi)))^2, 1)

B_est <- solve(crossprod(X,W*X), crossprod(X,W*Y))

mu <- X %*% B_est

pearson_res <- (Y-mu) / (var.func(mu, esti, phi))

psip <- dpsi.Huber(pearson_res, c)
W <- pmin(as.vector(w_i/(var.func(mu, esti, phi)))^2,
1)
B_est <- solve(crossprod(X, W * X), crossprod(X,
W * Y))
mu <- X %*% B_est
pearson_res <- (Y - mu)/(var.func(mu, esti, phi))
psip <- rlmDataDriven::dpsi.Huber(pearson_res, c)
mn <- mean(psip)
p<- as.numeric(length(B_est))
p <- as.numeric(length(B_est))
K <- 1 + p/n * var(psip)/(mn^2)
Kc <- (1/(n-p))*sum(psi.Huber(pearson_res,c)^2)/mn^2*K^2
indic[i]<- sum(diag(Kc*solve(t(X) %*% diag(as.vector(1 / (var.func(mu, esti, phi))))^2 %*% X) ))
if(print.summary){setTxtProgressBar(pb, i)}
Kc <- (1/(n - p)) * sum(psi.Huber(pearson_res, c)^2)/mn^2 *
K^2
indic[i] <- sum(diag(Kc * solve(t(X) %*% diag(as.vector(1/(var.func(mu,
esti, phi))))^2 %*% X)))
if (print.summary) {
setTxtProgressBar(pb, i)
}
}

#Fitting of the model with the best tuning parameter

c <- c_i[which.min(indic)]
}else{
c <- tuning.para
}
mu <- rlm.model$fitted.values
phi <- 1


esti<- uniroot(est,c(-4,4))$root

if(phi.par == T){
phi <- median(abs(Y-mu)/(var.func(mu, esti, phi = 1)))/0.6745
}else{
esti <- uniroot(est, c(-4, 4))$root
if (phi.par == T) {
phi <- median(abs(Y - mu)/(var.func(mu, esti, phi = 1)))/0.6745
}else {
phi <- 1
}

pearson_res <- (Y-mu) / (var.func(mu, esti, phi))

pearson_res <- (Y - mu)/(var.func(mu, esti, phi))
w_i <- as.vector((psi.Huber(pearson_res, c)/(pearson_res)))

W <- pmin(as.vector(w_i / (var.func(mu, esti, phi)))^2, 1)

B_est <- solve(crossprod(X,W*X), crossprod(X,W*Y))

mu <- X %*% B_est

pearson_res <- (Y-mu) / (var.func(mu, esti, phi))


if(is.null(n.lag)){
pacf(psi.Huber(pearson_res,c), main = "",ylim=c(0, 1))

W <- pmin(as.vector(w_i/(var.func(mu, esti, phi)))^2, 1)
B_est <- solve(crossprod(X, W * X), crossprod(X, W * Y))
mu <- X %*% B_est
pearson_res <- (Y - mu)/(var.func(mu, esti, phi))
if (is.null(n.lag)) {
pacf(psi.Huber(pearson_res, c), main = "", ylim = c(0,
1))
cat("\n")
cat("Select the number of lags: ")
nlag <- as.integer(readLines(n = 1))
}else{
nlag <- n.lag
}
if(nlag == 0){stop("Number of lag must be at least 1") }
list.l <- create_lag(pearson_res,n.lag = nlag)
for(a in 1:nlag)
assign(paste0('lag',a),list.l[[a]])

dat.l <- as.data.frame(list.l)*(var.func(mu, esti, phi))
dat.ln<- matrix(c(rep(1,nlag)),ncol=1)
row.names(dat.ln) <- names(dat.l)


B_est <- as.matrix(rbind(B_est,dat.ln))

X <- as.matrix(cbind(X,dat.l))

B_est <- solve(crossprod(X[-c(1:nlag),],W[-c(1:nlag)]*X[-c(1:nlag),]), crossprod(X[-c(1:nlag),],W[-c(1:nlag)]*Y[-c(1:nlag)]))

mu <- X[-c(1:nlag),] %*% B_est

esti<- uniroot(est_lag,c(-4,4))$root

if(phi.par == T){
phi <- median(abs(Y[-c(1:nlag)]-mu)/(var.func(mu, esti, phi = 1)))/0.6745
}else{
phi <- 1
else {
nlag <- n.lag
}
if (nlag == 0) {
stop("Number of lag must be at least 1")
}

list.l <- create_lag(pearson_res, n.lag = nlag)

pearson_res <- (Y[-c(1:nlag)]-mu) / (var.func(mu, esti, phi))

w_i <- as.vector((psi.Huber(pearson_res, c)/(pearson_res)))

W <- pmin(as.vector(w_i / (var.func(mu, esti, phi)))^2, 1)

B_est <- solve(crossprod(X[-c(1:nlag),],W*X[-c(1:nlag),]), crossprod(X[-c(1:nlag),],W*Y[-c(1:nlag)]))

mu <- X[-c(1:nlag),] %*% B_est
pearson_res <- (Y[-c(1:nlag)]-mu) / (var.func(mu, esti, phi))


p<- length(B_est)
psip <- dpsi.Huber(pearson_res,c)
mn <-mean(psip)
for (a in 1:nlag) assign(paste0("lag", a), list.l[[a]])
dat.l <- as.data.frame(list.l) * (var.func(mu, esti, phi))
dat.ln <- matrix(c(rep(1, nlag)), ncol = 1)
row.names(dat.ln) <- names(dat.l)
B_est <- as.matrix(rbind(B_est, dat.ln))
X <- as.matrix(cbind(X, dat.l))
B_est <- solve(crossprod(X[-c(1:nlag), ], W[-c(1:nlag)] *
X[-c(1:nlag), ]), crossprod(X[-c(1:nlag), ], W[-c(1:nlag)] *
Y[-c(1:nlag)]))
mu <- X[-c(1:nlag), ] %*% B_est

pearson_res <- (Y[-c(1:nlag)] - mu)/(var.func(mu, esti,
phi))

p <- length(B_est)
psip <- dpsi.Huber(pearson_res, c)
mn <- mean(psip)
K <- 1 + p/n * var(psip)/(mn^2)
CC <- solve(t(X[-c(1:nlag),])%*% diag(as.vector(1 / (var.func(mu, esti, phi))))^2 %*% X[-c(1:nlag),])
vcov.mod<-(1/(n-p))*sum(psi.Huber(pearson_res,c)^2)/mn^2*K^2*CC

sum_table <- data.frame(Estimate = B_est, Std.Error=sqrt(diag(vcov.mod)))
CC <- solve(t(X[-c(1:nlag), ]) %*% diag(as.vector(1/(var.func(mu,
esti, phi))))^2 %*% X[-c(1:nlag), ])
vcov.mod <- (1/(n - p)) * sum(psi.Huber(pearson_res, c)^2)/mn^2 *
K^2 * CC
sum_table <- data.frame(Estimate = B_est, Std.Error = sqrt(diag(vcov.mod)))
sum_table$t_value <- sum_table$Estimate/sum_table$Std.Error
sum_table<- round(sum_table[,-5], digits = 5)

sum_table <- round(sum_table[, -5], digits = 5)
z$coefficients <- B_est
z$residuals <- (Y[-c(1:nlag)]-mu)
z$residuals <- (Y[-c(1:nlag)] - mu)
z$fitted.values <- mu
z$vcov <- vcov.mod
z$summary <-sum_table
z$model <- cbind(yy,X)
z$p_residuals <- pearson_res
z$r_residuals <- psi.Huber(pearson_res, c = c)
z$tuningpara <- if(is.null(tuning.para)){list(optimal = c,
tested_values = c_i,
obtained_var = indic)

}else{list(c = c)}
z$varpara <-data.frame(gamma=esti, phi= phi)
if(print.summary){
z$summary <- sum_table
z$model <- cbind(yy, X)
z$p_residuals <- pearson_res
z$r_residuals <- psi.Huber((Y[-c(1:nlag)] - mu), c = c)
z$tuningpara <- if (is.null(tuning.para)) {
list(optimal = c, tested_values = c_i, obtained_var = indic)
}
else {
list(c = c)
}
z$varpara <- data.frame(gamma = esti, phi = phi)
if (print.summary) {
cat("Summary : \n ")
print(z$summary)

if(is.null(tuning.para)){cat("\n Optimal tuning parameter:", c)
}else{cat("\n Tuning parameter:",c)}}
return(z)
if (is.null(tuning.para)) {
cat("\n Optimal tuning parameter:", c)
}
else {
cat("\n Tuning parameter:", c)
}
}

return(z)
}
11 changes: 7 additions & 4 deletions man/rlmDD.Rd
Expand Up @@ -17,7 +17,7 @@ rlmDD(yy, xx, beta0, betaR, method, plot)
\arguments{
\item{yy}{Vector representing the response variable
}
\item{xx}{Design matrix of the covariates inclunding the intercept in the first column
\item{xx}{Design matrix of the covariates excluding the intercept in the first column
}
\item{beta0}{Initial parameter estimate using \code{lm}
Expand Down Expand Up @@ -67,15 +67,18 @@ data(stackloss)
LS <- lm(stack.loss ~ stack.x)
RB <- rlm(stack.loss ~ stack.x, psi = psi.huber, k = 1.345)
DD1 <- rlmDD(stack.loss, stack.x, LS$coef, RB$coef, method = "Huber", plot = "Y")
DD1 <- rlmDD(stack.loss, stack.x, LS$coef, RB$coef, method = "Huber",
plot = "Y")
LS <- lm(stack.loss ~ stack.x)
RB <- rlm(stack.loss ~ stack.x, psi = psi.bisquare, c = 4.685)
DD2 <- rlmDD(stack.loss, stack.x, LS$coef, RB$coef, method = "Bisquare", plot = "Y")
DD2 <- rlmDD(stack.loss, stack.x, LS$coef, RB$coef, method = "Bisquare",
plot = "Y")
LS <- lm(stack.loss ~ stack.x)
RB <- rlm(stack.loss ~ stack.x, psi = psi.huber, k = 1.345)
DD3 <- rlmDD(stack.loss, stack.x, LS$coef, RB$coef, method = "Exponential", plot = "Y")
DD3 <- rlmDD(stack.loss, stack.x, LS$coef, RB$coef, method = "Exponential",
plot = "Y")
## Plasma dataset
Expand Down
7 changes: 4 additions & 3 deletions man/rlmDD_het.Rd
Expand Up @@ -12,8 +12,9 @@ First, a M-estimation is performed on the data assuming that the variance is con
Finally, lagged term are built and added to the regression model therefore accounting for temporal correlations. The loss function used is Huber's function.
}
\usage{
rlmDD_het(yy, xx, var.function = c("power", "exponential"), phi.par = TRUE,
tuning.para = NULL, step = 0.1, n.lag = NULL, print.summary = TRUE)
rlmDD_het(yy, xx, var.function = c("power", "exponential"),
phi.par = TRUE, tuning.para = NULL, step = 0.1, n.lag = NULL,
print.summary = TRUE)
}
\arguments{
Expand Down Expand Up @@ -95,7 +96,7 @@ qqline(least_square$residuals, col = "red", lwd = 2)
#With choice of optimal tuning parameter and 2 lags.
#Note that if lag = NULL, a Pacf plot will appear to help you choose
#the number of lags and you will need to input this number in the console.
#the number of lags, you will need to input this number in the console.
model_1 <- rlmDD_het(yy, xx, var.function = "exponential",
tuning.para = NULL, n.lag = 2)
Expand Down

0 comments on commit 28aa075

Please sign in to comment.