-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 8de09b2
Showing
9 changed files
with
349 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
Package: rlmDataDriven | ||
Type: Package | ||
Title: Robust Regression with Data Driven Tuning Parameter | ||
Version: 0.1.0 | ||
Author: You-Gan Wang | ||
Maintainer: The package maintainer <you-gan.wang@qut.edu.au> | ||
Imports: stats, MASS | ||
Description: Data driven approach for robust regression estimation. | ||
See Wang et al. (2007), <doi:10.1198/106186007X180156>. | ||
License: GPL (>= 2.0) | ||
Encoding: UTF-8 | ||
LazyData: true | ||
RoxygenNote: 6.1.0 | ||
NeedsCompilation: no | ||
Packaged: 2018-08-10 04:37:08 UTC; wangy | ||
Repository: CRAN | ||
Date/Publication: 2018-09-17 15:10:03 UTC |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
3f3e1c776ddd3574b88183e68b2d686a *DESCRIPTION | ||
e684857efafa7b194d6d0459581b4b22 *NAMESPACE | ||
2bae9720c36a6d5ea17f668691892b0f *R/DD-internal.R | ||
2801b315c3d0a5a1cb7b2af36ff8b95a *R/rlmDD.R | ||
86db0b2c4c3a8a7f44ae40bbaf3ab29e *data/plasma.RData | ||
4f1e7315c723f966f354edb580e1ef61 *man/DD-internal.Rd | ||
887918ce9fb1ef10e70d2bcaff33e50e *man/plasma.Rd | ||
b09ddfb1b3755e554a3d4070a3dca96a *man/rlmDD.Rd |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
exportPattern("^[[:alpha:]]+") | ||
importFrom("utils", "data") | ||
importFrom("MASS","rlm") | ||
importFrom("MASS","psi.huber") | ||
importFrom("MASS","psi.bisquare") | ||
importFrom("stats", "lm", "median") | ||
importFrom("graphics", "axis", "legend") | ||
importFrom("stats", "mad") | ||
export(eff,ESL_O) | ||
export(rho.h,psi.Huber,dpsi.Huber) | ||
export(rho.b,psi.Tukey,dpsi.Tukey) | ||
export(rho.e,psi.Exp,dpsi.Exp) | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,127 @@ | ||
##huber's function | ||
psi.Huber<-function(r,c) | ||
{ | ||
newr<-pmin(c,pmax(r,-c)) | ||
newr | ||
} | ||
|
||
##derivative of huber's function | ||
dpsi.Huber<-function(r,c) | ||
{ | ||
newr<-as.numeric(abs(r)<=c) | ||
newr | ||
} | ||
|
||
|
||
##bisquare's function | ||
psi.Tukey<-function(r,c) | ||
{ | ||
r<-pmin(c,pmax(r,-c)) | ||
newr<-r*(1-(r/c)^2)^2 | ||
newr | ||
} | ||
|
||
##derivative of bisquare's function | ||
dpsi.Tukey<-function(r,c) | ||
{ | ||
r<-pmin(c,pmax(r,-c)) | ||
newr<-(1-(r/c)^2)*(1-5*(r/c)^2) | ||
newr | ||
} | ||
|
||
##modified huber's function | ||
psi.Exp<-function(r,c) | ||
{ | ||
newr<-r*exp(-(r/c)^2) | ||
newr | ||
} | ||
|
||
##derivative of modified huber's function | ||
dpsi.Exp<-function(r,c) | ||
{ | ||
newr<-(1-(2*r^2)/c^2)*exp(-(r/c)^2) | ||
newr | ||
} | ||
|
||
|
||
##find the most efficient tuning constant | ||
eff<-function(r,method,plot) | ||
{ | ||
nn<- length(r) | ||
if (method =="Huber") | ||
{ | ||
tau<-(1:30)/10 | ||
new<-unlist(lapply(tau,FUN=function(x){(sum(dpsi.Huber(r,x)))^2/(nn*sum(psi.Huber(r,x)^2))})) | ||
} | ||
else if (method=="Bisquare") | ||
{ | ||
tau<-(15.48:59.48)/10 | ||
new<-unlist(lapply(tau,FUN=function(x){(sum(dpsi.Tukey(r,x)))^2/(nn*sum(psi.Tukey(r,x)^2))})) | ||
} | ||
else if (method=="Exponential") | ||
{ | ||
tau<-(5:100)/10 | ||
new<-unlist(lapply(tau,FUN=function(x){(sum(dpsi.Exp(r,x)))^2/(nn*sum(psi.Exp(r,x)^2))})) | ||
} | ||
|
||
if (plot =="Y") | ||
{ | ||
plot(new, type="o", xaxt = "n", xlab="Tunning parameter", | ||
ylab="Efficiency",ylim=c(0,max(new)*1.3)) | ||
axis(1, at=seq_along(tau), labels=tau) | ||
if (method =="Exponential") { | ||
legend(length(tau)*0.8,max(new)*1.3,c("Exp"),bty = "n") | ||
} else { | ||
legend(length(tau)*0.8,max(new)*1.3,c(method),bty = "n") | ||
} | ||
} | ||
|
||
xx<- order(new) | ||
return(tau[xx[length(tau)]]) | ||
|
||
} | ||
|
||
|
||
rho.h<-function(u,c){ | ||
phi<-0; | ||
x1<-(abs(u)<=c) | ||
phi<-x1* (u^2/2) + (1-x1)*(abs(u)*c-c^2/2) | ||
phi | ||
} | ||
|
||
rho.b<-function(u,c){ | ||
phi<-0; | ||
x1<-(abs(u)<=c) | ||
phi<-x1*(1-(1-(u/c)^2)^3) + (1-x1) | ||
phi | ||
} | ||
|
||
rho.e<-function(u,c){ | ||
1-exp(-(u/c)^2) | ||
} | ||
|
||
|
||
ESL_O<-function(x,xx,y,beta,newc,maxit=500, toler=1e-6) | ||
{ | ||
it=0;delta=1; | ||
while (delta>toler && it<maxit){ | ||
it<-it+1; | ||
beta0<-beta; | ||
w<-0; | ||
e<-y-x%*%beta0; | ||
S_n<-max(mad(e),1e-6) | ||
u<-e/S_n | ||
for(i in seq_along(u)){ | ||
w[i]<-exp(-(u[i]/newc)^2); | ||
} | ||
|
||
W<-diag(w); | ||
#beta<-solve(t(x)%*%W%*%x)%*%t(x)%*%W%*%y; | ||
rlm1<-rlm(y~xx,psi=psi.huber,weights=w,k=newc) | ||
beta<-rlm1$coef | ||
r2<-summary(lm(y~xx,weights=w))$r.squared | ||
delta<-sqrt(sum((beta-beta0)^2)); | ||
} | ||
list(esti=rlm1, Std.Error=summary(rlm1)$coef[,2], weights=w, tunning=newc, R2=r2) | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
|
||
rlmDD <-function(yy,xx,beta0,betaR,method =c("Huber","Bisquare","Exponential"),plot) | ||
{ | ||
X <- matrix(c(rep(1, length(yy)), xx), ncol = dim(xx)[2]+1) | ||
u <- yy-X%*%beta0 | ||
sigma0 <- median(abs(u[!is.na(u)]))/0.6745 # drop NA | ||
uu <- yy-X%*%betaR | ||
ri <- (uu[!is.na(uu)])/sigma0 # drop NA | ||
newc <- eff(ri, method, plot) | ||
|
||
|
||
## MM estimation of the location for R2 calculation | ||
rlm0 <- rlm(yy~1, method = "MM") | ||
mu <- rlm0$coef | ||
|
||
method <- match.arg(method) | ||
|
||
if (method =="Huber"){ | ||
rlm1 <- rlm(yy~xx, psi = psi.huber, k = newc) | ||
sig <- rlm1$s | ||
A <- rho.h((yy-mu)/sig, newc) | ||
B <- rho.h(rlm1$res/sig, newc) | ||
r2 <- 1-sum(B)/sum(A) | ||
list(esti = rlm1, Std.Error = summary(rlm1)$coef[,2], | ||
tunning = newc, R2 = r2) | ||
} | ||
else if (method=="Bisquare"){ | ||
rlm1 <- rlm(yy~xx, psi = psi.bisquare, c=newc) | ||
sig <- rlm1$s | ||
A <- rho.b((yy-mu)/sig,newc) | ||
B <- rho.b(rlm1$res/sig,newc) | ||
r2 <- 1-sum(B)/sum(A) | ||
list(esti = rlm1, Std.Error = summary(rlm1)$coef[,2], | ||
tunning = newc, R2 = r2) | ||
} | ||
else if (method=="Exponential") | ||
{ | ||
m <- rlm(yy~xx, method="MM", maxit = 1000) | ||
re_mm <- data.frame(m[1])$coefficients | ||
X <- cbind(1,xx) | ||
beta <- re_mm | ||
ESL_O(X, xx, yy, re_mm, newc, maxit=500, toler=1e-6) | ||
} | ||
} |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
\name{DD-internal} | ||
\alias{eff} | ||
\alias{ESL_O} | ||
\alias{rho.h} | ||
\alias{rho.b} | ||
\alias{rho.e} | ||
\alias{psi.Huber} | ||
\alias{dpsi.Huber} | ||
\alias{psi.Tukey} | ||
\alias{dpsi.Tukey} | ||
\alias{psi.Exp} | ||
\alias{dpsi.Exp} | ||
\title{Internal Functions} | ||
\description{ | ||
Internal functions not to be used by the user. | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
\name{plasma} | ||
\alias{plasma} | ||
\title{plasma} | ||
\usage{plasma} | ||
\description{ | ||
The data are collected from 273 female patients. | ||
} | ||
\format{ | ||
This data frame contains the following columns: | ||
\describe{ | ||
\item{y}{plasma beta-carotene} | ||
\item{calories}{calories in KJ} | ||
\item{dietary}{dietary beta-carotene} | ||
} | ||
} | ||
\source{ | ||
Jiang, Y., Wang, Y-G., Fu, L., & Wang, X. (2018). Robust Estimation Using Modified Huber's Functions With New Tails. Technometrics, in press, see <doi:10.1080/00401706.2018.1470037>. | ||
} | ||
\keyword{datasets} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
\name{rlmDD} | ||
\alias{rlmDD} | ||
|
||
\title{ | ||
Data driven robust methods | ||
} | ||
\description{ | ||
Robust estimation often relies on a dispersion function that is more slowly varying at large values than the squared function. However, the choice of tuning constant in dispersion | ||
function may impact the estimation efficiency to a great extent. For a given family of dispersion functions, we suggest obtaining the `best' tuning constant from the data so that the asymptotic efficiency is maximized. | ||
This library provides a robust linear regression with a tuning parameter being automatically chosen to provide the necessary resistance against outliers. The robust (loss) functions include the Huber, Tukey bisquare and the exponential loss. | ||
} | ||
\usage{ | ||
rlmDD(yy, xx, beta0, betaR, method, plot) | ||
} | ||
\arguments{ | ||
\item{yy}{Vector representing the response variable | ||
} | ||
\item{xx}{Design matrix of the covariates inclunding the intercept in the first column | ||
} | ||
\item{beta0}{Initial parameter estimate using \code{lm} | ||
} | ||
\item{betaR}{Robust estimate of beta with a fixed tuning constant using \code{rlm} | ||
} | ||
\item{method}{Huber, Bisquare or Exponential | ||
} | ||
\item{plot}{"Y" gives a plot: the efficiency factor versus a range of tunning parameter values. | ||
} | ||
} | ||
%\details{ | ||
%efgdgd | ||
%} | ||
\value{ | ||
The function returns a list including | ||
\item{esti}{ Value of the robust estimate} | ||
\item{Std.Error}{ Standard error of the robust estimate} | ||
\item{tunning}{ Optimum tunning parameter} | ||
\item{R2}{ R-squared value} | ||
} | ||
\references{ | ||
Wang, Y-G., Lin, X., Zhu, M., & Bai, Z. (2007). Robust estimation using the Huber function with a data-dependent tuning constant. Journal of Computational and Graphical Statistics, 16(2), 468-481. | ||
Wang, X., Jiang, Y., Huang, M., & Zhang, H. (2013). Robust variable selection with exponential squared loss. Journal of the American Statistical Association, 108, 632-643. | ||
Wang, N., Wang, Y-G., Hu, S., Hu, Z. H., Xu, J., Tang, H., & Jin, G. (2018). Robust Regression with Data-Dependent Regularization Parameters and Autoregressive Temporal Correlations. Environmental Modeling & Assessment, in press. | ||
} | ||
\author{ | ||
You-Gan Wang, Na Wang | ||
} | ||
\seealso{ | ||
\code{rlm} function from package \code{MASS} | ||
} | ||
\examples{ | ||
library(MASS) | ||
data(stackloss) | ||
LS <- lm(stack.loss ~ stack.x) | ||
RB <- rlm(stack.loss ~ stack.x, psi = psi.huber, k = 1.345) | ||
DD1 <- rlmDD(stack.loss, stack.x, LS$coef, RB$coef, method = "Huber", plot = "Y") | ||
LS <- lm(stack.loss ~ stack.x) | ||
RB <- rlm(stack.loss ~ stack.x, psi = psi.bisquare, c = 4.685) | ||
DD2 <- rlmDD(stack.loss, stack.x, LS$coef, RB$coef, method = "Bisquare", plot = "Y") | ||
LS <- lm(stack.loss ~ stack.x) | ||
RB <- rlm(stack.loss ~ stack.x, psi = psi.huber, k = 1.345) | ||
DD3 <- rlmDD(stack.loss, stack.x, LS$coef, RB$coef, method = "Exponential", plot = "Y") | ||
## Plasma dataset | ||
data(plasma) | ||
y <- plasma$y | ||
x <- cbind(plasma$calories, plasma$dietary) | ||
LS <- lm(y ~ x) | ||
RB <- rlm(y ~ x, psi = psi.huber, k = 1.345) | ||
DD.h <- rlmDD(y, x, LS$coef, RB$coef, method = "Huber", plot = "Y") | ||
LS <- lm(y ~ x) | ||
RB <- rlm(y ~ x, psi = psi.bisquare, c = 4.685) | ||
DD.b <- rlmDD(y, x, LS$coef, RB$coef, method = "Bisquare", plot = "Y") | ||
LS <- lm(y ~ x) | ||
RB <- rlm(y ~ x, psi = psi.huber, k = 1.345) | ||
DD.e <- rlmDD(y, x, LS$coef, RB$coef, method = "Exponential", plot = "Y") | ||
} | ||
\keyword{regression} |