Skip to content

Commit

Permalink
version 0.1.0
Browse files Browse the repository at this point in the history
  • Loading branch information
The package maintainer authored and cran-robot committed Sep 17, 2018
0 parents commit 8de09b2
Show file tree
Hide file tree
Showing 9 changed files with 349 additions and 0 deletions.
17 changes: 17 additions & 0 deletions DESCRIPTION
@@ -0,0 +1,17 @@
Package: rlmDataDriven
Type: Package
Title: Robust Regression with Data Driven Tuning Parameter
Version: 0.1.0
Author: You-Gan Wang
Maintainer: The package maintainer <you-gan.wang@qut.edu.au>
Imports: stats, MASS
Description: Data driven approach for robust regression estimation.
See Wang et al. (2007), <doi:10.1198/106186007X180156>.
License: GPL (>= 2.0)
Encoding: UTF-8
LazyData: true
RoxygenNote: 6.1.0
NeedsCompilation: no
Packaged: 2018-08-10 04:37:08 UTC; wangy
Repository: CRAN
Date/Publication: 2018-09-17 15:10:03 UTC
8 changes: 8 additions & 0 deletions MD5
@@ -0,0 +1,8 @@
3f3e1c776ddd3574b88183e68b2d686a *DESCRIPTION
e684857efafa7b194d6d0459581b4b22 *NAMESPACE
2bae9720c36a6d5ea17f668691892b0f *R/DD-internal.R
2801b315c3d0a5a1cb7b2af36ff8b95a *R/rlmDD.R
86db0b2c4c3a8a7f44ae40bbaf3ab29e *data/plasma.RData
4f1e7315c723f966f354edb580e1ef61 *man/DD-internal.Rd
887918ce9fb1ef10e70d2bcaff33e50e *man/plasma.Rd
b09ddfb1b3755e554a3d4070a3dca96a *man/rlmDD.Rd
14 changes: 14 additions & 0 deletions NAMESPACE
@@ -0,0 +1,14 @@
exportPattern("^[[:alpha:]]+")
importFrom("utils", "data")
importFrom("MASS","rlm")
importFrom("MASS","psi.huber")
importFrom("MASS","psi.bisquare")
importFrom("stats", "lm", "median")
importFrom("graphics", "axis", "legend")
importFrom("stats", "mad")
export(eff,ESL_O)
export(rho.h,psi.Huber,dpsi.Huber)
export(rho.b,psi.Tukey,dpsi.Tukey)
export(rho.e,psi.Exp,dpsi.Exp)


127 changes: 127 additions & 0 deletions R/DD-internal.R
@@ -0,0 +1,127 @@
##huber's function
psi.Huber<-function(r,c)
{
newr<-pmin(c,pmax(r,-c))
newr
}

##derivative of huber's function
dpsi.Huber<-function(r,c)
{
newr<-as.numeric(abs(r)<=c)
newr
}


##bisquare's function
psi.Tukey<-function(r,c)
{
r<-pmin(c,pmax(r,-c))
newr<-r*(1-(r/c)^2)^2
newr
}

##derivative of bisquare's function
dpsi.Tukey<-function(r,c)
{
r<-pmin(c,pmax(r,-c))
newr<-(1-(r/c)^2)*(1-5*(r/c)^2)
newr
}

##modified huber's function
psi.Exp<-function(r,c)
{
newr<-r*exp(-(r/c)^2)
newr
}

##derivative of modified huber's function
dpsi.Exp<-function(r,c)
{
newr<-(1-(2*r^2)/c^2)*exp(-(r/c)^2)
newr
}


##find the most efficient tuning constant
eff<-function(r,method,plot)
{
nn<- length(r)
if (method =="Huber")
{
tau<-(1:30)/10
new<-unlist(lapply(tau,FUN=function(x){(sum(dpsi.Huber(r,x)))^2/(nn*sum(psi.Huber(r,x)^2))}))
}
else if (method=="Bisquare")
{
tau<-(15.48:59.48)/10
new<-unlist(lapply(tau,FUN=function(x){(sum(dpsi.Tukey(r,x)))^2/(nn*sum(psi.Tukey(r,x)^2))}))
}
else if (method=="Exponential")
{
tau<-(5:100)/10
new<-unlist(lapply(tau,FUN=function(x){(sum(dpsi.Exp(r,x)))^2/(nn*sum(psi.Exp(r,x)^2))}))
}

if (plot =="Y")
{
plot(new, type="o", xaxt = "n", xlab="Tunning parameter",
ylab="Efficiency",ylim=c(0,max(new)*1.3))
axis(1, at=seq_along(tau), labels=tau)
if (method =="Exponential") {
legend(length(tau)*0.8,max(new)*1.3,c("Exp"),bty = "n")
} else {
legend(length(tau)*0.8,max(new)*1.3,c(method),bty = "n")
}
}

xx<- order(new)
return(tau[xx[length(tau)]])

}


rho.h<-function(u,c){
phi<-0;
x1<-(abs(u)<=c)
phi<-x1* (u^2/2) + (1-x1)*(abs(u)*c-c^2/2)
phi
}

rho.b<-function(u,c){
phi<-0;
x1<-(abs(u)<=c)
phi<-x1*(1-(1-(u/c)^2)^3) + (1-x1)
phi
}

rho.e<-function(u,c){
1-exp(-(u/c)^2)
}


ESL_O<-function(x,xx,y,beta,newc,maxit=500, toler=1e-6)
{
it=0;delta=1;
while (delta>toler && it<maxit){
it<-it+1;
beta0<-beta;
w<-0;
e<-y-x%*%beta0;
S_n<-max(mad(e),1e-6)
u<-e/S_n
for(i in seq_along(u)){
w[i]<-exp(-(u[i]/newc)^2);
}

W<-diag(w);
#beta<-solve(t(x)%*%W%*%x)%*%t(x)%*%W%*%y;
rlm1<-rlm(y~xx,psi=psi.huber,weights=w,k=newc)
beta<-rlm1$coef
r2<-summary(lm(y~xx,weights=w))$r.squared
delta<-sqrt(sum((beta-beta0)^2));
}
list(esti=rlm1, Std.Error=summary(rlm1)$coef[,2], weights=w, tunning=newc, R2=r2)
}

44 changes: 44 additions & 0 deletions R/rlmDD.R
@@ -0,0 +1,44 @@

rlmDD <-function(yy,xx,beta0,betaR,method =c("Huber","Bisquare","Exponential"),plot)
{
X <- matrix(c(rep(1, length(yy)), xx), ncol = dim(xx)[2]+1)
u <- yy-X%*%beta0
sigma0 <- median(abs(u[!is.na(u)]))/0.6745 # drop NA
uu <- yy-X%*%betaR
ri <- (uu[!is.na(uu)])/sigma0 # drop NA
newc <- eff(ri, method, plot)


## MM estimation of the location for R2 calculation
rlm0 <- rlm(yy~1, method = "MM")
mu <- rlm0$coef

method <- match.arg(method)

if (method =="Huber"){
rlm1 <- rlm(yy~xx, psi = psi.huber, k = newc)
sig <- rlm1$s
A <- rho.h((yy-mu)/sig, newc)
B <- rho.h(rlm1$res/sig, newc)
r2 <- 1-sum(B)/sum(A)
list(esti = rlm1, Std.Error = summary(rlm1)$coef[,2],
tunning = newc, R2 = r2)
}
else if (method=="Bisquare"){
rlm1 <- rlm(yy~xx, psi = psi.bisquare, c=newc)
sig <- rlm1$s
A <- rho.b((yy-mu)/sig,newc)
B <- rho.b(rlm1$res/sig,newc)
r2 <- 1-sum(B)/sum(A)
list(esti = rlm1, Std.Error = summary(rlm1)$coef[,2],
tunning = newc, R2 = r2)
}
else if (method=="Exponential")
{
m <- rlm(yy~xx, method="MM", maxit = 1000)
re_mm <- data.frame(m[1])$coefficients
X <- cbind(1,xx)
beta <- re_mm
ESL_O(X, xx, yy, re_mm, newc, maxit=500, toler=1e-6)
}
}
Binary file added data/plasma.RData
Binary file not shown.
16 changes: 16 additions & 0 deletions man/DD-internal.Rd
@@ -0,0 +1,16 @@
\name{DD-internal}
\alias{eff}
\alias{ESL_O}
\alias{rho.h}
\alias{rho.b}
\alias{rho.e}
\alias{psi.Huber}
\alias{dpsi.Huber}
\alias{psi.Tukey}
\alias{dpsi.Tukey}
\alias{psi.Exp}
\alias{dpsi.Exp}
\title{Internal Functions}
\description{
Internal functions not to be used by the user.
}
20 changes: 20 additions & 0 deletions man/plasma.Rd
@@ -0,0 +1,20 @@
\name{plasma}
\alias{plasma}
\title{plasma}
\usage{plasma}
\description{
The data are collected from 273 female patients.
}
\format{
This data frame contains the following columns:
\describe{
\item{y}{plasma beta-carotene}
\item{calories}{calories in KJ}
\item{dietary}{dietary beta-carotene}
}
}
\source{
Jiang, Y., Wang, Y-G., Fu, L., & Wang, X. (2018). Robust Estimation Using Modified Huber's Functions With New Tails. Technometrics, in press, see <doi:10.1080/00401706.2018.1470037>.
}
\keyword{datasets}
103 changes: 103 additions & 0 deletions man/rlmDD.Rd
@@ -0,0 +1,103 @@
\name{rlmDD}
\alias{rlmDD}

\title{
Data driven robust methods
}
\description{
Robust estimation often relies on a dispersion function that is more slowly varying at large values than the squared function. However, the choice of tuning constant in dispersion
function may impact the estimation efficiency to a great extent. For a given family of dispersion functions, we suggest obtaining the `best' tuning constant from the data so that the asymptotic efficiency is maximized.
This library provides a robust linear regression with a tuning parameter being automatically chosen to provide the necessary resistance against outliers. The robust (loss) functions include the Huber, Tukey bisquare and the exponential loss.
}
\usage{
rlmDD(yy, xx, beta0, betaR, method, plot)
}
\arguments{
\item{yy}{Vector representing the response variable
}
\item{xx}{Design matrix of the covariates inclunding the intercept in the first column
}
\item{beta0}{Initial parameter estimate using \code{lm}
}
\item{betaR}{Robust estimate of beta with a fixed tuning constant using \code{rlm}
}
\item{method}{Huber, Bisquare or Exponential
}
\item{plot}{"Y" gives a plot: the efficiency factor versus a range of tunning parameter values.
}
}
%\details{
%efgdgd
%}
\value{
The function returns a list including
\item{esti}{ Value of the robust estimate}
\item{Std.Error}{ Standard error of the robust estimate}
\item{tunning}{ Optimum tunning parameter}
\item{R2}{ R-squared value}
}
\references{
Wang, Y-G., Lin, X., Zhu, M., & Bai, Z. (2007). Robust estimation using the Huber function with a data-dependent tuning constant. Journal of Computational and Graphical Statistics, 16(2), 468-481.
Wang, X., Jiang, Y., Huang, M., & Zhang, H. (2013). Robust variable selection with exponential squared loss. Journal of the American Statistical Association, 108, 632-643.
Wang, N., Wang, Y-G., Hu, S., Hu, Z. H., Xu, J., Tang, H., & Jin, G. (2018). Robust Regression with Data-Dependent Regularization Parameters and Autoregressive Temporal Correlations. Environmental Modeling & Assessment, in press.
}
\author{
You-Gan Wang, Na Wang
}
\seealso{
\code{rlm} function from package \code{MASS}
}
\examples{
library(MASS)
data(stackloss)
LS <- lm(stack.loss ~ stack.x)
RB <- rlm(stack.loss ~ stack.x, psi = psi.huber, k = 1.345)
DD1 <- rlmDD(stack.loss, stack.x, LS$coef, RB$coef, method = "Huber", plot = "Y")
LS <- lm(stack.loss ~ stack.x)
RB <- rlm(stack.loss ~ stack.x, psi = psi.bisquare, c = 4.685)
DD2 <- rlmDD(stack.loss, stack.x, LS$coef, RB$coef, method = "Bisquare", plot = "Y")
LS <- lm(stack.loss ~ stack.x)
RB <- rlm(stack.loss ~ stack.x, psi = psi.huber, k = 1.345)
DD3 <- rlmDD(stack.loss, stack.x, LS$coef, RB$coef, method = "Exponential", plot = "Y")
## Plasma dataset
data(plasma)
y <- plasma$y
x <- cbind(plasma$calories, plasma$dietary)
LS <- lm(y ~ x)
RB <- rlm(y ~ x, psi = psi.huber, k = 1.345)
DD.h <- rlmDD(y, x, LS$coef, RB$coef, method = "Huber", plot = "Y")
LS <- lm(y ~ x)
RB <- rlm(y ~ x, psi = psi.bisquare, c = 4.685)
DD.b <- rlmDD(y, x, LS$coef, RB$coef, method = "Bisquare", plot = "Y")
LS <- lm(y ~ x)
RB <- rlm(y ~ x, psi = psi.huber, k = 1.345)
DD.e <- rlmDD(y, x, LS$coef, RB$coef, method = "Exponential", plot = "Y")
}
\keyword{regression}

0 comments on commit 8de09b2

Please sign in to comment.