Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 86c07ac
Showing
19 changed files
with
1,041 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
Package: qut | ||
Type: Package | ||
Title: Quantile Universal Threshold | ||
Version: 1.0 | ||
Author: Jairo Diaz, Sylvain Sardy, Caroline Giacobino, Nick Hengartner. | ||
Maintainer: Jairo Diaz <jairo.diaz@unige.ch> | ||
Description: Selection of a threshold parameter for GLM-lasso to obtain a sparse model | ||
with a good compromise between high true positive rate and low false discovery rate. | ||
License: GPL-2 | ||
Depends: Matrix, glmnet, lars | ||
NeedsCompilation: no | ||
Packaged: 2015-09-20 07:54:10 UTC; Jairo | ||
Repository: CRAN | ||
Date/Publication: 2015-09-20 15:41:39 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
796386f5b46abf28de97ed5e6bfa948a *DESCRIPTION | ||
7e939e6767941731e8bc9e537d7ebf75 *NAMESPACE | ||
b35d2c3fe626c987a1236bf500ff89a1 *R/lambdaqut.R | ||
c5c0e9083a2d2266484c69cec2892cfe *R/predict.qut.R | ||
60b8c469d26b0a0a513bad3854bf3aca *R/qut.R | ||
07d830a8b07ef1d53e36e1e72b24b789 *R/sigmaqut.R | ||
83c9097fad099cce1c20298145c00225 *R/sigmarcv.R | ||
6366e619fc2df2a00b86d383beaf2eaf *data/datalist | ||
fb4808ca9012f6abbb606d7d1b21a039 *data/leukemia.rda | ||
3f8a1ef02e84a88f448620114f857d20 *data/riboflavin.rda | ||
82c882af4ff01af36d89fc694e28510f *man/lambdaqut.Rd | ||
ed49b029b582232aab02fc804364a362 *man/leukemia.Rd | ||
2fc547bfe76a865d91d34c6954aff496 *man/predict.qut.Rd | ||
b52ed035629cdab2584ed1bd53bd9541 *man/qut-package.Rd | ||
7701f3ae6406752095a020ef17f50a08 *man/qut.Rd | ||
e1e2b3e8a62002755cc157be3174b951 *man/riboflavin.Rd | ||
5cb8388ddf7ecb6984c3ac7afb91319a *man/sigmaqut.Rd | ||
504c41ff245b7e6c27cd3a1eb75b6a43 *man/sigmarcv.Rd |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
export(qut,lambdaqut,sigmaqut,sigmarcv,predict.qut) | ||
import(Matrix, glmnet, lars) | ||
importFrom("stats", "coef", "gaussian", "glm", "glm.fit", "lm", | ||
"median", "predict", "quantile", "rbinom", "rnorm", "rpois", | ||
"var") | ||
|
||
S3method(predict, qut) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,164 @@ | ||
lambdaqut <- | ||
function(y,X,family=gaussian,alpha.level='default',M=1000,qut.standardize=TRUE,intercept=TRUE,no.penalty=NULL,offset=NULL,bootstrap=TRUE){ | ||
#FUNCTION TO OBTAIN THE QUANTILE UNIVERSAL THRESHOLD | ||
|
||
#Hidden function for vectorized bootstrapping | ||
qutbootstrap <- function(curz){ | ||
ind=sample(1:n,n,replace=TRUE) | ||
curX=X[ind,] | ||
curmuhat=curz[1:n] | ||
curz=curz[-(1:n)] | ||
if(!intercept&sum(O!=0)!=0&is.null(no.penalty)){ | ||
curmuhat=muhatZ[ind,1] | ||
} | ||
else if(!is.null(no.penalty)|sum(O!=0)!=0){ | ||
curbeta0=glm.fit(y=curz,x=as.matrix(A[ind,]),intercept=FALSE,family=family,offset=O[ind])$coef | ||
curmuhat=family$linkinv(as.matrix(A[ind,])%*%curbeta0+O[ind]) | ||
} | ||
curbp=abs(t(curX)%*%(curz-curmuhat)) | ||
|
||
} | ||
|
||
#Hidden function for vectorized computation of the intercept for each of the Monte Carlo simulations of the NULL model | ||
glm0=function(y,x,family,offset=offset){ | ||
out=glm.fit(y=y,x=x,intercept=FALSE,family=family,offset=offset) | ||
return(out$coefficients) | ||
} | ||
|
||
family=family() | ||
n=nrow(X) | ||
p=ncol(X) | ||
X0=X | ||
if(is.null(offset)) O=rep(0,n) | ||
else O=offset | ||
|
||
#Check for warnings | ||
if(alpha.level!='default'){ | ||
if(alpha.level>1|alpha.level<0){ | ||
warning("alpha.level is not in [0,1] interval; set to default") | ||
alpha.level='default' | ||
} | ||
} | ||
if(M<=0){ | ||
warning("M is <=0; set to 1000") | ||
M=1000 | ||
} | ||
if (is.null(p) | (p <= 1)) stop("X should be a matrix with 2 or more columns") | ||
if (n!=length(y)) stop("Number of observations in y not equal to number of rows in X") | ||
if(length(O)!=n) stop("length of offset is different to the number of observations") | ||
if(family$family=='poisson'&(sum(y==0)==n)) stop("All your Poisson counts are zero") | ||
if(family$family=='binomial'&((sum(y==0)==n)|(sum(y==1)==n))) stop("All your Binomial measures are zero or one") | ||
|
||
#initialize A matrix | ||
if(!intercept&is.null(no.penalty)) muhat=family$linkinv(O) | ||
else{ | ||
A=c() | ||
if(!is.null(no.penalty)){ | ||
A=as.matrix(X[,no.penalty]) #if there are more unpenalized coefficients | ||
X=X[,-no.penalty] | ||
} | ||
if(intercept) A=cBind(rep(1,n),A) #if there is an intercept (column of ones) | ||
|
||
#Estimate beta0 as glm(y~A) | ||
beta0=glm.fit(y=y,x=as.matrix(A),intercept=FALSE,family=family,offset=offset)$coef | ||
muhat=family$linkinv(as.matrix(A)%*%beta0+O) | ||
} | ||
|
||
#Set default alpha.level | ||
if(alpha.level=='default') alpha.level=1/(sqrt(pi*log(p))) | ||
|
||
#adjustment gamma to the quantile (existence of glm) | ||
gamma=1 | ||
if(intercept&is.null(no.penalty)){ | ||
if(family$family=='poisson') gamma=1-exp(-n*muhat[1]) | ||
else if(family$family=='binomial') gamma=1-muhat[1]^n+(1-muhat[1])^n | ||
} | ||
else warning("Adjustment gamma for glm existence is not considered") | ||
|
||
#Monte Carlo Simulation of the null model | ||
if(family$family=='gaussian') z=matrix(rnorm(n*M,mean=as.vector(muhat),sd=1),n,M) | ||
else if(family$family=='poisson'){ | ||
z=matrix(rpois(n*M,lambda=as.vector(muhat)),n,M) | ||
z=z[,apply(z,2,sum)!=0] | ||
} | ||
else if(family$family=='binomial'){ | ||
z=matrix(rbinom(n*M,size=1,prob=as.vector(muhat)),n,M) | ||
z=z[,(apply(z,2,sum)!=0)&(apply(z,2,sum)!=n)] | ||
} | ||
else stop("Not available family") | ||
|
||
#obtain estimate of beta0 for each simulation | ||
if(!intercept&is.null(no.penalty)){ | ||
muhatZ=family$linkinv(O) | ||
muhatZ=matrix(rep(muhatZ,ncol(z)),nrow=n) | ||
} | ||
else{ | ||
beta0Z=apply(z,2,glm0,x=as.matrix(A),family=family,offset=offset) | ||
muhatZ=family$linkinv(as.matrix(A)%*%beta0Z+O) | ||
} | ||
|
||
divX=rep(1,ncol(X)) | ||
|
||
if(!bootstrap){ | ||
#No Bootstrap Matrix X | ||
|
||
#X'z | ||
bp=abs(t(X)%*%(z-muhatZ)) | ||
scale.factor=rep(1,p) | ||
|
||
#quantile-based standardization | ||
if(qut.standardize){ | ||
divX=apply(abs(bp),1,quantile,prob=(1-alpha.level)/gamma) | ||
divX[which(divX==0)]=1 | ||
|
||
#Alignment/scaling | ||
X=t(t(X)/divX) | ||
|
||
#X'z for the standardized X matrix | ||
bp=abs(t(X)%*%(z-muhatZ)) | ||
} | ||
} | ||
else{ | ||
#Bootstrap Matrix X | ||
|
||
#quantile-based standardization | ||
if(qut.standardize){ | ||
bp=abs(t(X)%*%(z-muhatZ)) | ||
scale.factor=rep(1,p) | ||
divX=apply(abs(bp),1,quantile,prob=(1-alpha.level)/gamma) | ||
divX[which(divX==0)]=1 | ||
|
||
#Alignment/scaling | ||
X=t(t(X)/divX) | ||
} | ||
|
||
#Bootstrapping | ||
bp=apply(rbind(muhatZ,z),2,qutbootstrap) | ||
resultsMC=apply(bp,2,max,na.rm=TRUE) | ||
lambda=quantile(resultsMC, prob=(1-alpha.level)/gamma) | ||
} | ||
|
||
#Obtain distribution of Lambda=max(X'z) | ||
resultsMC=apply(bp,2,max,na.rm=TRUE) | ||
|
||
#obtain lambda.qut for the given quantile | ||
lambda=quantile(resultsMC, prob=(1-alpha.level)/gamma) | ||
|
||
lambdamax=max(abs(t(X)%*%(y-muhat))) | ||
|
||
if(!is.null(no.penalty)){ | ||
X0[,-no.penalty]=X | ||
scale.factor[-no.penalty]=divX | ||
X=X0 | ||
} | ||
else scale.factor=divX | ||
|
||
#OUTPUT | ||
out=NULL | ||
out$scale.factor=scale.factor | ||
out$lambda.max=lambdamax | ||
out$lambda=lambda | ||
out$Xnew=X | ||
return(out) | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
predict.qut <- | ||
function(object, newx, mode='glm',offset=NULL,...){ | ||
|
||
n=nrow(newx) | ||
|
||
#Check for warnings | ||
if(is.null(ncol(newx))) stop("newx should be a matrix with 2 or more columns") | ||
if(length(offset)!=n&!is.null(offset)) stop("length of offset is different to the number of observations") | ||
if(ncol(newx)!=length(object$beta[-1])) stop("newx should be a matrix with the same number of columns as covariates in the fitted model") | ||
|
||
if(mode=='lasso'){ #Predict with the lasso coefficients | ||
if(class(object$fit)[1]=='lars') type='fit' else type='response' | ||
newx=t(t(newx)/object$scale.factor) | ||
out=predict(object$fit,newx=newx,type=type,s=object$lambda,mode='lambda',offset=offset) | ||
if(class(object$fit)[1]=='lars') out=out$fit | ||
} | ||
else if(mode=='glm'){ #Predict with the glm fitted coefficients | ||
f=object$family() | ||
if(is.null(offset)) offset=0 | ||
out=f$linkinv(object$betaglm[1]+newx%*%object$betaglm[-1]+offset) | ||
} | ||
|
||
return(out) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,162 @@ | ||
qut <- | ||
function(y,X,fit,family=gaussian,alpha.level='default',M=1000,qut.standardize=TRUE,intercept=TRUE,offset=NULL,bootstrap=TRUE,sigma='qut',estimator='unbiased',type=c('glmnet','lars'),lambda.seq=0,penalty.factor=rep(1,p),lambda.min.ratio=ifelse(n<p,0.01,0.0001),nlambda=100,lambda=NULL,...){ | ||
#FUNCTION TO FIT GLM WITH THE QUANTILE UNIVERSAL THRESHOLD | ||
|
||
#Hidden function to get glm fit with non-zero coefficients found by LASSO-GLM | ||
betaGLMestimates=function(){ | ||
if(type=='glmnet') betatemp=beta[-1] else betatemp=beta | ||
betaglm=betatemp*0 | ||
|
||
if(is.null(offset)) O=rep(0,n) | ||
if(sum(betatemp!=0)>0){ | ||
if(intercept) betaglm0=glm(y~X0[,betatemp!=0],family=family,offset=offset)$coef | ||
else betaglm0=glm(y~X0[,betatemp!=0]-1,family=family,offset=offset)$coef | ||
} | ||
else{if(intercept) betaglm0=glm(y~1,family=family,offset=offset)$coef} | ||
if(intercept){ | ||
betaglm[betatemp!=0]=betaglm0[-1] | ||
betaglm=c(betaglm0[1],betaglm) | ||
} | ||
else{ | ||
betaglm[betatemp!=0]=betaglm0 | ||
betaglm=c(0,betaglm) | ||
} | ||
|
||
names(betaglm)[1]='(Intercept)' | ||
names(betaglm)[-1]=1:p | ||
return(betaglm) | ||
} | ||
|
||
#Initialize some basic variables | ||
X0=X | ||
n=nrow(X) | ||
p=ncol(X) | ||
f=family() | ||
no.penalty=which(penalty.factor==0) | ||
if(length(no.penalty)==0) no.penalty=NULL | ||
|
||
#Check for warnings | ||
type=match.arg(type) | ||
if(type=='lars'&f$family!='gaussian'){ | ||
warning("type lars is just for gaussian family; set to glmnet") | ||
type='glmnet' | ||
} | ||
if(type=='lars'&!is.null(no.penalty)){ | ||
warning("type lars does not allow no penalty subsets; set to glmnet") | ||
type='glmnet' | ||
} | ||
if(type=='lars'&!is.null(offset)){ | ||
warning("type lars does not allow offset; set to glmnet") | ||
type='glmnet' | ||
} | ||
|
||
#Obtain lambda.qut (Quantile Universal Threshold) | ||
outqut=lambdaqut(y=y,X=X,alpha.level=alpha.level,M=M,qut.standardize=qut.standardize, family=family,intercept=intercept,no.penalty=no.penalty,offset=offset,bootstrap=bootstrap) | ||
X=outqut$Xnew #Get standardized matrix | ||
lambdaqut=outqut$lambda | ||
|
||
#Estimate sigma for gaussian family (if not specified) | ||
if(f$family=='gaussian'&!is.numeric(sigma)){ | ||
if(sigma=='qut') sigma=sigmaqut(y=y,X=X,intercept=intercept,estimator=estimator,alpha.level=alpha.level,M=M,qut.standardize=qut.standardize,offset=offset,penalty.factor=penalty.factor,...) | ||
else{ | ||
warning("sigma rcv or cv does not take into account penalty factor") | ||
if(sigma=='rcv') sigma = sigmarcv(X=X,y=y,cv=FALSE,intercept=intercept)$sigmahat | ||
else if(sigma=='cv') sigma = sigmarcv(X=X,y=y,cv=TRUE,intercept=intercept)$sigmahat | ||
else sigma = 1 | ||
} | ||
} | ||
else sigma=1 | ||
|
||
#Fit model | ||
if(type=='glmnet'){ #GLMNET | ||
lambdamax=outqut$lambda.max*sum(penalty.factor)/p | ||
#set sequence of lambdas | ||
if(lambda.seq==2){ #default glmnet values | ||
if(missing(lambda.min.ratio)) lambda.min.ratio=ifelse(n<p,0.01,0.0001) | ||
if(missing(nlambda)) nlambda=100 | ||
if(missing(lambda)) lambda=NULL | ||
} | ||
else{ #starting from lambdaqut to lambdamax | ||
lambda.min.ratio=lambdaqut*sigma/lambdamax*0.9999 | ||
if(lambda.min.ratio>1) lambda.min.ratio=0.9999 | ||
if(lambda.seq==0) nlambda=100 #100 lambdas #DEFAULT | ||
else if(lambda.seq==1) nlambda=n #n lambdas | ||
lambda=exp(seq(log(lambdamax),log(lambdaqut*sigma),length=nlambda))/n #n lambdas in logscale | ||
} | ||
if(missing(penalty.factor)) penalty.factor=rep(1,p) | ||
if(!missing(no.penalty)) penalty.factor[no.penalty]=0 | ||
|
||
if(missing(fit)) fit=glmnet(X,y,standardize=FALSE,intercept=intercept,family=f$family,penalty.factor=penalty.factor,offset=offset,nlambda=nlambda,lambda.min.ratio=lambda.min.ratio,...) | ||
|
||
if(max(fit$lambda)==Inf) beta=rep(0,p+1) | ||
else beta=coef(fit, s=lambdaqut*sigma/n*sum(penalty.factor)/p,offset=offset)/c(1,outqut$scale.factor) | ||
|
||
lambdaqut=lambdaqut/n | ||
|
||
#Get GLM fitted coefficients | ||
betaglm=try(betaGLMestimates(),silent=TRUE) | ||
|
||
|
||
} | ||
else if(type=='lars'){ #LARS | ||
|
||
#LASSO fit | ||
lambdamax=outqut$lambda.max*n | ||
if(missing(penalty.factor)) penalty.factor=rep(1,p) | ||
X=X/penalty.factor | ||
|
||
if(missing(fit)) fit=lars(X,y,intercept=intercept,normalize=FALSE,...) | ||
beta=coef(fit,s=lambdaqut*sigma,mode='lambda') | ||
|
||
if(intercept) beta0=mean(y)-mean(X%*%beta) else beta0=0 | ||
beta=beta/penalty.factor/outqut$scale.factor | ||
|
||
#Least squares fit | ||
betaglmestimatesLARS=function(){ | ||
betaglm=beta*0 | ||
if(sum(beta!=0)>0){ | ||
if(intercept) betaglm0=lm(y~X0[,beta!=0])$coef | ||
else betaglm0=lm(y~X0[,beta!=0]-1)$coef | ||
} | ||
else{if(intercept) betaglm0=lm(y~1)$coef} | ||
|
||
if(intercept){ | ||
betaglm[beta!=0]=betaglm0[-1] | ||
betaglm=c(betaglm0[1],betaglm) | ||
} | ||
else{ | ||
betaglm[beta!=0]=betaglm0 | ||
betaglm=c(0,betaglm) | ||
} | ||
names(betaglm)[1]='(Intercept)' | ||
names(betaglm)[-1]=1:p | ||
return(betaglm) | ||
} | ||
|
||
#Get GLM fitted coefficients | ||
betaglm=try(betaGLMestimates(),silent=TRUE) | ||
|
||
beta=c(beta0,beta) | ||
|
||
names(beta)[1]='(Intercept)' | ||
names(beta)[-1]=1:p | ||
|
||
} | ||
|
||
#OUTPUT | ||
out=NULL | ||
out$lambda=lambdaqut | ||
out$lambda.max=lambdamax/n | ||
out$scale.factor=outqut$scale.factor | ||
out$beta=beta | ||
if(class(betaglm)[1]=="try-error") warning("No valid GLM fits were possible to find") | ||
out$betaglm=betaglm | ||
out$fit=fit | ||
out$family=family | ||
if(f$family=='gaussian') out$sigma=sigma | ||
else out$sigma=NA | ||
class(out)='qut' | ||
return(out) | ||
} | ||
|
||
|
Oops, something went wrong.