Skip to content

Commit

Permalink
version 0.1
Browse files Browse the repository at this point in the history
  • Loading branch information
robingirard authored and gaborcsardi committed Apr 15, 2010
0 parents commit 9379caf
Show file tree
Hide file tree
Showing 21 changed files with 1,555 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .settings/de.walware.r.core.prefs
@@ -0,0 +1,3 @@
#Tue Apr 13 08:25:09 CEST 2010
RProjectBuild/Package.name=VHDClassification
eclipse.preferences.version=1
19 changes: 19 additions & 0 deletions DESCRIPTION
@@ -0,0 +1,19 @@
Package: VHDClassification
Type: Package
Title: Discrimination/Classification in very high dimension with linear
and quadratic rules.
Version: 0.1
Date: 2010-04-15
Author: Robin Girard
Maintainer: <robin.girard@mines-paristech.fr>
Description: This package provides an implementation of Linear
disciminant analysis and quadratic discriminant analysis that
works fine in very high dimension (when there are many more
variables than observations).
License: GPL-2
LazyLoad: yes
Depends: methods, e1071, lattice, stats
Suggests: pamr,randomForest
Packaged: 2010-05-02 07:12:59 UTC; robingirard
Repository: CRAN
Date/Publication: 2010-05-02 07:47:06
17 changes: 17 additions & 0 deletions NAMESPACE
@@ -0,0 +1,17 @@
exportPattern("^[[:alpha:]]+")
exportMethods(
".EvaluateLogLikeRatio",
"getBinaryRule",
"getLogLikeRatio",
"plotClassifRule",
"predict",
"show"
)
exportClasses(
"LinearRule",
"PartitionWithLLR",
"QuadraticRule"
)
export(".learnLinearRulefortune",".learnQuadraticRulefortune",
".LDA",".QDA",".tune.QDA",".tune.LDA")
import('e1071','lattice','stats','graphics')
57 changes: 57 additions & 0 deletions R/A.R
@@ -0,0 +1,57 @@
#setGeneric("predict", function(object, ...) standardGeneric("predict"))
setGeneric(
name="getLogLikeRatio",
def=function(object){standardGeneric('getLogLikeRatio')}
)
setGeneric(
name=".EvaluateLogLikeRatio",
def=function(x,object){standardGeneric('.EvaluateLogLikeRatio')}
)
setGeneric(
name=".minus",
def=function(object){standardGeneric('.minus')}
)
setGeneric(
name="plotClassifRule",
def=function(object,...){standardGeneric('plotClassifRule')}
)
setGeneric(
name="getBinaryRule",
def=function(object,k,l){standardGeneric('getBinaryRule')}
)

learnBinaryRule<-function(x,y,type='linear',procedure='FDRThresh',covariance='diagonal',ql=NULL,qq=NULL,prior=FALSE)
{

if (is.null(ql))
{
if (procedure=='UnivThresh'){ ql=1:5/4}
if (!(procedure%in%c('noThresh','UnivThresh'))){ql=10^(-1:-7)}
}
if (is.null(qq))
{
if (procedure=='UnivThresh'){qq=1:2/2}
if (!(procedure%in%c('noThresh','UnivThresh'))){qq=10^(-1:-7)}
}


if (type=='linear')
{
if (length(ql)>1){ BinaryLearningProcedure=.tune.LDA;}
else { BinaryLearningProcedure=.learnLinearRulefortune;}
return(BinaryLearningProcedure(x,y,procedure=procedure,covariance=covariance,ql=ql,prior=prior))
}
if (type=='quadratic')
{
if ((length(ql)>1)|(length(qq)>1)){ BinaryLearningProcedure=.tune.QDA;}
else { BinaryLearningProcedure=.learnQuadraticRulefortune;}
return(BinaryLearningProcedure(x,y,procedure=procedure,covariance=covariance,ql=ql,qq=qq,prior=prior))
}
if ((type!='linear')&(type!='quadratic'))
{
stop('The type of procedure has to be linear or quadratic')
}


}

57 changes: 57 additions & 0 deletions R/FDR_toolbox.R
@@ -0,0 +1,57 @@

#
# Author: robin
###############################################################################

.FDRS_Selection<-function(S,q,n)
{
X=sort(S,decreasing = TRUE,index.return=TRUE)
som=0;
#for (i in 1:length(X$x)){som=som+1/i}
p=q/(2*length(X$x))*(1:length(X$x))
t=qnorm(p,lower.tail=FALSE);
last=1;
for (i in 1:length(S))
{
if (X$x[i]-t[i]>0)
{
last=i
}
}
#print(last)
return(X$ix[1:last])
}



.FDR_Selection<-function(S,q)
{
X=sort(S,decreasing = TRUE,index.return=TRUE)
som=0;
#for (i in 1:length(X$x)){som=som+1/i}
p=q/(2*length(X$x))*(1:length(X$x))
t=qnorm(p,lower.tail=FALSE);
last=1;
for (i in 1:length(S))
{
if (X$x[i]-t[i]>0)
{
last=i
}
}
#print(last)
return(X$ix[1:last])
}

.FAN_Selection<-function(T,Cmoins,n)
{
X=sort(T,decreasing = TRUE,index.return=TRUE)
stat=array(0,length(T))
for (i in 1:length(T))
{
stat[i]=min(Cmoins[X$ix[1:i]])*(sum(X$x[1:i]^2))^2/(4*i/n+sum(X$x[1:i]^2))
}
#print(last)
return(X$ix[1:which.max(stat)])
}

167 changes: 167 additions & 0 deletions R/LinearRule.R
@@ -0,0 +1,167 @@

# Author: robin
###############################################################################


setClass(
Class="LinearRule",
representation=representation(
normalVector="numeric",
normalIndex="integer",
centerVector="numeric",
proportions="numeric",
prior="logical"
)
)

.learnLinearRule<-function(x1,x2,procedure='noThresh',covariance='diagonal',ql=1*(procedure=='Fisher')+0.05,prior=FALSE)
{
# dim(xi) is p x ni
# Procedure can be 'Fisher', covariance can be 'full' or 'diagonal'
#Testing if the data are ok
if (!(is(x, "matrix")||is(x, "data.frame")))
stop('the features have to be stored in a matrix or a data.frame')
procedure=as.character(procedure)
#declaration of variables
p=length(x1[1,]); n1=length(x1[,1]); n2=length(x2[,1]);
center=array(0,p);
hatmu=array(0,c(2,p));
hatC=array(0,p)
s=array(0,p)
F=array(0,p)
# start by learning covariance and group mean vectors
hatmu[1,]= colMeans(x1);
hatmu[2,]= colMeans(x2);
hatC=1/(n1+n2-1)*(colSums((x1-hatmu[1,])^2)+colSums((x2-hatmu[2,])^2))
indices=integer(0);
F=(hatmu[1,]-hatmu[2,])/(hatC)^(1/2);
s=(hatmu[1,]+hatmu[2,])/2;
# Comput the normal vectors depending on the used procedure.
switch(procedure,
'noThresh'={indices=1:p},
'FANThresh'={
indices=.FAN_Selection(abs(F),hatC^(-1),n);
F[!1:p%in%indices]=0;
},
'UnivThresh'={
lambda=array(as.numeric(sqrt(2*as.numeric(ql)*log(p)/n)),p);
indices=1:p
indices=indices[abs(F)>lambda]
F[!1:p%in%indices]=0;
},
'FDRThresh'={
indices=.FDR_Selection(sqrt(n)*abs(F),q=as.numeric(ql))
F[!1:p%in%indices]=0;
},
'FDRstudent'={
indices=.FDRS_Selection(sqrt(n)*abs(F),q=as.numeric(ql),n)
F[!1:p%in%indices]=0;
} ,
'Otherwise'={stop('Trying to use a non-implemented procedure')}
)
F=F/(hatC)^(1/2);
return(new(Class="LinearRule",normalVector=F,centerVector=s,
normalIndex=as.integer(indices),
proportions=as.numeric(n1/(n1+n2)),
prior=prior))
}

.learnLinearRulefortune<-function(x,y,...){
y=ordered(y)
classes=levels(y)
return(.learnLinearRule(x[y==classes[1],],x[y==classes[2],],...))
}


.LDA<-function(x,F,s,p=1/2)
return((sum(F*(x-s))>log((1-p)/p)))


setMethod(
f="plotClassifRule",
signature=c("LinearRule"),
definition=function(object,...){
Index=object@normalIndex
NormalVector=object@normalVector[Index]
Center=object@centerVector[Index]
Mydata=data.frame(NormalVector=NormalVector,Center=Center,Index=Index)
return(xyplot(NormalVector+Center~Index,data=Mydata,auto.key = TRUE,...))
}
)

setMethod(
f="show",
signature="LinearRule",
definition=function(object){
cat('Normal Vector: \n')
print(object@normalVector)
cat('Center Vector: \n')
print(object@centerVector)
}
)
setMethod(
f="getLogLikeRatio",
signature="LinearRule",
definition=function(object){
return(list(NormalVector=object@normalVector,VecterVector=object@centerVector))
}
)
setMethod(
f="predict",
signature="LinearRule",
definition=function(object,newdata){
if (!(is(newdata, "matrix")||is(newdata, "data.frame")))
stop("newdata must be a matrix or a data frame")
p=length(object@normalVector);
n=nrow(newdata);
if (object@prior)
res=apply(newdata,1,.LDA,object@normalVector,object@centerVector,object@proportions)
else
res=apply(newdata,1,.LDA,object@normalVector,object@centerVector)

y=factor(y)
resultat=res
resultat[res==1]=levels(y)[1]
resultat[res==0]=levels(y)[2]
return(resultat)
}
)

#setMethod(f="getNormalVector",signature='LinearRule',
# definition=function(object){return(object@normalVector)}
#)
#
#setMethod(f="getCenterVector",signature='LinearRule',
# definition=function(object){return(object@centerVector)}
#)

setMethod(f=".EvaluateLogLikeRatio",signature=c(x='numeric',object='LinearRule'),
definition=function(x,object){
return(sum(object@normalVector*(x-object@centerVector)))
}
)

setMethod(f=".minus",signature=c(object='LinearRule'),
definition=function(object){
return(new(Class="LinearRule",normalVector=-object@normalVector,
centerVector=object@centerVector,
normalIndex=object@normalIndex,
proportions=object@proportions,
prior=object@prior))
}
)


.tune.LDA<-function(x,y,procedure='FDRThresh',ql=10^(-1:-7),prior=prior,...)
{ #### for internal use only
#### y is an array of 2 factors
y=ordered(y)
call<-match.call()
ranges<-list(ql=ql,prior=prior,procedure=procedure)
ranges[sapply(ranges,is.null)]<-NULL
tunecontrol=tune.control(sampling='cross',best.model=FALSE)
if(length(ranges)<1) ranges=NULL
modeltmp<-tune('.learnLinearRulefortune',train.x=x,train.y=y,ranges=ranges,tunecontrol=tunecontrol,predict=predict,...)
besti=length(ranges$ql)+1-which.min(rev(modeltmp$performances[['error']]))
return(.learnLinearRulefortune(x,y,procedure=procedure,ql=modeltmp$performances$ql[besti],prior=prior))
}

0 comments on commit 9379caf

Please sign in to comment.