version 0.1

cran · Apr 15, 2010 · 9379caf · 9379caf
commit 9379caf
Show file tree

Hide file tree

Showing 21 changed files with 1,555 additions and 0 deletions.
diff --git a/.settings/de.walware.r.core.prefs b/.settings/de.walware.r.core.prefs
@@ -0,0 +1,3 @@
+#Tue Apr 13 08:25:09 CEST 2010
+RProjectBuild/Package.name=VHDClassification
+eclipse.preferences.version=1
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -0,0 +1,19 @@
+Package: VHDClassification
+Type: Package
+Title: Discrimination/Classification in very high dimension with linear
+        and quadratic rules.
+Version: 0.1
+Date: 2010-04-15
+Author: Robin Girard
+Maintainer: <robin.girard@mines-paristech.fr>
+Description: This package provides an implementation of Linear
+        disciminant analysis and quadratic discriminant analysis that
+        works fine in very high dimension (when there are many more
+        variables than observations).
+License: GPL-2
+LazyLoad: yes
+Depends: methods, e1071, lattice, stats
+Suggests: pamr,randomForest
+Packaged: 2010-05-02 07:12:59 UTC; robingirard
+Repository: CRAN
+Date/Publication: 2010-05-02 07:47:06
diff --git a/NAMESPACE b/NAMESPACE
@@ -0,0 +1,17 @@
+exportPattern("^[[:alpha:]]+")
+exportMethods(
+    ".EvaluateLogLikeRatio",
+    "getBinaryRule",
+    "getLogLikeRatio",
+    "plotClassifRule",
+    "predict",
+    "show" 
+)
+exportClasses(
+    "LinearRule",
+     "PartitionWithLLR",
+     "QuadraticRule" 
+)
+export(".learnLinearRulefortune",".learnQuadraticRulefortune",
+".LDA",".QDA",".tune.QDA",".tune.LDA")
+import('e1071','lattice','stats','graphics')
diff --git a/R/A.R b/R/A.R
@@ -0,0 +1,57 @@
+#setGeneric("predict", function(object, ...) standardGeneric("predict"))
+setGeneric(
+        name="getLogLikeRatio",
+        def=function(object){standardGeneric('getLogLikeRatio')}
+)
+setGeneric(
+        name=".EvaluateLogLikeRatio",
+        def=function(x,object){standardGeneric('.EvaluateLogLikeRatio')}
+)
+setGeneric(
+        name=".minus",
+        def=function(object){standardGeneric('.minus')}
+)
+setGeneric(
+        name="plotClassifRule",
+        def=function(object,...){standardGeneric('plotClassifRule')}
+)
+setGeneric(
+        name="getBinaryRule",
+        def=function(object,k,l){standardGeneric('getBinaryRule')}
+)
+
+learnBinaryRule<-function(x,y,type='linear',procedure='FDRThresh',covariance='diagonal',ql=NULL,qq=NULL,prior=FALSE)
+{
+
+    if (is.null(ql))
+    {
+        if (procedure=='UnivThresh'){ ql=1:5/4}
+        if (!(procedure%in%c('noThresh','UnivThresh'))){ql=10^(-1:-7)}
+    }
+    if (is.null(qq))
+    {
+        if (procedure=='UnivThresh'){qq=1:2/2}
+        if (!(procedure%in%c('noThresh','UnivThresh'))){qq=10^(-1:-7)}    
+    }
+
+
+    if (type=='linear')
+    {
+        if (length(ql)>1){ BinaryLearningProcedure=.tune.LDA;}
+        else { BinaryLearningProcedure=.learnLinearRulefortune;}
+        return(BinaryLearningProcedure(x,y,procedure=procedure,covariance=covariance,ql=ql,prior=prior))
+    } 
+    if (type=='quadratic')
+    {
+        if ((length(ql)>1)|(length(qq)>1)){ BinaryLearningProcedure=.tune.QDA;}
+        else { BinaryLearningProcedure=.learnQuadraticRulefortune;}
+        return(BinaryLearningProcedure(x,y,procedure=procedure,covariance=covariance,ql=ql,qq=qq,prior=prior))
+    }
+    if ((type!='linear')&(type!='quadratic'))
+    {
+        stop('The type of procedure has to be linear or quadratic')
+    }
+
+
+}
+
diff --git a/R/FDR_toolbox.R b/R/FDR_toolbox.R
@@ -0,0 +1,57 @@
+
+# 
+# Author: robin
+###############################################################################
+
+.FDRS_Selection<-function(S,q,n)
+{
+	X=sort(S,decreasing = TRUE,index.return=TRUE)
+	som=0;
+	#for (i in 1:length(X$x)){som=som+1/i}
+	p=q/(2*length(X$x))*(1:length(X$x))
+	t=qnorm(p,lower.tail=FALSE);
+	last=1;
+	for (i in 1:length(S))
+	{   
+		if (X$x[i]-t[i]>0)
+		{
+			last=i
+		}
+	}
+	#print(last)
+	return(X$ix[1:last])
+}
+
+
+
+.FDR_Selection<-function(S,q)
+{
+    X=sort(S,decreasing = TRUE,index.return=TRUE)
+	som=0;
+	#for (i in 1:length(X$x)){som=som+1/i}
+	p=q/(2*length(X$x))*(1:length(X$x))
+	t=qnorm(p,lower.tail=FALSE);
+	last=1;
+	for (i in 1:length(S))
+	{	
+		if (X$x[i]-t[i]>0)
+		{
+			last=i
+		}
+	}
+	#print(last)
+	return(X$ix[1:last])
+}
+
+.FAN_Selection<-function(T,Cmoins,n)
+{
+	X=sort(T,decreasing = TRUE,index.return=TRUE)
+	stat=array(0,length(T))
+	for (i in 1:length(T))
+	{	
+		stat[i]=min(Cmoins[X$ix[1:i]])*(sum(X$x[1:i]^2))^2/(4*i/n+sum(X$x[1:i]^2))
+	}
+	#print(last)
+	return(X$ix[1:which.max(stat)])
+}
+
diff --git a/R/LinearRule.R b/R/LinearRule.R
@@ -0,0 +1,167 @@
+
+# Author: robin
+###############################################################################
+
+
+setClass(
+		Class="LinearRule",
+		representation=representation(
+				normalVector="numeric",
+                normalIndex="integer",
+				centerVector="numeric",
+                proportions="numeric",
+                prior="logical"
+		)
+)
+
+.learnLinearRule<-function(x1,x2,procedure='noThresh',covariance='diagonal',ql=1*(procedure=='Fisher')+0.05,prior=FALSE)
+{
+    # dim(xi) is p x ni  
+    # Procedure can be 'Fisher', covariance can be 'full' or 'diagonal'
+    #Testing if the data are ok
+    if (!(is(x, "matrix")||is(x, "data.frame")))
+        stop('the features have to be stored in a matrix or a data.frame')
+    procedure=as.character(procedure)
+    #declaration of variables
+    p=length(x1[1,]); n1=length(x1[,1]); n2=length(x2[,1]);  
+    center=array(0,p);
+    hatmu=array(0,c(2,p));
+    hatC=array(0,p)
+    s=array(0,p)
+    F=array(0,p) 
+    # start by learning covariance and group mean vectors
+    hatmu[1,]= colMeans(x1);  
+    hatmu[2,]= colMeans(x2);
+    hatC=1/(n1+n2-1)*(colSums((x1-hatmu[1,])^2)+colSums((x2-hatmu[2,])^2))
+    indices=integer(0);
+    F=(hatmu[1,]-hatmu[2,])/(hatC)^(1/2);
+    s=(hatmu[1,]+hatmu[2,])/2;
+    # Comput the normal vectors depending on the used procedure.
+    switch(procedure,
+            'noThresh'={indices=1:p},
+            'FANThresh'={
+                indices=.FAN_Selection(abs(F),hatC^(-1),n);
+                F[!1:p%in%indices]=0;
+            },  
+            'UnivThresh'={
+                lambda=array(as.numeric(sqrt(2*as.numeric(ql)*log(p)/n)),p);
+                indices=1:p
+                indices=indices[abs(F)>lambda]
+                F[!1:p%in%indices]=0;
+            },
+            'FDRThresh'={
+                indices=.FDR_Selection(sqrt(n)*abs(F),q=as.numeric(ql))
+                F[!1:p%in%indices]=0;
+            },
+            'FDRstudent'={
+                indices=.FDRS_Selection(sqrt(n)*abs(F),q=as.numeric(ql),n)
+                F[!1:p%in%indices]=0;
+            } ,
+            'Otherwise'={stop('Trying to use a non-implemented procedure')}
+    )
+    F=F/(hatC)^(1/2);
+    return(new(Class="LinearRule",normalVector=F,centerVector=s,
+                    normalIndex=as.integer(indices),
+                    proportions=as.numeric(n1/(n1+n2)),
+                    prior=prior))
+}
+
+.learnLinearRulefortune<-function(x,y,...){
+    y=ordered(y)
+    classes=levels(y)
+    return(.learnLinearRule(x[y==classes[1],],x[y==classes[2],],...))
+}
+
+
+.LDA<-function(x,F,s,p=1/2)
+	return((sum(F*(x-s))>log((1-p)/p)))
+
+
+setMethod(
+        f="plotClassifRule",
+        signature=c("LinearRule"),
+        definition=function(object,...){
+            Index=object@normalIndex
+            NormalVector=object@normalVector[Index]
+            Center=object@centerVector[Index]
+            Mydata=data.frame(NormalVector=NormalVector,Center=Center,Index=Index)
+            return(xyplot(NormalVector+Center~Index,data=Mydata,auto.key = TRUE,...))
+        }
+)
+
+setMethod(
+        f="show",
+        signature="LinearRule",
+        definition=function(object){
+                    cat('Normal Vector: \n')
+            print(object@normalVector)
+                    cat('Center Vector: \n')
+            print(object@centerVector)
+        }
+)
+setMethod(
+        f="getLogLikeRatio",
+        signature="LinearRule",
+        definition=function(object){
+            return(list(NormalVector=object@normalVector,VecterVector=object@centerVector))
+        }
+)
+setMethod(
+        f="predict",
+        signature="LinearRule",
+        definition=function(object,newdata){
+            if (!(is(newdata, "matrix")||is(newdata, "data.frame")))  
+                stop("newdata must be a matrix or a data frame")
+            p=length(object@normalVector);
+            n=nrow(newdata);
+            if (object@prior)
+                res=apply(newdata,1,.LDA,object@normalVector,object@centerVector,object@proportions)
+            else
+                res=apply(newdata,1,.LDA,object@normalVector,object@centerVector)
+
+            y=factor(y)
+            resultat=res
+            resultat[res==1]=levels(y)[1]
+            resultat[res==0]=levels(y)[2]
+            return(resultat)
+        }
+)
+
+#setMethod(f="getNormalVector",signature='LinearRule',
+#		definition=function(object){return(object@normalVector)}
+#)
+#
+#setMethod(f="getCenterVector",signature='LinearRule',
+#		definition=function(object){return(object@centerVector)}
+#)
+
+setMethod(f=".EvaluateLogLikeRatio",signature=c(x='numeric',object='LinearRule'),
+        definition=function(x,object){
+            return(sum(object@normalVector*(x-object@centerVector)))        
+        }
+)
+
+setMethod(f=".minus",signature=c(object='LinearRule'),
+        definition=function(object){
+                    return(new(Class="LinearRule",normalVector=-object@normalVector,
+                                    centerVector=object@centerVector,
+                                   normalIndex=object@normalIndex,
+                                   proportions=object@proportions,
+                                   prior=object@prior))
+        }
+)
+
+
+.tune.LDA<-function(x,y,procedure='FDRThresh',ql=10^(-1:-7),prior=prior,...)
+{ #### for internal use only
+  #### y is an array of 2 factors
+    y=ordered(y)
+    call<-match.call()
+    ranges<-list(ql=ql,prior=prior,procedure=procedure)
+	ranges[sapply(ranges,is.null)]<-NULL
+	tunecontrol=tune.control(sampling='cross',best.model=FALSE)
+    if(length(ranges)<1) ranges=NULL
+	modeltmp<-tune('.learnLinearRulefortune',train.x=x,train.y=y,ranges=ranges,tunecontrol=tunecontrol,predict=predict,...)
+    besti=length(ranges$ql)+1-which.min(rev(modeltmp$performances[['error']]))
+    return(.learnLinearRulefortune(x,y,procedure=procedure,ql=modeltmp$performances$ql[besti],prior=prior))
+}