diff --git a/.settings/de.walware.r.core.prefs b/.settings/de.walware.r.core.prefs new file mode 100644 index 0000000..73e3845 --- /dev/null +++ b/.settings/de.walware.r.core.prefs @@ -0,0 +1,3 @@ +#Tue Apr 13 08:25:09 CEST 2010 +RProjectBuild/Package.name=VHDClassification +eclipse.preferences.version=1 diff --git a/DESCRIPTION b/DESCRIPTION new file mode 100644 index 0000000..9c730a0 --- /dev/null +++ b/DESCRIPTION @@ -0,0 +1,19 @@ +Package: VHDClassification +Type: Package +Title: Discrimination/Classification in very high dimension with linear + and quadratic rules. +Version: 0.1 +Date: 2010-04-15 +Author: Robin Girard +Maintainer: +Description: This package provides an implementation of Linear + disciminant analysis and quadratic discriminant analysis that + works fine in very high dimension (when there are many more + variables than observations). +License: GPL-2 +LazyLoad: yes +Depends: methods, e1071, lattice, stats +Suggests: pamr,randomForest +Packaged: 2010-05-02 07:12:59 UTC; robingirard +Repository: CRAN +Date/Publication: 2010-05-02 07:47:06 diff --git a/NAMESPACE b/NAMESPACE new file mode 100644 index 0000000..24e8c9b --- /dev/null +++ b/NAMESPACE @@ -0,0 +1,17 @@ +exportPattern("^[[:alpha:]]+") +exportMethods( + ".EvaluateLogLikeRatio", + "getBinaryRule", + "getLogLikeRatio", + "plotClassifRule", + "predict", + "show" +) +exportClasses( + "LinearRule", + "PartitionWithLLR", + "QuadraticRule" +) +export(".learnLinearRulefortune",".learnQuadraticRulefortune", +".LDA",".QDA",".tune.QDA",".tune.LDA") +import('e1071','lattice','stats','graphics') diff --git a/R/A.R b/R/A.R new file mode 100644 index 0000000..211ec34 --- /dev/null +++ b/R/A.R @@ -0,0 +1,57 @@ +#setGeneric("predict", function(object, ...) standardGeneric("predict")) +setGeneric( + name="getLogLikeRatio", + def=function(object){standardGeneric('getLogLikeRatio')} +) +setGeneric( + name=".EvaluateLogLikeRatio", + def=function(x,object){standardGeneric('.EvaluateLogLikeRatio')} +) +setGeneric( + name=".minus", + def=function(object){standardGeneric('.minus')} +) +setGeneric( + name="plotClassifRule", + def=function(object,...){standardGeneric('plotClassifRule')} +) +setGeneric( + name="getBinaryRule", + def=function(object,k,l){standardGeneric('getBinaryRule')} +) + +learnBinaryRule<-function(x,y,type='linear',procedure='FDRThresh',covariance='diagonal',ql=NULL,qq=NULL,prior=FALSE) +{ + + if (is.null(ql)) + { + if (procedure=='UnivThresh'){ ql=1:5/4} + if (!(procedure%in%c('noThresh','UnivThresh'))){ql=10^(-1:-7)} + } + if (is.null(qq)) + { + if (procedure=='UnivThresh'){qq=1:2/2} + if (!(procedure%in%c('noThresh','UnivThresh'))){qq=10^(-1:-7)} + } + + + if (type=='linear') + { + if (length(ql)>1){ BinaryLearningProcedure=.tune.LDA;} + else { BinaryLearningProcedure=.learnLinearRulefortune;} + return(BinaryLearningProcedure(x,y,procedure=procedure,covariance=covariance,ql=ql,prior=prior)) + } + if (type=='quadratic') + { + if ((length(ql)>1)|(length(qq)>1)){ BinaryLearningProcedure=.tune.QDA;} + else { BinaryLearningProcedure=.learnQuadraticRulefortune;} + return(BinaryLearningProcedure(x,y,procedure=procedure,covariance=covariance,ql=ql,qq=qq,prior=prior)) + } + if ((type!='linear')&(type!='quadratic')) + { + stop('The type of procedure has to be linear or quadratic') + } + + +} + diff --git a/R/FDR_toolbox.R b/R/FDR_toolbox.R new file mode 100644 index 0000000..5f00cdd --- /dev/null +++ b/R/FDR_toolbox.R @@ -0,0 +1,57 @@ + +# +# Author: robin +############################################################################### + +.FDRS_Selection<-function(S,q,n) +{ + X=sort(S,decreasing = TRUE,index.return=TRUE) + som=0; + #for (i in 1:length(X$x)){som=som+1/i} + p=q/(2*length(X$x))*(1:length(X$x)) + t=qnorm(p,lower.tail=FALSE); + last=1; + for (i in 1:length(S)) + { + if (X$x[i]-t[i]>0) + { + last=i + } + } + #print(last) + return(X$ix[1:last]) +} + + + +.FDR_Selection<-function(S,q) +{ + X=sort(S,decreasing = TRUE,index.return=TRUE) + som=0; + #for (i in 1:length(X$x)){som=som+1/i} + p=q/(2*length(X$x))*(1:length(X$x)) + t=qnorm(p,lower.tail=FALSE); + last=1; + for (i in 1:length(S)) + { + if (X$x[i]-t[i]>0) + { + last=i + } + } + #print(last) + return(X$ix[1:last]) +} + +.FAN_Selection<-function(T,Cmoins,n) +{ + X=sort(T,decreasing = TRUE,index.return=TRUE) + stat=array(0,length(T)) + for (i in 1:length(T)) + { + stat[i]=min(Cmoins[X$ix[1:i]])*(sum(X$x[1:i]^2))^2/(4*i/n+sum(X$x[1:i]^2)) + } + #print(last) + return(X$ix[1:which.max(stat)]) +} + diff --git a/R/LinearRule.R b/R/LinearRule.R new file mode 100644 index 0000000..b7063dd --- /dev/null +++ b/R/LinearRule.R @@ -0,0 +1,167 @@ + +# Author: robin +############################################################################### + + +setClass( + Class="LinearRule", + representation=representation( + normalVector="numeric", + normalIndex="integer", + centerVector="numeric", + proportions="numeric", + prior="logical" + ) +) + +.learnLinearRule<-function(x1,x2,procedure='noThresh',covariance='diagonal',ql=1*(procedure=='Fisher')+0.05,prior=FALSE) +{ + # dim(xi) is p x ni + # Procedure can be 'Fisher', covariance can be 'full' or 'diagonal' + #Testing if the data are ok + if (!(is(x, "matrix")||is(x, "data.frame"))) + stop('the features have to be stored in a matrix or a data.frame') + procedure=as.character(procedure) + #declaration of variables + p=length(x1[1,]); n1=length(x1[,1]); n2=length(x2[,1]); + center=array(0,p); + hatmu=array(0,c(2,p)); + hatC=array(0,p) + s=array(0,p) + F=array(0,p) + # start by learning covariance and group mean vectors + hatmu[1,]= colMeans(x1); + hatmu[2,]= colMeans(x2); + hatC=1/(n1+n2-1)*(colSums((x1-hatmu[1,])^2)+colSums((x2-hatmu[2,])^2)) + indices=integer(0); + F=(hatmu[1,]-hatmu[2,])/(hatC)^(1/2); + s=(hatmu[1,]+hatmu[2,])/2; + # Comput the normal vectors depending on the used procedure. + switch(procedure, + 'noThresh'={indices=1:p}, + 'FANThresh'={ + indices=.FAN_Selection(abs(F),hatC^(-1),n); + F[!1:p%in%indices]=0; + }, + 'UnivThresh'={ + lambda=array(as.numeric(sqrt(2*as.numeric(ql)*log(p)/n)),p); + indices=1:p + indices=indices[abs(F)>lambda] + F[!1:p%in%indices]=0; + }, + 'FDRThresh'={ + indices=.FDR_Selection(sqrt(n)*abs(F),q=as.numeric(ql)) + F[!1:p%in%indices]=0; + }, + 'FDRstudent'={ + indices=.FDRS_Selection(sqrt(n)*abs(F),q=as.numeric(ql),n) + F[!1:p%in%indices]=0; + } , + 'Otherwise'={stop('Trying to use a non-implemented procedure')} + ) + F=F/(hatC)^(1/2); + return(new(Class="LinearRule",normalVector=F,centerVector=s, + normalIndex=as.integer(indices), + proportions=as.numeric(n1/(n1+n2)), + prior=prior)) +} + +.learnLinearRulefortune<-function(x,y,...){ + y=ordered(y) + classes=levels(y) + return(.learnLinearRule(x[y==classes[1],],x[y==classes[2],],...)) +} + + +.LDA<-function(x,F,s,p=1/2) + return((sum(F*(x-s))>log((1-p)/p))) + + +setMethod( + f="plotClassifRule", + signature=c("LinearRule"), + definition=function(object,...){ + Index=object@normalIndex + NormalVector=object@normalVector[Index] + Center=object@centerVector[Index] + Mydata=data.frame(NormalVector=NormalVector,Center=Center,Index=Index) + return(xyplot(NormalVector+Center~Index,data=Mydata,auto.key = TRUE,...)) + } +) + +setMethod( + f="show", + signature="LinearRule", + definition=function(object){ + cat('Normal Vector: \n') + print(object@normalVector) + cat('Center Vector: \n') + print(object@centerVector) + } +) +setMethod( + f="getLogLikeRatio", + signature="LinearRule", + definition=function(object){ + return(list(NormalVector=object@normalVector,VecterVector=object@centerVector)) + } +) +setMethod( + f="predict", + signature="LinearRule", + definition=function(object,newdata){ + if (!(is(newdata, "matrix")||is(newdata, "data.frame"))) + stop("newdata must be a matrix or a data frame") + p=length(object@normalVector); + n=nrow(newdata); + if (object@prior) + res=apply(newdata,1,.LDA,object@normalVector,object@centerVector,object@proportions) + else + res=apply(newdata,1,.LDA,object@normalVector,object@centerVector) + + y=factor(y) + resultat=res + resultat[res==1]=levels(y)[1] + resultat[res==0]=levels(y)[2] + return(resultat) + } +) + +#setMethod(f="getNormalVector",signature='LinearRule', +# definition=function(object){return(object@normalVector)} +#) +# +#setMethod(f="getCenterVector",signature='LinearRule', +# definition=function(object){return(object@centerVector)} +#) + +setMethod(f=".EvaluateLogLikeRatio",signature=c(x='numeric',object='LinearRule'), + definition=function(x,object){ + return(sum(object@normalVector*(x-object@centerVector))) + } +) + +setMethod(f=".minus",signature=c(object='LinearRule'), + definition=function(object){ + return(new(Class="LinearRule",normalVector=-object@normalVector, + centerVector=object@centerVector, + normalIndex=object@normalIndex, + proportions=object@proportions, + prior=object@prior)) + } +) + + +.tune.LDA<-function(x,y,procedure='FDRThresh',ql=10^(-1:-7),prior=prior,...) +{ #### for internal use only + #### y is an array of 2 factors + y=ordered(y) + call<-match.call() + ranges<-list(ql=ql,prior=prior,procedure=procedure) + ranges[sapply(ranges,is.null)]<-NULL + tunecontrol=tune.control(sampling='cross',best.model=FALSE) + if(length(ranges)<1) ranges=NULL + modeltmp<-tune('.learnLinearRulefortune',train.x=x,train.y=y,ranges=ranges,tunecontrol=tunecontrol,predict=predict,...) + besti=length(ranges$ql)+1-which.min(rev(modeltmp$performances[['error']])) + return(.learnLinearRulefortune(x,y,procedure=procedure,ql=modeltmp$performances$ql[besti],prior=prior)) +} \ No newline at end of file diff --git a/R/PartitionWithLLR.R b/R/PartitionWithLLR.R new file mode 100644 index 0000000..80253f5 --- /dev/null +++ b/R/PartitionWithLLR.R @@ -0,0 +1,249 @@ +# TODO: Add comment +# +# Author: robin +############################################################################### + + + +setClass( + Class="PartitionWithLLR", + representation=representation( + LogLikeRatio="list", + labels="ordered", + ThreshProc='character', + ql='numeric', + qq='numeric' + ) +) + +setMethod( + f="predict", + signature="PartitionWithLLR", + definition=function(object,newdata){ + if (!(is(newdata, "matrix")||is(newdata, "data.frame"))) + stop("newdata must be a matrix or a data frame") + p=ncol(newdata); + n=nrow(newdata); + y=object@labels; + coherence=array(0,c(n,nlevels(y),nlevels(y)), + dimnames=list(1:n,levels(y),levels(y))) + for (k in levels(y)) + { + for (l in levels(y)) + { + if (l!=k) + { + LLR=getBinaryRule(object,k,l) + if(LLR@prior) + p=LLR@proportions + else + p=1/2 + priorthresh=log((1-p)/p) + coherence[,k,l]=(apply(newdata,1,.EvaluateLogLikeRatio,LLR)>priorthresh) + } + } + } + return(apply(coherence,1,.coherence2group)) + } +) + + +setMethod( + f="show", + signature="PartitionWithLLR", + definition=function(object){ + labels=object@labels; + cat('Partition of input space into ', nlevels(labels),'areas \n') + if (class(object@LogLikeRatio[[1]][[2]])=='LinearRule') + { + cat('Obtained with Linear separations\n') + cat('used thresolding procedure: ',object@ThreshProc,'\n') + if (length(object@ql)>1) + cat('with 10 fold cross validation on parameters ql with grid: ',object@ql,'\n') + else cat('with parameter ql= ',object@ql,'\n') + } + if (class(object@LogLikeRatio[[1]][[2]])=='QuadraticRule') + { + cat('Obtained with Quadratic separations\n') + cat('used thresolding procedure: ',object@ThreshProc,'\n') + if (length(object@ql)>1) + cat('with 10 fold cross validation on parameters ql with grid: ',object@ql,'\n') + else cat('with parameter ql= ',object@ql,'\n') + if (length(object@qq)>1) + cat('with 10 fold cross validation on parameters qq with grid: ',object@qq,'\n') + else cat('with parameter qq= ',object@qq,'\n') + } + + + } +) + +setMethod( + f="plotClassifRule", + signature=c("PartitionWithLLR"), + definition=function(object,...){ + labels=object@labels; + Mydata=data.frame() + for (k in labels) + { + for (j in labels) + { + if (k1){ BinaryLearningProcedure=.tune.LDA;} + else { BinaryLearningProcedure=.learnLinearRulefortune;} + } + if (type=='quadratic') + { + if ((length(ql)>1)|(length(qq)>1)){ BinaryLearningProcedure=.tune.QDA;} + else { BinaryLearningProcedure=.learnQuadraticRulefortune;} + } + if ((type!='linear')&(type!='quadratic')) + { + stop('The type of procedure has to be linear or quadratic') + } + } +######### Learning + + y=ordered(y) + f=list() + if (type=='quadratic') + { + for (k in levels(y)) + { + f[[k]]=list() + for (l in levels(y)) + { + if (l!=k) + { + x.train=x[(y==k)|(y==l),] + ytmp=y[(y==k)|(y==l)] + y.train=factor(array(c(1,0),length(ytmp))) + y.train[ytmp==l]=1 + y.train[ytmp==k]=0 + f[[k]][[l]]=.tune.QDA(x.train,y.train,procedure,ql,qq,prior=prior) + } + + } + } + } + if (type=='linear') + { + for (k in levels(y)) + { + f[[k]]=list() + for (l in levels(y)) + { + if (l!=k) + { + x.train=x[(y==k)|(y==l),] + ytmp=y[(y==k)|(y==l)] + y.train=factor(array(c(1,0),length(ytmp))) + y.train[ytmp==l]=1 + y.train[ytmp==k]=0 + f[[k]][[l]]=.tune.LDA(x.train,y.train,procedure,ql,prior=prior) + } + + } + } + } + return(new(Class="PartitionWithLLR",LogLikeRatio=f,labels=y,ThreshProc=procedure,ql=ql,qq=qq)) +} + +setMethod(f="getBinaryRule",signature='PartitionWithLLR', + definition=function(object,k,l){ + #i and j should be ordered factors + if (k!=l) return(object@LogLikeRatio[[k]][[l]]) + #if (k>l) return(.minus(object@LogLikeRatio[[l]][[k]])) + if (k==l) return(NULL) + } +) + + + +.coherence2group<-function(coherence) +{ + X=rowMeans(coherence) + indexes=which(X==max(X)) + if (length(indexes)==1){return(dimnames(coherence)[[2]][indexes])} + if (length(indexes)==2) + { + if (coherence[indexes[1],indexes[2]]==1) + return(dimnames(coherence)[[2]][indexes[1]]) + else + return(dimnames(coherence)[[2]][indexes[2]]) + } + if (length(indexes)>2) + return(dimnames(coherence)[[2]][sample(indexes,1)]) +} \ No newline at end of file diff --git a/R/QuadraticRule.R b/R/QuadraticRule.R new file mode 100644 index 0000000..0095519 --- /dev/null +++ b/R/QuadraticRule.R @@ -0,0 +1,211 @@ +# TODO: Add comment +# +# Author: robingirard +############################################################################### + + + +setClass( + Class="QuadraticRule", + representation=representation( + formVector="numeric", + formIndex="integer", + constant="numeric" + ), + contains="LinearRule" +) + +.learnQuadraticRule<-function(x1,x2,procedure='Fisher',covariance='diagonal',ql=1*(procedure=='Fisher')+0.05,qq=1*(procedure=='Fisher')+0.05,prior=FALSE) +{ + # dim(x) is p x n + # Procedure can be 'Fisher', covariance can be 'full' or 'diagonal' + #Testing if the data are ok + if (!(is(x, "matrix")||is(x, "data.frame"))) + stop('the features have to be stored in a matrix or a data.frame') + procedure=as.character(procedure) + #declaration of variables + p=length(x1[1,]); n1=length(x1[,1]); n2=length(x2[,1]); + center=array(0,p); + hatmu=array(0,c(2,p)); + hatC=array(0,c(2,p)) + s=array(0,p) + F=array(0,p) + A=array(0,p) + # start by learning covariance and group mean vectors + + hatmu[1,]= colMeans(x1); + hatmu[2,]= colMeans(x2); + hatC[1,]=1/(n1-1)*colSums((x1-hatmu[1,])^2) + hatC[2,]=1/(n2-1)*colSums((x2-hatmu[2,])^2) + indices=integer(0); + nindices=integer(0); + F=(hatmu[1,]-hatmu[2,])*(1/hatC[1,]+1/hatC[2,])^(1/2); + s=(hatmu[1,]+hatmu[2,])/2; + # start by learning covariance and group mean vectors + + TF=F/((1+hatC[1,]/hatC[2,])/n1+(1+hatC[2,]/hatC[1,])/n2)^(1/2) + # Comput the normal vectors depending on the used procedure. + + switch(procedure, + 'noThresh'={nindices=1:p}, + 'UnivThresh'={ + lambda=as.numeric(sqrt(2*as.numeric(ql)*log(p))); + tmp=1:p + nindices=tmp[(abs(TF)>lambda)] + F[!1:p%in%nindices]=0; + }, + 'FDRThresh'={ + nindices=.FDR_Selection(abs(TF),q=as.numeric(ql)) + F[!1:p%in%nindices]=0; + }, + 'Otherwise'={stop('Trying to use a non-implemented procedure')} + ) + F=F/2*(1/hatC[1,]+1/hatC[2,])^(1/2); + A=1/hatC[1,]-1/hatC[2,] + Tw=(hatC[1,]-hatC[2,])/(2*hatC[1,]^2/(n1-1)+2*hatC[2,]^2/(n2-1))^(1/2) + # Comput the form vectors depending on the used procedure. + + switch(procedure, + 'noThresh'={indices=1:p}, + 'UnivThresh'={ + lambda=as.numeric(sqrt(2*qq*log(p))); + tmp=1:p + indices=tmp[(abs(Tw)>lambda)] + A[!1:p%in%indices]=0; + }, + 'FDRThresh'={ + indices=.FDR_Selection(abs(Tw),q=as.numeric(qq)) + A[!1:p%in%indices]=0; + }, + 'Otherwise'={stop('Trying to use a non-implemented procedure')} + ) + lambda=as.numeric(sqrt(2*log(p))); + if (length(indices)==1) + { + if ((abs(Tw[indices])0) + { + c=sum(1/8*A[indices]*(hatmu[1,indices]-hatmu[2,indices])^2)+1/2*sum(log(hatC[1,indices]/hatC[2,indices])) + } + else{c=0;} + return(new(Class="QuadraticRule",normalVector=F, + centerVector=s, + constant=c, + formVector=A, + normalIndex=as.integer(nindices), + formIndex=as.integer(indices), + proportions=as.numeric(n1/(n1+n2)), + prior=prior)) +} +.learnQuadraticRulefortune<-function(x,y,...){ + y=ordered(y) + classes=levels(y) + return(.learnQuadraticRule(x[y==classes[1],],x[y==classes[2],],...)) +} + +setMethod( + f="predict", + signature="QuadraticRule", + definition=function(object,newdata){ + + if (!(is(newdata, "matrix")||is(newdata, "data.frame"))) + stop("newdata must be a matrix or a data frame") + p=length(object@normalVector); + n=nrow(newdata); + if (object@prior) + res=apply(newdata,1,.QDA,object@normalVector,object@centerVector,object@formVector,object@constant,object@proportions) + else + res=apply(newdata,1,.QDA,object@normalVector,object@centerVector,object@formVector,object@constant,1/2) + y=factor(y) + resultat=res + resultat[res==1]=levels(y)[1] + resultat[res==0]=levels(y)[2] + return(resultat) + } +) + +setMethod( + f="plotClassifRule", + signature=c("QuadraticRule"), + definition=function(object,...){ + Index=union(object@normalIndex,object@formIndex) + NormalVector=object@normalVector[Index] + Center=object@centerVector[Index] + FormVector=object@formVector[Index] + Mydata=data.frame(NormalVector=NormalVector,FormVector=FormVector, + Center=Center,Index=Index) + return(xyplot(NormalVector+Center+FormVector~Index,data=Mydata,auto.key = TRUE,...)) + } +) + +.QDA<-function(x,G,s,A,c,p=1/2) +{ + res=sum(-1/2*A*(x-s)^2+G*(x-s))-c-log((1-p)/p) + #cat('s=',sum(-1/2*A*(x-s)^2+G*(x-s)),' c=',c,' s-c=',res, ' \n') + return((res>0)) +} + + +setMethod(f=".EvaluateLogLikeRatio",signature=c(x='numeric',object='QuadraticRule'), + definition=function(x,object){ + return(-object@constant+sum(-1/2*object@formVector*(x-object@centerVector)^2+object@normalVector*(x-object@centerVector))) + } +) +setMethod(f=".minus",signature=c(object='QuadraticRule'), + definition=function(object){ + return(new(Class="QuadraticRule",normalVector=-object@normalVector, + centerVector=object@centerVector, + constant=-object@constant, + formVector=-object@formVector, + normalIndex=object@normalIndex, + formIndex=object@formIndex, + proportions=object@proportions, + prior=object@prior)) + } +) +setMethod( + f="show", + signature="QuadraticRule", + definition=function(object){ + cat('Normal Vector: \n') + print(object@normalVector) + cat('Center Vector: \n') + print(object@centerVector) + cat('Form Vector: \n') + print(object@formVector) + cat('constant: \n') + print(object@constant) + } +) + + +setMethod( + f="getLogLikeRatio", + signature="QuadraticRule", + definition=function(object){ + return(list( NormalVector=object@normalVector, + VecterVector=object@centerVector, + FormVector=object@formVector, + Constant=object@constant)) + } +) +.tune.QDA<-function(x,y,procedure='FDRThresh',ql=10^(-1:-7),qq=10^(-1:-2),prior=prior,...) +{ + y=ordered(y) + call<-match.call() + ranges<-list(ql=ql,qq=qq,prior=prior,procedure=procedure) + ranges[sapply(ranges,is.null)]<-NULL + tunecontrol=tune.control(sampling='cross',best.model=FALSE) + if(length(ranges)<1) ranges=NULL + y=ordered(y) + modeltmp<-tune('.learnQuadraticRulefortune',train.x=x,train.y=y,ranges=ranges,tunecontrol=tunecontrol,predict=predict,...) + besti=length(ranges$ql)*length(ranges$qq)+1-which.min(rev(modeltmp$performances[['error']])) + return(.learnQuadraticRulefortune(x,y, + procedure=procedure,ql=modeltmp$performances$ql[besti], + qq=modeltmp$performances$qq[besti],prior=prior)) +} \ No newline at end of file diff --git a/man/LinearRule-class.Rd b/man/LinearRule-class.Rd new file mode 100644 index 0000000..4f5ea4c --- /dev/null +++ b/man/LinearRule-class.Rd @@ -0,0 +1,45 @@ +\name{LinearRule-class} +\Rdversion{1.1} +\docType{class} +\alias{LinearRule-class} +\alias{plotClassifRule,LinearRule-method} +\alias{predict,LinearRule-method} +\alias{show,LinearRule-method} + +\title{Class "LinearRule" ~~~ } +\description{ ~~ A concise (1-5 lines) description of what the class is. ~~} +\section{Objects from the Class}{ +Objects can be created by calls of the form \code{new("LinearRule", ...)}. + ~~ describe objects here ~~ +} +\section{Slots}{ + \describe{ + \item{\code{normalVector}:}{Object of class \code{"numeric"} ~~ } + \item{\code{normalIndex}:}{Object of class \code{"numeric"} ~~ } + \item{\code{centerVector}:}{Object of class \code{"numeric"} ~~ } + \item{\code{prior}:}{Object of class \code{"logical"} ~~ } + \item{\code{proportions}:}{Object of class \code{"numeric"} ~~ } + } +} +\section{Methods}{ + \describe{ + \item{.EvaluateLogLikeRatio}{\code{signature(x = "numeric", object = "LinearRule")}: ... } + \item{getLogLikeRatio}{\code{signature(object = "LinearRule")}: ... } + \item{plotClassifRule}{\code{signature(object = "LinearRule")}: ... } + \item{predict}{\code{signature(object = "LinearRule")}: ... } + \item{show}{\code{signature(object = "LinearRule")}: ... } + } +} +\references{ } +\author{ } +\note{ } + + +\seealso{ +% ~~objects to See Also as \code{\link{~~fun~~}}, ~~~ +% or \code{\linkS4class{CLASSNAME}} for links to other classes +} +\examples{ +showClass("LinearRule") +} +\keyword{classes} diff --git a/man/PartitionWithLLR-class.Rd b/man/PartitionWithLLR-class.Rd new file mode 100644 index 0000000..8e85a79 --- /dev/null +++ b/man/PartitionWithLLR-class.Rd @@ -0,0 +1,45 @@ +\name{PartitionWithLLR-class} +\Rdversion{1.1} +\docType{class} +\alias{PartitionWithLLR-class} +\alias{plotClassifRule,PartitionWithLLR-method} +\alias{predict,PartitionWithLLR-method} +\alias{show,PartitionWithLLR-method} + +\title{Class "PartitionWithLLR" ~~~ } +\description{ ~~ A concise (1-5 lines) description of what the class is. ~~} +\section{Objects from the Class}{ +Objects can be created by calls of the form \code{new("PartitionWithLLR", ...)}. + ~~ describe objects here ~~ +} +\section{Slots}{ + \describe{ + \item{\code{LogLikeRatio}:}{Object of class \code{"list"} ~~ } + \item{\code{labels}:}{Object of class \code{"ordered"} ~~ } + \item{\code{ThreshProc}:}{Object of class \code{"character"} ~~ } + \item{\code{ql}:}{Object of class \code{"numeric"} ~~ } + \item{\code{qq}:}{Object of class \code{"numeric"} ~~ } + } +} +\section{Methods}{ + \describe{ + \item{getBinaryRule}{\code{signature(object = "PartitionWithLLR")}: ... } + \item{plotClassifRule}{\code{signature(object = "PartitionWithLLR")}: ... } + \item{predict}{\code{signature(object = "PartitionWithLLR")}: ... } + \item{show}{\code{signature(object = "PartitionWithLLR")}: ... } + } +} +\references{ } +\author{ } +\note{ } + + + +\seealso{ +% ~~objects to See Also as \code{\link{~~fun~~}}, ~~~ +% or \code{\linkS4class{CLASSNAME}} for links to other classes +} +\examples{ +showClass("PartitionWithLLR") +} +\keyword{classes} diff --git a/man/QuadraticRule-class.Rd b/man/QuadraticRule-class.Rd new file mode 100644 index 0000000..d94f861 --- /dev/null +++ b/man/QuadraticRule-class.Rd @@ -0,0 +1,49 @@ +\name{QuadraticRule-class} +\Rdversion{1.1} +\docType{class} +\alias{QuadraticRule-class} +\alias{plotClassifRule,QuadraticRule-method} +\alias{predict,QuadraticRule-method} +\alias{show,QuadraticRule-method} + +\title{Class "QuadraticRule" ~~~ } +\description{ This class implements a high dimensional binary quadratic classification rule} +\section{Objects from the Class}{ +Objects can be created by calls of \code{learnBinaryRule(x,y,type='quadratic')} see \code{\link{learnBinaryRule}}. + +} +\section{Slots}{ + \describe{ + \item{\code{formVector}:}{Object of class \code{"numeric"} ~~ } + \item{\code{formIndex}:}{Object of class \code{"numeric"} ~~ } + \item{\code{constant}:}{Object of class \code{"numeric"} ~~ } + \item{\code{normalVector}:}{Object of class \code{"numeric"} ~~ } + \item{\code{normalIndex}:}{Object of class \code{"numeric"} ~~ } + \item{\code{centerVector}:}{Object of class \code{"numeric"} ~~ } + } +} +\section{Extends}{ +Class \code{"\linkS4class{LinearRule}"}, directly. +} +\section{Methods}{ + \describe{ + \item{getLogLikeRatio}{\code{signature(object = "QuadraticRule")}: ... } + \item{plotClassifRule}{\code{signature(object = "QuadraticRule")}: ... } + \item{predict}{\code{signature(object = "QuadraticRule")}: ... } + \item{show}{\code{signature(object = "QuadraticRule")}: ... } + } +} +\references{ See my preprint Preprint } +\author{ robin girard } +\note{} + + %~Make other sections like Warning with \section{Warning }{....} ~ + +%\seealso{ +% ~~objects to See Also as \code{\link{~~fun~~}}, ~~~ +% or \code{\linkS4class{CLASSNAME}} for links to other classes +%} +\examples{ +showClass("QuadraticRule") +} +\keyword{classes} diff --git a/man/VHDClassification-package.Rd b/man/VHDClassification-package.Rd new file mode 100644 index 0000000..eed59e9 --- /dev/null +++ b/man/VHDClassification-package.Rd @@ -0,0 +1,190 @@ +\name{VHDClassification-package} +\alias{VHDClassification-package} +\alias{VHDClassification} +\alias{predict-methods} +\docType{package} +\title{ +Discrimination-Classification in very high dimension with linear and quadratic rules. +} +\description{ +This package provides an implementation of Linear disciminant analysis +and quadratic discriminant analysis that works fine in very high dimension +(when there are many more variables than observations). +} +\details{ +\tabular{ll}{ +Package: \tab VHDClassification\cr +Type: \tab Package\cr +Version: \tab 0.1\cr +Date: \tab 2010-04-15\cr +License: \tab GPL-2\cr +LazyLoad: \tab yes\cr +Depends: \tab methods, e1071, lattice, stats\cr +} +This package provides learning procedure for classification in very high dimension. +Binary learning is done with \code{\link{learnBinaryRule}} while K-class (K>=2) learning is done +with function \code{\link{learnPartitionWithLLR}}. + +\code{\link{learnBinaryRule}} can return an object \code{\link{LinearRule-class}} or an object \code{\link{QuadraticRule-class}} depending +whether type='linear' or 'quadratic'. +\code{\link{learnPartitionWithLLR}} basically returns a set of binary rules which is represented by the class \code{\link{PartitionWithLLR-class}}. +The used procedure for the learning are described in the papers cited below. The + + +The method predict (\link{predict-methods}) is implemented for class \code{\link{LinearRule-class}} \code{\link{QuadraticRule-class}} \code{\link{learnPartitionWithLLR}}. +It predicts the class of a new observation. +} +\author{ +Maintainer-author: Robin Girard +} +\references{ +Fast rate of convergence in high dimensional linear discriminant analysis. R. Girard To appear in Journal of Nonparametric Statistics.\\ +Very high dimensional discriminant analysis with thresholding estimation. R. Girard. Submitted. + +} +\keyword{ discrimination } +\keyword{ LDA } +\keyword{ QDA } +\keyword{ high dimension } +\keyword{ classification } +\keyword{ dimension reduction } +\seealso{ +} +\examples{ +############ Tests 2 classes when the true rule should be quadratic +#library(VHDClassification) +p=1000; n=50 ; mu=array(0,c(p,2)) ; C=array(c(1,20),c(p,2)); C[c(1,3,5),1]=40 +x=NULL; y=NULL; +for (k in 1:2){x=rbind(x,t(array(C[,k]^(1/2),c(p,n))*(matrix(rnorm(p*n),nrow=p,ncol=n))+array(mu[,k],c(p,n)))); + y=c(y,array(k,n))} +#Learning +LearnedQuadBinaryRule=learnBinaryRule(x,y,type='quadratic') +LearnedLinBinaryRule=learnBinaryRule(x,y) # default is linear type +# for comparison with SVM +# require(e1071) +# svmRule=best.tune('svm',train.x=x,train.y=factor(y),ranges=list(gamma=c(2^(-4:4),cost = 2^(-2:2)))) +# for comparison with randomForest +require(randomForest) +RF <- best.tune('randomForest',x,factor(y),ranges=list(ntree = c(100,500))) +# for comparison with nearest chrunken centroid +#require(pamr) +#myTrainingdata=list(x=t(x),y=y) +#mytrain <- pamr.train(myTrainingdata) +#mycv <- pamr.cv(mytrain,myTrainingdata) +#thresh=try(mycv$threshold[which.min(mycv$error)],silent = TRUE) + + + + +#Testing Set +x=NULL; y=NULL; +for (k in 1:2){ + x=rbind(x,t(array(C[,k]^(1/2),c(p,n))*(matrix(rnorm(p*n),nrow=p,ncol=n))+array(mu[,k],c(p,n)))); + y=c(y,array(k,n)) +} +#Testing +myTestingdata=list(x=x,y=y) +QDAScore=mean((y!=predict(LearnedQuadBinaryRule,myTestingdata$x))) ; +LDAScore=mean((y!=predict(LearnedLinBinaryRule,myTestingdata$x))) ; +RFScore=mean((y!=predict(RF,myTestingdata$x))) ; +#SVMScore=mean((y!=predict(svmRule,x))) ; +#comparison with nearest chrunken centroid +myTestingdata=list(x=t(x),y=y) +#SCScore=mean((myTestingdata$y!=as.numeric(pamr.predict(mytrain, myTestingdata$x,threshold=thresh,type="class")))) +cat('\n') +cat('What does it cost to use type=linear when the rule is quadratic ? ','\n', +'Score of the linear rule: ',LDAScore,'\n', +'Score of the quadratic rule: ',QDAScore,'\n', +#'Score of the nearest shrunken centroid rule: ',SCScore,'\n', +'Score of the random forest rule: ',RFScore,'\n', +#'Score of the support vector machine rule: ',SVMScore,'\n', +'Note: These scores should be average for a large number of experiment or interpreted carefully \n') +plotClassifRule(LearnedQuadBinaryRule) + +############ Tests 2 classes quadratic and linear. when the true is linear +#library(VHDClassification) +#p=100; n=50 ; mu=array(0,c(p,2)); mu[1:10,1]=1 ;C=array(c(1,20),p) +#x=NULL; y=NULL; +#for (k in 1:2){ +# x=rbind(x,t(array(C^(1/2),c(p,n))*(matrix(rnorm(p*n),nrow=p,ncol=n))+array(mu[,k],c(p,n)))); +# y=c(y,array(k,n))} +#Learning +#LearnedQuadBinaryRule=learnBinaryRule(x,y,type='quadratic') +#LearnedLinBinaryRule=learnBinaryRule(x,y) # default is linear type +#comparison with nearest chrunken centroid +#require(pamr) +#myTrainingdata=list(x=t(x),y=y) +#mytrain <- pamr.train(myTrainingdata) +#mycv <- pamr.cv(mytrain,myTrainingdata) +#thresh=try(mycv$threshold[which.min(mycv$error)],silent = TRUE) + + +#Testing Set +#x=NULL; y=NULL; +#for (k in 1:2){ +# x=rbind(x,t(array(C^(1/2),c(p,n))*(matrix(rnorm(p*n),nrow=p,ncol=n))+array(mu[,k],c(p,n)))); +# y=c(y,array(k,n)) +#} +#Testing +#myTestingdata=list(x=x,y=y) +#QDAScore=mean((y!=predict(LearnedQuadBinaryRule,myTestingdata$x))) ; +#LDAScore=mean((y!=predict(LearnedLinBinaryRule,myTestingdata$x))) ; +#comparison with nearest shrunken centroid +#myTestingdata=list(x=t(x),y=y) +#SCScore=mean((myTestingdata$y!=as.numeric(pamr.predict(mytrain,threshold=thresh, myTestingdata$x,type="class")))) +#cat('\n', +#'What does it cost to use type=quadratic rule when the true optimal rule is linear ? ','\n', +#'Score of the linear rule: ',LDAScore,'\n', +#'Score of the rule with type=quadratic : ',QDAScore,'\n', 'it detects that the true rule is linear?\n', +#'Score of the nearest shrunken centroid rule: ',SCScore,'\n') + +#plotClassifRule(LearnedQuadBinaryRule) + +############ Tests 3 classes +#library(VHDClassification) +#p=1000; n=40 ; mu=array(0,c(p,3)); mu[1:10,1]=4; C=array(c(1,20),p) + +#x=NULL; y=NULL; +#for (k in 1:3){ +# if (k<3){ +# x=rbind(x,t(array(C^(1/2),c(p,n))*(matrix(rnorm(p*n),nrow=p,ncol=n))+array(mu[,k],c(p,n)))); +# y=c(y,array(k,n))} +# else +# { +# tildeC=C; tildeC[1:10]=40; +# x=rbind(x,t(array(tildeC^(1/2),c(p,n))*(matrix(rnorm(p*n),nrow=p,ncol=n))+array(mu[,k],c(p,n)))); +# y=c(y,array(k,n)) +# } +# } +#Learning +#LearnedLinearPartitionWithLLR=learnPartitionWithLLR(x,y,type='linear') +#LearnedQuadraticPartitionWithLLR=learnPartitionWithLLR(x,y,type='quadratic') +#plotClassifRule(LearnedQuadraticPartitionWithLLR) +#require(randomForest) +#RF <- best.tune('randomForest',x,factor(y),ranges=list(ntree = c(500))) + +#Testing Set +#x=NULL; y=NULL; +#for (k in 1:3){ +# if (k<3){ +# x=rbind(x,t(array(C^(1/2),c(p,n))*(matrix(rnorm(p*n),nrow=p,ncol=n))+array(mu[,k],c(p,n)))); +# y=c(y,array(k,n))} +# else +# { +# tildeC=C; tildeC[1:10]=40; +# x=rbind(x,t(array(tildeC^(1/2),c(p,n))*(matrix(rnorm(p*n),nrow=p,ncol=n))+array(mu[,k],c(p,n)))); +# y=c(y,array(k,n)) +# } +# } +#Testing +#myTestingdata=list(x=x,y=y) +#LDAScore=mean((y!=factor(predict(LearnedLinearPartitionWithLLR,myTestingdata$x)))) ; +#QDAScore=mean((y!=factor(predict(LearnedQuadraticPartitionWithLLR,myTestingdata$x)))) ; +#RFScore=mean((y!=predict(RF,myTestingdata$x))) ; + +#cat('Score of the quadratic rule: ',QDAScore,'\n', +#'Score of the linear rule: ',LDAScore,'\n', +#'Score of the random Forest Rule: ',RFScore,'\n') + + +} diff --git a/man/getBinaryRule-methods.Rd b/man/getBinaryRule-methods.Rd new file mode 100644 index 0000000..6f4e3ea --- /dev/null +++ b/man/getBinaryRule-methods.Rd @@ -0,0 +1,15 @@ +\name{getBinaryRule-methods} +\docType{methods} +\alias{getBinaryRule-methods} +\alias{getBinaryRule,PartitionWithLLR-method} +\title{ ~~ Methods for Function getBinaryRule ~~} +\description{ + ~~ Methods for function \code{getBinaryRule} ~~ +} +\section{Methods}{ +\describe{ + +\item{\code{signature(object = "PartitionWithLLR")}}{ see \code{\link{getBinaryRule}} } +}} +\keyword{methods} + diff --git a/man/getBinaryRule.Rd b/man/getBinaryRule.Rd new file mode 100644 index 0000000..c72b96a --- /dev/null +++ b/man/getBinaryRule.Rd @@ -0,0 +1,64 @@ +\name{getBinaryRule} +\alias{getBinaryRule} +%- Also NEED an '\alias' for EACH other topic documented here. +\title{ +Getter set of binary rules (object PartitionWithLLR) +} +\description{ +This function returns the binary rule for discrimination between data from class k and data from class l +} +\usage{ +getBinaryRule(object, k, l) +} +%- maybe also 'usage' for other objects documented here. +\arguments{ + \item{object}{ +An object of class PartitionWithLLR as returned by learnPartitionWithLLR +} + \item{k}{ +an existing label +} + \item{l}{ +an existing label +} +} +\details{ +%% ~~ If necessary, more details than the description above ~~ +} +\value{ +A binary classification rule. Can either be an object of class LinearRule or an object of class QuadraticRule +} +\references{ +Fast rate of convergence in high dimensional linear discriminant analysis. R. Girard To appear in Journal of Nonparametric Statistics.\\ +Very high dimensional discriminant analysis with thresholding estimation. R. Girard. Submitted. + +} +\author{ +Robin Girard +} +%\note{ +%% ~~further notes~~ +%} + +%% ~Make other sections like Warning with \section{Warning }{....} ~ + +\seealso{ +\code{\link{getLogLikeRatio}} +} +\examples{ +#try p=1000 , 5000, ... +p=100; n=20 ; mu=array(0,c(p,4)); mu[1:10,1]=2 ;mu[11:20,2]=2;C=array(c(1,20),p) +mu[21:30,3]=2 +x=NULL; y=NULL; +for (k in 1:4){ + x=rbind(x,t(array(C^(1/2),c(p,n))*(matrix(rnorm(p*n),nrow=p,ncol=n))+array(mu[,k],c(p,n)))); + y=c(y,array(k,n))} +#Learning +LearnedLinearPartitionWithLLR=learnPartitionWithLLR(x,y,procedure='FDRThresh') +Rule=getBinaryRule(LearnedLinearPartitionWithLLR,1,2) +show(Rule) +} +% Add one or more standard keywords, see file 'KEYWORDS' in the +% R documentation directory. +\keyword{Discrimination} + diff --git a/man/getLogLikeRatio-methods.Rd b/man/getLogLikeRatio-methods.Rd new file mode 100644 index 0000000..c852927 --- /dev/null +++ b/man/getLogLikeRatio-methods.Rd @@ -0,0 +1,20 @@ +\name{getLogLikeRatio-methods} +\docType{methods} +\alias{getLogLikeRatio-methods} +\alias{getLogLikeRatio,LinearRule-method} +\alias{getLogLikeRatio,QuadraticRule-method} +\title{ ~~ Methods for Function getLogLikeRatio ~~} +\description{ + ~~ Methods for function \code{getLogLikeRatio} ~~ +} +\section{Methods}{ +\describe{ + +\item{\code{signature(object = "LinearRule")}}{ Returns a list with NormalVector and CenterVector. The loglikelihood ratio on x can be evaluated by +L(x)=1/2. } + +\item{\code{signature(object = "QuadraticRule")}}{ returns a list with a NormalVector, CenterVector, FormVector (3 vectors) and a numeric constant Constant. The loglikelihood ratio on x can be evaluated by +L(x)=-1/2+ -Constant } +}} +\keyword{methods} + diff --git a/man/getLogLikeRatio.Rd b/man/getLogLikeRatio.Rd new file mode 100644 index 0000000..2a19337 --- /dev/null +++ b/man/getLogLikeRatio.Rd @@ -0,0 +1,58 @@ +\name{getLogLikeRatio} +\alias{getLogLikeRatio} +%- Also NEED an '\alias' for EACH other topic documented here. +\title{ +Get the log-likelihood ratio from a binary rule (QuadraticRule or LinearRule) +} +\description{ +Binary rules can be expressed +} +\usage{ +getLogLikeRatio(object) +} +%- maybe also 'usage' for other objects documented here. +\arguments{ + \item{object}{ +an object of type LinearRule or QuadraticRule. +} +} +\details{ +Get everything that defines a log likelihood ratio between two gaussian measures. +} +\value{ +A list, see \code{\link{getLogLikeRatio-methods}} +} +\references{ +Fast rate of convergence in high dimensional linear discriminant analysis. R. Girard To appear in Journal of Nonparametric Statistics.\\ +Very high dimensional discriminant analysis with thresholding estimation. R. Girard. Submitted. + +} +\author{ +Robin Girard +} +\note{ +%% ~~further notes~~ +} + +%% ~Make other sections like Warning with \section{Warning }{....} ~ + +\seealso{ +%% ~~objects to See Also as \code{\link{help}}, ~~~ +} +\examples{ +p=100; n=20 ; mu=array(0,c(p,4)); mu[1:10,1]=2 ;mu[11:20,2]=2;C=array(c(1,20),p) +mu[21:30,3]=2 +x=NULL; y=NULL; +for (k in 1:4){ + x=rbind(x,t(array(C^(1/2),c(p,n))*(matrix(rnorm(p*n),nrow=p,ncol=n))+array(mu[,k],c(p,n)))); + y=c(y,array(k,n))} +#Learning +LearnedLinearPartitionWithLLR=learnPartitionWithLLR(x,y,procedure='FDRThresh') + +Rule=getBinaryRule(LearnedLinearPartitionWithLLR,1,2) +LLR=getLogLikeRatio(Rule) +print(LLR)} +% Add one or more standard keywords, see file 'KEYWORDS' in the +% R documentation directory. +\keyword{ discrimination } + diff --git a/man/learnBinaryRule.Rd b/man/learnBinaryRule.Rd new file mode 100644 index 0000000..1f66e1f --- /dev/null +++ b/man/learnBinaryRule.Rd @@ -0,0 +1,78 @@ +\name{learnBinaryRule} +\alias{learnBinaryRule} +%- Also NEED an '\alias' for EACH other topic documented here. +\title{ +Function to learn a binary classification rule +} +\description{ +Function to learn a binary classification rule. +For more than two class, use \code{\link{learnPartitionWithLLR}} instead. +The learned rule can be linear or quadratic. +There are reduction dimension methods (accessible via argument procedure) +to make the procedure efficient when the number of features is larger than the number of observations +} +\usage{ +learnBinaryRule(x, y,type='linear', procedure = "FDRThresh", covariance = "diagonal", ql = NULL, qq = NULL,prior=FALSE) +} +%- maybe also 'usage' for other objects documented here. +\arguments{ + \item{x}{ +The Matrix with input data of size pxn (p feature space dimension, and n number of observations) +} + \item{y}{ + A vector of n factors with 2 LEVELS (labels) associated to observations (can also be numeric) +} +\item{type}{'quadratic' or 'linear' are valid types. } + \item{procedure}{ +Procedure gives the used procedure to reduce the dimensionality of the estimated NormalVector and FormVector. +use 'noThresh' for no dimensionality reduction. UnivTresh is the universal threshold and FDRThresh is an FDR thresolding procedure. +When type=='linear' 'FANThresh' and 'FDRstudent' are also available. For type linear, the thresholding procedures are fully described in the Paper +"Fast rate of convergence in high dimensional linear discriminant analysis"} + \item{covariance}{ +Unused argument ... further development comming soon +} + \item{ql}{ +The parameter associated to the thresholding procedure for the estimation of NormalVector. +If a vector of values is given a 10 fold cross validation is performed +} + \item{qq}{ +The parameter associated to the thresholding procedure for the estimation of FormVector (only when type='quadratic'). +If a vector of values is given a 10 fold cross validation is performed +} +\item{prior}{ Do we put a prior on y (taking into account the proportion of the different class in the learning set to build the classification rule} + +} +\details{ +%% ~~ If necessary, more details than the description above ~~ +} +\value{ +A classification rule of class LinearRule if type='linear' and of class QuadraticRule if type='quadratic'. +} +\references{ +Fast rate of convergence in high dimensional linear discriminant analysis. R. Girard To appear in Journal of Nonparametric Statistics.\\ +Very high dimensional discriminant analysis with thresholding estimation. R. Girard. Submitted. +} +\author{ +Robin Girard +} +%\note{ +%% ~~further notes~~ +%} + +%% ~Make other sections like Warning with \section{Warning }{....} ~ + +\seealso{ +\code{\link{learnPartitionWithLLR}} +} +\examples{ +p=100; n=50 ; mu=array(0,c(p,2)); mu[1:10,1]=1 ;C=array(c(1,20),p) +x=NULL; y=NULL; +for (k in 1:2){ x=rbind(x,t(array(C^(1/2),c(p,n))*(matrix(rnorm(p*n),nrow=p,ncol=n))+array(mu[,k],c(p,n)))); + y=c(y,array(k,n)) } +#Learning +LearnedBinaryRule=learnBinaryRule(x,y) +show(LearnedBinaryRule)} +% Add one or more standard keywords, see file 'KEYWORDS' in the +% R documentation directory. +%\keyword{ ~kwd1 } + diff --git a/man/learnPartitionWithLLR.Rd b/man/learnPartitionWithLLR.Rd new file mode 100644 index 0000000..6d16f6b --- /dev/null +++ b/man/learnPartitionWithLLR.Rd @@ -0,0 +1,67 @@ +\name{learnPartitionWithLLR} +\alias{learnPartitionWithLLR} +%- Also NEED an '\alias' for EACH other topic documented here. +\title{ +A function to learn a rule in case of 2 classes or more +} +\description{ +A function to learn a rule in case of 2 classes or more. + There are reduction dimension methods (accessible via argument procedure) +to make the procedure efficient when the number of features is larger than the number of observations +} +\usage{ +learnPartitionWithLLR(x, y, type = "linear", procedure = "FDRThresh", ql = NULL, qq = NULL, BinaryLearningProcedure = NULL,prior=FALSE) +} +%- maybe also 'usage' for other objects documented here. +\arguments{The Argument are exactly the same as in \code{\link{learnBinaryRule}} except that y may have more than 2 levels + \item{x}{ +see \code{\link{learnBinaryRule}} +} + \item{y}{ +vector of factors with two or more levels +} + \item{type}{ +%% ~~Describe \code{type} here~~ +} + \item{procedure}{ +%% ~~Describe \code{procedure} here~~ +} + \item{ql}{ +%% ~~Describe \code{ql} here~~ +} + \item{qq}{ +%% ~~Describe \code{qq} here~~ +} + \item{BinaryLearningProcedure}{ +%% ~~Describe \code{BinaryLearningProcedure} here~~ +} +\item{prior}{ Do we put a prior on y (taking into account the proportion of the different class in the learning set to build the classification rule} +} +\details{ +%% ~~ If necessary, more details than the description above ~~ +} +\value{ +%% ~Describe the value returned +%% If it is a LIST, use +%% \item{comp1 }{Description of 'comp1'} +%% \item{comp2 }{Description of 'comp2'} +%% ... +} +\references{ +%% ~put references to the literature/web site here ~ +} +\author{ +%% ~~who you are~~ +} +\note{ +%% ~~further notes~~ +} + +%% ~Make other sections like Warning with \section{Warning }{....} ~ + +\seealso{ +%% ~~objects to See Also as \code{\link{help}}, ~~~ +} +\examples{ + +} diff --git a/man/plotClassifRule.Rd b/man/plotClassifRule.Rd new file mode 100644 index 0000000..7a1d908 --- /dev/null +++ b/man/plotClassifRule.Rd @@ -0,0 +1,52 @@ +\name{plotClassifRule} +\alias{plotClassifRule} +%- Also NEED an '\alias' for EACH other topic documented here. +\title{A +plot function for classification rules (binary or not, quadratic or linear) +} +\description{ +plot function for classification rules (binary or not, quadratic or linear). Essentially a wrapper to xyplot. +} +\usage{ +plotClassifRule(object, ...) +} +%- maybe also 'usage' for other objects documented here. +\arguments{ + \item{object}{ +%% ~~Describe \code{object} here~~ +} + \item{\dots}{ +other argument that can be passed through xyplot +} +} +\details{ +%% ~~ If necessary, more details than the description above ~~ +} +\value{ +%% ~Describe the value returned +%% If it is a LIST, use +%% \item{comp1 }{Description of 'comp1'} +%% \item{comp2 }{Description of 'comp2'} +%% ... +} +\references{ +%% ~put references to the literature/web site here ~ +} +\author{ +%% ~~who you are~~ +} +\note{ +%% ~~further notes~~ +} + +%% ~Make other sections like Warning with \section{Warning }{....} ~ + +\seealso{ +%% ~~objects to See Also as \code{\link{help}}, ~~~ +} +\examples{ +} +% Add one or more standard keywords, see file 'KEYWORDS' in the +% R documentation directory. +\keyword{ } + diff --git a/man/z.EvaluateLogLikeRatio-methods.Rd b/man/z.EvaluateLogLikeRatio-methods.Rd new file mode 100644 index 0000000..0dac203 --- /dev/null +++ b/man/z.EvaluateLogLikeRatio-methods.Rd @@ -0,0 +1,18 @@ +\name{.EvaluateLogLikeRatio-methods} +\docType{methods} +\alias{.EvaluateLogLikeRatio-methods} +\alias{.EvaluateLogLikeRatio,numeric,LinearRule-method} +\alias{.EvaluateLogLikeRatio,numeric,QuadraticRule-method} +\title{ ~~ Methods for Function .EvaluateLogLikeRatio ~~} +\description{ + ~~ Methods for function \code{.EvaluateLogLikeRatio} ~~ +} +\section{Methods}{ +\describe{ + +\item{ +\code{signature(x = "numeric", object = "LinearRule")}}{ hidden.} +} + +} +\keyword{methods} diff --git a/tests/TestFileLinear.R b/tests/TestFileLinear.R new file mode 100644 index 0000000..20a8c97 --- /dev/null +++ b/tests/TestFileLinear.R @@ -0,0 +1,74 @@ +# TODO: Add comment +# +# Author: robin +############################################################################### + +library(VHDClassification) +############ Tests 2 classes linear and quadratic. What do we loose by searching a quadratic rule +############ when the true is linear +p=100; n=50 ; mu=array(0,c(p,2)); mu[1:10,1]=2 ;C=array(c(1,20),p) +xl=NULL; yl=NULL; +for (k in 1:2){xl=rbind(xl,t(array(C^(1/2),c(p,n))*(matrix(rnorm(p*n),nrow=p,ncol=n))+array(mu[,k],c(p,n)))); + yl=c(yl,array(k,n))} + +x=NULL; y=NULL; +for (k in 1:2){ + x=rbind(x,t(array(C^(1/2),c(p,n))*(matrix(rnorm(p*n),nrow=p,ncol=n))+array(mu[,k],c(p,n)))); + y=c(y,array(k,n)) +} +#Learning +LearnedBinaryRule=learnBinaryRule(xl,yl,type='quadratic') +plotClassifRule(LearnedBinaryRule) +show(LearnedBinaryRule) +predict(LearnedBinaryRule,x) +LearnedBinaryRule=learnBinaryRule(xl,yl,type='linear') +predict(LearnedBinaryRule,x) +plotClassifRule(LearnedBinaryRule) +show(LearnedBinaryRule) +LearnedBinaryRule=learnBinaryRule(xl,yl,type='quadratic',procedure='UnivThresh') +plotClassifRule(LearnedBinaryRule) +show(LearnedBinaryRule) +predict(LearnedBinaryRule,x) +LearnedBinaryRule=learnBinaryRule(xl,yl,type='linear',procedure='UnivThresh') +predict(LearnedBinaryRule,x) +plotClassifRule(LearnedBinaryRule) +show(LearnedBinaryRule) +LearnedBinaryRule=learnBinaryRule(xl,yl,type='linear',procedure='FDRstudent') +predict(LearnedBinaryRule,x) +plotClassifRule(LearnedBinaryRule) +show(LearnedBinaryRule) +LearnedBinaryRule=learnBinaryRule(xl,yl,type='linear',procedure='Fisher') +predict(LearnedBinaryRule,x) +plotClassifRule(LearnedBinaryRule) +show(LearnedBinaryRule) +LearnedBinaryRule=learnBinaryRule(xl,yl,type='linear',procedure='FANThresh') +predict(LearnedBinaryRule,x) +plotClassifRule(LearnedBinaryRule) +show(LearnedBinaryRule) + + +############ Tests 3 classes linear +p=100; n=20 ; mu=array(0,c(p,4)); mu[1:10,1]=2 ;mu[11:20,2]=2;C=array(c(1,20),p) +mu[21:30,3]=2 +x=NULL; y=NULL; +for (k in 1:4){ + x=rbind(x,t(array(C^(1/2),c(p,n))*(matrix(rnorm(p*n),nrow=p,ncol=n))+array(mu[,k],c(p,n)))); + y=c(y,array(k,n))} +#Learning +LearnedLinearPartitionWithLLR=learnPartitionWithLLR(x,y,procedure='FDRThresh') +LearnedQuadraticPartitionWithLLR=learnPartitionWithLLR(x,y,type='quadratic',procedure='FDRThresh') + + +plotClassifRule(LearnedLinearPartitionWithLLR) +#Testing Set +x=NULL; y=NULL; +for (k in 1:3){ + x=rbind(x,t(array(C^(1/2),c(p,n))*(matrix(rnorm(p*n),nrow=p,ncol=n))+array(mu[,k],c(p,n)))); + y=c(y,array(k,n)) +} +#Testing +myTestingdata=list(x=x,y=y) +LDAScore=mean((y!=predict(LearnedLinearPartitionWithLLR,myTestingdata$x))) ; +print(LDAScore) +QDAScore=mean((y!=predict(LearnedQuadraticPartitionWithLLR,myTestingdata$x))) ; +print(QDAScore)