version 2.2

cran · Feb 20, 2015 · b2fd022 · b2fd022
1 parent a72897c
commit b2fd022
Show file tree

Hide file tree

Showing 8 changed files with 141 additions and 44 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,14 +1,14 @@
 Package: npmv
 Type: Package
 Title: Nonparametric Comparison of Multivariate Samples
-Version: 2.1
-Date: 2013-11-24
+Version: 2.2
+Date: 2015-02-20
 Author: Woodrow Burchett and Amanda Ellis
 Maintainer: Amanda Ellis <arelli4@uky.edu>
 Description: Performs analysis of one-way multivariate data, for small samples using Nonparametric techniques. Using approximations for ANOVA Type, Wilks' Lambda, Lawley Hotelling, and Bartlett Nanda Pillai Test statics, the package compares the multivariate distributions for a single explanatory variable.  The comparison is also performed using a permutation test for each of the four test statistics. The package also performs an all-subsets algorithm regarding variables and regarding factor levels. 
-Depends: Formula, ggplot2
+Imports: Formula
 License: GPL-2
-Packaged: 2013-11-25 00:00:58 UTC; Amanda
+Packaged: 2015-02-25 02:47:01 UTC; Amanda
 NeedsCompilation: no
 Repository: CRAN
-Date/Publication: 2013-12-04 19:18:17
+Date/Publication: 2015-02-25 08:12:44
diff --git a/MD5 b/MD5
@@ -1,10 +1,10 @@
-48ffe5fa480a41724408cfc6285f0fbf *DESCRIPTION
-4bef8f90e730c1780e1f17836fd42624 *NAMESPACE
-44ac709629c95c6dc4f3f38e41b8b1ea *R/basenonpartest.R
-4522d6ad75222398fc705585e0fdae0c *R/nonpartest.R
-426ae858d144748d6bb5105ecb1686e6 *R/ssnonpartest.R
+a3be42dceb9e47aa0364a02e615d2493 *DESCRIPTION
+cd24174264a698b096726c670e01771b *NAMESPACE
+0431115cda843d9f0f6e9791be302b86 *R/basenonpartest.R
+291f8de1834db8e29531909c660f3be5 *R/nonpartest.R
+2d40fc1b5022a3571c4b4440f42b9759 *R/ssnonpartest.R
 2b528a89b41c1e33024220baecc5af7a *data/sberry.rda
-92716c198052cd08ce32c646ed4f74e5 *man/nonpartest.Rd
-f75f65fa0c722d28a7b8aaa72658fb4b *man/npmv-package.Rd
+bf03df9a0e7be07bb2953b8369607643 *man/nonpartest.Rd
+868db8bc97846969871db57c4be6f24c *man/npmv-package.Rd
 fba438f751e7c535a814474f5dc7d395 *man/sberry.Rd
 fcda1e6a4074fa2e69d62a73cd78c405 *man/ssnonpartest.Rd
diff --git a/NAMESPACE b/NAMESPACE
@@ -1 +1,2 @@
 export(nonpartest,ssnonpartest)
+import('Formula')
diff --git a/R/basenonpartest.R b/R/basenonpartest.R
@@ -66,6 +66,8 @@ function(frame,group,vars,tests=c(1,1,1,1)){
    fhat0 <- ((a^2)/((a-1)*sum(1/(ssize-1))))*fhat
    Fanova <- sum(diag(H2))/sum(diag(G3))
    pvalanova <- 1 - pf(Fanova,fhat,fhat0)
+   df1anova<-fhat
+   df2anova<-fhat0
 
    #McKeon approximation for the Lawley Hotelling Test
    if(tests[2]==1){
@@ -76,9 +78,13 @@ function(frame,group,vars,tests=c(1,1,1,1)){
       g <- (p*(a-1)*(D-2))/((N-a-p-1)*D)
       FLH <- U/g
       pvalLH <- 1- pf(FLH,K,D)
+      df1LH<-K
+      df2LH<-D
    }else{
       FLH <- NA
       pvalLH <- NA
+      df1LH<-NA
+      df2LH<-NA
    }
 
    #Muller approximation for the Bartlett-Nanda-Pillai Test
@@ -89,9 +95,13 @@ function(frame,group,vars,tests=c(1,1,1,1)){
       nu2   <- ((N-a+gamma-p)/N) * (( (gamma*(N-a+gamma-p)*(N+2)*(N-1))/((N-a)*(N-p)))-2)
       FBNP  <- ((V/gamma)/nu1)/((1-(V/gamma))/nu2)
       pvalBNP <- 1-pf(FBNP,nu1,nu2)
+      df1BNP<-nu1
+      df2BNP<-nu2
    }else{
       FBNP  <- NA
       pvalBNP <- NA
+      df1BNP<-NA
+      df2BNP<-NA
    }
 
    #Wilk's Lambda Test
@@ -110,12 +120,17 @@ function(frame,group,vars,tests=c(1,1,1,1)){
       df2_WLF=r_WL*t_WL-2*u_WL
       WLF=(1-lambda^(1/t_WL))/lambda^(1/t_WL)*df2_WLF/df1_WLF
       pvalWL=1-pf(WLF,df1_WLF,df2_WLF)
+      df1WL<-df1_WLF
+      df2WL<-df2_WLF
    }else{
       WLF=NA
       pvalWL=NA
+      df1WL<-NA
+      df2WL<-NA
    }
 
-   out <- list('Fanova'=Fanova,'FWL'=WLF,'pvalWL'=pvalWL,'fhat'=fhat,'fhat0'=fhat0,'pvalanova'=pvalanova,'FLH'=FLH,'pvalLH'=pvalLH,'FBNP'=FBNP,'pvalBNP'=pvalBNP)
+   out <- list('Fanova'=Fanova,'FWL'=WLF,'pvalWL'=pvalWL,'fhat'=fhat,'fhat0'=fhat0,'pvalanova'=pvalanova,'FLH'=FLH,'pvalLH'=pvalLH,'FBNP'=FBNP,'pvalBNP'=pvalBNP,
+               'df1anova'=df1anova,'df2anova'=df2anova,'df1LH'=df1LH,'df2LH'=df2LH,'df1BNP'=df1BNP,'df2BNP'=df2BNP,'df1WL'=df1WL,'df2WL'=df2WL)
    return(out)
 
 }
diff --git a/R/nonpartest.R b/R/nonpartest.R
@@ -1,4 +1,4 @@
-nonpartest <- function(formula,data,permtest=TRUE,permreps=10000,plots=TRUE,tests=c(1,1,1,1),releffects=TRUE){
+nonpartest <- function(formula,data,permtest=TRUE,permreps=10000,plots=TRUE,tests=c(1,1,1,1),releffects=TRUE,...){
 
 #Checks to see if formula
   if(!is(formula,"formula")){
@@ -43,7 +43,8 @@ nonpartest <- function(formula,data,permtest=TRUE,permreps=10000,plots=TRUE,test
 
    if(sum(ssize<2)>0){return('Error: Each group must have sample size of at least 2')}
 
-#Plot
+#Plot gives the user the options to specify title or not
+  par.list <- list()
   if(plots==TRUE && max(ssize)>10){
     if (length(vars) > 1)
     {
@@ -52,7 +53,17 @@ nonpartest <- function(formula,data,permtest=TRUE,permreps=10000,plots=TRUE,test
     }
 
     for(i in 1:length(vars)){
-    boxplot(frame[,vars[i]]~frame[,groupvar],data=frame,main=vars[i],ylab=vars[i],xlab=names(frame)[groupvar.location])
+      par.list[[i]] <- list(...)
+      if(!'main'%in%names(par.list[[i]])){
+        par.list[[i]] <- c(par.list[[i]], main=vars[i])
+      }
+      if(!'ylab'%in%names(par.list[[i]])){
+        par.list[[i]] <- c(par.list[[i]], ylab=vars[i])
+      }
+      if(!'xlab'%in%names(par.list[[i]])){
+        par.list[[i]] <- c(par.list[[i]], xlab=names(frame)[groupvar.location])
+      }
+      do.call(boxplot,c(list(frame[,vars[i]]~frame[,groupvar],data=frame),par.list[[i]]))
     }
 
   }
@@ -64,9 +75,20 @@ nonpartest <- function(formula,data,permtest=TRUE,permreps=10000,plots=TRUE,test
       on.exit(devAskNewPage(oask))
     }
     for(i in 1:length(vars)){
-    plot=qplot(frame[,groupvar],frame[,vars[i]],data=frame,main=vars[i],ylab=vars[i],xlab=names(frame)[groupvar.location])
-    print(plot)
-    }
+      par.list[[i]] <- list(...)
+      if(!'main'%in%names(par.list[[i]])){
+        par.list[[i]] <- c(par.list[[i]], main=vars[i])
+      }
+      if(!'ylab'%in%names(par.list[[i]])){
+        par.list[[i]] <- c(par.list[[i]], ylab=vars[i])
+      }
+      if(!'xlab'%in%names(par.list[[i]])){
+        par.list[[i]] <- c(par.list[[i]], xlab=names(frame)[groupvar.location])
+      }
+      #plot=qplot(frame[,groupvar],frame[,vars[i]],data=frame,...)
+      do.call(boxplot,c(list(frame[,vars[i]]~frame[,groupvar],data=frame),border='white',par.list[[i]]))
+      do.call(points,c(list(frame[,vars[i]]~frame[,groupvar],data=frame),pch=20))
+      }
   }
 
 
@@ -110,7 +132,7 @@ nonpartest <- function(formula,data,permtest=TRUE,permreps=10000,plots=TRUE,test
    G3 <- (1/a)*G3
 
    if(det(H1)==0 | det(H2)==0 | det(G1)==0 | det(G2)==0 | det(G3)==0){
-      warning('Rank Matrix is Singular, only ANOVA test returned')
+      warning('Rank covariance matrix is singular, only ANOVA test returned')
       tests=c(1,0,0,0)
    }
 
@@ -144,6 +166,14 @@ nonpartest <- function(formula,data,permtest=TRUE,permreps=10000,plots=TRUE,test
    Fanova     <- base$Fanova
    FWL       <- base$FWL
    pvalWL    <- base$pvalWL
+   df1anova  <- base$df1anova
+   df2anova  <- base$df2anova
+   df1LH     <- base$df1LH
+   df2LH     <- base$df2LH
+   df1BNP    <- base$df1BNP
+   df2BNP    <- base$df2BNP
+   df1WL     <- base$df1WL
+   df2WL     <- base$df2WL
 
    if(permtest){
       perms <- matrix(NA,permreps,4)
@@ -162,20 +192,22 @@ nonpartest <- function(formula,data,permtest=TRUE,permreps=10000,plots=TRUE,test
       pvalBNPperm  <- sum(as.numeric(perms[,3]>FBNP))/permreps
       pvalWLperm       <- sum(as.numeric(perms[,4]>FWL))/permreps
 
-      results=matrix(c(Fanova,FLH,FBNP,FWL,pvalanova,pvalLH,pvalBNP,pvalWL,pvalanovperm,pvalLHperm,pvalBNPperm,pvalWLperm),ncol=3)
+      results=matrix(c(Fanova,FLH,FBNP,FWL,df1anova,df1LH,df1BNP,df1WL,df2anova,df2LH,df2BNP,df2WL,pvalanova,pvalLH,pvalBNP,pvalWL,pvalanovperm,pvalLHperm,pvalBNPperm,pvalWLperm),ncol=5)
       results=data.frame(results,row.names=c('ANOVA type test p-value','McKeon approx. for the Lawley Hotelling Test','Muller approx. for the Bartlett-Nanda-Pillai Test',
                     'Wilks Lambda'))
-      colnames(results)=c('Test Statistic','P-value','Permutation Test p-value')
-      results[,3] <- round(as.numeric(results[,3]),digits=3)
+      colnames(results)=c('Test Statistic','df1','df2','P-value','Permutation Test p-value')
+      results[,5] <- round(as.numeric(results[,5]),digits=3)
    }else{
-      results=matrix(c(Fanova,FLH,FBNP,FWL,pvalanova,pvalLH,pvalBNP,pvalWL),ncol=2)
+      results=matrix(c(Fanova,FLH,FBNP,FWL,df1anova,df1LH,df1BNP,df1WL,df2anova,df2LH,df2BNP,df2WL,pvalanova,pvalLH,pvalBNP,pvalWL),ncol=4)
       results=data.frame(results,row.names=c('ANOVA type test p-value','McKeon approx. for the Lawley Hotelling Test','Muller approx. for the Bartlett-Nanda-Pillai Test',
                     'Wilks Lambda'))
-      colnames(results)=c('Test Statistic','P-value')
+      colnames(results)=c('Test Statistic','df1','df2','P-value')
    }
 
    results[,1] <- round(as.numeric(results[,1]),digits=3)
    results[,2] <- round(as.numeric(results[,2]),digits=3)
+   results[,3] <- round(as.numeric(results[,3]),digits=3)
+   results[,4] <- round(as.numeric(results[,4]),digits=3)
 
    if(releffects){
       if(a == 2){

diff --git a/R/ssnonpartest.R b/R/ssnonpartest.R
@@ -54,7 +54,8 @@ ssnonpartest <- function(formula,data,alpha=.05,test=c(0,0,0,1),factors.and.vari
 #Defines logical variable that tells when to exit
   exit=FALSE
 
-#Checks to see if R Matrix is singular, if so returns warning
+#Checks to see if R Matrix is singular, if so returns warning and chances to ANOVA type statistic
+if(test[1]!=1){
   # Compute sample sizes per group
   N <- length(frame[,1])
   ssize <- array(NA,a)
@@ -102,20 +103,31 @@ ssnonpartest <- function(formula,data,alpha=.05,test=c(0,0,0,1),factors.and.vari
   }
 
   if(det(H1)==0 | det(H2)==0 | det(G1)==0 | det(G2)==0 | det(G3)==0 && test[1]!=1){
-    return('Rank Matrix is Singular, only ANOVA test can be calculated')
-    tests=c(1,0,0,0)
-  }
+    cat('Rank Matrix is Singular, only ANOVA test can be calculated \n')
+    test=c(1,0,0,0)
+  }else{
+
   H1 <- (1/(a-1))*H1
   H2 <- (1/(a-1))*H2
   G1 <- (1/(N-a))*G1
   G2 <- (1/(a-1))*G2
   G3 <- (1/a)*G3
 
   if(det(H1)==0 | det(H2)==0 | det(G1)==0 | det(G2)==0 | det(G3)==0 && test[1]!=1){
-  return('Rank Matrix is Singular, only ANOVA test can be calculated')
-  tests=c(1,0,0,0)
+    test=c(1,0,0,0)
+    cat('Rank Matrix is Singular, only ANOVA test can be calculated \n')
+    }
   }
-
+}  
+
+##################################
+#Output of which statistic is used
+##################################
+if(test[1]==1){cat('\nThe ANOVA type statistic will be used in the following test \n')}
+if(test[2]==1){cat('\nThe Lawley Hotelling type (McKeon\'s F approximation) statistic will be used in the following test \n')}
+if(test[3]==1){cat('\nThe  Bartlett-Nanda-Pillai type (Muller\'s F approximation) statistic will be used in the following test \n')}
+if(test[4]==1){cat('\nThe Wilks\' Lambda type statistic will be used in the following test \n')}
+
 #######################
 #Global Hypothesis Test
 ####################### 
@@ -137,7 +149,7 @@ ssnonpartest <- function(formula,data,alpha=.05,test=c(0,0,0,1),factors.and.vari
 if(p>a || factors.and.variables==TRUE){  #Only runs if user wants to check subsets of factor levels
 
 #Since the Global hypothesis is significant outputs first subset, which is the subset of all factor levels
-cat('\n~Performing the Subset Algorithm based on Factor levels~\n The Hypothesis of equality between factor levels ', levels, 'is rejected \n')
+cat('\n~Performing the Subset Algorithm based on Factor levels~\nThe Hypothesis of equality between factor levels ', levels, 'is rejected \n')
 
 #Exit if only 2 factor levels are being tested
 if (length(levels)<= 2 && factors.and.variables==FALSE){return(cat('All appropriate subsets using factor levels have been checked using a closed multiple testing procedure, which controls the maximum overall type I error rate at alpha=',alpha))}
@@ -275,21 +287,36 @@ for(l in 1:(a-4)) {  #We only run from 1:(a-4) because we are checking subsets o
         }
       }
     }  
+
+    #Checks to see if there are new subsets to check
+    if (length(newsubsets)==1 && is.na(newsubsets) && factors.and.variables==FALSE){return(cat('All appropriate subsets using factor levels have been checked using a multiple testing procedure, which controls the maximum overall type I error rate at alpha=',alpha))}
+    if (length(newsubsets)==1 && is.na(newsubsets) && factors.and.variables==TRUE){cat('All appropriate subsets using factor levels have been checked using a multiple testing procedure, which controls the maximum overall type I error rate at alpha=',alpha,'\n')
+        exit=TRUE}
+
+    if(exit==FALSE){
     #Removes duplicates
     newsubsets=unique(newsubsets)
 
+
     #Removes subsets of size less than current size (1:a-2)
     for(i in 1:length(newsubsets))
     {
       if(length(newsubsets[[i]])<(number.elements-1)){newsubsets[[i]]=NA}
     }
+
     # Removes NA
     newsubsets=newsubsets[!is.na(newsubsets)]
 
+    #Checks to see if there are new subsets to check
+    if (length(newsubsets)==0  && factors.and.variables==FALSE){return(cat('All appropriate subsets using factor levels have been checked using a multiple testing procedure, which controls the maximum overall type I error rate at alpha=',alpha))}
+    if (length(newsubsets)==0 && factors.and.variables==TRUE){cat('All appropriate subsets using factor levels have been checked using a multiple testing procedure, which controls the maximum overall type I error rate at alpha=',alpha,'\n')
+        exit=TRUE}
+
+    if(exit==FALSE){
     #Creates a list of significant subsets and non-significant subsets
     newsubsetcount=length(newsubsets)
     sigfactorsubsets=vector('list',newsubsetcount)
-    nonsigfactorsubsets=vector('list',newsubsetcount)
+    nonsigfactorsubsets.new=vector('list',newsubsetcount)
 
     for(i in 1:newsubsetcount)
     {
@@ -303,19 +330,24 @@ for(l in 1:(a-4)) {  #We only run from 1:(a-4) because we are checking subsets o
      if (test[3]==1){testpval=base$pvalBNP}
      if (test[4]==1){testpval=base$pvalWL}
 
-     if( testpval >= alpha) {nonsigfactorsubsets[[i]]=levels(subsetframe[,groupvarsub])}else {nonsigfactorsubsets[[i]]=NA}
+     if( testpval >= alpha) {nonsigfactorsubsets.new[[i]]=levels(subsetframe[,groupvarsub])}else {nonsigfactorsubsets.new[[i]]=NA}
      if( testpval < alpha) {sigfactorsubsets[[i]]=levels(subsetframe[,groupvarsub])}else {sigfactorsubsets[[i]]=NA}
      if( testpval < alpha) {cat('The Hypothesis of equality between factor levels ', newsubsets[[i]], 'is rejected \n')}
     }
 
-    nonsigfactorsubsets=nonsigfactorsubsets[!is.na(nonsigfactorsubsets)]
+    nonsigfactorsubsets.new=nonsigfactorsubsets.new[!is.na(nonsigfactorsubsets.new)]
     sigfactorsubsets=sigfactorsubsets[!is.na(sigfactorsubsets)]
+
+    #Combine previous non-significant subsets with new
+    nonsigfactorsubsets=c(nonsigfactorsubsets,nonsigfactorsubsets.new)
 
     #Checks to see if there are step2subs and if there is only one significant subset in either case exit is set to TRUE
     if (length(sigfactorsubsets)<= 1 && factors.and.variables==FALSE){return(cat('All appropriate subsets using factor levels have been checked using a closed multiple testing procedure, which controls the maximum overall type I error rate at alpha=',alpha))}
     if (length(sigfactorsubsets)<= 1 && factors.and.variables==TRUE){
       cat('All appropriate subsets using factor levels have been checked using a closed multiple testing procedure, which controls the maximum overall type I error rate at alpha=',alpha,'\n')
       exit=TRUE}
+    }
+  }
 }
 
 #Checks to see if the significant subsets are of length 2, if length 2 or less if length 2 or less function is done checking factor levels
@@ -415,6 +447,7 @@ if (length(vars)<= 1){return(cat('All appropriate subsets using response variabl
 
   number.elements=p-2
   if (number.elements== 1){return(cat('All appropriate subsets using response variables have been checked using a multiple testing procedure, which controls the maximum overall type I error rate at alpha=',alpha))}
+
   for(l in 1:(p-3)) {  
 
     newsubsetcount=length(sig.variable.subsets)
@@ -445,18 +478,21 @@ if (length(vars)<= 1){return(cat('All appropriate subsets using response variabl
     #Removes duplicates
     newsubsets=unique(newsubsets)
 
+    #Checks to see if there are new subsets to check
+    if (length(newsubsets)==1 && is.na(newsubsets)){return(cat('All appropriate subsets using response variables have been checked using a multiple testing procedure, which controls the maximum overall type I error rate at alpha=',alpha))}
+
     #Removes subsets of size less than current size (1:p-2)
     for(i in 1:length(newsubsets))
     {
       if(length(newsubsets[[i]])<(number.elements-1)){newsubsets[[i]]=NA}
     }
     # Removes NA
     newsubsets=newsubsets[!is.na(newsubsets)]
-
+       
     #Creates a list of significant subsets and non-significant subsets
     newsubsetcount=length(newsubsets)
     sig.variable.subsets=vector('list',newsubsetcount)
-    nonsig.variable.subsets=vector('list',newsubsetcount)
+    nonsig.variable.subsets.new=vector('list',newsubsetcount)
     multiplier=newsubsetcount   #The multiplier is the number of test peformed
     for(i in 1:newsubsetcount)
     {
@@ -467,12 +503,22 @@ if (length(vars)<= 1){return(cat('All appropriate subsets using response variabl
       if (test[3]==1){testpval=base$pvalBNP}
       if (test[4]==1){testpval=base$pvalWL}
 
-      if( testpval*multiplier >= alpha) {nonsig.variable.subsets[[i]]=newsubsets[[i]]}else {nonsig.variable.subsets[[i]]=NA}
+      if( testpval*multiplier >= alpha) {nonsig.variable.subsets.new[[i]]=newsubsets[[i]]}else {nonsig.variable.subsets.new[[i]]=NA}
       if( testpval*multiplier < alpha) {sig.variable.subsets[[i]]=newsubsets[[i]]}else {sig.variable.subsets[[i]]=NA}
       if( testpval*multiplier < alpha) {cat('The Hypothesis of equality using response variables ', newsubsets[[i]], 'is rejected \n')}
     }
 
-    nonsig.variable.subsets=nonsig.variable.subsets[!is.na(nonsig.variable.subsets)]
+    nonsig.variable.subsets.new=nonsig.variable.subsets.new[!is.na(nonsig.variable.subsets.new)]
+
+    #Combine previous non-significant subsets with new
+    nonsig.variable.subsets=c(nonsig.variable.subsets,nonsig.variable.subsets.new)
+
+    sig.variable.subsets=sig.variable.subsets[!is.na(sig.variable.subsets)]
+
+    #Removes Duplicates
+    sig.variable.subsets=unique(sig.variable.subsets)
+
+    # Removes NA
     sig.variable.subsets=sig.variable.subsets[!is.na(sig.variable.subsets)]
 
     number.elements=number.elements-1