Skip to content

Commit

Permalink
version 2.2
Browse files Browse the repository at this point in the history
  • Loading branch information
AmandaEllis authored and gaborcsardi committed Feb 20, 2015
1 parent a72897c commit b2fd022
Show file tree
Hide file tree
Showing 8 changed files with 141 additions and 44 deletions.
10 changes: 5 additions & 5 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
Package: npmv
Type: Package
Title: Nonparametric Comparison of Multivariate Samples
Version: 2.1
Date: 2013-11-24
Version: 2.2
Date: 2015-02-20
Author: Woodrow Burchett and Amanda Ellis
Maintainer: Amanda Ellis <arelli4@uky.edu>
Description: Performs analysis of one-way multivariate data, for small samples using Nonparametric techniques. Using approximations for ANOVA Type, Wilks' Lambda, Lawley Hotelling, and Bartlett Nanda Pillai Test statics, the package compares the multivariate distributions for a single explanatory variable. The comparison is also performed using a permutation test for each of the four test statistics. The package also performs an all-subsets algorithm regarding variables and regarding factor levels.
Depends: Formula, ggplot2
Imports: Formula
License: GPL-2
Packaged: 2013-11-25 00:00:58 UTC; Amanda
Packaged: 2015-02-25 02:47:01 UTC; Amanda
NeedsCompilation: no
Repository: CRAN
Date/Publication: 2013-12-04 19:18:17
Date/Publication: 2015-02-25 08:12:44
14 changes: 7 additions & 7 deletions MD5
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
48ffe5fa480a41724408cfc6285f0fbf *DESCRIPTION
4bef8f90e730c1780e1f17836fd42624 *NAMESPACE
44ac709629c95c6dc4f3f38e41b8b1ea *R/basenonpartest.R
4522d6ad75222398fc705585e0fdae0c *R/nonpartest.R
426ae858d144748d6bb5105ecb1686e6 *R/ssnonpartest.R
a3be42dceb9e47aa0364a02e615d2493 *DESCRIPTION
cd24174264a698b096726c670e01771b *NAMESPACE
0431115cda843d9f0f6e9791be302b86 *R/basenonpartest.R
291f8de1834db8e29531909c660f3be5 *R/nonpartest.R
2d40fc1b5022a3571c4b4440f42b9759 *R/ssnonpartest.R
2b528a89b41c1e33024220baecc5af7a *data/sberry.rda
92716c198052cd08ce32c646ed4f74e5 *man/nonpartest.Rd
f75f65fa0c722d28a7b8aaa72658fb4b *man/npmv-package.Rd
bf03df9a0e7be07bb2953b8369607643 *man/nonpartest.Rd
868db8bc97846969871db57c4be6f24c *man/npmv-package.Rd
fba438f751e7c535a814474f5dc7d395 *man/sberry.Rd
fcda1e6a4074fa2e69d62a73cd78c405 *man/ssnonpartest.Rd
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
export(nonpartest,ssnonpartest)
import('Formula')
17 changes: 16 additions & 1 deletion R/basenonpartest.R
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ function(frame,group,vars,tests=c(1,1,1,1)){
fhat0 <- ((a^2)/((a-1)*sum(1/(ssize-1))))*fhat
Fanova <- sum(diag(H2))/sum(diag(G3))
pvalanova <- 1 - pf(Fanova,fhat,fhat0)
df1anova<-fhat
df2anova<-fhat0

#McKeon approximation for the Lawley Hotelling Test
if(tests[2]==1){
Expand All @@ -76,9 +78,13 @@ function(frame,group,vars,tests=c(1,1,1,1)){
g <- (p*(a-1)*(D-2))/((N-a-p-1)*D)
FLH <- U/g
pvalLH <- 1- pf(FLH,K,D)
df1LH<-K
df2LH<-D
}else{
FLH <- NA
pvalLH <- NA
df1LH<-NA
df2LH<-NA
}

#Muller approximation for the Bartlett-Nanda-Pillai Test
Expand All @@ -89,9 +95,13 @@ function(frame,group,vars,tests=c(1,1,1,1)){
nu2 <- ((N-a+gamma-p)/N) * (( (gamma*(N-a+gamma-p)*(N+2)*(N-1))/((N-a)*(N-p)))-2)
FBNP <- ((V/gamma)/nu1)/((1-(V/gamma))/nu2)
pvalBNP <- 1-pf(FBNP,nu1,nu2)
df1BNP<-nu1
df2BNP<-nu2
}else{
FBNP <- NA
pvalBNP <- NA
df1BNP<-NA
df2BNP<-NA
}

#Wilk's Lambda Test
Expand All @@ -110,12 +120,17 @@ function(frame,group,vars,tests=c(1,1,1,1)){
df2_WLF=r_WL*t_WL-2*u_WL
WLF=(1-lambda^(1/t_WL))/lambda^(1/t_WL)*df2_WLF/df1_WLF
pvalWL=1-pf(WLF,df1_WLF,df2_WLF)
df1WL<-df1_WLF
df2WL<-df2_WLF
}else{
WLF=NA
pvalWL=NA
df1WL<-NA
df2WL<-NA
}

out <- list('Fanova'=Fanova,'FWL'=WLF,'pvalWL'=pvalWL,'fhat'=fhat,'fhat0'=fhat0,'pvalanova'=pvalanova,'FLH'=FLH,'pvalLH'=pvalLH,'FBNP'=FBNP,'pvalBNP'=pvalBNP)
out <- list('Fanova'=Fanova,'FWL'=WLF,'pvalWL'=pvalWL,'fhat'=fhat,'fhat0'=fhat0,'pvalanova'=pvalanova,'FLH'=FLH,'pvalLH'=pvalLH,'FBNP'=FBNP,'pvalBNP'=pvalBNP,
'df1anova'=df1anova,'df2anova'=df2anova,'df1LH'=df1LH,'df2LH'=df2LH,'df1BNP'=df1BNP,'df2BNP'=df2BNP,'df1WL'=df1WL,'df2WL'=df2WL)
return(out)

}
56 changes: 44 additions & 12 deletions R/nonpartest.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
nonpartest <- function(formula,data,permtest=TRUE,permreps=10000,plots=TRUE,tests=c(1,1,1,1),releffects=TRUE){
nonpartest <- function(formula,data,permtest=TRUE,permreps=10000,plots=TRUE,tests=c(1,1,1,1),releffects=TRUE,...){

#Checks to see if formula
if(!is(formula,"formula")){
Expand Down Expand Up @@ -43,7 +43,8 @@ nonpartest <- function(formula,data,permtest=TRUE,permreps=10000,plots=TRUE,test

if(sum(ssize<2)>0){return('Error: Each group must have sample size of at least 2')}

#Plot
#Plot gives the user the options to specify title or not
par.list <- list()
if(plots==TRUE && max(ssize)>10){
if (length(vars) > 1)
{
Expand All @@ -52,7 +53,17 @@ nonpartest <- function(formula,data,permtest=TRUE,permreps=10000,plots=TRUE,test
}

for(i in 1:length(vars)){
boxplot(frame[,vars[i]]~frame[,groupvar],data=frame,main=vars[i],ylab=vars[i],xlab=names(frame)[groupvar.location])
par.list[[i]] <- list(...)
if(!'main'%in%names(par.list[[i]])){
par.list[[i]] <- c(par.list[[i]], main=vars[i])
}
if(!'ylab'%in%names(par.list[[i]])){
par.list[[i]] <- c(par.list[[i]], ylab=vars[i])
}
if(!'xlab'%in%names(par.list[[i]])){
par.list[[i]] <- c(par.list[[i]], xlab=names(frame)[groupvar.location])
}
do.call(boxplot,c(list(frame[,vars[i]]~frame[,groupvar],data=frame),par.list[[i]]))
}

}
Expand All @@ -64,9 +75,20 @@ nonpartest <- function(formula,data,permtest=TRUE,permreps=10000,plots=TRUE,test
on.exit(devAskNewPage(oask))
}
for(i in 1:length(vars)){
plot=qplot(frame[,groupvar],frame[,vars[i]],data=frame,main=vars[i],ylab=vars[i],xlab=names(frame)[groupvar.location])
print(plot)
}
par.list[[i]] <- list(...)
if(!'main'%in%names(par.list[[i]])){
par.list[[i]] <- c(par.list[[i]], main=vars[i])
}
if(!'ylab'%in%names(par.list[[i]])){
par.list[[i]] <- c(par.list[[i]], ylab=vars[i])
}
if(!'xlab'%in%names(par.list[[i]])){
par.list[[i]] <- c(par.list[[i]], xlab=names(frame)[groupvar.location])
}
#plot=qplot(frame[,groupvar],frame[,vars[i]],data=frame,...)
do.call(boxplot,c(list(frame[,vars[i]]~frame[,groupvar],data=frame),border='white',par.list[[i]]))
do.call(points,c(list(frame[,vars[i]]~frame[,groupvar],data=frame),pch=20))
}
}


Expand Down Expand Up @@ -110,7 +132,7 @@ nonpartest <- function(formula,data,permtest=TRUE,permreps=10000,plots=TRUE,test
G3 <- (1/a)*G3

if(det(H1)==0 | det(H2)==0 | det(G1)==0 | det(G2)==0 | det(G3)==0){
warning('Rank Matrix is Singular, only ANOVA test returned')
warning('Rank covariance matrix is singular, only ANOVA test returned')
tests=c(1,0,0,0)
}

Expand Down Expand Up @@ -144,6 +166,14 @@ nonpartest <- function(formula,data,permtest=TRUE,permreps=10000,plots=TRUE,test
Fanova <- base$Fanova
FWL <- base$FWL
pvalWL <- base$pvalWL
df1anova <- base$df1anova
df2anova <- base$df2anova
df1LH <- base$df1LH
df2LH <- base$df2LH
df1BNP <- base$df1BNP
df2BNP <- base$df2BNP
df1WL <- base$df1WL
df2WL <- base$df2WL

if(permtest){
perms <- matrix(NA,permreps,4)
Expand All @@ -162,20 +192,22 @@ nonpartest <- function(formula,data,permtest=TRUE,permreps=10000,plots=TRUE,test
pvalBNPperm <- sum(as.numeric(perms[,3]>FBNP))/permreps
pvalWLperm <- sum(as.numeric(perms[,4]>FWL))/permreps

results=matrix(c(Fanova,FLH,FBNP,FWL,pvalanova,pvalLH,pvalBNP,pvalWL,pvalanovperm,pvalLHperm,pvalBNPperm,pvalWLperm),ncol=3)
results=matrix(c(Fanova,FLH,FBNP,FWL,df1anova,df1LH,df1BNP,df1WL,df2anova,df2LH,df2BNP,df2WL,pvalanova,pvalLH,pvalBNP,pvalWL,pvalanovperm,pvalLHperm,pvalBNPperm,pvalWLperm),ncol=5)
results=data.frame(results,row.names=c('ANOVA type test p-value','McKeon approx. for the Lawley Hotelling Test','Muller approx. for the Bartlett-Nanda-Pillai Test',
'Wilks Lambda'))
colnames(results)=c('Test Statistic','P-value','Permutation Test p-value')
results[,3] <- round(as.numeric(results[,3]),digits=3)
colnames(results)=c('Test Statistic','df1','df2','P-value','Permutation Test p-value')
results[,5] <- round(as.numeric(results[,5]),digits=3)
}else{
results=matrix(c(Fanova,FLH,FBNP,FWL,pvalanova,pvalLH,pvalBNP,pvalWL),ncol=2)
results=matrix(c(Fanova,FLH,FBNP,FWL,df1anova,df1LH,df1BNP,df1WL,df2anova,df2LH,df2BNP,df2WL,pvalanova,pvalLH,pvalBNP,pvalWL),ncol=4)
results=data.frame(results,row.names=c('ANOVA type test p-value','McKeon approx. for the Lawley Hotelling Test','Muller approx. for the Bartlett-Nanda-Pillai Test',
'Wilks Lambda'))
colnames(results)=c('Test Statistic','P-value')
colnames(results)=c('Test Statistic','df1','df2','P-value')
}

results[,1] <- round(as.numeric(results[,1]),digits=3)
results[,2] <- round(as.numeric(results[,2]),digits=3)
results[,3] <- round(as.numeric(results[,3]),digits=3)
results[,4] <- round(as.numeric(results[,4]),digits=3)

if(releffects){
if(a == 2){
Expand Down
76 changes: 61 additions & 15 deletions R/ssnonpartest.R
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,8 @@ ssnonpartest <- function(formula,data,alpha=.05,test=c(0,0,0,1),factors.and.vari
#Defines logical variable that tells when to exit
exit=FALSE

#Checks to see if R Matrix is singular, if so returns warning
#Checks to see if R Matrix is singular, if so returns warning and chances to ANOVA type statistic
if(test[1]!=1){
# Compute sample sizes per group
N <- length(frame[,1])
ssize <- array(NA,a)
Expand Down Expand Up @@ -102,20 +103,31 @@ ssnonpartest <- function(formula,data,alpha=.05,test=c(0,0,0,1),factors.and.vari
}

if(det(H1)==0 | det(H2)==0 | det(G1)==0 | det(G2)==0 | det(G3)==0 && test[1]!=1){
return('Rank Matrix is Singular, only ANOVA test can be calculated')
tests=c(1,0,0,0)
}
cat('Rank Matrix is Singular, only ANOVA test can be calculated \n')
test=c(1,0,0,0)
}else{

H1 <- (1/(a-1))*H1
H2 <- (1/(a-1))*H2
G1 <- (1/(N-a))*G1
G2 <- (1/(a-1))*G2
G3 <- (1/a)*G3

if(det(H1)==0 | det(H2)==0 | det(G1)==0 | det(G2)==0 | det(G3)==0 && test[1]!=1){
return('Rank Matrix is Singular, only ANOVA test can be calculated')
tests=c(1,0,0,0)
test=c(1,0,0,0)
cat('Rank Matrix is Singular, only ANOVA test can be calculated \n')
}
}

}

##################################
#Output of which statistic is used
##################################
if(test[1]==1){cat('\nThe ANOVA type statistic will be used in the following test \n')}
if(test[2]==1){cat('\nThe Lawley Hotelling type (McKeon\'s F approximation) statistic will be used in the following test \n')}
if(test[3]==1){cat('\nThe Bartlett-Nanda-Pillai type (Muller\'s F approximation) statistic will be used in the following test \n')}
if(test[4]==1){cat('\nThe Wilks\' Lambda type statistic will be used in the following test \n')}

#######################
#Global Hypothesis Test
#######################
Expand All @@ -137,7 +149,7 @@ ssnonpartest <- function(formula,data,alpha=.05,test=c(0,0,0,1),factors.and.vari
if(p>a || factors.and.variables==TRUE){ #Only runs if user wants to check subsets of factor levels

#Since the Global hypothesis is significant outputs first subset, which is the subset of all factor levels
cat('\n~Performing the Subset Algorithm based on Factor levels~\n The Hypothesis of equality between factor levels ', levels, 'is rejected \n')
cat('\n~Performing the Subset Algorithm based on Factor levels~\nThe Hypothesis of equality between factor levels ', levels, 'is rejected \n')

#Exit if only 2 factor levels are being tested
if (length(levels)<= 2 && factors.and.variables==FALSE){return(cat('All appropriate subsets using factor levels have been checked using a closed multiple testing procedure, which controls the maximum overall type I error rate at alpha=',alpha))}
Expand Down Expand Up @@ -275,21 +287,36 @@ for(l in 1:(a-4)) { #We only run from 1:(a-4) because we are checking subsets o
}
}
}

#Checks to see if there are new subsets to check
if (length(newsubsets)==1 && is.na(newsubsets) && factors.and.variables==FALSE){return(cat('All appropriate subsets using factor levels have been checked using a multiple testing procedure, which controls the maximum overall type I error rate at alpha=',alpha))}
if (length(newsubsets)==1 && is.na(newsubsets) && factors.and.variables==TRUE){cat('All appropriate subsets using factor levels have been checked using a multiple testing procedure, which controls the maximum overall type I error rate at alpha=',alpha,'\n')
exit=TRUE}

if(exit==FALSE){
#Removes duplicates
newsubsets=unique(newsubsets)


#Removes subsets of size less than current size (1:a-2)
for(i in 1:length(newsubsets))
{
if(length(newsubsets[[i]])<(number.elements-1)){newsubsets[[i]]=NA}
}

# Removes NA
newsubsets=newsubsets[!is.na(newsubsets)]

#Checks to see if there are new subsets to check
if (length(newsubsets)==0 && factors.and.variables==FALSE){return(cat('All appropriate subsets using factor levels have been checked using a multiple testing procedure, which controls the maximum overall type I error rate at alpha=',alpha))}
if (length(newsubsets)==0 && factors.and.variables==TRUE){cat('All appropriate subsets using factor levels have been checked using a multiple testing procedure, which controls the maximum overall type I error rate at alpha=',alpha,'\n')
exit=TRUE}

if(exit==FALSE){
#Creates a list of significant subsets and non-significant subsets
newsubsetcount=length(newsubsets)
sigfactorsubsets=vector('list',newsubsetcount)
nonsigfactorsubsets=vector('list',newsubsetcount)
nonsigfactorsubsets.new=vector('list',newsubsetcount)

for(i in 1:newsubsetcount)
{
Expand All @@ -303,19 +330,24 @@ for(l in 1:(a-4)) { #We only run from 1:(a-4) because we are checking subsets o
if (test[3]==1){testpval=base$pvalBNP}
if (test[4]==1){testpval=base$pvalWL}

if( testpval >= alpha) {nonsigfactorsubsets[[i]]=levels(subsetframe[,groupvarsub])}else {nonsigfactorsubsets[[i]]=NA}
if( testpval >= alpha) {nonsigfactorsubsets.new[[i]]=levels(subsetframe[,groupvarsub])}else {nonsigfactorsubsets.new[[i]]=NA}
if( testpval < alpha) {sigfactorsubsets[[i]]=levels(subsetframe[,groupvarsub])}else {sigfactorsubsets[[i]]=NA}
if( testpval < alpha) {cat('The Hypothesis of equality between factor levels ', newsubsets[[i]], 'is rejected \n')}
}

nonsigfactorsubsets=nonsigfactorsubsets[!is.na(nonsigfactorsubsets)]
nonsigfactorsubsets.new=nonsigfactorsubsets.new[!is.na(nonsigfactorsubsets.new)]
sigfactorsubsets=sigfactorsubsets[!is.na(sigfactorsubsets)]

#Combine previous non-significant subsets with new
nonsigfactorsubsets=c(nonsigfactorsubsets,nonsigfactorsubsets.new)

#Checks to see if there are step2subs and if there is only one significant subset in either case exit is set to TRUE
if (length(sigfactorsubsets)<= 1 && factors.and.variables==FALSE){return(cat('All appropriate subsets using factor levels have been checked using a closed multiple testing procedure, which controls the maximum overall type I error rate at alpha=',alpha))}
if (length(sigfactorsubsets)<= 1 && factors.and.variables==TRUE){
cat('All appropriate subsets using factor levels have been checked using a closed multiple testing procedure, which controls the maximum overall type I error rate at alpha=',alpha,'\n')
exit=TRUE}
}
}
}

#Checks to see if the significant subsets are of length 2, if length 2 or less if length 2 or less function is done checking factor levels
Expand Down Expand Up @@ -415,6 +447,7 @@ if (length(vars)<= 1){return(cat('All appropriate subsets using response variabl

number.elements=p-2
if (number.elements== 1){return(cat('All appropriate subsets using response variables have been checked using a multiple testing procedure, which controls the maximum overall type I error rate at alpha=',alpha))}

for(l in 1:(p-3)) {

newsubsetcount=length(sig.variable.subsets)
Expand Down Expand Up @@ -445,18 +478,21 @@ if (length(vars)<= 1){return(cat('All appropriate subsets using response variabl
#Removes duplicates
newsubsets=unique(newsubsets)

#Checks to see if there are new subsets to check
if (length(newsubsets)==1 && is.na(newsubsets)){return(cat('All appropriate subsets using response variables have been checked using a multiple testing procedure, which controls the maximum overall type I error rate at alpha=',alpha))}

#Removes subsets of size less than current size (1:p-2)
for(i in 1:length(newsubsets))
{
if(length(newsubsets[[i]])<(number.elements-1)){newsubsets[[i]]=NA}
}
# Removes NA
newsubsets=newsubsets[!is.na(newsubsets)]

#Creates a list of significant subsets and non-significant subsets
newsubsetcount=length(newsubsets)
sig.variable.subsets=vector('list',newsubsetcount)
nonsig.variable.subsets=vector('list',newsubsetcount)
nonsig.variable.subsets.new=vector('list',newsubsetcount)
multiplier=newsubsetcount #The multiplier is the number of test peformed
for(i in 1:newsubsetcount)
{
Expand All @@ -467,12 +503,22 @@ if (length(vars)<= 1){return(cat('All appropriate subsets using response variabl
if (test[3]==1){testpval=base$pvalBNP}
if (test[4]==1){testpval=base$pvalWL}

if( testpval*multiplier >= alpha) {nonsig.variable.subsets[[i]]=newsubsets[[i]]}else {nonsig.variable.subsets[[i]]=NA}
if( testpval*multiplier >= alpha) {nonsig.variable.subsets.new[[i]]=newsubsets[[i]]}else {nonsig.variable.subsets.new[[i]]=NA}
if( testpval*multiplier < alpha) {sig.variable.subsets[[i]]=newsubsets[[i]]}else {sig.variable.subsets[[i]]=NA}
if( testpval*multiplier < alpha) {cat('The Hypothesis of equality using response variables ', newsubsets[[i]], 'is rejected \n')}
}

nonsig.variable.subsets=nonsig.variable.subsets[!is.na(nonsig.variable.subsets)]
nonsig.variable.subsets.new=nonsig.variable.subsets.new[!is.na(nonsig.variable.subsets.new)]

#Combine previous non-significant subsets with new
nonsig.variable.subsets=c(nonsig.variable.subsets,nonsig.variable.subsets.new)

sig.variable.subsets=sig.variable.subsets[!is.na(sig.variable.subsets)]

#Removes Duplicates
sig.variable.subsets=unique(sig.variable.subsets)

# Removes NA
sig.variable.subsets=sig.variable.subsets[!is.na(sig.variable.subsets)]

number.elements=number.elements-1
Expand Down

0 comments on commit b2fd022

Please sign in to comment.