Skip to content

Commit

Permalink
version 1.0
Browse files Browse the repository at this point in the history
  • Loading branch information
johanngb authored and cran-robot committed Feb 27, 2018
1 parent e9b82a3 commit 52463a8
Show file tree
Hide file tree
Showing 16 changed files with 214 additions and 74 deletions.
12 changes: 6 additions & 6 deletions DESCRIPTION 100755 → 100644
@@ -1,15 +1,15 @@
Package: cpt
Title: Classification Permutation Test
Description: Non-parametric test for equality of multivariate distributions. Trains a classifier to classify (multivariate) observations as coming from one of two distributions. If the classifier is able to classify the observations better than would be expected by chance (using permutation inference), then the null hypothesis that the two distributions are equal is rejected.
Version: 0.9
Date: 2017-03-1
Imports: MASS, nnet, randomForest
Description: Non-parametric test for equality of multivariate distributions. Trains a classifier to classify (multivariate) observations as coming from one of several distributions. If the classifier is able to classify the observations better than would be expected by chance (using permutation inference), then the null hypothesis that the distributions are equal is rejected.
Version: 1.0
Date: 2018-2-26
Imports: MASS, nnet, randomForest, glmnet
Author: Johann Gagnon-Bartsch <johanngb@umich.edu>
Maintainer: Johann Gagnon-Bartsch <johanngb@umich.edu>
License: GPL
URL: http://dept.stat.lsa.umich.edu/~johanngb
LazyLoad: yes
NeedsCompilation: no
Packaged: 2017-03-02 13:16:22 UTC; johann
Packaged: 2018-02-27 18:59:14 UTC; johann
Repository: CRAN
Date/Publication: 2017-03-02 16:14:16
Date/Publication: 2018-02-27 19:20:03 UTC
24 changes: 13 additions & 11 deletions MD5
@@ -1,13 +1,15 @@
bf08ea2f965acb7bf0f5a2a8ebd8d29c *DESCRIPTION
f285cee7fbebbcbce60f6e076dd6164b *NAMESPACE
6eb9fdf8fc1bb8996065491e577464b8 *R/applyclassifiers.R
06d087e5273286c8a542fa3cc8f6f5df *R/cpt.R
f16b09cf2a898e26ae376ee2ae077f6c *R/getmetric.R
a118e9ddd7cac7894237e5fe02df8437 *R/gettestmethod.R
d29d8d81f29e25dca836e4f116ad6aac *DESCRIPTION
de0e7172cef2b50e23d2b4a58f96bd23 *NAMESPACE
ee246132368348b606def55cff7be0c5 *R/applyclassifiers.R
3a23a9a8d40ba0ee31b4b306c5126481 *R/cpt.R
e8063fc42ba577005bdd919ead877071 *R/getcombmethod.R
7235781e1c25370cb4c23208fb885eab *R/getensemble.metric.R
b9d644400fc9a4ef70ffb1b9f9b21832 *R/getmetric.R
7437c39174c27c502cd17294475545a7 *R/gettestmethod.R
70f2ae6f2a683d5776ab392189bee83a *R/gettestmethods.R
bdb13c32469d01d76c4de6f03fb660bc *R/getteststat.R
58fa478f85cba4a77a477636fa43b6a0 *R/gettrainmethod.R
32b30252abef51dc55ab31c6cfc68f3f *R/getteststat.R
9cbe378084ceb7abaa5f3d9f56c6d26e *R/gettrainmethod.R
ef2d130d68e57477a20eb762e0c00b53 *R/gettrainmethods.R
78fa1692627fa4fb614645a62e57e748 *R/train.R
44d74470c02d53b64e0fd0ed62461ec8 *man/cpt-package.Rd
1ad9ff61783b51b5032ced2c54fa8543 *man/cpt.Rd
35014fd243d937533e3d2940766615fd *R/train.R
c1cf85e07ad2f2ad289c2a741031861d *man/cpt-package.Rd
e72967beb125f37eb1b0a6640516f045 *man/cpt.Rd
4 changes: 2 additions & 2 deletions NAMESPACE 100755 → 100644
@@ -1,6 +1,6 @@
import(MASS, nnet, randomForest)
import(MASS, nnet, randomForest, glmnet)

importFrom("stats", "coefficients", "model.matrix", "predict", "rbinom")
importFrom("stats", "coefficients", "model.matrix", "predict", "rbinom", "sd")

export(
cpt
Expand Down
30 changes: 25 additions & 5 deletions R/applyclassifiers.R 100755 → 100644
@@ -1,10 +1,30 @@
applyclassifiers <-
function (tstZ, tstT, classifiers, test.methods, metric)
function (tstZ, tstT, classifiers, test.methods, metric, ensemble.metric,
testistrain = FALSE)
{
metrics = rep(0, length(classifiers))
for (i in 1:length(classifiers)) {
metrics[i] = metric(test.methods[[i]](tstZ, classifiers[[i]]),
rval = rep(NA, length(classifiers) + 1)
if (nrow(tstZ) == 1) {
class.output = rep(NA, length(classifiers) * length(levels(tstT)))
dim(class.output) = c(length(classifiers), length(levels(tstT)))
for (i in 1:length(classifiers)) class.output[i, ] = test.methods[[i]](tstZ,
classifiers[[i]], testistrain = testistrain)
for (i in 1:length(classifiers)) rval[i] = metric(class.output[i,
, drop = FALSE], tstT)
dim(class.output) = c(1, dim(class.output))
rval[length(classifiers) + 1] = ensemble.metric(class.output,
tstT)
}
return(metrics)
else {
class.output = rep(NA, nrow(tstZ) * length(classifiers) *
length(levels(tstT)))
dim(class.output) = c(nrow(tstZ), length(classifiers),
length(levels(tstT)))
for (i in 1:length(classifiers)) class.output[, i, ] = test.methods[[i]](tstZ,
classifiers[[i]], testistrain = testistrain)
for (i in 1:length(classifiers)) rval[i] = metric(class.output[,
i, ], tstT)
rval[length(classifiers) + 1] = ensemble.metric(class.output,
tstT)
}
return(rval)
}
52 changes: 39 additions & 13 deletions R/cpt.R 100755 → 100644
@@ -1,34 +1,60 @@
cpt <-
function (Z, T, leaveout = 0, class.methods = "forest", metric = "rate",
paired = FALSE, perm.N = 1000, leaveout.N = 100)
function (Z, T, leaveout = 0, class.methods = "forest", metric = "probability",
ensemble.metric = "mean.prob", paired = FALSE, perm.N = 1000,
leaveout.N = 100, comb.methods = c(class.methods, "ensemble"),
comb.method = "fisher")
{
T = as.factor(T)
train.methods = gettrainmethods(class.methods)
test.methods = gettestmethods(class.methods)
if (is.character(metric))
metric = getmetric(metric)
if (is.character(ensemble.metric))
ensemble.metric = getensemble.metric(ensemble.metric)
if (is.character(comb.method))
comb.method = getcombmethod(comb.method)
if ((leaveout > 0) && (leaveout < 1))
leaveout = ceiling(min(table(T)) * leaveout)
nulldist = rep(0, perm.N)
teststat = getteststat(Z, T, leaveout, train.methods, test.methods,
metric, leaveout.N)
teststat = rep(NA, length(class.methods))
nulldist = matrix(NA, perm.N, length(class.methods) + 1)
colnames(nulldist) = c(class.methods, "ensemble")
for (method.i in 1:length(class.methods)) {
teststat = getteststat(Z, T, leaveout, train.methods,
test.methods, metric, ensemble.metric, leaveout.N)
}
if (paired) {
T = as.numeric(T) - 1
for (i in 1:perm.N) {
newT = as.numeric(T)
newT = T
newT[T == 0] = rbinom(length(T)/2, 1, 0.5)
newT[T == 1] = 1 - newT[T == 0]
newT = as.factor(newT)
nulldist[i] = getteststat(Z, newT, leaveout, train.methods,
test.methods, metric, leaveout.N)
nulldist[i, ] = getteststat(Z, newT, leaveout, train.methods,
test.methods, metric, ensemble.metric, leaveout.N)
}
}
else {
for (i in 1:perm.N) {
Z = Z[sample(nrow(Z)), , drop = FALSE]
nulldist[i] = getteststat(Z, T, leaveout, train.methods,
test.methods, metric, leaveout.N)
T = T[sample(length(T))]
nulldist[i, ] = getteststat(Z, T, leaveout, train.methods,
test.methods, metric, ensemble.metric, leaveout.N)
}
}
pval = sum(nulldist >= teststat)/perm.N
return(list(pval = pval, teststat = teststat, nulldist = nulldist))
pvals = rep(NA, ncol(nulldist))
names(pvals) = colnames(nulldist)
nullpvaldist = matrix(NA, perm.N, ncol(nulldist))
colnames(nullpvaldist) = names(pvals)
for (method.i in 1:ncol(nulldist)) {
pvals[method.i] = sum(nulldist[, method.i] >= teststat[method.i])/perm.N
nullpvaldist[, method.i] = rank(nulldist[, method.i],
ties.method = "max")/perm.N
}
nullcombpvaldist = apply(nullpvaldist[, comb.methods, drop = FALSE],
1, comb.method)
pval = sum(nullcombpvaldist <= comb.method(pvals[comb.methods]))/perm.N
if (length(class.methods) == 1)
return(list(pval = pvals[1], teststat = teststat[1],
nulldist = nulldist[, 1], pvals = pvals[1]))
return(list(pval = pval, teststat = teststat, nulldist = nulldist,
pvals = pvals))
}
13 changes: 13 additions & 0 deletions R/getcombmethod.R
@@ -0,0 +1,13 @@
getcombmethod <-
function (comb.method)
{
if (comb.method == "fisher") {
rval = function(x) {
return(mean(log(x)))
}
}
if (comb.method == "min") {
rval = min
}
return(rval)
}
32 changes: 32 additions & 0 deletions R/getensemble.metric.R
@@ -0,0 +1,32 @@
getensemble.metric <-
function (ensemble.metric)
{
if (ensemble.metric == "vote") {
rval = function(class.output, tstT) {
temp = (class.output - as.vector(apply(class.output,
c(1, 2), max))) == 0
temp = temp/as.vector(apply(temp, c(1, 2), sum))
votemat = apply(temp, c(1, 3), sum)
indexmat = cbind(1:nrow(votemat), tstT)
temp = votemat - apply(votemat, 1, max) == 0
temp = temp/apply(temp, 1, sum)
return(mean(votemat[indexmat]))
}
}
if (ensemble.metric == "mean.prob") {
rval = function(class.output, tstT) {
meanprob = apply(class.output, c(1, 3), mean)
indexmat = cbind(1:nrow(meanprob), tstT)
return(mean(meanprob[indexmat]))
}
}
if (ensemble.metric == "mean.mse") {
rval = function(class.output, tstT) {
meanprob = apply(class.output, c(1, 3), mean)
indexmat = cbind(1:nrow(meanprob), tstT)
meanprob[indexmat] = 1 - meanprob[indexmat]
return(-mean(meanprob^2))
}
}
return(rval)
}
2 changes: 1 addition & 1 deletion R/getmetric.R 100755 → 100644
Expand Up @@ -12,7 +12,7 @@ function (metric)
indexmat = cbind(1:nrow(prob), tstT)
temp = prob - apply(prob, 1, max) == 0
temp = temp/apply(temp, 1, sum)
return(mean(prob[indexmat]))
return(mean(temp[indexmat]))
}
}
else if (metric == "mse") {
Expand Down
25 changes: 20 additions & 5 deletions R/gettestmethod.R 100755 → 100644
Expand Up @@ -2,26 +2,41 @@ gettestmethod <-
function (method)
{
if (method == "logistic") {
rval = function(Z, classifier) {
rval = function(Z, classifier, testistrain = FALSE) {
tmp = exp(cbind(0, cbind(1, Z) %*% classifier))
return(tmp/apply(tmp, 1, sum))
}
}
else if (method == "logistic2") {
rval = function(Z, classifier) {
rval = function(Z, classifier, testistrain = FALSE) {
tmp = exp(cbind(0, model.matrix(~.^2, data = data.frame(Z)) %*%
classifier))
return(tmp/apply(tmp, 1, sum))
}
}
else if (method == "lda") {
rval = function(Z, classifier) {
rval = function(Z, classifier, testistrain = FALSE) {
return(predict(classifier, Z)$posterior)
}
}
else if (method == "forest") {
rval = function(Z, classifier) {
return(predict(classifier, Z, type = "prob"))
rval = function(Z, classifier, testistrain = FALSE) {
if (testistrain)
return(predict(classifier, type = "prob"))
else return(predict(classifier, Z, type = "prob"))
}
}
else if (method == "glmnet") {
rval = function(Z, classifier, testistrain = FALSE) {
return(predict(classifier, newx = Z, type = "response"))
}
}
else if (method == "glmnet2") {
rval = function(Z, classifier, testistrain = FALSE) {
Z = scale(Z, center = classifier[[1]], scale = classifier[[2]])
Z = model.matrix(~.^2, data = data.frame(Z))[, -1,
drop = FALSE]
return(predict(classifier[[3]], newx = Z, type = "response"))
}
}
}
Empty file modified R/gettestmethods.R 100755 → 100644
Empty file.
25 changes: 13 additions & 12 deletions R/getteststat.R 100755 → 100644
@@ -1,28 +1,29 @@
getteststat <-
function (Z, T, leaveout, train.methods, test.methods, metric,
leaveout.N)
ensemble.metric, leaveout.N)
{
if (leaveout == 0) {
classifiers = train(Z, T, train.methods)
metrics = applyclassifiers(Z, T, classifiers, test.methods,
metric)
return(max(metrics))
return(applyclassifiers(Z, T, classifiers, test.methods,
metric, ensemble.metric, testistrain = TRUE))
}
else if ((leaveout == 1) & (leaveout.N == nrow(Z))) {
metricsmat = matrix(0, leaveout.N, length(train.methods))
metric.mat = matrix(NA, leaveout.N, length(train.methods) +
1)
for (leaveout.i in 1:leaveout.N) {
trnZ = Z[-leaveout.i, , drop = FALSE]
trnT = T[-leaveout.i]
tstZ = Z[leaveout.i, , drop = FALSE]
tstT = T[leaveout.i]
classifiers = train(trnZ, trnT, train.methods)
metricsmat[leaveout.i, ] = applyclassifiers(tstZ,
tstT, classifiers, test.methods, metric)
metric.mat[leaveout.i, ] = applyclassifiers(tstZ,
tstT, classifiers, test.methods, metric, ensemble.metric)
}
return(max(apply(metricsmat, 2, mean)))
return(apply(metric.mat, 2, mean))
}
else {
metricsmat = matrix(0, leaveout.N, length(train.methods))
metric.mat = matrix(NA, leaveout.N, length(train.methods) +
1)
for (leaveout.i in 1:leaveout.N) {
testset = rep(FALSE, length(T))
for (i in 1:length(levels(T))) testset[sample(which(levels(T)[i] ==
Expand All @@ -32,9 +33,9 @@ function (Z, T, leaveout, train.methods, test.methods, metric,
tstZ = Z[testset, , drop = FALSE]
tstT = T[testset]
classifiers = train(trnZ, trnT, train.methods)
metricsmat[leaveout.i, ] = applyclassifiers(tstZ,
tstT, classifiers, test.methods, metric)
metric.mat[leaveout.i, ] = applyclassifiers(tstZ,
tstT, classifiers, test.methods, metric, ensemble.metric)
}
return(max(apply(metricsmat, 2, mean)))
return(apply(metric.mat, 2, mean))
}
}
15 changes: 15 additions & 0 deletions R/gettrainmethod.R 100755 → 100644
Expand Up @@ -28,5 +28,20 @@ function (method)
return(randomForest(Z, T))
}
}
else if (method == "glmnet") {
rval = function(Z, T) {
return(cv.glmnet(Z, T, family = "multinomial"))
}
}
else if (method == "glmnet2") {
rval = function(Z, T) {
means = apply(Z, 2, mean)
sds = apply(Z, 2, sd)
Z = scale(Z)
Z = as.matrix(model.matrix(~.^2, data = data.frame(Z))[,
-1])
return(list(means, sds, cv.glmnet(Z, T, family = "multinomial")))
}
}
return(rval)
}
Empty file modified R/gettrainmethods.R 100755 → 100644
Empty file.
9 changes: 4 additions & 5 deletions R/train.R 100755 → 100644
@@ -1,9 +1,8 @@
train <-
function (trnZ, trnT, train.methods)
{
classifiers = list()
for (i in 1:length(train.methods)) {
classifiers[[i]] = train.methods[[i]](trnZ, trnT)
}
return(classifiers)
rval = list()
for (i in 1:length(train.methods)) rval[[i]] = train.methods[[i]](trnZ,
trnT)
return(rval)
}
6 changes: 3 additions & 3 deletions man/cpt-package.Rd 100755 → 100644
Expand Up @@ -7,15 +7,15 @@ Classification Permutation Test
}

\description{
Non-parametric test for equality of multivariate distributions. Trains a classifier to classify (multivariate) observations as coming from one of two distributions. If the classifier is able to classify the observations better than would be expected by chance (using permutation inference), then the null hypothesis that the two distributions are equal is rejected.
Description: Non-parametric test for equality of multivariate distributions. Trains a classifier to classify (multivariate) observations as coming from one of several distributions. If the classifier is able to classify the observations better than would be expected by chance (using permutation inference), then the null hypothesis that the distributions are equal is rejected.
}

\details{
\tabular{ll}{
Package: \tab cpt\cr
Type: \tab Package\cr
Version: \tab 0.9\cr
Date: \tab 2017-3-1\cr
Version: \tab 1.0\cr
Date: \tab 2018-2-26\cr
License: \tab GPL\cr
LazyLoad: \tab yes\cr
}
Expand Down

0 comments on commit 52463a8

Please sign in to comment.