Skip to content

Commit

Permalink
Merge pull request #3 from topepo/master
Browse files Browse the repository at this point in the history
Catchup to upstream
  • Loading branch information
jknowles committed Apr 3, 2015
2 parents 7ff7192 + d3b6bfd commit 072a438
Show file tree
Hide file tree
Showing 25 changed files with 2,217 additions and 87 deletions.
21 changes: 18 additions & 3 deletions RegressionTests/Code/gam.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,14 @@ test_class_cv_model <- train(trainX, trainY,
metric = "ROC",
preProc = c("center", "scale"))

set.seed(849)
test_class_cv_dist <- train(trainX, trainY,
method = "gam",
trControl = cctrl1,
metric = "ROC",
preProc = c("center", "scale"),
family = negbin(theta = 1))

set.seed(849)
test_class_cv_form <- train(Class ~ ., data = training,
method = "gam",
Expand Down Expand Up @@ -94,11 +102,18 @@ test_reg_cv_model <- train(trainX, trainY,
preProc = c("center", "scale"))
test_reg_pred <- predict(test_reg_cv_model, testX)

set.seed(849)
test_reg_cv_dist <- train(trainX, trainY,
method = "gam",
trControl = rctrl1,
preProc = c("center", "scale"),
family = scat())

set.seed(849)
test_reg_cv_form <- train(y ~ ., data = training,
method = "gam",
trControl = rctrl1,
preProc = c("center", "scale"))
method = "gam",
trControl = rctrl1,
preProc = c("center", "scale"))
test_reg_pred_form <- predict(test_reg_cv_form, testX)

set.seed(849)
Expand Down
15 changes: 15 additions & 0 deletions RegressionTests/Code/gamSpline.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,14 @@ test_class_cv_model <- train(trainX, trainY,
metric = "ROC",
preProc = c("center", "scale"))

set.seed(849)
test_class_cv_dist <- train(trainX, trainY,
method = "gamSpline",
trControl = cctrl1,
metric = "ROC",
preProc = c("center", "scale"),
family = binomial(link = "cloglog"))

set.seed(849)
test_class_cv_form <- train(Class ~ ., data = training,
method = "gamSpline",
Expand Down Expand Up @@ -95,6 +103,13 @@ test_reg_cv_model <- train(trainX, trainY,
preProc = c("center", "scale"))
test_reg_pred <- predict(test_reg_cv_model, testX)

set.seed(849)
test_reg_cv_dist <- train(trainX, abs(trainY),
method = "gamSpline",
trControl = rctrl1,
preProc = c("center", "scale"),
family = Gamma)

set.seed(849)
test_reg_cv_form <- train(y ~ ., data = training,
method = "gamSpline",
Expand Down
20 changes: 20 additions & 0 deletions RegressionTests/Code/gbm.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,17 @@ test_class_cv_model <- train(trainX, trainY,
tuneGrid = gbmGrid,
verbose = FALSE)

set.seed(849)
test_class_cv_dist <- train(trainX, trainY,
method = "gbm",
trControl = cctrl1,
metric = "ROC",
preProc = c("center", "scale"),
tuneGrid = gbmGrid,
verbose = FALSE
distribution = "adaboost")


set.seed(849)
test_class_cv_form <- train(Class ~ ., data = training,
method = "gbm",
Expand Down Expand Up @@ -108,6 +119,15 @@ test_reg_cv_model <- train(trainX, trainY,
verbose = FALSE)
test_reg_pred <- predict(test_reg_cv_model, testX)

set.seed(849)
test_reg_cv_dist <- train(trainX, trainY,
method = "gbm",
trControl = rctrl1,
preProc = c("center", "scale"),
tuneGrid = gbmGrid,
verbose = FALSE,
distribution = "laplace")

set.seed(849)
test_reg_cv_form <- train(y ~ ., data = training,
method = "gbm",
Expand Down
24 changes: 10 additions & 14 deletions models/files/gam.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,26 +17,22 @@ modelInfo <- list(label = "Generalized Additive Model using Splines",
dat$.outcome <- y
dist <- gaussian()
}
out <- mgcv:::gam(modForm, data = dat, family = dist,
modelArgs <- list(formula = modForm,
data = dat,
select = param$select,
method = as.character(param$method),
...)
# if(is.null(wts)) {
#
# } else {
# out <- mgcv:::gam(modForm, data = dat, family = dist,
# select = param$select,
# method = as.character(param$method),
# weights = wts,
# ...)
# }
method = as.character(param$method))
## Intercept family if passed in
theDots <- list(...)
if(!any(names(theDots) == "family")) modelArgs$family <- dist
modelArgs <- c(modelArgs, theDots)

out <- do.call(getFromNamespace("gam", "mgcv"), modelArgs)
out

},
predict = function(modelFit, newdata, submodels = NULL) {
if(!is.data.frame(newdata)) newdata <- as.data.frame(newdata)
if(modelFit$problemType == "Classification")
{
if(modelFit$problemType == "Classification") {
probs <- predict(modelFit, newdata, type = "response")
out <- ifelse(probs < .5,
modelFit$obsLevel[1],
Expand Down
23 changes: 14 additions & 9 deletions models/files/gamLoess.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,21 @@ modelInfo <- list(label = "Generalized Additive Model using LOESS",
grid = function(x, y, len = NULL)
expand.grid(span = .5, degree = 1),
fit = function(x, y, wts, param, lev, last, classProbs, ...) {
dat <- if(is.data.frame(x)) x else as.data.frame(x)
dat$.outcome <- y
args <- list(data = if(is.data.frame(x)) x else as.data.frame(x))
args$data$.outcome <- y
args$formula <- caret:::smootherFormula(x,
smoother = "lo",
span = param$span,
degree = param$degree)
theDots <- list(...)

gam:::gam(caret:::smootherFormula(x,
smoother = "lo",
span = param$span,
degree = param$degree),
data = dat,
family = if(is.factor(y)) binomial() else gaussian(),
...)

if(!any(names(theDots) == "family"))
args$family <- if(is.factor(y)) binomial else gaussian

if(length(theDots) > 0) args <- c(args, theDots)

do.call(getFromNamespace("gam", "gam"), args)
},
predict = function(modelFit, newdata, submodels = NULL) {
if(!is.data.frame(newdata)) newdata <- as.data.frame(newdata)
Expand Down
7 changes: 5 additions & 2 deletions models/files/gamSpline.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,12 @@ modelInfo <- list(label = "Generalized Additive Model using Splines",
args$formula <- caret:::smootherFormula(x,
smoother = "s",
df = param$df)
args$family <- if(is.factor(y)) binomial else gaussian

theDots <- list(...)


if(!any(names(theDots) == "family"))
args$family <- if(is.factor(y)) binomial else gaussian

if(length(theDots) > 0) args <- c(args, theDots)

do.call(getFromNamespace("gam", "gam"), args)
Expand Down
4 changes: 2 additions & 2 deletions models/files/gbm.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,7 @@ modelInfo <- list(label = "Stochastic Gradient Boosting",
modDist <- theDots$distribution
theDots$distribution <- NULL
} else {
if(is.numeric(y))
{
if(is.numeric(y)) {
modDist <- "gaussian"
} else modDist <- if(length(lev) == 2) "bernoulli" else "multinomial"
}
Expand All @@ -45,6 +44,7 @@ modelInfo <- list(label = "Stochastic Gradient Boosting",
n.trees = param$n.trees,
shrinkage = param$shrinkage,
distribution = modDist)
if(any(names(theDots) == "family")) modArgs$distribution <- NULL

if(length(theDots) > 0) modArgs <- c(modArgs, theDots)

Expand Down
19 changes: 1 addition & 18 deletions models/files/rpartCost.R
Original file line number Diff line number Diff line change
Expand Up @@ -87,23 +87,6 @@ modelInfo <- list(label = "Cost-Sensitive CART",
}
out
},
prob = function(modelFit, newdata, submodels = NULL) {
if(!is.data.frame(newdata)) newdata <- as.data.frame(newdata)
out <- predict(modelFit, newdata, type = "prob")

if(!is.null(submodels))
{
tmp <- vector(mode = "list", length = nrow(submodels) + 1)
tmp[[1]] <- out
for(j in seq(along = submodels$cp))
{
prunedFit <- prune.rpart(modelFit, cp = submodels$cp[j])
tmpProb <- predict(prunedFit, newdata, type = "prob")
tmp[[j+1]] <- as.data.frame(tmpProb[, modelFit$obsLevels, drop = FALSE])
}
out <- tmp
}
out
},
prob = NULL,
tags = c("Tree-Based Model", "Implicit Feature Selection", "Cost Sensitive Learning"),
sort = function(x) x[order(-x$cp, -x$Cost),])
4 changes: 2 additions & 2 deletions pkg/caret/DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: caret
Version: 6.0-43
Date: 2015-01-27
Version: 6.0-44
Date: 2015-04-01
Title: Classification and Regression Training
Author: Max Kuhn. Contributions from Jed Wing, Steve Weston, Andre
Williams, Chris Keefer, Allan Engelhardt, Tony Cooper, Zachary Mayer,
Expand Down
2 changes: 1 addition & 1 deletion pkg/caret/R/aaa.R
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ twoClassSummary <- function (data, lev = NULL, model = NULL)
requireNamespaceQuietStop('pROC')
if (!all(levels(data[, "pred"]) == levels(data[, "obs"])))
stop("levels of observed and predicted data do not match")
rocObject <- try(pROC::roc.default(data$obs, data[, lev[1]]), silent = TRUE)
rocObject <- try(pROC::roc(data$obs, data[, lev[1]]), silent = TRUE)
rocAUC <- if(class(rocObject)[1] == "try-error") NA else rocObject$auc
out <- c(rocAUC,
sensitivity(data[, "pred"], data[, "obs"], lev[1]),
Expand Down
43 changes: 20 additions & 23 deletions pkg/caret/R/extractProb.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@

## TODO use foreach to parallelize

extractProb <- function(models,
testX = NULL,
Expand Down Expand Up @@ -39,17 +39,15 @@ extractProb <- function(models,
for(i in seq(along = models))
{
if(verbose) cat("starting ", models[[i]]$method, "\n"); flush.console()
if(!unkOnly)
{
tempTrainPred <- predictionFunction(models[[i]]$modelInfo,
models[[i]]$finalModel,
trainX,
models[[i]]$preProcess)
if(!unkOnly) {
tempTrainProb <- probFunction(models[[i]]$modelInfo,
models[[i]]$finalModel,
trainX,
models[[i]]$preProcess)

tempTrainPred <- apply(tempTrainProb, 1, which.max)
tempTrainPred <- colnames(tempTrainProb)[tempTrainPred]
tempTrainPred <- factor(tempTrainPred,
levels = models[[i]]$modelInfo$levels(models[[i]]$finalModel))

if(verbose) cat(models[[i]]$method, ":", length(tempTrainPred), "training predictions were added\n"); flush.console()

Expand All @@ -61,21 +59,20 @@ extractProb <- function(models,
dataType <- c(dataType, rep("Training", length(tempTrainPred)))

# Test Data
if(!is.null(testX) & !is.null(testY))
{
if(!is.null(testX) & !is.null(testY)) {
if(!is.data.frame(testX)) testX <- as.data.frame(testX)
tempX <- testX
tempY <- testY
tempX$.outcome <- NULL
tempTestPred <- predictionFunction(models[[i]]$modelInfo,
models[[i]]$finalModel,
tempX,
models[[i]]$preProcess)
tempX$.outcome <- NULL
tempTestProb <- probFunction(models[[i]]$modelInfo,
models[[i]]$finalModel,
tempX,
models[[i]]$preProcess)

models[[i]]$preProcess)
tempTestPred <- apply(tempTestProb, 1, which.max)
tempTestPred <- colnames(tempTestProb)[tempTestPred]
tempTestPred <- factor(tempTestPred,
levels = models[[i]]$modelInfo$levels(models[[i]]$finalModel))

if(verbose) cat(models[[i]]$method, ":", length(tempTestPred), "test predictions were added\n")

predProb <- if(is.null(predProb)) tempTestProb else rbind(predProb, tempTestProb)
Expand All @@ -94,16 +91,16 @@ extractProb <- function(models,
if(!is.data.frame(unkX)) unkX <- as.data.frame(unkX)
tempX <- unkX
tempX$.outcome <- NULL

tempUnkPred <- predictionFunction(models[[i]]$modelInfo,
models[[i]]$finalModel,
tempX,
models[[i]]$preProcess)

tempUnkProb <- probFunction(models[[i]]$modelInfo,
models[[i]]$finalModel,
tempX,
models[[i]]$preProcess)

tempUnkPred <- apply(tempUnkProb, 1, which.max)
tempUnkPred <- colnames(tempUnkProb)[tempUnkPred]
tempUnkPred <- factor(tempUnkPred,
levels = models[[i]]$modelInfo$levels(models[[i]]$finalModel))

if(verbose) cat(models[[i]]$method, ":", length(tempUnkPred), "unknown predictions were added\n")

predProb <- if(is.null(predProb)) tempUnkProb else rbind(predProb, tempUnkProb)
Expand Down
1 change: 1 addition & 0 deletions pkg/caret/R/misc.R
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ evalSummaryFunction <- function(y, wts, ctrl, lev, metric, method) {
stop("train()'s use of ROC codes requires class probabilities. See the classProbs option of trainControl()")
}
if(!is.null(wts)) testOutput$weights <- sample(wts, min(10, length(wts)))
testOutput$rowIndex <- sample(seq(along = y), size = nrow(testOutput))
ctrl$summaryFunction(testOutput, lev, method)
}

Expand Down
2 changes: 1 addition & 1 deletion pkg/caret/R/print.varImp.train.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ function(x, top = min(20, dim(x$importance)[1]), digits = max(3, getOption("digi
printObj <- printObj[,1,drop = FALSE]
names(printObj) <- "Importance"
}
print.data.frame(printObj, digits = digits, ...)
print(printObj, digits = digits, ...)
invisible(x)
}

8 changes: 5 additions & 3 deletions pkg/caret/R/rfe.R
Original file line number Diff line number Diff line change
Expand Up @@ -535,7 +535,7 @@ gamFuncs <- list(summary = defaultSummary,
},
pred = function(object, x)
{
#browser()
if(!is.data.frame(x)) x <- as.data.frame(x)
loaded <- search()
gamLoaded <- any(loaded == "package:gam")
if(gamLoaded) detach(package:gam)
Expand Down Expand Up @@ -625,12 +625,13 @@ rfFuncs <- list(summary = defaultSummary,
lmFuncs <- list(summary = defaultSummary,
fit = function(x, y, first, last, ...)
{
tmp <- as.data.frame(x)
tmp <- if(is.data.frame(x)) x else as.data.frame(x)
tmp$y <- y
lm(y~., data = tmp)
},
pred = function(object, x)
{
if(!is.data.frame(x)) x <- as.data.frame(x)
predict(object, x)
},
rank = function(object, x, y)
Expand Down Expand Up @@ -688,12 +689,13 @@ nbFuncs <- list(summary = defaultSummary,
lrFuncs <- ldaFuncs
lrFuncs$fit <- function (x, y, first, last, ...)
{
tmp <- x
tmp <- if(is.data.frame(x)) x else as.data.frame(x)
tmp$Class <- y
glm(Class ~ ., data = tmp, family = "binomial")
}
lrFuncs$pred <- function (object, x)
{
if(!is.data.frame(x)) x <- as.data.frame(x)
lvl <- levels(object$data$Class)
tmp <- predict(object, x, type = "response")
out <- data.frame(1-tmp, tmp)
Expand Down
Loading

0 comments on commit 072a438

Please sign in to comment.