Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Catchup to upstream #3

Merged
merged 11 commits into from
Apr 3, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 18 additions & 3 deletions RegressionTests/Code/gam.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,14 @@ test_class_cv_model <- train(trainX, trainY,
metric = "ROC",
preProc = c("center", "scale"))

set.seed(849)
test_class_cv_dist <- train(trainX, trainY,
method = "gam",
trControl = cctrl1,
metric = "ROC",
preProc = c("center", "scale"),
family = negbin(theta = 1))

set.seed(849)
test_class_cv_form <- train(Class ~ ., data = training,
method = "gam",
Expand Down Expand Up @@ -94,11 +102,18 @@ test_reg_cv_model <- train(trainX, trainY,
preProc = c("center", "scale"))
test_reg_pred <- predict(test_reg_cv_model, testX)

set.seed(849)
test_reg_cv_dist <- train(trainX, trainY,
method = "gam",
trControl = rctrl1,
preProc = c("center", "scale"),
family = scat())

set.seed(849)
test_reg_cv_form <- train(y ~ ., data = training,
method = "gam",
trControl = rctrl1,
preProc = c("center", "scale"))
method = "gam",
trControl = rctrl1,
preProc = c("center", "scale"))
test_reg_pred_form <- predict(test_reg_cv_form, testX)

set.seed(849)
Expand Down
15 changes: 15 additions & 0 deletions RegressionTests/Code/gamSpline.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,14 @@ test_class_cv_model <- train(trainX, trainY,
metric = "ROC",
preProc = c("center", "scale"))

set.seed(849)
test_class_cv_dist <- train(trainX, trainY,
method = "gamSpline",
trControl = cctrl1,
metric = "ROC",
preProc = c("center", "scale"),
family = binomial(link = "cloglog"))

set.seed(849)
test_class_cv_form <- train(Class ~ ., data = training,
method = "gamSpline",
Expand Down Expand Up @@ -95,6 +103,13 @@ test_reg_cv_model <- train(trainX, trainY,
preProc = c("center", "scale"))
test_reg_pred <- predict(test_reg_cv_model, testX)

set.seed(849)
test_reg_cv_dist <- train(trainX, abs(trainY),
method = "gamSpline",
trControl = rctrl1,
preProc = c("center", "scale"),
family = Gamma)

set.seed(849)
test_reg_cv_form <- train(y ~ ., data = training,
method = "gamSpline",
Expand Down
20 changes: 20 additions & 0 deletions RegressionTests/Code/gbm.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,17 @@ test_class_cv_model <- train(trainX, trainY,
tuneGrid = gbmGrid,
verbose = FALSE)

set.seed(849)
test_class_cv_dist <- train(trainX, trainY,
method = "gbm",
trControl = cctrl1,
metric = "ROC",
preProc = c("center", "scale"),
tuneGrid = gbmGrid,
verbose = FALSE
distribution = "adaboost")


set.seed(849)
test_class_cv_form <- train(Class ~ ., data = training,
method = "gbm",
Expand Down Expand Up @@ -108,6 +119,15 @@ test_reg_cv_model <- train(trainX, trainY,
verbose = FALSE)
test_reg_pred <- predict(test_reg_cv_model, testX)

set.seed(849)
test_reg_cv_dist <- train(trainX, trainY,
method = "gbm",
trControl = rctrl1,
preProc = c("center", "scale"),
tuneGrid = gbmGrid,
verbose = FALSE,
distribution = "laplace")

set.seed(849)
test_reg_cv_form <- train(y ~ ., data = training,
method = "gbm",
Expand Down
24 changes: 10 additions & 14 deletions models/files/gam.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,26 +17,22 @@ modelInfo <- list(label = "Generalized Additive Model using Splines",
dat$.outcome <- y
dist <- gaussian()
}
out <- mgcv:::gam(modForm, data = dat, family = dist,
modelArgs <- list(formula = modForm,
data = dat,
select = param$select,
method = as.character(param$method),
...)
# if(is.null(wts)) {
#
# } else {
# out <- mgcv:::gam(modForm, data = dat, family = dist,
# select = param$select,
# method = as.character(param$method),
# weights = wts,
# ...)
# }
method = as.character(param$method))
## Intercept family if passed in
theDots <- list(...)
if(!any(names(theDots) == "family")) modelArgs$family <- dist
modelArgs <- c(modelArgs, theDots)

out <- do.call(getFromNamespace("gam", "mgcv"), modelArgs)
out

},
predict = function(modelFit, newdata, submodels = NULL) {
if(!is.data.frame(newdata)) newdata <- as.data.frame(newdata)
if(modelFit$problemType == "Classification")
{
if(modelFit$problemType == "Classification") {
probs <- predict(modelFit, newdata, type = "response")
out <- ifelse(probs < .5,
modelFit$obsLevel[1],
Expand Down
23 changes: 14 additions & 9 deletions models/files/gamLoess.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,21 @@ modelInfo <- list(label = "Generalized Additive Model using LOESS",
grid = function(x, y, len = NULL)
expand.grid(span = .5, degree = 1),
fit = function(x, y, wts, param, lev, last, classProbs, ...) {
dat <- if(is.data.frame(x)) x else as.data.frame(x)
dat$.outcome <- y
args <- list(data = if(is.data.frame(x)) x else as.data.frame(x))
args$data$.outcome <- y
args$formula <- caret:::smootherFormula(x,
smoother = "lo",
span = param$span,
degree = param$degree)
theDots <- list(...)

gam:::gam(caret:::smootherFormula(x,
smoother = "lo",
span = param$span,
degree = param$degree),
data = dat,
family = if(is.factor(y)) binomial() else gaussian(),
...)

if(!any(names(theDots) == "family"))
args$family <- if(is.factor(y)) binomial else gaussian

if(length(theDots) > 0) args <- c(args, theDots)

do.call(getFromNamespace("gam", "gam"), args)
},
predict = function(modelFit, newdata, submodels = NULL) {
if(!is.data.frame(newdata)) newdata <- as.data.frame(newdata)
Expand Down
7 changes: 5 additions & 2 deletions models/files/gamSpline.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,12 @@ modelInfo <- list(label = "Generalized Additive Model using Splines",
args$formula <- caret:::smootherFormula(x,
smoother = "s",
df = param$df)
args$family <- if(is.factor(y)) binomial else gaussian

theDots <- list(...)


if(!any(names(theDots) == "family"))
args$family <- if(is.factor(y)) binomial else gaussian

if(length(theDots) > 0) args <- c(args, theDots)

do.call(getFromNamespace("gam", "gam"), args)
Expand Down
4 changes: 2 additions & 2 deletions models/files/gbm.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,7 @@ modelInfo <- list(label = "Stochastic Gradient Boosting",
modDist <- theDots$distribution
theDots$distribution <- NULL
} else {
if(is.numeric(y))
{
if(is.numeric(y)) {
modDist <- "gaussian"
} else modDist <- if(length(lev) == 2) "bernoulli" else "multinomial"
}
Expand All @@ -45,6 +44,7 @@ modelInfo <- list(label = "Stochastic Gradient Boosting",
n.trees = param$n.trees,
shrinkage = param$shrinkage,
distribution = modDist)
if(any(names(theDots) == "family")) modArgs$distribution <- NULL

if(length(theDots) > 0) modArgs <- c(modArgs, theDots)

Expand Down
19 changes: 1 addition & 18 deletions models/files/rpartCost.R
Original file line number Diff line number Diff line change
Expand Up @@ -87,23 +87,6 @@ modelInfo <- list(label = "Cost-Sensitive CART",
}
out
},
prob = function(modelFit, newdata, submodels = NULL) {
if(!is.data.frame(newdata)) newdata <- as.data.frame(newdata)
out <- predict(modelFit, newdata, type = "prob")

if(!is.null(submodels))
{
tmp <- vector(mode = "list", length = nrow(submodels) + 1)
tmp[[1]] <- out
for(j in seq(along = submodels$cp))
{
prunedFit <- prune.rpart(modelFit, cp = submodels$cp[j])
tmpProb <- predict(prunedFit, newdata, type = "prob")
tmp[[j+1]] <- as.data.frame(tmpProb[, modelFit$obsLevels, drop = FALSE])
}
out <- tmp
}
out
},
prob = NULL,
tags = c("Tree-Based Model", "Implicit Feature Selection", "Cost Sensitive Learning"),
sort = function(x) x[order(-x$cp, -x$Cost),])
4 changes: 2 additions & 2 deletions pkg/caret/DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: caret
Version: 6.0-43
Date: 2015-01-27
Version: 6.0-44
Date: 2015-04-01
Title: Classification and Regression Training
Author: Max Kuhn. Contributions from Jed Wing, Steve Weston, Andre
Williams, Chris Keefer, Allan Engelhardt, Tony Cooper, Zachary Mayer,
Expand Down
2 changes: 1 addition & 1 deletion pkg/caret/R/aaa.R
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ twoClassSummary <- function (data, lev = NULL, model = NULL)
requireNamespaceQuietStop('pROC')
if (!all(levels(data[, "pred"]) == levels(data[, "obs"])))
stop("levels of observed and predicted data do not match")
rocObject <- try(pROC::roc.default(data$obs, data[, lev[1]]), silent = TRUE)
rocObject <- try(pROC::roc(data$obs, data[, lev[1]]), silent = TRUE)
rocAUC <- if(class(rocObject)[1] == "try-error") NA else rocObject$auc
out <- c(rocAUC,
sensitivity(data[, "pred"], data[, "obs"], lev[1]),
Expand Down
43 changes: 20 additions & 23 deletions pkg/caret/R/extractProb.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@

## TODO use foreach to parallelize

extractProb <- function(models,
testX = NULL,
Expand Down Expand Up @@ -39,17 +39,15 @@ extractProb <- function(models,
for(i in seq(along = models))
{
if(verbose) cat("starting ", models[[i]]$method, "\n"); flush.console()
if(!unkOnly)
{
tempTrainPred <- predictionFunction(models[[i]]$modelInfo,
models[[i]]$finalModel,
trainX,
models[[i]]$preProcess)
if(!unkOnly) {
tempTrainProb <- probFunction(models[[i]]$modelInfo,
models[[i]]$finalModel,
trainX,
models[[i]]$preProcess)

tempTrainPred <- apply(tempTrainProb, 1, which.max)
tempTrainPred <- colnames(tempTrainProb)[tempTrainPred]
tempTrainPred <- factor(tempTrainPred,
levels = models[[i]]$modelInfo$levels(models[[i]]$finalModel))

if(verbose) cat(models[[i]]$method, ":", length(tempTrainPred), "training predictions were added\n"); flush.console()

Expand All @@ -61,21 +59,20 @@ extractProb <- function(models,
dataType <- c(dataType, rep("Training", length(tempTrainPred)))

# Test Data
if(!is.null(testX) & !is.null(testY))
{
if(!is.null(testX) & !is.null(testY)) {
if(!is.data.frame(testX)) testX <- as.data.frame(testX)
tempX <- testX
tempY <- testY
tempX$.outcome <- NULL
tempTestPred <- predictionFunction(models[[i]]$modelInfo,
models[[i]]$finalModel,
tempX,
models[[i]]$preProcess)
tempX$.outcome <- NULL
tempTestProb <- probFunction(models[[i]]$modelInfo,
models[[i]]$finalModel,
tempX,
models[[i]]$preProcess)

models[[i]]$preProcess)
tempTestPred <- apply(tempTestProb, 1, which.max)
tempTestPred <- colnames(tempTestProb)[tempTestPred]
tempTestPred <- factor(tempTestPred,
levels = models[[i]]$modelInfo$levels(models[[i]]$finalModel))

if(verbose) cat(models[[i]]$method, ":", length(tempTestPred), "test predictions were added\n")

predProb <- if(is.null(predProb)) tempTestProb else rbind(predProb, tempTestProb)
Expand All @@ -94,16 +91,16 @@ extractProb <- function(models,
if(!is.data.frame(unkX)) unkX <- as.data.frame(unkX)
tempX <- unkX
tempX$.outcome <- NULL

tempUnkPred <- predictionFunction(models[[i]]$modelInfo,
models[[i]]$finalModel,
tempX,
models[[i]]$preProcess)

tempUnkProb <- probFunction(models[[i]]$modelInfo,
models[[i]]$finalModel,
tempX,
models[[i]]$preProcess)

tempUnkPred <- apply(tempUnkProb, 1, which.max)
tempUnkPred <- colnames(tempUnkProb)[tempUnkPred]
tempUnkPred <- factor(tempUnkPred,
levels = models[[i]]$modelInfo$levels(models[[i]]$finalModel))

if(verbose) cat(models[[i]]$method, ":", length(tempUnkPred), "unknown predictions were added\n")

predProb <- if(is.null(predProb)) tempUnkProb else rbind(predProb, tempUnkProb)
Expand Down
1 change: 1 addition & 0 deletions pkg/caret/R/misc.R
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ evalSummaryFunction <- function(y, wts, ctrl, lev, metric, method) {
stop("train()'s use of ROC codes requires class probabilities. See the classProbs option of trainControl()")
}
if(!is.null(wts)) testOutput$weights <- sample(wts, min(10, length(wts)))
testOutput$rowIndex <- sample(seq(along = y), size = nrow(testOutput))
ctrl$summaryFunction(testOutput, lev, method)
}

Expand Down
2 changes: 1 addition & 1 deletion pkg/caret/R/print.varImp.train.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ function(x, top = min(20, dim(x$importance)[1]), digits = max(3, getOption("digi
printObj <- printObj[,1,drop = FALSE]
names(printObj) <- "Importance"
}
print.data.frame(printObj, digits = digits, ...)
print(printObj, digits = digits, ...)
invisible(x)
}

8 changes: 5 additions & 3 deletions pkg/caret/R/rfe.R
Original file line number Diff line number Diff line change
Expand Up @@ -535,7 +535,7 @@ gamFuncs <- list(summary = defaultSummary,
},
pred = function(object, x)
{
#browser()
if(!is.data.frame(x)) x <- as.data.frame(x)
loaded <- search()
gamLoaded <- any(loaded == "package:gam")
if(gamLoaded) detach(package:gam)
Expand Down Expand Up @@ -625,12 +625,13 @@ rfFuncs <- list(summary = defaultSummary,
lmFuncs <- list(summary = defaultSummary,
fit = function(x, y, first, last, ...)
{
tmp <- as.data.frame(x)
tmp <- if(is.data.frame(x)) x else as.data.frame(x)
tmp$y <- y
lm(y~., data = tmp)
},
pred = function(object, x)
{
if(!is.data.frame(x)) x <- as.data.frame(x)
predict(object, x)
},
rank = function(object, x, y)
Expand Down Expand Up @@ -688,12 +689,13 @@ nbFuncs <- list(summary = defaultSummary,
lrFuncs <- ldaFuncs
lrFuncs$fit <- function (x, y, first, last, ...)
{
tmp <- x
tmp <- if(is.data.frame(x)) x else as.data.frame(x)
tmp$Class <- y
glm(Class ~ ., data = tmp, family = "binomial")
}
lrFuncs$pred <- function (object, x)
{
if(!is.data.frame(x)) x <- as.data.frame(x)
lvl <- levels(object$data$Class)
tmp <- predict(object, x, type = "response")
out <- data.frame(1-tmp, tmp)
Expand Down
Loading