Skip to content

Commit

Permalink
Merge 3152090 into c7735fb
Browse files Browse the repository at this point in the history
  • Loading branch information
t-8-n committed Mar 22, 2016
2 parents c7735fb + 3152090 commit b719220
Show file tree
Hide file tree
Showing 4 changed files with 497 additions and 1 deletion.
7 changes: 6 additions & 1 deletion pkg/mboostDevel/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,11 @@ import(Matrix)
import(parallel)
importFrom(survival, Surv, survfit)
importFrom(splines, bs, splineDesign)
importFrom(lattice, levelplot)
importFrom(lattice, levelplot, barchart)
importFrom(nnls, nnls)
importFrom(quadprog, solve.QP)
importFrom(utils, packageDescription)
#importFrom(utils, globalVariables)
importFrom("graphics", "abline", "axis", "box", "lines", "matlines", "matplot", "plot", "points", "polygon")
importFrom("grDevices", "hcl", "rgb")

Expand All @@ -27,6 +28,7 @@ export(glmboost,
survFit, selected, selected.mboost,
nuisance, "%+%", "%X%", "%O%", extract, risk, "mstop<-",
stabsel.mboost, stabsel_parameters.mboost, confint.mboost, confint.glmboost,
varimp,
## exports to make FDboost happy:
df2lambda, hyper_bbs, bl_lin)
###, basesel, fitsel)
Expand Down Expand Up @@ -76,6 +78,7 @@ S3method(coef, bm_lin)
S3method(coef, bm_cwlin)
S3method(selected, mboost)
S3method(cvrisk, mboost)
S3method(varimp, mboost)
# S3method(selected, glmboost)
S3method(update, mboost)
S3method(extract, mboost)
Expand All @@ -93,5 +96,7 @@ S3method(lines, mboost.ci)
S3method(print, glmboost.ci)
S3method(stabsel, mboost)
S3method(stabsel_parameters, mboost)
S3method(plot, varimp)
S3method(as.data.frame, varimp)

useDynLib(mboostDevel)
246 changes: 246 additions & 0 deletions pkg/mboostDevel/R/varimp.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,246 @@
varimp <- function(object, ...)
UseMethod("varimp")


varimp.mboost <- function(object, ...) {

### which baselearners/variables were selected in boosting steps
learner_names <- if( !(inherits(object, "glmboost")) )
names(object$baselearner) else variable.names(object)
learner_selected <- object$xselect()


### --------------------------------------------------
### compute risks for each step
# initial risk for the intercept model
y <- object$response
if(is.factor(y)) y <- 2*as.numeric(y) - 3
risk0 <- object$family@risk( y = y, f = object$offset )
# risk after each boosting-step
riskstep <- object$risk()
# risk reduction per step
riskdiff <- c(risk0, riskstep[-length(riskstep)]) - riskstep

### compute empirical in-bag risk (according to output in cvrisk)
riskdiff <- riskdiff / length(object$response)


### --------------------------------------------------
### explained risk attributed to baselearners
explained <- sapply(seq_along(learner_names), FUN = function(i) {
sum( riskdiff[which(learner_selected == i)] )
})
names(explained) <- learner_names


### --------------------------------------------------
### define new varimp-object
class(explained) <- "varimp"

# add selection frequencies to varimp-object
attr(explained, "selfreqs") <- sapply(seq_along(learner_names), function(i) {
mean(learner_selected == i)
})

# add variable names per baselearner to varimp-object
var_names <- unname( variable.names(object) )
# for identifiability sort variable names in interactions
# (not required for glmboost interactions)
var_names <- sapply(strsplit(var_names, ", "), function(x) {
do.call( function(...) paste(... , sep = ", " ),
as.list(x[order(x)]) ) })
var_order <- order( sapply(var_names, function(i) {
sum(explained[var_names == i])
}) )

# add names as ordered factor (for order in graphical output)
attr(explained, "variable_names") <- ordered(var_names, levels =
unique(var_names[var_order]))

return(explained)
}


as.data.frame.varimp <- function(x, row.names = NULL, optional = FALSE, ...) {
data.frame(
reduction = as.numeric(x),
# blearner as ordered factor (corresponding to variable(_names))
blearner = ordered(names(x), levels = unique(names(x)[order(x)])),
variable = attr(x, "variable_names"),
selfreq = attr(x, "selfreqs")
)
}


plot.varimp <- function(x, percent = TRUE, type = c("variable", "blearner"),
blorder = c("importance", "alphabetical", "rev_alphabetical", "formula"),
nbars = 10L, maxchar = 20L, xlab = NULL, ylab = NULL, xlim, auto.key, ...) {

### --------------------------------------------------
### get arguments
args <- as.list(match.call())

type <- match.arg(type)
blorder <- match.arg(blorder)


### --------------------------------------------------
### check arguments
if( !(class(x) == "varimp") )
stop(paste(deparse(substitute(x)), "is not of class varimp."))

if( !(is.logical(percent)) )
stop("Parameter percent has of type logical.")

if( !(type %in% c("blearner","variable")) )
stop("Parameter type has to be 'blearner' or 'variable'.")

if( !(is.numeric(nbars) && nbars > 0 && length(nbars) == 1) )
stop("Parameter nbars has to be a positive integer.")

if( !(is.numeric(maxchar) && maxchar > 0 && length(maxchar) == 1) )
stop("Parameter maxchar has to be a positive integer.")

if( !(blorder %in% c("importance", "alphabetical", "rev_alphabetical",
"formula")) )
stop(paste("Parameter blorder has to be one of 'importance',",
"'alphabetical', 'rev_alphabetical' or 'formula'."))


### --------------------------------------------------
### set plotting arguments
# set range for x-axis
if( !("xlim" %in% names(args)) ) {
# special calculation to include the case that risk reduction is negative
xlim <- c(sum(x*(x < 0)), sum(x*(x >= 0)))
if( percent ) xlim <- xlim / sum(abs(x))
}

# set x-axis label and transform values to percentages if necessary
if( percent ) {
x <- x / sum(abs(x))
# special handling if risk reduction is negative
if( any(x < 0) )
warning(paste("At least one risk reduction value is negative. Percental",
"reduction is thus calculated as 'reduction/sum(abs(reduction))'."))
}

# set axis labels
xlab <- ifelse(is.null(xlab), ifelse(percent, "In-bag Risk Reduction (%)",
"In-bag Risk Reduction"), xlab)
ylab <- ifelse(is.null(ylab), ifelse(type == "variable", "Variables",
"Baselearner"), ylab)



### --------------------------------------------------
### create data.frame for all values shown in barchart
plot_data <- data.frame(x)


### --------------------------------------------------
# specify baselearner order (if blorder != "importance", the default order set
# via as.data.frame.varimp)
plot_data[, "blearner"] <- switch(blorder,
# importance is default, in this case no change
importance = plot_data[, "blearner"],
# as blearner order is reverted again later, they are specified in
# reverted order here
alphabetical = ordered(names(x), rev(levels(ordered(names(x))))),
rev_alphabetical = ordered(names(x)),
formula = ordered(names(x), rev(names(x)))
)


### --------------------------------------------------
### if number of baselearners/variables exceeds nbars, aggregate some values
if( nlevels(plot_data[, type]) > nbars ) {
# logical vector indicating rows to be subsumed
others <- !(plot_data[, type] %in% tail(levels(plot_data[, type]), nbars-1))

for( i in c("blearner","variable") ) {
# add new level OTHER to factor variable blearner/variable
plot_data[, i] <-
ordered(plot_data[, i], levels = c("other", levels(plot_data[, i])))
# set baselearner/variable names to OTHER depending on nbars
plot_data[others, i] <- "other"
}

# add up risk reduction and selfreqs for OTHER
plot_data[others, "reduction"] <- sum( plot_data[others, "reduction"] )
plot_data[others, "selfreq"] <- sum( plot_data[others, "selfreq"] )

# use only number of observations corresponding to nbars
plot_data <- plot_data[c(which(!others), which(others)[1]),]

# update ordered factors
for( i in c("blearner","variable") )
plot_data[, i] <- factor(plot_data[, i])
}


### --------------------------------------------------
### create bar labels
# (maybe) truncate bar labels for baselearner or variable names
trunc_levels <- sapply(levels(plot_data[, type]),
function(name) {
paste(strtrim(name, maxchar), if( nchar(name) < maxchar ) "" else "..")
}
)

# additional warning message if bar labels are not distinct (after truncation)
if( length(unique(trunc_levels)) != nlevels(plot_data[, type]) ) {
warning(paste("'maxchar' set too small to distinguish", type, "labels.",
"Different variables are probably summed up.",
"Value for argument 'maxchar' should be incremented."))
}
levels(plot_data[, type]) <- trunc_levels

# convert rounded selfreqs to character
plot_data[, "selfreq"] <-
as.character(round(plot_data[, "selfreq"], digits = 2))

# for type = "variable" additionally accumulate selfreqs per variable
# in order of risk reduction of involved baselearners
if( type == "variable" ) {
# reverse order of baselearners (larger stacks first)
# (also compare line 128ff - setting of baselearner order)
plot_data[, "blearner"] <-
ordered(plot_data[, "blearner"], rev(levels(plot_data[, "blearner"])))
# sum up selfreqs
plot_data[, "selfreq"] <- sapply(plot_data[, "variable"], function(i) {
do.call( function(...) paste(..., sep = " + "), as.list(
plot_data[plot_data$variable == i, "selfreq"]
[order(plot_data[plot_data$variable == i, "blearner"])]
) )
})
}

# add selfreqs to bar labels
selfreq_labels = unlist( unique(plot_data[, c(type, "selfreq")])[2] )

levels(plot_data[, type]) = paste0( levels(plot_data[, type]), "\n ",
"sel. freq: ~", selfreq_labels[order(unique(plot_data[, type]))] )


### --------------------------------------------------
### activate auto.key if bars are stacked (only other bars than 'others')
if( !("auto.key" %in% names(args)) ) {
auto.key <- nlevels(plot_data[, "variable"]) < nrow(plot_data)
}


### --------------------------------------------------
### create final plot depending on type
if( type == "variable" ) {
barchart(variable ~ reduction, groups = plot_data[, "blearner"],
data = plot_data, horizontal = TRUE, xlab = xlab, ylab = ylab, xlim =xlim,
scales = list(x = list(tck = c(1,0), at = seq(0,sum(x), length.out = 5))),
stack = TRUE, auto.key = auto.key, ...)
} else {
barchart(blearner ~ reduction, data = plot_data,
horizontal = TRUE, xlab = xlab, ylab = ylab, xlim = xlim,
scales = list(x = list(tck = c(1,0), at = seq(0,sum(x), length.out = 5))),
...)
}
}

0 comments on commit b719220

Please sign in to comment.