Skip to content

Commit

Permalink
Merge pull request #57 from georgheinze/bug_fix
Browse files Browse the repository at this point in the history
Fixed bug in backward() and forward()
  • Loading branch information
gregorsteiner committed Aug 18, 2023
2 parents 91afc91 + d6277f4 commit c532a47
Show file tree
Hide file tree
Showing 6 changed files with 66 additions and 44 deletions.
30 changes: 16 additions & 14 deletions R/add1.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#' @param scope The scope of variables considered for adding or dropping. Should be a
#' vector of variable names. Can be left missing; the method will then use all variables
#' in the object's data slot which are not identified as the response variable.
#' @param data The data frame used to fit the object.
#' @param test The type of test statistic. Currently, only the PLR test (penalized likelihood
#' ratio test) is allowed for logistf fits.
#' @param ... Further arguments passed to or from other methods.
Expand All @@ -21,10 +22,10 @@
#' @examples
#' data(sex2)
#' fit<-logistf(data=sex2, case~1, pl=FALSE)
#' add1(fit, scope=c("dia", "age"))
#' add1(fit, scope=c("dia", "age"), data=sex2)
#'
#' fit2<-logistf(data=sex2, case~age+oc+dia+vic+vicl+vis)
#' drop1(fit2)
#' drop1(fit2, data=sex2)
#'
#' @importFrom stats add.scope
#' @importFrom stats update.formula
Expand All @@ -33,13 +34,13 @@
#' @rdname add1
#' @method add1 logistf
#' @exportS3Method add1 logistf
add1.logistf<-function(object, scope, test="PLR", ...){
add1.logistf<-function(object, scope, data, test="PLR", ...){
if(missing(scope)) stop("please provide scope: no terms in scope for adding to object")
else if(is.numeric(scope)) scope<-attr(terms(object),"term.labels")[scope]
else if(!is.character(scope)) scope <- add.scope(object, update.formula(object, scope))

mf <- match.call(expand.dots =FALSE)
m <- match(c("object", "scope", "test"), names(mf), 0L)
m <- match(c("object", "scope", "test", "data"), names(mf), 0L)
mf <- mf[c(1, m)]
object <- eval(mf$object, parent.frame())

Expand All @@ -52,7 +53,8 @@ add1.logistf<-function(object, scope, test="PLR", ...){
mat<-matrix(0,nvar,3)
for(i in 1:nvar){
newform<-as.formula(paste(object$formula,variables[i], sep="+"))
res<-anova(object, update(object,formula=newform))
fit2 <- update(object, formula=newform, data = data)
res<-anova(object, fit2)
mat[i,1]<-res$chisq
mat[i,2]<-res$df
mat[i,3]<-res$pval
Expand All @@ -62,18 +64,18 @@ add1.logistf<-function(object, scope, test="PLR", ...){
return(mat)
}
#' @exportS3Method add1 flic
add1.flic<-function(object, scope, test="PLR", ...){
add1.logistf(object, scope, ...)
add1.flic<-function(object, scope, data, test="PLR", ...){
add1.logistf(object, scope, data, ...)
}

#' @exportS3Method add1 flac
add1.flac<-function(object, scope, test="PLR", ...){
add1.logistf(object, scope, ...)
add1.flac<-function(object, scope, data, test="PLR", ...){
add1.logistf(object, scope, data, ...)
}
#' @aliases drop1
#' @method drop1 logistf
#' @exportS3Method drop1 logistf
drop1.logistf<-function(object, scope, test="PLR", ...){
drop1.logistf<-function(object, scope, data, test="PLR", ...){
mf <- match.call(expand.dots =FALSE)
m <- match(c("object", "scope", "test"), names(mf), 0L)
mf <- mf[c(1, m)]
Expand Down Expand Up @@ -117,12 +119,12 @@ drop1.logistf<-function(object, scope, test="PLR", ...){

#' @method drop1 flic
#' @exportS3Method drop1 flic
drop1.flic<-function(object, scope, test="PLR", ...){
drop1.logistf(object, scope, ...)
drop1.flic<-function(object, scope, data, test="PLR", ...){
drop1.logistf(object, scope, data, ...)
}

#' @method drop1 flac
#' @exportS3Method drop1 flac
drop1.flac<-function(object, scope, test="PLR", ...){
drop1.logistf(object, scope, augmented_data=TRUE,...)
drop1.flac<-function(object, scope, data, test="PLR", ...){
drop1.logistf(object, scope, data, augmented_data=TRUE,...)
}
45 changes: 28 additions & 17 deletions R/backward.r
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,20 @@
#' These functions provide simple backward elimination/forward selection procedures for logistf models.
#'
#' The variable selection is simply performed by repeatedly calling add1 or drop1 methods for logistf,
#' and is based on penalized likelihood ratio test. It can also properly handle variables that were
#' defined as factors in the original data set.
#' and is based on penalized likelihood ratio test.
#'
#' Note that selecting among factor variables is not supported.
#' One way to use forward or backward with factor variables is to first convert them
#' into numeric variables (0/1 coded dummy variables, choosing a sensible reference category).
#' Forward and backward will then perform selection on the dummy variables,
#' meaning that it will collapse levels of a factor variable with similar outcomes.
#'
#' @param object A fitted logistf model object. To start with an empty model, create a model fit
#' with a formula= y~1, pl=FALSE. (Replace y by your response variable.)
#' @param scope The scope of variables to add/drop from the model. Can be missing for backward, backward will use
#' the terms of the object fit. Alternatively, an arbitrary vector of variable names can be given, to allow
#' that only some of the variables will be competitively selected or dropped. Has to be provided for forward.
#' @param data The data frame used to fit the object.
#' @param steps The number of forward selection/backward elimination steps.
#' @param slstay For \code{backward}, the significance level to stay in the model.
#' @param slentry For \code{forward}, the significance level to enter the model.
Expand All @@ -22,13 +28,14 @@
#'
#' @return An updated \code{logistf, flic} or \code{flac} fit with the finally selected model.
#'
#'
#' @examples
#' data(sex2)
#' fit<-logistf(data=sex2, case~1, pl=FALSE)
#' fitf<-forward(fit, scope = c("dia", "age"))
#' fitf<-forward(fit, scope=c("dia", "age"), data=sex2)
#'
#' fit2<-logistf(data=sex2, case~age+oc+vic+vicl+vis+dia)
#' fitb<-backward(fit2)
#' fitb<-backward(fit2, data=sex2)
#'
#' @importFrom utils tail
#'
Expand All @@ -40,14 +47,14 @@ backward <- function(object,...){
#' @exportS3Method backward logistf
#' @method backward logistf
#' @rdname backward
backward.logistf <- function(object, scope, steps=1000, slstay=0.05, trace=TRUE, printwork=FALSE,full.penalty=FALSE, ...){
backward.logistf <- function(object, scope, data, steps=1000, slstay=0.05, trace=TRUE, printwork=FALSE,full.penalty=FALSE, ...){
istep<-0 #index of steps

mf <- match.call(expand.dots =FALSE)
m <- match("object", names(mf), 0L)
mf <- mf[c(1, m)]
object <- eval(mf$object, parent.frame())
object <- update(object, formula=object$formula)
object <- update(object, formula=object$formula, data = data)
variables <- attr(terms(object),"term.labels")
form <- formula(terms(object)) #to take care of dot shortcut
Terms <- terms(object)
Expand All @@ -56,6 +63,8 @@ backward.logistf <- function(object, scope, steps=1000, slstay=0.05, trace=TRUE,

terms.fit <- object$modcontrol$terms.fit
if(!is.null(terms.fit)) stop("Please call backward on a logistf-object with all terms fitted.")

if(any(sapply(data[, variables], is.factor))) stop("Selecting among factor variables is not supported.")

working<-object
if(trace){
Expand Down Expand Up @@ -87,10 +96,10 @@ backward.logistf <- function(object, scope, steps=1000, slstay=0.05, trace=TRUE,
inscope <- factor.scope(inscope, list(drop = fdrop))$drop

if(full.penalty && istep!=0){ #check with istep!=0 if removal is an empty vector - not possible to pass such to drop1
mat <- drop1(object, scope=inscope, full.penalty.vec=removal,...)
mat <- drop1(object, scope=inscope, data = data, full.penalty.vec=removal,...)
}
else {
mat<-drop1(working,scope=inscope,...)
mat<-drop1(working,scope=inscope, data = data, ...)
}
istep<-istep+1
if(all(mat[,3]<slstay)) {
Expand All @@ -109,12 +118,12 @@ backward.logistf <- function(object, scope, steps=1000, slstay=0.05, trace=TRUE,
if(!full.penalty){ #update working only if full.penalty==FALSE
newform <- update.formula(working$formula,paste("~ . -", curr_removal))
if(working$df==2 | working$df==mat[mat[,3]==max(mat[,3]),2][1]){
working<-update(working, formula=newform, pl=FALSE, evaluate = FALSE)
working <- eval.parent(working)
working<-update(working, formula=newform, data = data, pl=FALSE)
#working <- eval.parent(working)
}
else {
working<-update(working, formula=newform, evaluate = FALSE)
working <- eval.parent(working)
working<-update(working, formula=newform, data = data)
#working <- eval.parent(working)

}
Terms <- terms(working)
Expand All @@ -139,7 +148,7 @@ backward.logistf <- function(object, scope, steps=1000, slstay=0.05, trace=TRUE,
tofit <- (1:k)[-(tmp+1)]
modcontrol <- object$modcontrol
modcontrol$terms.fit <- tofit
working<-update(working, modcontrol = modcontrol)
working<-update(working, modcontrol = modcontrol, data = data)
}
}
return(working)
Expand All @@ -160,7 +169,7 @@ forward <- function(object,...){
#' @exportS3Method forward logistf
#' @method forward logistf
#' @rdname backward
forward.logistf<-function(object, scope, steps=1000, slentry=0.05, trace=TRUE, printwork=FALSE, pl=TRUE, ...){
forward.logistf<-function(object, scope, data, steps=1000, slentry=0.05, trace=TRUE, printwork=FALSE, pl=TRUE, ...){
istep<-0

mf <- match.call(expand.dots =FALSE)
Expand All @@ -172,6 +181,8 @@ forward.logistf<-function(object, scope, steps=1000, slentry=0.05, trace=TRUE, p
terms.fit <- object$modcontrol$terms.fit
if(!is.null(terms.fit)) stop("Please call forward on a logistf-object with all terms fitted.")

if(any(sapply(data[, scope], is.factor))) stop("Selecting among factor variables is not supported.")

working<-object
if(missing(scope)) {
stop("Please provide scope (vector of variable names).\n")
Expand All @@ -194,13 +205,13 @@ forward.logistf<-function(object, scope, steps=1000, slentry=0.05, trace=TRUE, p
inscope<-scope
while(istep<steps & length(inscope)>=1){
istep<-istep+1
mat<-add1(working, scope=inscope)
mat<-add1(working, scope = inscope, data = data)
if(all(mat[,3]>slentry)) break
index<-(1:nrow(mat))[mat[,3]==min(mat[,3])]
if(length(index)>1) index<-index[mat[index,1]==max(mat[index,1])]
addvar<-rownames(mat)[index]
newform=as.formula(paste("~.+",addvar))
working<-update(working, formula=newform, pl=FALSE)
working<-update(working, formula=newform, pl=FALSE, data = data)
newindex<-is.na(match(inscope, attr(terms(object),"term.labels")))
if(all(is.na(newindex)==TRUE)) break
#inscope<-inscope[-index]
Expand All @@ -213,7 +224,7 @@ forward.logistf<-function(object, scope, steps=1000, slentry=0.05, trace=TRUE, p
}
}
}
if(pl) working<-update(working, pl=TRUE)
if(pl) working<-update(working, pl=TRUE, data = data)
if(trace) cat("\n")
return(working)
}
5 changes: 2 additions & 3 deletions R/logistf.R
Original file line number Diff line number Diff line change
Expand Up @@ -99,12 +99,11 @@
#' matrix(rbinom(2000,2,runif(2000)*0.5),100,20))
#' colnames(snpdata)<-paste("SNP",1:20,"_",sep="")
#' snpdata<-as.data.frame(snpdata)
#' for(i in 1:20) snpdata[,i]<-as.factor(snpdata[,i])
#' snpdata$case<-c(rep(0,100),rep(1,100))
#'
#' fitsnp<-logistf(data=snpdata, formula=case~1, pl=FALSE)
#' add1(fitsnp, scope=paste("SNP",1:20,"_",sep=""))
#' fitf<-forward(fitsnp, scope = paste("SNP",1:20,"_",sep=""))
#' add1(fitsnp, scope=paste("SNP",1:20,"_",sep=""), data=snpdata)
#' fitf<-forward(fitsnp, scope = paste("SNP",1:20,"_",sep=""), data=snpdata)
#' fitf
#'
#' @author Georg Heinze and Meinhard Ploner
Expand Down
8 changes: 5 additions & 3 deletions man/add1.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 13 additions & 4 deletions man/backward.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 2 additions & 3 deletions man/logistf.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit c532a47

Please sign in to comment.