Merge pull request #57 from georgheinze/bug_fix

Fixed bug in backward() and forward()
georgheinze · Aug 18, 2023 · c532a47 · c532a47
2 parents 91afc91 + d6277f4
commit c532a47
Show file tree

Hide file tree

Showing 6 changed files with 66 additions and 44 deletions.
diff --git a/R/add1.R b/R/add1.R
@@ -10,6 +10,7 @@
 #' @param scope The scope of variables considered for adding or dropping. Should be a 
 #' vector of variable names. Can be left missing; the method will then use all variables 
 #' in the object's data slot which are not identified as the response variable.
+#' @param data The data frame used to fit the object.
 #' @param test The type of test statistic. Currently, only the PLR test (penalized likelihood 
 #' ratio test) is allowed for logistf fits.
 #' @param ... Further arguments passed to or from other methods.
@@ -21,10 +22,10 @@
 #' @examples
 #' data(sex2) 
 #' fit<-logistf(data=sex2, case~1, pl=FALSE) 
-#' add1(fit, scope=c("dia", "age"))
+#' add1(fit, scope=c("dia", "age"), data=sex2)
 #'  
 #' fit2<-logistf(data=sex2, case~age+oc+dia+vic+vicl+vis) 
-#' drop1(fit2)
+#' drop1(fit2, data=sex2)
 #' 
 #' @importFrom stats add.scope
 #' @importFrom stats update.formula
@@ -33,13 +34,13 @@
 #' @rdname add1
 #' @method add1 logistf
 #' @exportS3Method add1 logistf
-add1.logistf<-function(object, scope, test="PLR", ...){
+add1.logistf<-function(object, scope, data, test="PLR", ...){
   if(missing(scope)) stop("please provide scope: no terms in scope for adding to object")
   else if(is.numeric(scope)) scope<-attr(terms(object),"term.labels")[scope]
   else if(!is.character(scope)) scope <- add.scope(object, update.formula(object, scope))
 
   mf <- match.call(expand.dots =FALSE)
-  m <- match(c("object", "scope", "test"), names(mf), 0L)
+  m <- match(c("object", "scope", "test", "data"), names(mf), 0L)
   mf <- mf[c(1, m)]
   object <- eval(mf$object, parent.frame())
 
@@ -52,7 +53,8 @@ add1.logistf<-function(object, scope, test="PLR", ...){
   mat<-matrix(0,nvar,3)
   for(i in 1:nvar){
     newform<-as.formula(paste(object$formula,variables[i], sep="+"))
-    res<-anova(object, update(object,formula=newform))
+    fit2 <- update(object, formula=newform, data = data)
+    res<-anova(object, fit2)
     mat[i,1]<-res$chisq
     mat[i,2]<-res$df
     mat[i,3]<-res$pval
@@ -62,18 +64,18 @@ add1.logistf<-function(object, scope, test="PLR", ...){
   return(mat)
 }
 #' @exportS3Method add1 flic
-add1.flic<-function(object, scope, test="PLR", ...){
-  add1.logistf(object, scope, ...)
+add1.flic<-function(object, scope, data, test="PLR", ...){
+  add1.logistf(object, scope, data, ...)
 }
 
 #' @exportS3Method add1 flac
-add1.flac<-function(object, scope, test="PLR", ...){
-  add1.logistf(object, scope, ...)
+add1.flac<-function(object, scope, data, test="PLR", ...){
+  add1.logistf(object, scope, data, ...)
 }
 #' @aliases drop1
 #' @method drop1 logistf
 #' @exportS3Method drop1 logistf
-drop1.logistf<-function(object, scope, test="PLR", ...){
+drop1.logistf<-function(object, scope, data, test="PLR", ...){
   mf <- match.call(expand.dots =FALSE)
   m <- match(c("object", "scope", "test"), names(mf), 0L)
   mf <- mf[c(1, m)]
@@ -117,12 +119,12 @@ drop1.logistf<-function(object, scope, test="PLR", ...){
 
 #' @method drop1 flic
 #' @exportS3Method drop1 flic
-drop1.flic<-function(object, scope, test="PLR", ...){
-  drop1.logistf(object, scope,  ...)
+drop1.flic<-function(object, scope, data, test="PLR", ...){
+  drop1.logistf(object, scope, data,  ...)
 }
 
 #' @method drop1 flac
 #' @exportS3Method drop1 flac
-drop1.flac<-function(object, scope, test="PLR", ...){
-  drop1.logistf(object, scope, augmented_data=TRUE,...)
+drop1.flac<-function(object, scope, data, test="PLR", ...){
+  drop1.logistf(object, scope, data, augmented_data=TRUE,...)
 }
diff --git a/R/backward.r b/R/backward.r
@@ -3,14 +3,20 @@
 #' These functions provide simple backward elimination/forward selection procedures for logistf models.
 #' 
 #' The variable selection is simply performed by repeatedly calling add1 or drop1 methods for logistf, 
-#' and is based on penalized likelihood ratio test. It can also properly handle variables that were 
-#' defined as factors in the original data set.
+#' and is based on penalized likelihood ratio test.
+#' 
+#' Note that selecting among factor variables is not supported.
+#' One way to use forward or backward with factor variables is to first convert them
+#' into numeric variables (0/1 coded dummy variables, choosing a sensible reference category).
+#' Forward and backward will then perform selection on the dummy variables, 
+#' meaning that it will collapse levels of a factor variable with similar outcomes.
 #' 
 #' @param object A fitted logistf model object. To start with an empty model, create a model fit 
 #' with a formula= y~1, pl=FALSE. (Replace y by your response variable.)
 #' @param scope The scope of variables to add/drop from the model. Can be missing for backward, backward will use 
 #' the terms of the object fit. Alternatively, an arbitrary vector of variable names can be given, to allow 
 #' that only some of the variables will be competitively selected or dropped. Has to be provided for forward.
+#' @param data The data frame used to fit the object.
 #' @param steps The number of forward selection/backward elimination steps.
 #' @param slstay For \code{backward}, the significance level to stay in the model.
 #' @param slentry For \code{forward}, the significance level to enter the model.
@@ -22,13 +28,14 @@
 #'
 #' @return An updated \code{logistf, flic} or \code{flac} fit with the finally selected model.
 #'
+#'
 #' @examples 
 #' data(sex2) 
 #' fit<-logistf(data=sex2, case~1, pl=FALSE) 
-#' fitf<-forward(fit, scope = c("dia", "age")) 
+#' fitf<-forward(fit, scope=c("dia", "age"), data=sex2) 
 #' 
 #' fit2<-logistf(data=sex2, case~age+oc+vic+vicl+vis+dia) 
-#' fitb<-backward(fit2)
+#' fitb<-backward(fit2, data=sex2)
 #' 
 #' @importFrom utils tail
 #' 
@@ -40,14 +47,14 @@ backward <- function(object,...){
 #' @exportS3Method backward logistf
 #' @method backward logistf
 #' @rdname backward
-backward.logistf <- function(object, scope, steps=1000, slstay=0.05, trace=TRUE, printwork=FALSE,full.penalty=FALSE, ...){
+backward.logistf <- function(object, scope, data, steps=1000, slstay=0.05, trace=TRUE, printwork=FALSE,full.penalty=FALSE, ...){
   istep<-0 #index of steps
 
   mf <- match.call(expand.dots =FALSE)
   m <- match("object", names(mf), 0L)
   mf <- mf[c(1, m)]
   object <- eval(mf$object, parent.frame())
-  object <- update(object, formula=object$formula)
+  object <- update(object, formula=object$formula, data = data)
   variables <- attr(terms(object),"term.labels")
   form <- formula(terms(object)) #to take care of dot shortcut
   Terms <- terms(object)
@@ -56,6 +63,8 @@ backward.logistf <- function(object, scope, steps=1000, slstay=0.05, trace=TRUE,
 
   terms.fit <- object$modcontrol$terms.fit
   if(!is.null(terms.fit)) stop("Please call backward on a logistf-object with all terms fitted.")
+
+  if(any(sapply(data[, variables], is.factor))) stop("Selecting among factor variables is not supported.")
 
   working<-object
   if(trace){
@@ -87,10 +96,10 @@ backward.logistf <- function(object, scope, steps=1000, slstay=0.05, trace=TRUE,
     inscope <- factor.scope(inscope, list(drop = fdrop))$drop
 
     if(full.penalty && istep!=0){ #check with istep!=0 if removal is an empty vector - not possible to pass such to drop1
-      mat <- drop1(object, scope=inscope, full.penalty.vec=removal,...)
+      mat <- drop1(object, scope=inscope, data = data, full.penalty.vec=removal,...)
     }
     else {
-      mat<-drop1(working,scope=inscope,...)
+      mat<-drop1(working,scope=inscope, data = data, ...)
     }
     istep<-istep+1
     if(all(mat[,3]<slstay)) {
@@ -109,12 +118,12 @@ backward.logistf <- function(object, scope, steps=1000, slstay=0.05, trace=TRUE,
     if(!full.penalty){ #update working only if full.penalty==FALSE
       newform <- update.formula(working$formula,paste("~ . -", curr_removal))
       if(working$df==2 | working$df==mat[mat[,3]==max(mat[,3]),2][1]){
-        working<-update(working, formula=newform, pl=FALSE, evaluate = FALSE)
-        working <- eval.parent(working)
+        working<-update(working, formula=newform, data = data, pl=FALSE)
+        #working <- eval.parent(working)
       }
       else {
-        working<-update(working, formula=newform, evaluate = FALSE)
-        working <- eval.parent(working)
+        working<-update(working, formula=newform, data = data)
+        #working <- eval.parent(working)
 
       }
       Terms <- terms(working)
@@ -139,7 +148,7 @@ backward.logistf <- function(object, scope, steps=1000, slstay=0.05, trace=TRUE,
       tofit <- (1:k)[-(tmp+1)]
       modcontrol <- object$modcontrol
       modcontrol$terms.fit <- tofit
-      working<-update(working, modcontrol = modcontrol)
+      working<-update(working, modcontrol = modcontrol, data = data)
     }
   }
   return(working)
@@ -160,7 +169,7 @@ forward <- function(object,...){
 #' @exportS3Method forward logistf
 #' @method forward logistf
 #' @rdname backward
-forward.logistf<-function(object, scope, steps=1000, slentry=0.05, trace=TRUE, printwork=FALSE, pl=TRUE, ...){
+forward.logistf<-function(object, scope, data, steps=1000, slentry=0.05, trace=TRUE, printwork=FALSE, pl=TRUE, ...){
   istep<-0
 
   mf <- match.call(expand.dots =FALSE)
@@ -172,6 +181,8 @@ forward.logistf<-function(object, scope, steps=1000, slentry=0.05, trace=TRUE, p
   terms.fit <- object$modcontrol$terms.fit
   if(!is.null(terms.fit)) stop("Please call forward on a logistf-object with all terms fitted.")
 
+  if(any(sapply(data[, scope], is.factor))) stop("Selecting among factor variables is not supported.")
+
   working<-object
   if(missing(scope)) {
     stop("Please provide scope (vector of variable names).\n")
@@ -194,13 +205,13 @@ forward.logistf<-function(object, scope, steps=1000, slentry=0.05, trace=TRUE, p
   inscope<-scope
   while(istep<steps & length(inscope)>=1){
     istep<-istep+1
-    mat<-add1(working, scope=inscope)
+    mat<-add1(working, scope = inscope, data = data)
     if(all(mat[,3]>slentry)) break
     index<-(1:nrow(mat))[mat[,3]==min(mat[,3])]
     if(length(index)>1) index<-index[mat[index,1]==max(mat[index,1])]
     addvar<-rownames(mat)[index]
     newform=as.formula(paste("~.+",addvar))
-    working<-update(working, formula=newform, pl=FALSE)
+    working<-update(working, formula=newform, pl=FALSE, data = data)
     newindex<-is.na(match(inscope, attr(terms(object),"term.labels")))
     if(all(is.na(newindex)==TRUE)) break
     #inscope<-inscope[-index]
@@ -213,7 +224,7 @@ forward.logistf<-function(object, scope, steps=1000, slentry=0.05, trace=TRUE, p
       }
     }
   }
-   if(pl) working<-update(working, pl=TRUE)
+   if(pl) working<-update(working, pl=TRUE, data = data)
    if(trace) cat("\n")
    return(working)
 }
diff --git a/R/logistf.R b/R/logistf.R
@@ -99,12 +99,11 @@
 #'   matrix(rbinom(2000,2,runif(2000)*0.5),100,20))
 #' colnames(snpdata)<-paste("SNP",1:20,"_",sep="")
 #' snpdata<-as.data.frame(snpdata)
-#' for(i in 1:20) snpdata[,i]<-as.factor(snpdata[,i])
 #' snpdata$case<-c(rep(0,100),rep(1,100))
 #' 
 #' fitsnp<-logistf(data=snpdata, formula=case~1, pl=FALSE)
-#' add1(fitsnp, scope=paste("SNP",1:20,"_",sep=""))
-#' fitf<-forward(fitsnp, scope = paste("SNP",1:20,"_",sep=""))
+#' add1(fitsnp, scope=paste("SNP",1:20,"_",sep=""), data=snpdata)
+#' fitf<-forward(fitsnp, scope = paste("SNP",1:20,"_",sep=""), data=snpdata)
 #' fitf
 #' 
 #' @author Georg Heinze and Meinhard Ploner

diff --git a/man/add1.Rd b/man/add1.Rd
diff --git a/man/backward.Rd b/man/backward.Rd
diff --git a/man/logistf.Rd b/man/logistf.Rd