Skip to content

Commit

Permalink
version 1.3.0
Browse files Browse the repository at this point in the history
  • Loading branch information
danieltompsett authored and cran-robot committed Jan 4, 2022
1 parent d41f57f commit 72ea89e
Show file tree
Hide file tree
Showing 15 changed files with 1,465 additions and 1,305 deletions.
12 changes: 6 additions & 6 deletions DESCRIPTION
Expand Up @@ -2,19 +2,19 @@ Package: gesttools
Type: Package
Title: General Purpose G-Estimation for End of Study or Time-Varying
Outcomes
Version: 1.2.0
Version: 1.3.0
Author: Daniel Tompsett, Stijn Vansteelandt, Oliver Dukes, Bianca De Stavola
Maintainer: Daniel Tompsett <danieltompsettwork@gmail.com>
Description: Provides a series of general purpose tools to perform g-estimation using the methods described in Sjolander and Vansteelandt (2016) <doi:10.1515/em-2015-0005> and Dukes and Vansteelandt <doi:10.1093/aje/kwx347>. The package allows for g-estimation in a wide variety of circumstances, including an end of study or time-varying outcome, and an exposure that is a binary, continuous, or a categorical variable with three or more categories. The package also supports g-estimation with time-varying causal effects and effect modification by a confounding variable.
License: GPL-3
Encoding: UTF-8
LazyData: false
RoxygenNote: 7.1.1
RoxygenNote: 7.1.2
Imports: DataCombine, tidyr, tibble, tidyselect, geeM, rsample, nnet,
magrittr
magrittr, testthat
URL: https://github.com/danieltompsett/gesttools
BugReports: https://github.com/danieltompsett/gesttools/issues
Config/testthat/edition: 3
NeedsCompilation: no
Packaged: 2021-06-10 12:12:03 UTC; danie
Packaged: 2021-12-28 22:44:31 UTC; danie
Repository: CRAN
Date/Publication: 2021-06-10 12:30:02 UTC
Date/Publication: 2022-01-04 09:20:02 UTC
26 changes: 14 additions & 12 deletions MD5
@@ -1,12 +1,14 @@
21dc5399dd23c8e40c36d14612a5e92c *DESCRIPTION
85277f9fbbc040992f797e1d0aee8be1 *NAMESPACE
957e569bfd4f200d7a3ac3a4aed457da *R/FormatData.R
eec3b250c6b064fe2e9ff97189bee7d3 *R/dataexamples.R
ba98b87a6305b3d023fc9a14e01362e9 *R/gestMultiple.R
a2a9bc0617e5df5dc05692e2dc917354 *R/gestSingle.R
bd338af86889cb644385ab93487954b4 *R/gestboot.R
f4895791aad931c909d94ffdede6d515 *man/FormatData.Rd
5cdc73c2a35dd4622385126619addb7f *man/dataexamples.Rd
8b8627228f71f64f80a7769077c8533d *man/gestMultiple.Rd
df0fee5fd62ac25d53aeb768385935ac *man/gestSingle.Rd
2fc955afa4df12e21ceb79fec5f9fe33 *man/gestboot.Rd
b82155154637ea1fa04f26312d1170e9 *DESCRIPTION
78eb1179edf4cb26d0a16ca27ff9bd65 *NAMESPACE
411ade2a445c6b26e8f6f50ca18224a0 *R/FormatData.R
38bec68bc8fb55194b76f15352bae11b *R/dataexamples.R
353ac2dbddb7c64379a2aa90456f2637 *R/gestMultiple.R
b9dda3b5edda76a02597ae78ee92a5d8 *R/gestSingle.R
8b6dbd00b05710cbafa72d0b1af2238b *R/gestboot.R
d3026cc0204b91ad959499d1c45f87c7 *man/FormatData.Rd
b94e5d1649b7323b4f9036ce39b39a59 *man/dataexamples.Rd
5c03f475fe3ae6f245d0ab2ea45c1e60 *man/gestMultiple.Rd
2b2e24a9cefda5abd6ef53156d4941a4 *man/gestSingle.Rd
dcf4c2fb2f43e721e41f712897ad8003 *man/gestboot.Rd
baadd5fcff0f63aff29edee57598e176 *tests/testthat.R
04d67ec07380ca4555a4988b2db60c95 *tests/testthat/test.R
2 changes: 2 additions & 0 deletions NAMESPACE
Expand Up @@ -5,13 +5,15 @@ export(dataexamples)
export(gestMultiple)
export(gestSingle)
export(gestboot)
import(testthat)
importFrom(DataCombine,slide)
importFrom(geeM,geem)
importFrom(magrittr,"%>%")
importFrom(nnet,multinom)
importFrom(rsample,bootstraps)
importFrom(stats,Gamma)
importFrom(stats,complete.cases)
importFrom(stats,formula)
importFrom(stats,gaussian)
importFrom(stats,glm)
importFrom(stats,plogis)
Expand Down
99 changes: 51 additions & 48 deletions R/FormatData.R
Expand Up @@ -22,8 +22,7 @@
#' corresponding to the lagged histories of all variables included in \code{varying}.
#' These will be labeled as \code{LagVari} where \code{Var} is the variable name and \code{i}
#' indicates how much the variable is lagged by. For example \code{LagAn2} is the value of \code{An}, 2
#' time periods prior. Note that \code{LagAn1} is not generated as this is automatically included
#' in the g-estimation functions.
#' time periods prior.
#' @param GenerateHistoryMax An optional positive integer specifying \code{GenerateHistory} to generate exposure histories
#' up to \code{GenerateHistoryMax} time periods prior.
#'
Expand All @@ -35,62 +34,66 @@
#' \code{as.numeric()} variable.
#'
#' @examples
#' data<-dataexamples(n=1000,seed=3456,Censoring=TRUE)$datagest
#' #To demonstrate the function we
#' #Delete the third row, corresponding to the entry for ID 1 at time 3
#' data<-data[-3,]
#' datanew<-FormatData(data=data,idvar="id",timevar="time",An="A",
#' varying=c("A","L"),GenerateHistory=FALSE,GenerateHistoryMax=NA)
#' data <- dataexamples(n = 1000, seed = 3456, Censoring = TRUE)$datagest
#' # To demonstrate the function we
#' # Delete the third row, corresponding to the entry for ID 1 at time 3
#' data <- data[-3, ]
#' datanew <- FormatData(
#' data = data, idvar = "id", timevar = "time", An = "A",
#' Cn = "C", varying = c("A", "L"), GenerateHistory = TRUE, GenerateHistoryMax = 1
#' )
#' head(datanew)
#' #Note that the missing entry has been re-added,
#' #with missing values for A and L in the third row
#' #An example with lagged history of time varying variables created.
#' data<-dataexamples(n=1000,seed=3456,Censoring=TRUE)$datagestmultcat
#' datanew<-FormatData(data=data,idvar="id",timevar="time",An="A",
#' Cn="C",varying=c("A","L"),GenerateHistory=TRUE,GenerateHistoryMax=NA)
#' # Note that the missing entry has been re-added,
#' # with missing values for A and L in the third row
#' # An example with lagged history of time varying variables created.
#' data <- dataexamples(n = 1000, seed = 3456, Censoring = TRUE)$datagestmultcat
#' datanew <- FormatData(
#' data = data, idvar = "id", timevar = "time", An = "A",
#' Cn = "C", varying = c("Y","A", "L"), GenerateHistory = TRUE, GenerateHistoryMax = NA
#' )
#' head(datanew)
#' @export


FormatData<-function(data,idvar,timevar,An,varying,Cn=NA,GenerateHistory=FALSE,
GenerateHistoryMax=NA){
if(!is.data.frame(data))(stop("Either no data set has been given, or it is not in a data frame."))
if (!is.na(Cn)){
varying<-c(varying,Cn)
}

datwide<-reshape(data,direction="wide",timevar=timevar,idvar=idvar,v.names=varying)
datrec<-reshape(datwide,direction="long",timevar=timevar,idvar=idvar)
datrec<-datrec[order(datrec[,idvar],datrec[,timevar]),]
FormatData <- function(data, idvar, timevar, An, varying, Cn = NA, GenerateHistory = FALSE,
GenerateHistoryMax = NA) {
if (!is.data.frame(data)) (stop("Either no data set has been given, or it is not in a data frame."))
if (!is.na(Cn)) {
varying <- c(varying, Cn)
}

if(GenerateHistory==TRUE){
T<-max(datrec[,timevar])
if (T<=1)(stop('Lagged variables cannot be created with only 1 time period'))
if(is.na(GenerateHistoryMax)==TRUE){
GenerateHistoryMax<-T-1}
varying<-varying[!(varying %in% Cn)]
histmax<-min(T-1,GenerateHistoryMax)
datwide <- reshape(data, direction = "wide", timevar = timevar, idvar = idvar, v.names = varying)
datrec <- reshape(datwide, direction = "long", timevar = timevar, idvar = idvar)
datrec <- datrec[order(datrec[, idvar], datrec[, timevar]), ]

#Function to generate lagged variables
lagged<-function(name){
for (i in 1:histmax){
datrec<-slide(data=datrec,Var=name,GroupVar="id",slideBy=-i,NewVar=paste("Lag",i,name,sep=""),reminder=F)
#Set Value of lagged variable that do not exists to either 0, or the reference category.
if(is.factor(datrec[,name])==TRUE){
datrec[,paste("Lag",i,name,sep="")]<-as.factor(datrec[,paste("Lag",i,name,sep="")])
levels(datrec[,paste("Lag",i,name,sep="")])<-levels(datrec[,name])
datrec[datrec[,timevar] %in% seq(1,i,by=1),paste("Lag",i,name,sep="")]<-levels(datrec[,name])[1]
if (GenerateHistory == TRUE) {
T <- max(datrec[, timevar])
if (T <= 1) (stop("Lagged variables cannot be created with only 1 time period"))
if (is.na(GenerateHistoryMax) == TRUE) {
GenerateHistoryMax <- T - 1
}
varying <- varying[!(varying %in% Cn)]
histmax <- min(T - 1, GenerateHistoryMax)

}else{
datrec[datrec[,timevar] %in% seq(1,i,by=1),paste("Lag",i,name,sep="")]<-0}
# Function to generate lagged variables
lagged <- function(name) {
for (i in 1:histmax) {
datrec <- slide(data = datrec, Var = name, GroupVar = "id", slideBy = -i, NewVar = paste("Lag", i, name, sep = ""), reminder = F)
# Set Value of lagged variable that do not exists to either 0, or the reference category.
if (is.factor(datrec[, name]) == TRUE) {
datrec[, paste("Lag", i, name, sep = "")] <- as.factor(datrec[, paste("Lag", i, name, sep = "")])
levels(datrec[, paste("Lag", i, name, sep = "")]) <- levels(datrec[, name])
datrec[datrec[, timevar] %in% seq(1, i, by = 1), paste("Lag", i, name, sep = "")] <- levels(datrec[, name])[1]
} else {
datrec[datrec[, timevar] %in% seq(1, i, by = 1), paste("Lag", i, name, sep = "")] <- 0
}
}
return(datrec)
return(datrec)
}

datrec<-Reduce(merge,lapply(varying,lagged))
#Remove first lagged exposure as it is already generated by functions
datrec<-datrec[,-which(names(datrec) %in% c(paste("Lag",1,An,sep="")))]
}
datrec <- Reduce(merge, lapply(varying, lagged))
}


return(datrec)}
return(datrec)
}

0 comments on commit 72ea89e

Please sign in to comment.