Skip to content

Commit

Permalink
enable custom operations on coins and purses
Browse files Browse the repository at this point in the history
  • Loading branch information
bluefoxr committed Mar 7, 2024
1 parent 132a625 commit aba5a4c
Show file tree
Hide file tree
Showing 8 changed files with 449 additions and 3 deletions.
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
S3method(Aggregate,coin)
S3method(Aggregate,data.frame)
S3method(Aggregate,purse)
S3method(Custom,coin)
S3method(Custom,purse)
S3method(Denominate,coin)
S3method(Denominate,data.frame)
S3method(Denominate,purse)
Expand Down Expand Up @@ -44,6 +46,7 @@ S3method(qTreat,purse)
export(Aggregate)
export(CAGR)
export(COIN_to_coin)
export(Custom)
export(Denominate)
export(Impute)
export(Normalise)
Expand Down
211 changes: 211 additions & 0 deletions R/custom.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,211 @@
#' Custom operation
#'
#' Custom operation on a purse. This is an experimental new feature.
#'
#' In this function, the data set named `dset` is extracted from the coin using
#' `get_dset(purse, dset)`. It is passed to the function `f_cust`, which is required
#' to return an equivalent but modified data frame, which is then written as a new
#' data set with name `write_to`. This is intended to allow arbitrary operations
#' on coin data sets while staying within the COINr framework, which means that if
#' `Regen()` is used, these operations will be re-run, allowing them to be included
#' in things like sensitivity analysis.
#'
#' The format of `f_cust` is important. It must be a function whose first argument
#' is called `x`: this will be the argument that the data is passed to. The data will
#' be in the same format as extracted via `get_dset(purse, dset)`, which means it will
#' have `uCode` and `Time` columns. `f_cust` can have other arguments which are passed
#' to it via `f_cust_para`. The function should return a data frame similar to the data
#' that was passed to it, it must contain have the same column names (meaning you can't
#' remove indicators), but otherwise is flexible - this means some caution is necessary
#' to ensure that subsequent operations don't fail. Be careful, for example, to ensure
#' that there are no duplicates in `uCode`, and that indicator columns are numeric.
#'
#' @param x A purse object
#' @param dset The data set to apply the operation to.
#' @param f_cust Function to apply to the data set. See details.
#' @param f_cust_para Optional additional parameters to pass to the function defined
#' by `f_cust`.
#' @param write_to Name of data set to write to
#' @param ... Arguments to pass to/from other methods.
#' @param global Logical: if `TRUE`, the entire data set, over all time points, is passed
#' to the function `f_cust`. This is useful if the custom operation should be different for
#' different time points, for example. Otherwise if `FALSE`, passes the data set within each
#' coin one at a time to `f_cust`.
#'
#' @return An updated purse.
#' @export
#'
#' @examples
#' # build example purse
#' purse <- build_example_purse(up_to = "new_coin")
#'
#' # custom function - set points before 2020 to NA for BEL in FDI due to a
#' # break in the series
#' f_cust <- function(x){x[(x$uCode == "BEL") & (x$Time < 2020), "FDI"] <- NA;
#' return(x)}
#'
#'
#'
Custom.purse <- function(x, dset, f_cust, f_cust_para = NULL, global = FALSE,
write_to = NULL, ...){

# input check
check_purse(x)

if(global){

iDatas <- get_dset(x, dset)

# run global dset through function
iDatas_c <- do.call(f_cust, c(list(x = iDatas), f_cust_para))

if("uCode" %nin% names(iDatas_c)){
stop("Required column 'uCode' not present in the data frame returned by f_cust.")
}
if("Time" %nin% names(iDatas_c)){
stop("Required column 'Time' not present in the data frame returned by f_cust.")
}
if(!setequal(names(iDatas_c), names(iDatas))){
stop("The output of ", f_cust, " has different column names than the input data set.")
}

# split by Time
iDatas_c_l <- split(iDatas_c, iDatas_c$Time)

# now write dsets to coins
x$coin <- lapply(x$coin, function(coin){

# get Time
tt <- coin$Meta$Unit$Time[[1]]
if(is.null(tt)){
stop("Time index is NULL or not found in writing treated data set to coin.")
}

if(is.null(write_to)){
write_to <- "Custom"
}

iData_write <- iDatas_c_l[[which(names(iDatas_c_l) == tt)]]
iData_write <- iData_write[names(iData_write) != "Time"]

# write dset first
coin <- write_dset(coin, iData_write, dset = write_to)

# also write to log - we signal that coin can't be regenerated any more
coin$Log$can_regen <- FALSE
coin$Log$message <- "Coin was subject to a custom operation inside a purse with global = TRUE. Cannot be regenerated."

coin
})

} else {

# apply treatment to each coin
x$coin <- lapply(x$coin, function(coin){
Custom.coin(coin, dset = dset, f_cust = f_cust, f_cust_para = f_cust_para,
write_to = write_to)
})

}

# make sure still purse class
class(x) <- c("purse", "data.frame")
x
}


#' Custom operation
#'
#' Custom operation on a coin. This is an experimental new feature so please check
#' the results carefully.
#'
#' In this function, the data set named `dset` is extracted from the coin using
#' `get_dset(coin, dset)`. It is passed to the function `f_cust`, which is required
#' to return an equivalent but modified data frame, which is then written as a new
#' data set with name `write_to`. This is intended to allow arbitrary operations
#' on coin data sets while staying within the COINr framework, which means that if
#' `Regen()` is used, these operations will be re-run, allowing them to be included
#' in things like sensitivity analysis.
#'
#' The format of `f_cust` is important. It must be a function whose first argument
#' is called `x`: this will be the argument that the data is passed to. The data will
#' be in the same format as extracted via `get_dset(coin, dset)`, which means it will
#' have a `uCode` column. `f_cust` can have other arguments which are passed
#' to it via `f_cust_para`. The function should return a data frame similar to the data
#' that was passed to it, it must contain have the same column names (meaning you can't
#' remove indicators), but otherwise is flexible - this means some caution is necessary
#' to ensure that subsequent operations don't fail. Be careful, for example, to ensure
#' that there are no duplicates in `uCode`, and that indicator columns are numeric.
#'
#' @param x A coin
#' @param dset Target data set
#' @param f_cust Function to apply to the data set. See details.
#' @param f_cust_para Optional additional parameters to pass to the function defined
#' by `f_cust`.
#' @param write_to Name of data set to write to
#' @param write2log Logical: whether or not to write to the log.
#' @param ... Arguments to pass to/from other methods.
#'
#' @return A coin
#' @export
#'
#' @examples
#' # build example coin
#' coin <- build_example_coin(up_to = "new_coin")
#'
#' # create function - replaces suspected unreliable point with NA
#' f_NA <- function(x){ x[3, 10] <- NA; return(x)}
#'
#' # call function from Custom()
#' coin <- Custom(coin, dset = "Raw", f_cust = "f_NA")
#' stopifnot(is.na(coin$Data$Custom[3,10]))
#'
Custom.coin <- function(x, dset, f_cust, f_cust_para = NULL, write_to = NULL,
write2log = TRUE, ...){

# WRITE LOG ---------------------------------------------------------------

coin <- write_log(x, dont_write = "x", write2log = write2log)

# GET DSET, CHECKS --------------------------------------------------------

iData <- get_dset(coin, dset)

# APPLY OPERATION ---------------------------------------------------------

iDatac <- do.call(f_cust, c(list(x = iData), f_cust_para))

if(!is.data.frame(iDatac)){
stop("The output of ", f_cust, " is not a data frame. 'f_cust' is required to return a data frame.")
}
# if(nrow(iDatac) != nrow(iData)){
# stop("The output of ", f_cust, " has a different number of rows than the input data set.")
# }
if("uCode" %nin% names(iDatac)){
stop("Required column 'uCode' not present in the data frame returned by f_cust.")
}
if(!setequal(names(iDatac), names(iData))){
stop("The output of ", f_cust, " has different column names than the input data set.")
}

# Write to coin
if(is.null(write_to)){
write_to <- "Custom"
}
write_dset(coin, iDatac, dset = write_to)

}

#' Custom operation
#'
#' Allows a custom data operation on coins or purses.
#'
#' @param x Object to be operated on (coin or purse)
#' @param ... arguments passed to or from other methods.
#'
#' @return Modified object.
#'
#' @export
Custom <- function (x, ...){
UseMethod("Custom")
}
41 changes: 40 additions & 1 deletion R/treat.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,50 @@ Treat.purse <- function(x, dset, global_specs = NULL, indiv_specs = NULL,
# input check
check_purse(x)

# if(global){
#
# iDatas <- get_dset(x, dset)
# iDatas_ <- iDatas[names(iDatas) != "Time"]
#
# # run global dset through Treat (as data frame), excluding Time col
# iDatas_t <- Treat(iDatas_, global_specs = global_specs,
# indiv_specs = indiv_specs, combine_treat = combine_treat)
# # split by Time
# iDatas_t_l <- split(iDatas_n, iDatas$Time)
#
# # now write dsets to coins
# x$coin <- lapply(x$coin, function(coin){
#
# # get Time
# tt <- coin$Meta$Unit$Time[[1]]
# if(is.null(tt)){
# stop("Time index is NULL or not found in writing treated data set to coin.")
# }
#
# if(is.null(write_to)){
# write_to <- "Treated"
# }
#
# # write dset first
# coin <- write_dset(coin, iDatas_t_l[[which(names(iDatas_t_l) == tt)]], dset = write_to)
#
# # also write to log - we signal that coin can't be regenerated any more
# coin$Log$can_regen <- FALSE
# coin$Log$message <- "Coin was treated inside a purse with global = TRUE. Cannot be regenerated."
#
# coin
# })
#
# } else {

# apply treatment to each coin
x$coin <- lapply(x$coin, function(coin){
Treat.coin(coin, dset = dset, global_specs = global_specs,
indiv_specs = indiv_specs, combine_treat = combine_treat, write_to = write_to)
indiv_specs = indiv_specs, combine_treat = combine_treat, write_to = write_to)
})

#}

# make sure still purse class
class(x) <- c("purse", "data.frame")
x
Expand Down
4 changes: 2 additions & 2 deletions R/write_to_coins.R
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,9 @@ write_log <- function(coin, dont_write = NULL, write2log = TRUE){
func_name2 <- unlist(strsplit(func_name, "\\.")[[1]])[1]

# make sure this is a builder function calling
builders <- c("Aggregate", "Denominate", "Impute", "new_coin", "Normalise", "qNormalise", "qTreat", "Screen", "Treat")
builders <- c("Aggregate", "Denominate", "Impute", "new_coin", "Normalise", "qNormalise", "qTreat", "Screen", "Treat", "Custom")
if(func_name2 %nin% builders){
stop("The calling function ", func_name2, " is not one of the functions allowed to write to log. Authorised functions are ", builders)
stop("The calling function ", func_name2, " is not one of the functions allowed to write to log. Authorised functions are: ", toString(builders))
}

# write to coin
Expand Down
19 changes: 19 additions & 0 deletions man/Custom.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

70 changes: 70 additions & 0 deletions man/Custom.coin.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit aba5a4c

Please sign in to comment.