From ceba885f1658e6115f22864c03c0bba37fc64cc4 Mon Sep 17 00:00:00 2001 From: Bernardo Lares Date: Tue, 1 Mar 2022 18:57:49 -0500 Subject: [PATCH 01/17] feat: enable date_min and date_max params on robyn_allocator() --- R/NAMESPACE | 1 + R/R/allocator.R | 97 +++++++++++++++++----------------------- R/R/convergence.R | 7 ++- R/R/imports.R | 2 +- R/man/robyn_allocator.Rd | 7 ++- 5 files changed, 53 insertions(+), 61 deletions(-) diff --git a/R/NAMESPACE b/R/NAMESPACE index ee4a90824..eb90a65c4 100644 --- a/R/NAMESPACE +++ b/R/NAMESPACE @@ -52,6 +52,7 @@ importFrom(dplyr,row_number) importFrom(dplyr,select) importFrom(dplyr,slice) importFrom(dplyr,summarise) +importFrom(dplyr,summarise_all) importFrom(dplyr,ungroup) importFrom(foreach,"%dopar%") importFrom(foreach,foreach) diff --git a/R/R/allocator.R b/R/R/allocator.R index 40120063b..49906ba36 100644 --- a/R/R/allocator.R +++ b/R/R/allocator.R @@ -48,6 +48,8 @@ #' Defaults to 100000. #' @param constr_mode Character. Options are \code{"eq"} or \code{"ineq"}, #' indicating constraints with equality or inequality. +#' @param date_min,date_max Character. Date range to calculate mean (of non-zero spends) and +#' total spends. Default will consider all dates within window. #' @return A list object containing allocator result. #' @examples #' \dontrun{ @@ -86,7 +88,7 @@ #' } #' @export robyn_allocator <- function(robyn_object = NULL, - select_build = NULL, + select_build = 0, InputCollect = NULL, OutputCollect = NULL, select_model = NULL, @@ -98,6 +100,8 @@ robyn_allocator <- function(robyn_object = NULL, channel_constr_up = 2, maxeval = 100000, constr_mode = "eq", + date_min = NULL, + date_max = NULL, export = TRUE, quiet = FALSE, ui = FALSE) { @@ -138,11 +142,15 @@ robyn_allocator <- function(robyn_object = NULL, channel_constr_low, channel_constr_up, expected_spend, expected_spend_days, constr_mode) + # Channel contrains + # channel_constr_low <- rep(0.8, length(paid_media_spends)) + # channel_constr_up <- rep(1.2, length(paid_media_spends)) names(channel_constr_low) <- paid_media_spends names(channel_constr_up) <- paid_media_spends + + # Hyper-parameters and results dt_hyppar <- OutputCollect$resultHypParam[solID == select_model] dt_bestCoef <- OutputCollect$xDecompAgg[solID == select_model & rn %in% paid_media_spends] - dt_mediaSpend <- dt_input[startRW:endRW, mediaSpendSorted, with = FALSE] ## Sort table and get filter for channels mmm coef reduced to 0 dt_coef <- dt_bestCoef[, .(rn, coef)] @@ -153,38 +161,17 @@ robyn_allocator <- function(robyn_object = NULL, names(coefSelectorSorted) <- dt_coefSorted$rn ## Filter and sort all variables by name that is essential for the apply function later - #mediaVarSortedFiltered <- mediaVarSorted[coefSelectorSorted] mediaSpendSortedFiltered <- mediaSpendSorted[coefSelectorSorted] if (!all(coefSelectorSorted)) { chn_coef0 <- setdiff(mediaVarSorted, mediaSpendSortedFiltered) - message(paste(chn_coef0, collapse = ", "), " are excluded in optimiser because their coeffients are 0") + message("Excluded in optimiser because their coeffients are 0: ", paste(chn_coef0, collapse = ", ")) } - dt_hyppar <- dt_hyppar[, .SD, .SDcols = hyper_names(adstock, mediaSpendSortedFiltered)] setcolorder(dt_hyppar, sort(names(dt_hyppar))) - dt_optim <- dt_mod[, mediaSpendSortedFiltered, with = FALSE] - dt_optimCost <- dt_input[startRW:endRW, mediaSpendSortedFiltered, with = FALSE] dt_bestCoef <- dt_bestCoef[rn %in% mediaSpendSortedFiltered] - costMultiplierVec <- InputCollect$mediaCostFactor[mediaSpendSortedFiltered] - - # if (any(InputCollect$exposure_selector)) { - # dt_modNLS <- merge(data.table(channel = mediaVarSortedFiltered), spendExpoMod, all.x = TRUE, by = "channel") - # vmaxVec <- dt_modNLS[order(rank(channel))][, Vmax] - # names(vmaxVec) <- mediaVarSortedFiltered - # kmVec <- dt_modNLS[order(rank(channel))][, Km] - # names(kmVec) <- mediaVarSortedFiltered - # } else { - # vmaxVec <- rep(0, length(mediaVarSortedFiltered)) - # kmVec <- rep(0, length(mediaVarSortedFiltered)) - # } - - # exposure_selectorSorted <- InputCollect$exposure_selector[media_order] - # exposure_selectorSorted <- exposure_selectorSorted[coefSelectorSorted] - # exposure_selectorSortedFiltered <- exposure_selectorSorted[mediaVarSortedFiltered] channelConstrLowSorted <- channel_constr_low[media_order][coefSelectorSorted] channelConstrUpSorted <- channel_constr_up[media_order][coefSelectorSorted] - ## Get adstock parameters for each channel getAdstockHypPar <- get_adstock_params(InputCollect, dt_hyppar) @@ -195,22 +182,35 @@ robyn_allocator <- function(robyn_object = NULL, gammaTrans <- hills$gammaTrans coefsFiltered <- hills$coefsFiltered - # ## build evaluation funciton - # if (!is.null(spendExpoMod)) { - # mm_lm_coefs <- spendExpoMod$coef_lm - # names(mm_lm_coefs) <- spendExpoMod$channel - # } else { - # mm_lm_coefs <- c() - # } + # Spend values based on date range set + dt_optimCost <- dt_mod %>% slice(startRW:endRW) + if (is.null(date_min)) date_min <- min(dt_optimCost$ds) + if (is.null(date_max)) date_max <- max(dt_optimCost$ds) + stopifnot(date_min >= min(dt_optimCost$ds)) + stopifnot(date_max <= max(dt_optimCost$ds)) + histFiltered <- filter(dt_optimCost, .data$ds >= date_min & .data$ds <= date_max) + nPeriod <- nrow(histFiltered) + message(sprintf("Date Window: %s:%s (%s %ss)", date_min, date_max, nPeriod, InputCollect$intervalType)) + + histSpendB <- select(histFiltered, any_of(mediaSpendSortedFiltered)) + histSpendTotal <- sum(histSpendB) + histSpend <- unlist(summarise_all(select(histFiltered, any_of(mediaSpendSorted)), sum)) + histSpendUnit <- unlist(summarise_all(histSpendB, function(x) sum(x) / sum(x > 0))) + histSpendUnitTotal <- sum(histSpendUnit) + histSpendShare <- histSpendUnit/histSpendUnitTotal - ## Build constraints function with scenarios - nPeriod <- nrow(dt_optimCost) + # Response values NOT based on date range set xDecompAggMedia <- OutputCollect$xDecompAgg[ solID == select_model & rn %in% paid_media_spends][order(rank(rn))] + histResponseUnitModel <- setNames( + xDecompAggMedia[rn %in% mediaSpendSortedFiltered, get("mean_response")], + mediaSpendSortedFiltered) + histResponseUnitAllocator <- unlist(-eval_f(histSpendUnit)[["objective.channel"]]) + ## Build constraints function with scenarios if ("max_historical_response" %in% scenario) { - expected_spend <- sum(xDecompAggMedia$total_spend) - expSpendUnitTotal <- sum(xDecompAggMedia$mean_spend) # expected_spend / nPeriod + expected_spend <- histSpendTotal + expSpendUnitTotal <- histSpendUnitTotal } else { expSpendUnitTotal <- expected_spend / (expected_spend_days / InputCollect$dayInterval) } @@ -227,8 +227,6 @@ robyn_allocator <- function(robyn_object = NULL, # kmVec = kmVec, expSpendUnitTotal = expSpendUnitTotal) # So we can implicitly use these values within eval_f() - # optim_env <- new.env(parent = globalenv()) - # optim_env$eval_list <- eval_list options("ROBYN_TEMP" = eval_list) # eval_f(c(1,1)) @@ -239,23 +237,6 @@ robyn_allocator <- function(robyn_object = NULL, # $objective.channel # [1] -6.590166e-07 -3.087475e-06 -2.316821e-02 -1.250144e-05 - histSpend <- xDecompAggMedia[, .(rn, total_spend)] - histSpend <- histSpend$total_spend - names(histSpend) <- sort(InputCollect$paid_media_spends) - histSpendTotal <- sum(histSpend) - histSpendUnitTotal <- sum(xDecompAggMedia$mean_spend) - histSpendUnit <- xDecompAggMedia[rn %in% mediaSpendSortedFiltered, mean_spend] - names(histSpendUnit) <- mediaSpendSortedFiltered - histSpendShare <- histSpendUnit/histSpendUnitTotal - names(histSpendShare) <- mediaSpendSortedFiltered - - # QA: check if objective function correctly implemented - histResponseUnitModel <- setNames( - xDecompAggMedia[rn %in% mediaSpendSortedFiltered, get("mean_response")], - mediaSpendSortedFiltered) - histResponseUnitAllocator <- unlist(-eval_f(histSpendUnit)[["objective.channel"]]) - identical(round(histResponseUnitModel, 3), round(histResponseUnitAllocator, 3)) - ## Set initial values and bounds x0 <- lb <- histSpendUnit * channelConstrLowSorted ub <- histSpendUnit * channelConstrUpSorted @@ -288,9 +269,12 @@ robyn_allocator <- function(robyn_object = NULL, )) ## Collect output - dt_bestModel <- dt_bestCoef[, .(rn, mean_spend, xDecompAgg, roi_total, roi_mean)][order(rank(rn))] dt_optimOut <- data.table( channels = mediaSpendSortedFiltered, + date_min = date_min, + date_max = date_max, + periods = sprintf("%s %ss", nPeriod, InputCollect$intervalType), + # Initial histSpend = histSpend[mediaSpendSortedFiltered], histSpendTotal = histSpendTotal, initSpendUnitTotal = histSpendUnitTotal, @@ -299,9 +283,11 @@ robyn_allocator <- function(robyn_object = NULL, initResponseUnit = histResponseUnitModel, initResponseUnitTotal = sum(xDecompAggMedia$mean_response), initRoiUnit = histResponseUnitModel / histSpendUnit, + # Expected expSpendTotal = expected_spend, expSpendUnitTotal = expSpendUnitTotal, expSpendUnitDelta = expSpendUnitTotal / histSpendUnitTotal - 1, + # Optimized optmSpendUnit = nlsMod$solution, optmSpendUnitDelta = (nlsMod$solution / histSpendUnit - 1), optmSpendUnitTotal = sum(nlsMod$solution), @@ -343,6 +329,7 @@ print.robyn_allocator <- function(x, ...) { Model ID: {x$dt_optimOut$solID[1]} Total Spend Increase: {spend_increase_p}% ({spend_increase}) Total Response Increase (Optimized): {signif(100 * x$dt_optimOut$optmResponseUnitTotalLift[1], 3)}% +Window: {x$dt_optimOut$date_min[1]}:{x$dt_optimOut$date_max[1]} ({x$dt_optimOut$periods[1]}) Allocation Summary: {summary} diff --git a/R/R/convergence.R b/R/R/convergence.R index 1a5e603a5..ad5e54602 100644 --- a/R/R/convergence.R +++ b/R/R/convergence.R @@ -64,9 +64,7 @@ robyn_converge <- function(OutputModels, n_cuts = 20, sd_qtref = 3, med_lowb = 3 include.lowest = TRUE, ordered_result = TRUE, dig.lab = 6 )) - # Calculate sd and median on each cut to alert user on: - # 1) last quantile's sd < mean sd of default first 3 qt - # 2) last quantile's median < median of first qt - default 3 * mean sd of defualt first 3 qt + # Calculate sd and median on each cut to alert user when no convergence errors <- dt_objfunc_cvg %>% group_by(.data$error_type, .data$cuts) %>% summarise( @@ -174,7 +172,8 @@ robyn_converge <- function(OutputModels, n_cuts = 20, sd_qtref = 3, med_lowb = 3 theme_lares() if (calibrated) { - moo_cloud_plot <- moo_cloud_plot + geom_point(data = df, aes(size = .data$mape, alpha = 1 - .data$mape)) + moo_cloud_plot <- moo_cloud_plot + geom_point(data = df, aes(size = .data$mape, alpha = 1 - .data$mape)) + + guides(alpha = "none") } else { moo_cloud_plot <- moo_cloud_plot + geom_point() } diff --git a/R/R/imports.R b/R/R/imports.R index 1bce8ba2b..0aa8efa16 100644 --- a/R/R/imports.R +++ b/R/R/imports.R @@ -23,7 +23,7 @@ #' @importFrom doRNG %dorng% #' @importFrom doParallel registerDoParallel stopImplicitCluster #' @importFrom dplyr any_of arrange as_tibble bind_rows contains desc distinct everything filter -#' group_by lag left_join mutate n pull rename row_number select slice summarise ungroup +#' group_by lag left_join mutate n pull rename row_number select slice summarise summarise_all ungroup #' @importFrom foreach foreach %dopar% getDoParWorkers registerDoSEQ #' @import ggplot2 #' @importFrom ggridges geom_density_ridges diff --git a/R/man/robyn_allocator.Rd b/R/man/robyn_allocator.Rd index f2e61c544..682d25e4c 100644 --- a/R/man/robyn_allocator.Rd +++ b/R/man/robyn_allocator.Rd @@ -7,7 +7,7 @@ \usage{ robyn_allocator( robyn_object = NULL, - select_build = NULL, + select_build = 0, InputCollect = NULL, OutputCollect = NULL, select_model = NULL, @@ -19,6 +19,8 @@ robyn_allocator( channel_constr_up = 2, maxeval = 1e+05, constr_mode = "eq", + date_min = NULL, + date_max = NULL, export = TRUE, quiet = FALSE, ui = FALSE @@ -76,6 +78,9 @@ Defaults to 100000.} \item{constr_mode}{Character. Options are \code{"eq"} or \code{"ineq"}, indicating constraints with equality or inequality.} +\item{date_min, date_max}{Character. Date range to calculate mean (of non-zero spends) and +total spends. Default will consider all dates within window.} + \item{export}{Boolean. Export outcomes into local files?} \item{quiet}{Boolean. Keep messages off?} From c8a0aa9054724dcd6e964d67fed1673e85f1bed6 Mon Sep 17 00:00:00 2001 From: Bernardo Lares Date: Wed, 2 Mar 2022 12:13:29 -0500 Subject: [PATCH 02/17] fix: Few changes --- R/R/allocator.R | 2 +- R/R/model.R | 16 ++++++---------- demo/demo.R | 4 ++-- 3 files changed, 9 insertions(+), 13 deletions(-) diff --git a/R/R/allocator.R b/R/R/allocator.R index 49906ba36..1fde72b15 100644 --- a/R/R/allocator.R +++ b/R/R/allocator.R @@ -200,12 +200,12 @@ robyn_allocator <- function(robyn_object = NULL, histSpendShare <- histSpendUnit/histSpendUnitTotal # Response values NOT based on date range set + # OutputCollect$xDecompVecCollect only contains default selected model xDecompAggMedia <- OutputCollect$xDecompAgg[ solID == select_model & rn %in% paid_media_spends][order(rank(rn))] histResponseUnitModel <- setNames( xDecompAggMedia[rn %in% mediaSpendSortedFiltered, get("mean_response")], mediaSpendSortedFiltered) - histResponseUnitAllocator <- unlist(-eval_f(histSpendUnit)[["objective.channel"]]) ## Build constraints function with scenarios if ("max_historical_response" %in% scenario) { diff --git a/R/R/model.R b/R/R/model.R index 5ebc15ad8..951ac3140 100644 --- a/R/R/model.R +++ b/R/R/model.R @@ -960,7 +960,7 @@ robyn_response <- function(robyn_object = NULL, InputCollect = NULL, OutputCollect = NULL) { - ## get input + ## Get input if (!is.null(robyn_object)) { if (!file.exists(robyn_object)) { @@ -1026,15 +1026,13 @@ robyn_response <- function(robyn_object = NULL, stop("media_metric must be one value from paid_media_spends, paid_media_vars or organic_vars") } - #media_vec <- dt_input[, get(media_metric)] - if (!is.null(metric_value)) { if (length(metric_value) != 1 | metric_value <= 0 | !is.numeric(metric_value)) { stop("'metric_value' must be a positive number") } } - ## transform exposure to spend when necessary + ## Transform exposure to spend when necessary if (metric_type == "exposure") { get_spend_name <- paid_media_spends[which(paid_media_vars==media_metric)] @@ -1052,17 +1050,15 @@ robyn_response <- function(robyn_object = NULL, Vmax <- spendExpoMod[channel == media_metric, Vmax] Km <- spendExpoMod[channel == media_metric, Km] media_vec <- mic_men(x = spend_vec, Vmax = Vmax, Km = Km, reverse = FALSE) - #metric_value <- mic_men(x = metric_value, Vmax = Vmax, Km = Km, reverse = FALSE) } else { coef_lm <- spendExpoMod[channel == media_metric, coef_lm] media_vec <- spend_vec * coef_lm - #metric_value <- metric_value * coef_lm } hpm_name <- get_spend_name } else { media_vec <- dt_input[, get(media_metric)] - # use non-0 meanas marginal level if spend not provided + # use non-0 means marginal level if spend not provided if (is.null(metric_value)) { metric_value <- mean(media_vec[startRW:endRW][media_vec[startRW:endRW] > 0]) message("metric_value not provided. using mean of ", media_metric, " instead") @@ -1071,7 +1067,7 @@ robyn_response <- function(robyn_object = NULL, } - ## adstocking + ## Adstocking if (adstock == "geometric") { theta <- dt_hyppar[solID == select_model, get(paste0(hpm_name, "_thetas"))] x_list <- adstock_geometric(x = media_vec, theta = theta) @@ -1086,14 +1082,14 @@ robyn_response <- function(robyn_object = NULL, } m_adstocked <- x_list$x_decayed - ## saturation + ## Saturation m_adstockedRW <- m_adstocked[startRW:endRW] alpha <- dt_hyppar[solID == select_model, get(paste0(hpm_name, "_alphas"))] gamma <- dt_hyppar[solID == select_model, get(paste0(hpm_name, "_gammas"))] Saturated <- saturation_hill(x = m_adstockedRW, alpha = alpha, gamma = gamma, x_marginal = metric_value) m_saturated <- saturation_hill(x = m_adstockedRW, alpha = alpha, gamma = gamma) - ## decomp + ## Decomp coeff <- dt_coef[solID == select_model & rn == hpm_name, coef] response_vec <- m_saturated * coeff Response <- as.numeric(Saturated * coeff) diff --git a/demo/demo.R b/demo/demo.R index c88178b12..2ad64026a 100644 --- a/demo/demo.R +++ b/demo/demo.R @@ -383,8 +383,8 @@ AllocatorCollect$dt_optimOut if (TRUE) { cat("QA if results from robyn_allocator and robyn_response agree: ") select_media <- "search_S" - optimal_spend <- AllocatorCollect$dt_optimOut[channels== select_media, optmSpendUnit] - optimal_response_allocator <- AllocatorCollect$dt_optimOut[channels== select_media, optmResponseUnit] + optimal_spend <- AllocatorCollect$dt_optimOut[channels == select_media, optmSpendUnit] + optimal_response_allocator <- AllocatorCollect$dt_optimOut[channels == select_media, optmResponseUnit] optimal_response <- robyn_response( robyn_object = robyn_object, select_build = 0, From 8e81aef4c39178e621af0801ad3e8221b641f744 Mon Sep 17 00:00:00 2001 From: Bernardo Lares Date: Wed, 2 Mar 2022 12:13:29 -0500 Subject: [PATCH 03/17] fix: Re-calculate media response to match mean spend, especially when using date range filters --- R/R/allocator.R | 31 ++++++++++++++++++++----------- R/R/model.R | 16 ++++++---------- R/R/plots.R | 8 ++++++-- demo/demo.R | 8 ++++++-- 4 files changed, 38 insertions(+), 25 deletions(-) diff --git a/R/R/allocator.R b/R/R/allocator.R index 49906ba36..8146d0b2f 100644 --- a/R/R/allocator.R +++ b/R/R/allocator.R @@ -123,7 +123,6 @@ robyn_allocator <- function(robyn_object = NULL, ## Set local data & params values if (TRUE) { - dt_input <- InputCollect$dt_input dt_mod <- InputCollect$dt_mod paid_media_vars <- InputCollect$paid_media_vars media_order <- order(paid_media_vars) @@ -186,8 +185,8 @@ robyn_allocator <- function(robyn_object = NULL, dt_optimCost <- dt_mod %>% slice(startRW:endRW) if (is.null(date_min)) date_min <- min(dt_optimCost$ds) if (is.null(date_max)) date_max <- max(dt_optimCost$ds) - stopifnot(date_min >= min(dt_optimCost$ds)) - stopifnot(date_max <= max(dt_optimCost$ds)) + if (date_min < min(dt_optimCost$ds)) date_min <- min(dt_optimCost$ds) + if (date_max > max(dt_optimCost$ds)) date_max <- max(dt_optimCost$ds) histFiltered <- filter(dt_optimCost, .data$ds >= date_min & .data$ds <= date_max) nPeriod <- nrow(histFiltered) message(sprintf("Date Window: %s:%s (%s %ss)", date_min, date_max, nPeriod, InputCollect$intervalType)) @@ -199,13 +198,23 @@ robyn_allocator <- function(robyn_object = NULL, histSpendUnitTotal <- sum(histSpendUnit) histSpendShare <- histSpendUnit/histSpendUnitTotal - # Response values NOT based on date range set - xDecompAggMedia <- OutputCollect$xDecompAgg[ - solID == select_model & rn %in% paid_media_spends][order(rank(rn))] - histResponseUnitModel <- setNames( - xDecompAggMedia[rn %in% mediaSpendSortedFiltered, get("mean_response")], - mediaSpendSortedFiltered) - histResponseUnitAllocator <- unlist(-eval_f(histSpendUnit)[["objective.channel"]]) + # Response values based on date range -> mean spend + histResponseUnitModel <- NULL + for (i in seq_along(mediaSpendSortedFiltered)) { + histResponseUnitModel <- c( + histResponseUnitModel, + robyn_response( + robyn_object = robyn_object, + select_build = select_build, + mediaSpendSortedFiltered[i], + select_model = select_model, + metric_value = histSpendUnit[i], + dt_hyppar = OutputCollect$resultHypParam, + dt_coef = OutputCollect$xDecompAgg, + InputCollect = InputCollect, + OutputCollect = OutputCollect)$response) + } + names(histResponseUnitModel) <- mediaSpendSortedFiltered ## Build constraints function with scenarios if ("max_historical_response" %in% scenario) { @@ -281,7 +290,7 @@ robyn_allocator <- function(robyn_object = NULL, initSpendUnit = histSpendUnit, initSpendShare = histSpendShare, initResponseUnit = histResponseUnitModel, - initResponseUnitTotal = sum(xDecompAggMedia$mean_response), + initResponseUnitTotal = sum(histResponseUnitModel), initRoiUnit = histResponseUnitModel / histSpendUnit, # Expected expSpendTotal = expected_spend, diff --git a/R/R/model.R b/R/R/model.R index 5ebc15ad8..951ac3140 100644 --- a/R/R/model.R +++ b/R/R/model.R @@ -960,7 +960,7 @@ robyn_response <- function(robyn_object = NULL, InputCollect = NULL, OutputCollect = NULL) { - ## get input + ## Get input if (!is.null(robyn_object)) { if (!file.exists(robyn_object)) { @@ -1026,15 +1026,13 @@ robyn_response <- function(robyn_object = NULL, stop("media_metric must be one value from paid_media_spends, paid_media_vars or organic_vars") } - #media_vec <- dt_input[, get(media_metric)] - if (!is.null(metric_value)) { if (length(metric_value) != 1 | metric_value <= 0 | !is.numeric(metric_value)) { stop("'metric_value' must be a positive number") } } - ## transform exposure to spend when necessary + ## Transform exposure to spend when necessary if (metric_type == "exposure") { get_spend_name <- paid_media_spends[which(paid_media_vars==media_metric)] @@ -1052,17 +1050,15 @@ robyn_response <- function(robyn_object = NULL, Vmax <- spendExpoMod[channel == media_metric, Vmax] Km <- spendExpoMod[channel == media_metric, Km] media_vec <- mic_men(x = spend_vec, Vmax = Vmax, Km = Km, reverse = FALSE) - #metric_value <- mic_men(x = metric_value, Vmax = Vmax, Km = Km, reverse = FALSE) } else { coef_lm <- spendExpoMod[channel == media_metric, coef_lm] media_vec <- spend_vec * coef_lm - #metric_value <- metric_value * coef_lm } hpm_name <- get_spend_name } else { media_vec <- dt_input[, get(media_metric)] - # use non-0 meanas marginal level if spend not provided + # use non-0 means marginal level if spend not provided if (is.null(metric_value)) { metric_value <- mean(media_vec[startRW:endRW][media_vec[startRW:endRW] > 0]) message("metric_value not provided. using mean of ", media_metric, " instead") @@ -1071,7 +1067,7 @@ robyn_response <- function(robyn_object = NULL, } - ## adstocking + ## Adstocking if (adstock == "geometric") { theta <- dt_hyppar[solID == select_model, get(paste0(hpm_name, "_thetas"))] x_list <- adstock_geometric(x = media_vec, theta = theta) @@ -1086,14 +1082,14 @@ robyn_response <- function(robyn_object = NULL, } m_adstocked <- x_list$x_decayed - ## saturation + ## Saturation m_adstockedRW <- m_adstocked[startRW:endRW] alpha <- dt_hyppar[solID == select_model, get(paste0(hpm_name, "_alphas"))] gamma <- dt_hyppar[solID == select_model, get(paste0(hpm_name, "_gammas"))] Saturated <- saturation_hill(x = m_adstockedRW, alpha = alpha, gamma = gamma, x_marginal = metric_value) m_saturated <- saturation_hill(x = m_adstockedRW, alpha = alpha, gamma = gamma) - ## decomp + ## Decomp coeff <- dt_coef[solID == select_model & rn == hpm_name, coef] response_vec <- m_saturated * coeff Response <- as.numeric(Saturated * coeff) diff --git a/R/R/plots.R b/R/R/plots.R index 0ca9c8f33..5882869c2 100644 --- a/R/R/plots.R +++ b/R/R/plots.R @@ -504,14 +504,18 @@ allocation_plots <- function(InputCollect, OutputCollect, dt_optimOut, select_mo show.legend = FALSE, hjust = -0.2) + theme(legend.position = c(0.9, 0.4), legend.title = element_blank()) + labs( - title = "Response curve and mean spend by channel", + title = "Response curve and mean spend* by channel", subtitle = paste0( "rsq_train: ", plotDT_scurveMeanResponse[, round(mean(rsq_train), 4)], ", nrmse = ", plotDT_scurveMeanResponse[, round(mean(nrmse), 4)], ", decomp.rssd = ", plotDT_scurveMeanResponse[, round(mean(decomp.rssd), 4)], ", mape.lift = ", plotDT_scurveMeanResponse[, round(mean(mape), 4)] ), - x = "Spend", y = "Response" + x = "Spend", y = "Response", + caption = sprintf("*Based on date range: %s to %s (%s)", + dt_optimOut$date_min[1], + dt_optimOut$date_max[1], + dt_optimOut$periods[1]) ) + lares::scale_x_abbr() + lares::scale_y_abbr() # Gather all plots diff --git a/demo/demo.R b/demo/demo.R index c88178b12..abbba5e27 100644 --- a/demo/demo.R +++ b/demo/demo.R @@ -356,9 +356,12 @@ AllocatorCollect <- robyn_allocator( , scenario = "max_historical_response" , channel_constr_low = c(0.7, 0.7, 0.7, 0.7, 0.7) , channel_constr_up = c(1.2, 1.5, 1.5, 1.5, 1.5) + , date_min = "2017-12-01" + , date_max = "2018-02-01" ) print(AllocatorCollect) AllocatorCollect$dt_optimOut +AllocatorCollect$plots$p14 # Run the "max_response_expected_spend" scenario: "What's the maximum response for a given # total spend based on historical saturation and what is the spend mix?" "optmSpendShareUnit" @@ -375,6 +378,7 @@ AllocatorCollect <- robyn_allocator( ) print(AllocatorCollect) AllocatorCollect$dt_optimOut +AllocatorCollect$plots$p14 ## A csv is exported into the folder for further usage. Check schema here: ## https://github.com/facebookexperimental/Robyn/blob/main/demo/schema.R @@ -383,8 +387,8 @@ AllocatorCollect$dt_optimOut if (TRUE) { cat("QA if results from robyn_allocator and robyn_response agree: ") select_media <- "search_S" - optimal_spend <- AllocatorCollect$dt_optimOut[channels== select_media, optmSpendUnit] - optimal_response_allocator <- AllocatorCollect$dt_optimOut[channels== select_media, optmResponseUnit] + optimal_spend <- AllocatorCollect$dt_optimOut[channels == select_media, optmSpendUnit] + optimal_response_allocator <- AllocatorCollect$dt_optimOut[channels == select_media, optmResponseUnit] optimal_response <- robyn_response( robyn_object = robyn_object, select_build = 0, From 55bb1b7848704416551ee90a03e3ce3ddf71f8ef Mon Sep 17 00:00:00 2001 From: Bernardo Lares Date: Thu, 10 Mar 2022 17:00:49 -0500 Subject: [PATCH 04/17] format: improved viz for allocation results --- R/R/plots.R | 119 ++++++++++++++++++++++++++-------------------------- 1 file changed, 59 insertions(+), 60 deletions(-) diff --git a/R/R/plots.R b/R/R/plots.R index 5882869c2..c1a9b7f09 100644 --- a/R/R/plots.R +++ b/R/R/plots.R @@ -419,37 +419,42 @@ robyn_onepagers <- function(InputCollect, OutputCollect, selected = NULL, quiet allocation_plots <- function(InputCollect, OutputCollect, dt_optimOut, select_model, scenario, export = TRUE, quiet = FALSE) { + subtitle <- paste0( + "Total spend increase: ", dt_optimOut[ + , round(mean(optmSpendUnitTotalDelta) * 100, 1) + ], "%", + "\nTotal response increase: ", dt_optimOut[ + , round(mean(optmResponseUnitTotalLift) * 100, 1) + ], "% with optimised spend allocation" + ) + + errors <- paste0( + "R2 train: ", plotDT_scurveMeanResponse[, round(mean(rsq_train), 4)], + ", NRMSE = ", plotDT_scurveMeanResponse[, round(mean(nrmse), 4)], + ", DECOMP.RSSD = ", plotDT_scurveMeanResponse[, round(mean(decomp.rssd), 4)], + ", MAPE = ", plotDT_scurveMeanResponse[, round(mean(mape), 4)] + ) + # 1. Response comparison plot plotDT_resp <- dt_optimOut[, c("channels", "initResponseUnit", "optmResponseUnit")][order(rank(channels))] plotDT_resp[, channels := as.factor(channels)] chn_levels <- plotDT_resp[, as.character(channels)] plotDT_resp[, channels := factor(channels, levels = chn_levels)] - setnames(plotDT_resp, names(plotDT_resp), new = c("channel", "initial response / time unit", "optimised response / time unit")) + fcts <- c("channel", "Initial Response / Time Unit", "Optimised Response / Time Unit") + setnames(plotDT_resp, names(plotDT_resp), new = fcts) plotDT_resp <- suppressWarnings(melt.data.table(plotDT_resp, id.vars = "channel", value.name = "response")) - p12 <- ggplot(plotDT_resp, aes(x = .data$channel, y = .data$response, fill = .data$variable)) + - geom_bar(stat = "identity", width = 0.5, position = "dodge") + - coord_flip() + - scale_fill_brewer(palette = "Paired") + - geom_text(aes(label = round(.data$response, 0), hjust = 1, size = 2.0), - position = position_dodge(width = 0.5), fontface = "bold", show.legend = FALSE - ) + - theme( - legend.title = element_blank(), legend.position = c(0.8, 0.2), - axis.text.x = element_blank(), legend.background = element_rect( - colour = "grey", fill = "transparent" - ) + p12 <- ggplot(plotDT_resp, aes(y = .data$channel, x = .data$response, fill = reorder(.data$variable, as.numeric(.data$variable)))) + + geom_bar(stat = "identity", width = 0.5, position = position_dodge2(reverse = TRUE, padding = 0)) + + scale_fill_brewer(palette = 3) + + geom_text(aes(x = 0, label = formatNum(.data$response, 0), hjust = -0.1), + position = position_dodge2(width = 0.5, reverse = TRUE), fontface = "bold", show.legend = FALSE ) + + theme_lares(legend = "top") + + scale_x_abbr() + labs( - title = "Initial vs. optimised mean response", - subtitle = paste0( - "Total spend increases ", dt_optimOut[ - , round(mean(optmSpendUnitTotalDelta) * 100, 1) - ], "%", - "\nTotal response increases ", dt_optimOut[ - , round(mean(optmResponseUnitTotalLift) * 100, 1) - ], "% with optimised spend allocation" - ), - y = NULL, x = "Channels" + title = "Initial vs. Optimised Mean Response", + subtitle = subtitle, + fill = NULL, x = "Mean Response [#]", y = NULL ) # 2. Budget share comparison plot @@ -457,28 +462,21 @@ allocation_plots <- function(InputCollect, OutputCollect, dt_optimOut, select_mo plotDT_share[, channels := as.factor(channels)] chn_levels <- plotDT_share[, as.character(channels)] plotDT_share[, channels := factor(channels, levels = chn_levels)] - setnames(plotDT_share, names(plotDT_share), new = c("channel", "initial avg.spend share", "optimised avg.spend share")) + fcts <- c("channel", "Initial Avg. Spend Share", "Optimised Avg. Spend Share") + setnames(plotDT_share, names(plotDT_share), new = fcts) plotDT_share <- suppressWarnings(melt.data.table(plotDT_share, id.vars = "channel", value.name = "spend_share")) - p13 <- ggplot(plotDT_share, aes(x = .data$channel, y = .data$spend_share, fill = .data$variable)) + - geom_bar(stat = "identity", width = 0.5, position = "dodge") + - coord_flip() + - scale_fill_brewer(palette = "Paired") + - geom_text(aes(label = paste0(round(.data$spend_share * 100, 2), "%"), hjust = 1, size = 2.0), - position = position_dodge(width = 0.5), fontface = "bold", show.legend = FALSE - ) + - theme( - legend.title = element_blank(), legend.position = c(0.8, 0.2), - axis.text.x = element_blank(), legend.background = element_rect( - colour = "grey", fill = "transparent" - ) + p13 <- ggplot(plotDT_share, aes(y = .data$channel, x = .data$spend_share, fill = .data$variable)) + + geom_bar(stat = "identity", width = 0.5, position = position_dodge2(reverse = TRUE, padding = 0)) + + scale_fill_brewer(palette = 3) + + geom_text(aes(x = 0, label = formatNum(.data$spend_share * 100, signif = 3, pos = "%"), hjust = -0.1), + position = position_dodge2(width = 0.5, reverse = TRUE), fontface = "bold", show.legend = FALSE ) + + theme_lares(legend = "top") + + lares::scale_x_percent() + labs( - title = "Initial vs. optimised budget allocation", - subtitle = paste0( - "Total spend increases ", dt_optimOut[, round(mean(optmSpendUnitTotalDelta) * 100, 1)], "%", - "\nTotal response increases ", dt_optimOut[, round(mean(optmResponseUnitTotalLift) * 100, 1)], "% with optimised spend allocation" - ), - y = NULL, x = "Channels" + title = "Initial vs. Optimised Budget Allocation", + subtitle = subtitle, + fill = NULL, x = "Budget Allocation [%]", y = NULL ) ## 3. Response curve @@ -491,26 +489,25 @@ allocation_plots <- function(InputCollect, OutputCollect, dt_optimOut, select_mo plotDT_scurve <- cbind(plotDT_saturation, plotDT_decomp[, .(response)]) plotDT_scurve <- plotDT_scurve[spend >= 0] # remove outlier introduced by MM nls fitting plotDT_scurveMeanResponse <- OutputCollect$xDecompAgg[solID == select_model & rn %in% InputCollect$paid_media_spends] - dt_optimOutScurve <- rbind(dt_optimOut[, .(channels, initSpendUnit, initResponseUnit)][, type := "initial"], - dt_optimOut[, .(channels, optmSpendUnit, optmResponseUnit)][, type := "optimised"], use.names = FALSE) + dt_optimOutScurve <- rbind(dt_optimOut[, .(channels, initSpendUnit, initResponseUnit)][, type := "Initial"], + dt_optimOut[, .(channels, optmSpendUnit, optmResponseUnit)][, type := "Optimised"], use.names = FALSE) setnames(dt_optimOutScurve, c("channels", "spend", "response", "type")) + dt_optimOutScurve$hjust <- ifelse(dt_optimOutScurve$type == "Initial", 1.2, -0.2) p14 <- ggplot(data = plotDT_scurve, aes(x = .data$spend, y = .data$response, color = .data$channel)) + geom_line() + geom_point(data = dt_optimOutScurve, aes( - x = .data$spend, y = .data$response, color = .data$channels, shape = .data$type), size = 2) + + x = .data$spend, y = .data$response, + color = .data$channels, shape = .data$type), size = 2.5) + geom_text(data = dt_optimOutScurve, aes( x = .data$spend, y = .data$response, color = .data$channels, + hjust = .data$hjust, label = formatNum(.data$spend, 2, abbr = TRUE)), - show.legend = FALSE, hjust = -0.2) + - theme(legend.position = c(0.9, 0.4), legend.title = element_blank()) + + show.legend = FALSE) + + theme_lares(legend.position = c(0.9, 0.4), pal = 2) + + theme(legend.title = element_blank()) + labs( - title = "Response curve and mean spend* by channel", - subtitle = paste0( - "rsq_train: ", plotDT_scurveMeanResponse[, round(mean(rsq_train), 4)], - ", nrmse = ", plotDT_scurveMeanResponse[, round(mean(nrmse), 4)], - ", decomp.rssd = ", plotDT_scurveMeanResponse[, round(mean(decomp.rssd), 4)], - ", mape.lift = ", plotDT_scurveMeanResponse[, round(mean(mape), 4)] - ), + title = "Response Curve and Mean Spend* by Channel", + subtitle = errors, x = "Spend", y = "Response", caption = sprintf("*Based on date range: %s to %s (%s)", dt_optimOut$date_min[1], @@ -519,19 +516,21 @@ allocation_plots <- function(InputCollect, OutputCollect, dt_optimOut, select_mo ) + lares::scale_x_abbr() + lares::scale_y_abbr() # Gather all plots - grobTitle <- paste0("Budget allocator optimum result for model ID ", select_model) - plots <- (p13 + p12) / p14 + plot_annotation( - title = grobTitle, theme = theme(plot.title = element_text(hjust = 0.5)) - ) - if (export) { + grobTitle <- paste0("Budget Allocator Optimum Result for Model ID ", select_model) scenario <- ifelse(scenario == "max_historical_response", "hist", "respo") filename <- paste0(OutputCollect$plot_folder, select_model, "_reallocated_", scenario, ".png") + p13 <- p13 + labs(subtitle = NULL) + p12 <- p12 + labs(subtitle = NULL) + plots <- (p13 + p12) / p14 + plot_annotation( + title = grobTitle, subtitle = subtitle, + theme = theme_lares(plot.title = element_text(hjust = 0.5), background = "white") + ) if (!quiet) message("Exporting charts into file: ", filename) ggsave( filename = filename, plot = plots, limitsize = FALSE, - dpi = 400, width = 18, height = 14 + dpi = 350, width = 15, height = 12 ) } From 2f94676596acf95ea01533020c1d4ecfcd4be7f9 Mon Sep 17 00:00:00 2001 From: Bernardo Lares Date: Fri, 11 Mar 2022 11:25:41 -0500 Subject: [PATCH 05/17] format: improved viz for one-pagers --- R/NAMESPACE | 2 + R/R/auxiliary.R | 18 -- R/R/imports.R | 2 +- R/R/outputs.R | 9 +- R/R/plots.R | 385 +++++++++++++++++++++------------------ R/man/robyn_onepagers.Rd | 4 +- R/man/robyn_outputs.Rd | 4 +- 7 files changed, 220 insertions(+), 204 deletions(-) diff --git a/R/NAMESPACE b/R/NAMESPACE index eb90a65c4..df14087fb 100644 --- a/R/NAMESPACE +++ b/R/NAMESPACE @@ -69,7 +69,9 @@ importFrom(lares,freqs) importFrom(lares,glued) importFrom(lares,removenacols) importFrom(lares,scale_x_abbr) +importFrom(lares,scale_x_percent) importFrom(lares,scale_y_abbr) +importFrom(lares,scale_y_percent) importFrom(lares,theme_lares) importFrom(lares,v2t) importFrom(lubridate,day) diff --git a/R/R/auxiliary.R b/R/R/auxiliary.R index 0a8d4e4d2..4a5eb009e 100644 --- a/R/R/auxiliary.R +++ b/R/R/auxiliary.R @@ -3,24 +3,6 @@ # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. -# Includes function format_unit, get_rsq - -# Format unit -format_unit <- function(x_in) { - x_out <- sapply(x_in, function(x) { - if (abs(x) >= 1000000000) { - x_out <- paste0(round(x / 1000000000, 1), " B") - } else if (abs(x) >= 1000000 & abs(x) < 1000000000) { - x_out <- paste0(round(x / 1000000, 1), " M") - } else if (abs(x) >= 1000 & abs(x) < 1000000) { - x_out <- paste0(round(x / 1000, 1), " K") - } else { - x_out <- round(x, 0) - } - }, simplify = TRUE) - return(x_out) -} - # Calculate R-squared get_rsq <- function(true, predicted, p = NULL, df.int = NULL) { sse <- sum((predicted - true)^2) diff --git a/R/R/imports.R b/R/R/imports.R index 0aa8efa16..22aa658e4 100644 --- a/R/R/imports.R +++ b/R/R/imports.R @@ -29,7 +29,7 @@ #' @importFrom ggridges geom_density_ridges #' @importFrom glmnet cv.glmnet glmnet #' @importFrom lares check_opts clusterKmeans formatNum freqs glued removenacols theme_lares `%>%` -#' scale_x_abbr scale_y_abbr v2t +#' scale_x_abbr scale_x_percent scale_y_percent scale_y_abbr v2t #' @importFrom lubridate is.Date day floor_date #' @importFrom minpack.lm nlsLM #' @importFrom nloptr nloptr diff --git a/R/R/outputs.R b/R/R/outputs.R index 61210bcc9..c6734a024 100644 --- a/R/R/outputs.R +++ b/R/R/outputs.R @@ -25,7 +25,7 @@ #' @param plot_pareto Boolean. Set to \code{FALSE} to deactivate plotting #' and saving model one-pagers. Used when testing models. #' @param clusters Boolean. Apply \code{robyn_clusters()} to output models? -#' @param selected Character vector. Which models (by \code{solID}) do you +#' @param select_model Character vector. Which models (by \code{solID}) do you #' wish to plot the one-pagers and export? Default will take top #' \code{robyn_clusters()} results. #' @param csv_out Character. Accepts "pareto" or "all". Default to "pareto". Set @@ -43,7 +43,8 @@ robyn_outputs <- function(InputCollect, OutputModels, plot_folder = getwd(), plot_folder_sub = NULL, plot_pareto = TRUE, csv_out = "pareto", - clusters = TRUE, selected = "clusters", + clusters = TRUE, + select_model = "clusters", ui = FALSE, export = TRUE, quiet = FALSE, ...) { @@ -129,10 +130,10 @@ robyn_outputs <- function(InputCollect, OutputModels, if (plot_pareto) { if (!quiet) message(sprintf( ">>> Exporting %sone-pagers into directory...", ifelse(!OutputCollect$hyper_fixed, "pareto ", ""))) - selected <- if (!clusters | is.null(OutputCollect[["clusters"]])) NULL else selected + select_model <- if (!clusters | is.null(OutputCollect[["clusters"]])) NULL else select_model pareto_onepagers <- robyn_onepagers( InputCollect, OutputCollect, - selected = selected, + select_model = select_model, quiet = quiet, export = export) } diff --git a/R/R/plots.R b/R/R/plots.R index c1a9b7f09..51c1e5daf 100644 --- a/R/R/plots.R +++ b/R/R/plots.R @@ -9,7 +9,6 @@ #' @rdname robyn_outputs #' @export robyn_plots <- function(InputCollect, OutputCollect, export = TRUE) { - check_class("robyn_outputs", OutputCollect) pareto_fronts <- OutputCollect$pareto_fronts hyper_fixed <- OutputCollect$hyper_fixed @@ -19,22 +18,25 @@ robyn_plots <- function(InputCollect, OutputCollect, export = TRUE) { if (!hyper_fixed) { ## Prophet - if (!is.null(InputCollect$prophet_vars) && length(InputCollect$prophet_vars) > 0 - || !is.null(InputCollect$factor_vars) && length(InputCollect$factor_vars) > 0) - { + if (!is.null(InputCollect$prophet_vars) && length(InputCollect$prophet_vars) > 0 || + !is.null(InputCollect$factor_vars) && length(InputCollect$factor_vars) > 0) { dt_plotProphet <- InputCollect$dt_mod[, c("ds", "dep_var", InputCollect$prophet_vars, InputCollect$factor_vars), with = FALSE] dt_plotProphet <- suppressWarnings(melt.data.table(dt_plotProphet, id.vars = "ds")) all_plots[["pProphet"]] <- pProphet <- ggplot( - dt_plotProphet, aes(x = ds, y = value)) + + dt_plotProphet, aes(x = ds, y = value) + ) + geom_line(color = "steelblue") + facet_wrap(~variable, scales = "free", ncol = 1) + labs(title = "Prophet decomposition") + - xlab(NULL) + ylab(NULL) - if (export) ggsave( - paste0(OutputCollect$plot_folder, "prophet_decomp.png"), - plot = pProphet, limitsize = FALSE, - dpi = 600, width = 12, height = 3 * length(levels(dt_plotProphet$variable)) - ) + xlab(NULL) + + ylab(NULL) + if (export) { + ggsave( + paste0(OutputCollect$plot_folder, "prophet_decomp.png"), + plot = pProphet, limitsize = FALSE, + dpi = 600, width = 12, height = 3 * length(levels(dt_plotProphet$variable)) + ) + } } ## Spend exposure model @@ -47,13 +49,15 @@ robyn_plots <- function(InputCollect, OutputCollect, export = TRUE) { title = "Spend-exposure fitting with Michaelis-Menten model", theme = theme(plot.title = element_text(hjust = 0.5)) ) - if (export) ggsave( - paste0(OutputCollect$plot_folder, "spend_exposure_fitting.png"), - plot = pSpendExposure, dpi = 600, width = 12, limitsize = FALSE, - height = ceiling(length(InputCollect$plotNLSCollect) / 3) * 7 - ) + if (export) { + ggsave( + paste0(OutputCollect$plot_folder, "spend_exposure_fitting.png"), + plot = pSpendExposure, dpi = 600, width = 12, limitsize = FALSE, + height = ceiling(length(InputCollect$plotNLSCollect) / 3) * 7 + ) + } } else { - # message("No spend-exposure modelling needed. All media variables used for MMM are spend variables") + # message("No spend-exposure modelling needed. All media variables used for MMM are spend variables") } ## Hyperparameter sampling distribution @@ -62,23 +66,27 @@ robyn_plots <- function(InputCollect, OutputCollect, export = TRUE) { hpnames_updated <- c(names(OutputCollect$OutputModels$hyper_updated), "robynPareto") hpnames_updated <- str_replace(hpnames_updated, "lambda", "lambda_hp") resultHypParam.melted <- melt.data.table(resultHypParam[, hpnames_updated, with = FALSE], - id.vars = c("robynPareto")) + id.vars = c("robynPareto") + ) resultHypParam.melted <- resultHypParam.melted[variable == "lambda_hp", variable := "lambda"] all_plots[["pSamp"]] <- ggplot( - resultHypParam.melted, aes(x = value, y = variable, color = variable, fill = variable)) + + resultHypParam.melted, aes(x = value, y = variable, color = variable, fill = variable) + ) + geom_violin(alpha = .5, size = 0) + geom_point(size = 0.2) + - theme(legend.position = "none") + + theme_lares(legend = "none") + labs( - title = "Hyperparameter optimisation sampling", - subtitle = paste0("Sample distribution", ", iterations = ", OutputCollect$iterations, " * ", OutputCollect$trials, " trial"), + title = "Hyperparameter Optimisation Sampling", + subtitle = paste0("Sample distribution", ", iterations = ", OutputCollect$iterations, " x ", OutputCollect$trials, " trial"), x = "Hyperparameter space", y = NULL ) - if (export) ggsave( - paste0(OutputCollect$plot_folder, "hypersampling.png"), - plot = all_plots$pSamp, dpi = 600, width = 12, height = 7, limitsize = FALSE - ) + if (export) { + ggsave( + paste0(OutputCollect$plot_folder, "hypersampling.png"), + plot = all_plots$pSamp, dpi = 600, width = 12, height = 7, limitsize = FALSE + ) + } } ## Pareto front @@ -91,11 +99,12 @@ robyn_plots <- function(InputCollect, OutputCollect, export = TRUE) { calibrated <- !is.null(InputCollect$calibration_input) pParFront <- ggplot(resultHypParam, aes( - x = .data$nrmse, y = .data$decomp.rssd, colour = .data$iterations)) + + x = .data$nrmse, y = .data$decomp.rssd, colour = .data$iterations + )) + scale_colour_gradient(low = "skyblue", high = "navyblue") + labs( - title = ifelse(!calibrated, "Multi-objective evolutionary performance", - "Multi-objective evolutionary performance with calibration" + title = ifelse(!calibrated, "Multi-objective Evolutionary Performance", + "Multi-objective Evolutionary Performance with Calibration" ), subtitle = sprintf( "2D Pareto fronts with %s, for %s trial%s with %s iterations each", @@ -107,8 +116,8 @@ robyn_plots <- function(InputCollect, OutputCollect, export = TRUE) { colour = "Iterations", size = "MAPE", alpha = NULL - ) #+ - #theme_lares() + ) + + theme_lares() # Add MAPE dimension when calibrated if (calibrated) { pParFront <- pParFront + @@ -127,28 +136,36 @@ robyn_plots <- function(InputCollect, OutputCollect, export = TRUE) { } pParFront <- pParFront + geom_line( data = resultHypParam[robynPareto == pfs], - aes(x = .data$nrmse, y = .data$decomp.rssd), colour = pf_color) + aes(x = .data$nrmse, y = .data$decomp.rssd), colour = pf_color + ) } all_plots[["pParFront"]] <- pParFront - if (export) ggsave( - paste0(OutputCollect$plot_folder, "pareto_front.png"), - plot = pParFront, limitsize = FALSE, - dpi = 600, width = 12, height = 7 - ) + if (export) { + ggsave( + paste0(OutputCollect$plot_folder, "pareto_front.png"), + plot = pParFront, limitsize = FALSE, + dpi = 600, width = 12, height = 7 + ) + } } ## Ridgeline model convergence if (length(temp_all) > 0) { xDecompAgg <- copy(temp_all$xDecompAgg) - dt_ridges <- xDecompAgg[rn %in% InputCollect$paid_media_spends - , .(variables = rn - , roi_total - , iteration = (iterNG-1)*OutputCollect$cores+iterPar - , trial)][order(iteration, variables)] - bin_limits <- c(1,20) - qt_len <- ifelse(OutputCollect$iterations <=100, 1, - ifelse(OutputCollect$iterations > 2000, 20, ceiling(OutputCollect$iterations/100))) - set_qt <- floor(quantile(1:OutputCollect$iterations, seq(0, 1, length.out = qt_len+1))) + dt_ridges <- xDecompAgg[ + rn %in% InputCollect$paid_media_spends, + .( + variables = rn, + roi_total, + iteration = (iterNG - 1) * OutputCollect$cores + iterPar, + trial + ) + ][order(iteration, variables)] + bin_limits <- c(1, 20) + qt_len <- ifelse(OutputCollect$iterations <= 100, 1, + ifelse(OutputCollect$iterations > 2000, 20, ceiling(OutputCollect$iterations / 100)) + ) + set_qt <- floor(quantile(1:OutputCollect$iterations, seq(0, 1, length.out = qt_len + 1))) set_bin <- set_qt[-1] dt_ridges[, iter_bin := cut(dt_ridges$iteration, breaks = set_qt, labels = set_bin)] dt_ridges <- dt_ridges[!is.na(iter_bin)] @@ -157,29 +174,32 @@ robyn_plots <- function(InputCollect, OutputCollect, export = TRUE) { plot_vars <- dt_ridges[, unique(variables)] plot_n <- ceiling(length(plot_vars) / 6) for (pl in 1:plot_n) { - loop_vars <- na.omit(plot_vars[(1:6)+6*(pl-1)]) + loop_vars <- na.omit(plot_vars[(1:6) + 6 * (pl - 1)]) dt_ridges_loop <- dt_ridges[variables %in% loop_vars, ] - all_plots[[paste0("pRidges",pl)]] <- pRidges <- ggplot( - dt_ridges_loop, aes(x = roi_total, y = iter_bin, fill = as.integer(iter_bin), linetype = trial)) + + all_plots[[paste0("pRidges", pl)]] <- pRidges <- ggplot( + dt_ridges_loop, aes(x = roi_total, y = iter_bin, fill = as.integer(iter_bin), linetype = trial) + ) + scale_fill_distiller(palette = "GnBu") + geom_density_ridges(scale = 4, col = "white", quantile_lines = TRUE, quantiles = 2, alpha = 0.7) + - facet_wrap(~ variables, scales = "free") + - guides(fill = "none")+ - theme(panel.background = element_blank()) + - labs(x = "Total ROAS", y = "Iteration Bucket" - ,title = "ROAS distribution over iteration" - ,fill = "iter bucket") - if (export) suppressMessages(ggsave( - paste0(OutputCollect$plot_folder, "roas_convergence",pl,".png"), - plot = pRidges, dpi = 600, width = 12, limitsize = FALSE, - height = ceiling(length(loop_vars) / 3) * 6 - )) + facet_wrap(~ .data$variables, scales = "free") + + guides(fill = "none", linetype = "none") + + theme_lares() + + labs( + x = "Total ROAS by Channel", y = NULL, + title = "ROAS Distribution over Iteration Buckets" + ) + if (export) { + suppressMessages(ggsave( + paste0(OutputCollect$plot_folder, "roas_convergence", pl, ".png"), + plot = pRidges, dpi = 600, width = 12, limitsize = FALSE, + height = ceiling(length(loop_vars) / 3) * 6 + )) + } } } } # End of !hyper_fixed return(invisible(all_plots)) - } @@ -189,18 +209,17 @@ robyn_plots <- function(InputCollect, OutputCollect, export = TRUE) { #' @inheritParams robyn_outputs #' @inheritParams robyn_csv #' @export -robyn_onepagers <- function(InputCollect, OutputCollect, selected = NULL, quiet = FALSE, export = TRUE) { - +robyn_onepagers <- function(InputCollect, OutputCollect, select_model = NULL, quiet = FALSE, export = TRUE) { check_class("robyn_outputs", OutputCollect) pareto_fronts <- OutputCollect$pareto_fronts hyper_fixed <- OutputCollect$hyper_fixed resultHypParam <- copy(OutputCollect$resultHypParam) xDecompAgg <- copy(OutputCollect$xDecompAgg) - if (!is.null(selected)) { - if ("clusters" %in% selected) selected <- OutputCollect$clusters$models$solID - resultHypParam <- resultHypParam[solID %in% selected] - xDecompAgg <- xDecompAgg[solID %in% selected] - if (!quiet) message(">> Exporting only cluster results one-pagers (", nrow(resultHypParam), ")...") + if (!is.null(select_model)) { + if ("clusters" %in% select_model) select_model <- OutputCollect$clusters$models$solID + resultHypParam <- resultHypParam[solID %in% select_model] + xDecompAgg <- xDecompAgg[solID %in% select_model] + if (!quiet) message(">> Generating only cluster results one-pagers (", nrow(resultHypParam), ")...") } # Prepare for parallel plotting @@ -232,91 +251,97 @@ robyn_onepagers <- function(InputCollect, OutputCollect, selected = NULL, quiet plotMediaShare <- xDecompAgg[robynPareto == pf & rn %in% InputCollect$paid_media_spends] uniqueSol <- plotMediaShare[, unique(solID)] - # parallelResult <- for (sid in uniqueSol) { + # parallelResult <- for (sid in uniqueSol) { # sid = uniqueSol[1] parallelResult <- foreach(sid = uniqueSol) %dorng% { - plotMediaShareLoop <- plotMediaShare[solID == sid] rsq_train_plot <- plotMediaShareLoop[, round(unique(rsq_train), 4)] nrmse_plot <- plotMediaShareLoop[, round(unique(nrmse), 4)] decomp_rssd_plot <- plotMediaShareLoop[, round(unique(decomp.rssd), 4)] mape_lift_plot <- ifelse(!is.null(InputCollect$calibration_input), plotMediaShareLoop[, round(unique(mape), 4)], NA) + errors <- paste0( + "R2 train: ", rsq_train_plot, + ", NRMSE = ", nrmse_plot, + ", DECOMP.RSSD = ", decomp_rssd_plot, + ifelse(!is.na(mape_lift_plot), paste0(", MAPE = ", mape_lift_plot), "") + ) + ## 1. Spend x effect share comparison plotMediaShareLoopBar <- temp[[sid]]$plot1data$plotMediaShareLoopBar plotMediaShareLoopLine <- temp[[sid]]$plot1data$plotMediaShareLoopLine ySecScale <- temp[[sid]]$plot1data$ySecScale + plotMediaShareLoopBar$variable <- stringr::str_to_title(gsub("_", " ", plotMediaShareLoopBar$variable)) + plotMediaShareLoopLine$variable <- "Total ROI" p1 <- ggplot(plotMediaShareLoopBar, aes(x = .data$rn, y = .data$value, fill = .data$variable)) + geom_bar(stat = "identity", width = 0.5, position = "dodge") + - geom_text(aes(label = paste0(round(.data$value * 100, 2), "%")), - color = "darkblue", position = position_dodge(width = 0.5), fontface = "bold") + - geom_line(data = plotMediaShareLoopLine, aes( - x = .data$rn, y = .data$value / ySecScale, group = 1, color = .data$variable), - inherit.aes = FALSE) + - geom_point(data = plotMediaShareLoopLine, aes( - x = .data$rn, y = .data$value / ySecScale, group = 1, color = .data$variable), - inherit.aes = FALSE, size = 4) + + geom_text(aes(y = 0, label = paste0(round(.data$value * 100, 1), "%")), + hjust = -.1, color = "darkblue", position = position_dodge(width = 0.5), fontface = "bold" + ) + + geom_line( + data = plotMediaShareLoopLine, aes( + x = .data$rn, y = .data$value / ySecScale, group = 1 + ), + inherit.aes = FALSE, color = "#03396C" + ) + + geom_point( + data = plotMediaShareLoopLine, aes( + x = .data$rn, y = .data$value / ySecScale, group = 1 + ), + inherit.aes = FALSE, size = 4, color = "#03396C" + ) + geom_text( data = plotMediaShareLoopLine, aes( - label = round(.data$value, 2), x = .data$rn, y = .data$value / ySecScale, group = 1, color = .data$variable), - fontface = "bold", inherit.aes = FALSE, hjust = -1, size = 6 + label = round(.data$value, 2), x = .data$rn, y = .data$value / ySecScale, + group = 1, color = .data$variable + ), + fontface = "bold", inherit.aes = FALSE, hjust = -.5, size = 5 ) + - scale_y_continuous(sec.axis = sec_axis(~ . * ySecScale)) + + scale_y_percent() + coord_flip() + - theme(legend.title = element_blank(), legend.position = c(0.9, 0.2), axis.text.x = element_blank()) + - scale_fill_brewer(palette = "Paired") + + theme_lares(axis.text.x = element_blank(), legend = "top") + + scale_fill_brewer(palette = 3) + + scale_color_manual(values = "#03396C") + labs( title = paste0("Share of Spend VS Share of Effect with total ", ifelse(InputCollect$dep_var_type == "conversion", "CPA", "ROI")), - subtitle = paste0( - "rsq_train: ", rsq_train_plot, - ", nrmse = ", nrmse_plot, - ", decomp.rssd = ", decomp_rssd_plot, - ifelse(!is.na(mape_lift_plot), paste0(", mape.lift = ", mape_lift_plot), "") - ), - y = NULL, x = NULL + y = "Total Share by Channel", x = NULL, fill = NULL, color = NULL ) ## 2. Waterfall plotWaterfallLoop <- temp[[sid]]$plot2data$plotWaterfallLoop + plotWaterfallLoop$sign <- ifelse(plotWaterfallLoop$sign == "pos", "Positive", "Negative") p2 <- suppressWarnings( ggplot(plotWaterfallLoop, aes(x = id, fill = sign)) + - geom_rect(aes(x = rn, xmin = id - 0.45, xmax = id + 0.45, - ymin = end, ymax = start), stat = "identity") + + geom_rect(aes( + x = rn, xmin = id - 0.45, xmax = id + 0.45, + ymin = end, ymax = start + ), stat = "identity") + scale_x_discrete("", breaks = levels(plotWaterfallLoop$rn), labels = plotWaterfallLoop$rn) + - theme(axis.text.x = element_text(angle = 65, vjust = 0.6), legend.position = c(0.1, 0.1)) + + scale_y_percent() + + theme_lares(legend = "top") + geom_text(mapping = aes( - label = paste0(format_unit(xDecompAgg), "\n", round(xDecompPerc * 100, 2), "%"), + label = paste0(formatNum(xDecompAgg, abbr = TRUE), "\n", round(xDecompPerc * 100, 1), "%"), y = rowSums(cbind(plotWaterfallLoop$end, plotWaterfallLoop$xDecompPerc / 2)) - ), fontface = "bold") + + ), fontface = "bold", lineheight = .7) + coord_flip() + labs( - title = "Response decomposition waterfall by predictor", - subtitle = paste0( - "rsq_train: ", rsq_train_plot, - ", nrmse = ", nrmse_plot, - ", decomp.rssd = ", decomp_rssd_plot, - ifelse(!is.na(mape_lift_plot), paste0(", mape.lift = ", mape_lift_plot), "") - ), - x = NULL, y = NULL - )) + title = "Response Decomposition Waterfall by Predictor", + x = NULL, y = NULL, fill = "Sign" + ) + ) ## 3. Adstock rate if (InputCollect$adstock == "geometric") { dt_geometric <- temp[[sid]]$plot3data$dt_geometric p3 <- ggplot(dt_geometric, aes(x = .data$channels, y = .data$thetas, fill = "coral")) + geom_bar(stat = "identity", width = 0.5) + - theme(legend.position = "none") + + theme_lares(legend = "none") + coord_flip() + - geom_text(aes(label = paste0(round(thetas * 100, 1), "%")), - position = position_dodge(width = 0.5), fontface = "bold") + - ylim(0, 1) + + geom_text(aes(label = formatNum(100 * thetas, 1, pos = "%")), + hjust = -.1, position = position_dodge(width = 0.5), fontface = "bold" + ) + + scale_y_percent(limit = c(0, 1)) + labs( - title = "Geometric adstock - fixed decay rate over time", - subtitle = paste0( - "rsq_train: ", rsq_train_plot, - ", nrmse = ", nrmse_plot, - ", decomp.rssd = ", decomp_rssd_plot, - ifelse(!is.na(mape_lift_plot), paste0(", mape.lift = ", mape_lift_plot), "") - ), + title = "Geometric Adstock: Fixed Decay Rate Over Time", y = NULL, x = NULL ) } @@ -325,59 +350,57 @@ robyn_onepagers <- function(InputCollect, OutputCollect, selected = NULL, quiet wb_type <- temp[[sid]]$plot3data$wb_type p3 <- ggplot(weibullCollect, aes(x = .data$x, y = .data$decay_accumulated)) + geom_line(aes(color = .data$channel)) + - facet_wrap(~.data$channel) + + facet_wrap(~ .data$channel) + geom_hline(yintercept = 0.5, linetype = "dashed", color = "gray") + geom_text(aes(x = max(.data$x), y = 0.5, vjust = -0.5, hjust = 1, label = "Halflife"), colour = "gray") + - theme(legend.position = "none") + - labs(title = paste0("Weibull adstock ", wb_type," - flexible decay rate over time"), - subtitle = paste0( - "rsq_train: ", rsq_train_plot, - ", nrmse = ", nrmse_plot, - ", decomp.rssd = ", decomp_rssd_plot, - ifelse(!is.na(mape_lift_plot), paste0(", mape.lift = ", mape_lift_plot), "") - ), - x = "Time unit", y = NULL) + theme_lares(legend = "none") + + labs( + title = paste0("Weibull Adstock ", wb_type, ": Flexible Decay Rate Over Time"), + x = "Time Unit", y = NULL + ) } - ## 4. Response curve + ## 4. Response curves dt_scurvePlot <- temp[[sid]]$plot4data$dt_scurvePlot dt_scurvePlotMean <- temp[[sid]]$plot4data$dt_scurvePlotMean if (!"channel" %in% colnames(dt_scurvePlotMean)) dt_scurvePlotMean$channel <- dt_scurvePlotMean$rn - p4 <- ggplot(dt_scurvePlot[dt_scurvePlot$channel %in% InputCollect$paid_media_spends,], - aes(x = .data$spend, y = .data$response, color = .data$channel)) + + p4 <- ggplot( + dt_scurvePlot[dt_scurvePlot$channel %in% InputCollect$paid_media_spends, ], + aes(x = .data$spend, y = .data$response, color = .data$channel) + ) + geom_line() + geom_point(data = dt_scurvePlotMean, aes( - x = .data$mean_spend, y = .data$mean_response, color = .data$channel)) + - geom_text(data = dt_scurvePlotMean, aes( - x = .data$mean_spend, y = .data$mean_response, color = .data$channel, - label = formatNum(.data$mean_spend, 2, abbr = TRUE)), - show.legend = FALSE, hjust = -0.2) + - theme(legend.position = c(0.9, 0.2)) + - labs( - title = "Response curve and mean spend by channel", - subtitle = paste0( - "rsq_train: ", rsq_train_plot, - ", nrmse = ", nrmse_plot, - ", decomp.rssd = ", decomp_rssd_plot, - ifelse(!is.na(mape_lift_plot), paste0(", mape.lift = ", mape_lift_plot), "") + x = .data$mean_spend, y = .data$mean_response, color = .data$channel + )) + + geom_text( + data = dt_scurvePlotMean, aes( + x = .data$mean_spend, y = .data$mean_response, color = .data$channel, + label = formatNum(.data$mean_spend, 2, abbr = TRUE) ), - x = "Spend", y = "Response" - ) + lares::scale_x_abbr() + lares::scale_y_abbr() + show.legend = FALSE, hjust = -0.2 + ) + + theme_lares(pal = 2) + + theme(legend.position = c(0.9, 0.2), + legend.background = element_rect(fill = alpha('grey98', 0.6), color = "grey90")) + + labs( + title = "Response Curves and Mean Spends by Channel", + x = "Spend", y = "Response", color = NULL + ) + + scale_x_abbr() + + scale_y_abbr() ## 5. Fitted vs actual xDecompVecPlotMelted <- temp[[sid]]$plot5data$xDecompVecPlotMelted + xDecompVecPlotMelted$variable <- stringr::str_to_title(xDecompVecPlotMelted$variable) + xDecompVecPlotMelted$linetype <- ifelse(xDecompVecPlotMelted$variable == "Predicted", "solid", "dotted") p5 <- ggplot(xDecompVecPlotMelted, aes(x = .data$ds, y = .data$value, color = .data$variable)) + - geom_line() + - theme(legend.position = c(0.9, 0.9)) + + geom_path(aes(linetype = .data$linetype), size = 0.6) + + theme_lares(legend = "top", pal = 2) + + scale_y_abbr() + + guides(linetype = "none") + labs( - title = "Actual vs. predicted response", - subtitle = paste0( - "rsq_train: ", rsq_train_plot, - ", nrmse = ", nrmse_plot, - ", decomp.rssd = ", decomp_rssd_plot, - ifelse(!is.na(mape_lift_plot), paste0(", mape.lift = ", mape_lift_plot), "") - ), - x = "Date", y = "Response" + title = "Actual vs. Predicted Response", + x = "Date", y = "Response", color = NULL ) ## 6. Diagnostic: fitted vs residual @@ -385,19 +408,21 @@ robyn_onepagers <- function(InputCollect, OutputCollect, selected = NULL, quiet p6 <- qplot(x = .data$predicted, y = .data$actual - .data$predicted, data = xDecompVecPlot) + geom_hline(yintercept = 0) + geom_smooth(se = TRUE, method = "loess", formula = "y ~ x") + - xlab("Fitted") + ylab("Residual") + ggtitle("Fitted vs. Residual") + scale_x_abbr() + scale_y_abbr() + + theme_lares() + + labs(x = "Fitted", y = "Residual", title = "Fitted vs. Residual") ## Aggregate one-pager plots and export - onepagerTitle <- paste0("Model one-pager, on pareto front ", pf, ", ID: ", sid) + onepagerTitle <- paste0("Model One-pager, on Pareto Front ", pf, ", ID: ", sid) pg <- wrap_plots(p2, p5, p1, p4, p3, p6, ncol = 2) + - plot_annotation(title = onepagerTitle, theme = theme(plot.title = element_text(hjust = 0.5))) + plot_annotation(title = onepagerTitle, subtitle = errors, theme = theme_lares(background = "white")) all_plots[[sid]] <- pg if (export) { ggsave( filename = paste0(OutputCollect$plot_folder, "/", sid, ".png"), plot = pg, limitsize = FALSE, - dpi = 600, width = 18, height = 18 + dpi = 400, width = 18, height = 18 ) } if (check_parallel_plot() & !quiet & count_mod_out > 0) { @@ -414,11 +439,9 @@ robyn_onepagers <- function(InputCollect, OutputCollect, selected = NULL, quiet # Stop cluster to avoid memory leaks if (check_parallel_plot()) stopImplicitCluster() return(invisible(all_plots)) - } allocation_plots <- function(InputCollect, OutputCollect, dt_optimOut, select_model, scenario, export = TRUE, quiet = FALSE) { - subtitle <- paste0( "Total spend increase: ", dt_optimOut[ , round(mean(optmSpendUnitTotalDelta) * 100, 1) @@ -447,7 +470,7 @@ allocation_plots <- function(InputCollect, OutputCollect, dt_optimOut, select_mo geom_bar(stat = "identity", width = 0.5, position = position_dodge2(reverse = TRUE, padding = 0)) + scale_fill_brewer(palette = 3) + geom_text(aes(x = 0, label = formatNum(.data$response, 0), hjust = -0.1), - position = position_dodge2(width = 0.5, reverse = TRUE), fontface = "bold", show.legend = FALSE + position = position_dodge2(width = 0.5, reverse = TRUE), fontface = "bold", show.legend = FALSE ) + theme_lares(legend = "top") + scale_x_abbr() + @@ -469,10 +492,10 @@ allocation_plots <- function(InputCollect, OutputCollect, dt_optimOut, select_mo geom_bar(stat = "identity", width = 0.5, position = position_dodge2(reverse = TRUE, padding = 0)) + scale_fill_brewer(palette = 3) + geom_text(aes(x = 0, label = formatNum(.data$spend_share * 100, signif = 3, pos = "%"), hjust = -0.1), - position = position_dodge2(width = 0.5, reverse = TRUE), fontface = "bold", show.legend = FALSE + position = position_dodge2(width = 0.5, reverse = TRUE), fontface = "bold", show.legend = FALSE ) + theme_lares(legend = "top") + - lares::scale_x_percent() + + scale_x_percent() + labs( title = "Initial vs. Optimised Budget Allocation", subtitle = subtitle, @@ -490,30 +513,39 @@ allocation_plots <- function(InputCollect, OutputCollect, dt_optimOut, select_mo plotDT_scurve <- plotDT_scurve[spend >= 0] # remove outlier introduced by MM nls fitting plotDT_scurveMeanResponse <- OutputCollect$xDecompAgg[solID == select_model & rn %in% InputCollect$paid_media_spends] dt_optimOutScurve <- rbind(dt_optimOut[, .(channels, initSpendUnit, initResponseUnit)][, type := "Initial"], - dt_optimOut[, .(channels, optmSpendUnit, optmResponseUnit)][, type := "Optimised"], use.names = FALSE) + dt_optimOut[, .(channels, optmSpendUnit, optmResponseUnit)][, type := "Optimised"], + use.names = FALSE + ) setnames(dt_optimOutScurve, c("channels", "spend", "response", "type")) dt_optimOutScurve$hjust <- ifelse(dt_optimOutScurve$type == "Initial", 1.2, -0.2) p14 <- ggplot(data = plotDT_scurve, aes(x = .data$spend, y = .data$response, color = .data$channel)) + geom_line() + geom_point(data = dt_optimOutScurve, aes( x = .data$spend, y = .data$response, - color = .data$channels, shape = .data$type), size = 2.5) + - geom_text(data = dt_optimOutScurve, aes( - x = .data$spend, y = .data$response, color = .data$channels, - hjust = .data$hjust, - label = formatNum(.data$spend, 2, abbr = TRUE)), - show.legend = FALSE) + + color = .data$channels, shape = .data$type + ), size = 2.5) + + geom_text( + data = dt_optimOutScurve, aes( + x = .data$spend, y = .data$response, color = .data$channels, + hjust = .data$hjust, + label = formatNum(.data$spend, 2, abbr = TRUE) + ), + show.legend = FALSE + ) + theme_lares(legend.position = c(0.9, 0.4), pal = 2) + theme(legend.title = element_blank()) + labs( title = "Response Curve and Mean Spend* by Channel", - subtitle = errors, x = "Spend", y = "Response", - caption = sprintf("*Based on date range: %s to %s (%s)", - dt_optimOut$date_min[1], - dt_optimOut$date_max[1], - dt_optimOut$periods[1]) - ) + lares::scale_x_abbr() + lares::scale_y_abbr() + caption = sprintf( + "*Based on date range: %s to %s (%s)", + dt_optimOut$date_min[1], + dt_optimOut$date_max[1], + dt_optimOut$periods[1] + ) + ) + + scale_x_abbr() + + scale_y_abbr() # Gather all plots if (export) { @@ -524,7 +556,7 @@ allocation_plots <- function(InputCollect, OutputCollect, dt_optimOut, select_mo p12 <- p12 + labs(subtitle = NULL) plots <- (p13 + p12) / p14 + plot_annotation( title = grobTitle, subtitle = subtitle, - theme = theme_lares(plot.title = element_text(hjust = 0.5), background = "white") + theme = theme_lares(background = "white") ) if (!quiet) message("Exporting charts into file: ", filename) ggsave( @@ -535,5 +567,4 @@ allocation_plots <- function(InputCollect, OutputCollect, dt_optimOut, select_mo } return(list(p12 = p12, p13 = p13, p14 = p14)) - } diff --git a/R/man/robyn_onepagers.Rd b/R/man/robyn_onepagers.Rd index 3d01759c1..188092d1a 100644 --- a/R/man/robyn_onepagers.Rd +++ b/R/man/robyn_onepagers.Rd @@ -7,7 +7,7 @@ robyn_onepagers( InputCollect, OutputCollect, - selected = NULL, + select_model = NULL, quiet = FALSE, export = TRUE ) @@ -17,7 +17,7 @@ robyn_onepagers( \item{OutputCollect}{\code{robyn_run(..., export = FALSE)} output.} -\item{selected}{Character vector. Which models (by \code{solID}) do you +\item{select_model}{Character vector. Which models (by \code{solID}) do you wish to plot the one-pagers and export? Default will take top \code{robyn_clusters()} results.} diff --git a/R/man/robyn_outputs.Rd b/R/man/robyn_outputs.Rd index b540d9996..a0d6df532 100644 --- a/R/man/robyn_outputs.Rd +++ b/R/man/robyn_outputs.Rd @@ -17,7 +17,7 @@ robyn_outputs( plot_pareto = TRUE, csv_out = "pareto", clusters = TRUE, - selected = "clusters", + select_model = "clusters", ui = FALSE, export = TRUE, quiet = FALSE, @@ -56,7 +56,7 @@ to "all" will output all iterations as csv. Set NULL to skip exports into CSVs.} \item{clusters}{Boolean. Apply \code{robyn_clusters()} to output models?} -\item{selected}{Character vector. Which models (by \code{solID}) do you +\item{select_model}{Character vector. Which models (by \code{solID}) do you wish to plot the one-pagers and export? Default will take top \code{robyn_clusters()} results.} From 3d7bd336a8f4e46f444c76dd7e7d2161cf479754 Mon Sep 17 00:00:00 2001 From: Bernardo Lares Date: Fri, 11 Mar 2022 11:47:45 -0500 Subject: [PATCH 06/17] format: set grids on Y only plots --- R/R/plots.R | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/R/R/plots.R b/R/R/plots.R index 51c1e5daf..1195ec07a 100644 --- a/R/R/plots.R +++ b/R/R/plots.R @@ -285,22 +285,21 @@ robyn_onepagers <- function(InputCollect, OutputCollect, select_model = NULL, qu ) + geom_point( data = plotMediaShareLoopLine, aes( - x = .data$rn, y = .data$value / ySecScale, group = 1 + x = .data$rn, y = .data$value / ySecScale, group = 1, color = "Total ROI" ), - inherit.aes = FALSE, size = 4, color = "#03396C" + inherit.aes = FALSE, size = 4 ) + geom_text( data = plotMediaShareLoopLine, aes( - label = round(.data$value, 2), x = .data$rn, y = .data$value / ySecScale, - group = 1, color = .data$variable + label = round(.data$value, 2), x = .data$rn, y = .data$value / ySecScale, group = 1 ), fontface = "bold", inherit.aes = FALSE, hjust = -.5, size = 5 ) + scale_y_percent() + coord_flip() + - theme_lares(axis.text.x = element_blank(), legend = "top") + + theme_lares(axis.text.x = element_blank(), legend = "top", grid = "Xx") + scale_fill_brewer(palette = 3) + - scale_color_manual(values = "#03396C") + + scale_color_manual(values = c("Total ROI" = "#03396C")) + labs( title = paste0("Share of Spend VS Share of Effect with total ", ifelse(InputCollect$dep_var_type == "conversion", "CPA", "ROI")), y = "Total Share by Channel", x = NULL, fill = NULL, color = NULL @@ -334,7 +333,7 @@ robyn_onepagers <- function(InputCollect, OutputCollect, select_model = NULL, qu dt_geometric <- temp[[sid]]$plot3data$dt_geometric p3 <- ggplot(dt_geometric, aes(x = .data$channels, y = .data$thetas, fill = "coral")) + geom_bar(stat = "identity", width = 0.5) + - theme_lares(legend = "none") + + theme_lares(legend = "none", grid = "Xx") + coord_flip() + geom_text(aes(label = formatNum(100 * thetas, 1, pos = "%")), hjust = -.1, position = position_dodge(width = 0.5), fontface = "bold" @@ -353,7 +352,7 @@ robyn_onepagers <- function(InputCollect, OutputCollect, select_model = NULL, qu facet_wrap(~ .data$channel) + geom_hline(yintercept = 0.5, linetype = "dashed", color = "gray") + geom_text(aes(x = max(.data$x), y = 0.5, vjust = -0.5, hjust = 1, label = "Halflife"), colour = "gray") + - theme_lares(legend = "none") + + theme_lares(legend = "none", grid = "Xx") + labs( title = paste0("Weibull Adstock ", wb_type, ": Flexible Decay Rate Over Time"), x = "Time Unit", y = NULL @@ -380,8 +379,10 @@ robyn_onepagers <- function(InputCollect, OutputCollect, select_model = NULL, qu show.legend = FALSE, hjust = -0.2 ) + theme_lares(pal = 2) + - theme(legend.position = c(0.9, 0.2), - legend.background = element_rect(fill = alpha('grey98', 0.6), color = "grey90")) + + theme( + legend.position = c(0.9, 0.2), + legend.background = element_rect(fill = alpha("grey98", 0.6), color = "grey90") + ) + labs( title = "Response Curves and Mean Spends by Channel", x = "Spend", y = "Response", color = NULL From cc21c501404de03e1f72204675f0c62379af6169 Mon Sep 17 00:00:00 2001 From: Bernardo Lares Date: Fri, 11 Mar 2022 14:43:39 -0500 Subject: [PATCH 07/17] format: last plots fixed and formated on plots.R --- R/R/plots.R | 60 +++++++++++++++++++++++++++++++---------------------- 1 file changed, 35 insertions(+), 25 deletions(-) diff --git a/R/R/plots.R b/R/R/plots.R index 1195ec07a..fb67f0916 100644 --- a/R/R/plots.R +++ b/R/R/plots.R @@ -28,8 +28,7 @@ robyn_plots <- function(InputCollect, OutputCollect, export = TRUE) { geom_line(color = "steelblue") + facet_wrap(~variable, scales = "free", ncol = 1) + labs(title = "Prophet decomposition") + - xlab(NULL) + - ylab(NULL) + xlab(NULL) + ylab(NULL) + theme_lares() if (export) { ggsave( paste0(OutputCollect$plot_folder, "prophet_decomp.png"), @@ -443,6 +442,7 @@ robyn_onepagers <- function(InputCollect, OutputCollect, select_model = NULL, qu } allocation_plots <- function(InputCollect, OutputCollect, dt_optimOut, select_model, scenario, export = TRUE, quiet = FALSE) { + subtitle <- paste0( "Total spend increase: ", dt_optimOut[ , round(mean(optmSpendUnitTotalDelta) * 100, 1) @@ -452,6 +452,16 @@ allocation_plots <- function(InputCollect, OutputCollect, dt_optimOut, select_mo ], "% with optimised spend allocation" ) + plotDT_saturation <- melt.data.table(OutputCollect$mediaVecCollect[ + solID == select_model & type == "saturatedSpendReversed" + ], id.vars = "ds", measure.vars = InputCollect$paid_media_spends, value.name = "spend", variable.name = "channel") + plotDT_decomp <- melt.data.table(OutputCollect$mediaVecCollect[ + solID == select_model & type == "decompMedia" + ], id.vars = "ds", measure.vars = InputCollect$paid_media_spends, value.name = "response", variable.name = "channel") + plotDT_scurve <- cbind(plotDT_saturation, plotDT_decomp[, .(response)]) + plotDT_scurve <- plotDT_scurve[spend >= 0] # remove outlier introduced by MM nls fitting + plotDT_scurveMeanResponse <- OutputCollect$xDecompAgg[solID == select_model & rn %in% InputCollect$paid_media_spends] + errors <- paste0( "R2 train: ", plotDT_scurveMeanResponse[, round(mean(rsq_train), 4)], ", NRMSE = ", plotDT_scurveMeanResponse[, round(mean(nrmse), 4)], @@ -492,7 +502,7 @@ allocation_plots <- function(InputCollect, OutputCollect, dt_optimOut, select_mo p13 <- ggplot(plotDT_share, aes(y = .data$channel, x = .data$spend_share, fill = .data$variable)) + geom_bar(stat = "identity", width = 0.5, position = position_dodge2(reverse = TRUE, padding = 0)) + scale_fill_brewer(palette = 3) + - geom_text(aes(x = 0, label = formatNum(.data$spend_share * 100, signif = 3, pos = "%"), hjust = -0.1), + geom_text(aes(x = 0, label = formatNum(.data$spend_share * 100, 1, pos = "%"), hjust = -0.1), position = position_dodge2(width = 0.5, reverse = TRUE), fontface = "bold", show.legend = FALSE ) + theme_lares(legend = "top") + @@ -504,40 +514,39 @@ allocation_plots <- function(InputCollect, OutputCollect, dt_optimOut, select_mo ) ## 3. Response curve - plotDT_saturation <- melt.data.table(OutputCollect$mediaVecCollect[ - solID == select_model & type == "saturatedSpendReversed" - ], id.vars = "ds", measure.vars = InputCollect$paid_media_spends, value.name = "spend", variable.name = "channel") - plotDT_decomp <- melt.data.table(OutputCollect$mediaVecCollect[ - solID == select_model & type == "decompMedia" - ], id.vars = "ds", measure.vars = InputCollect$paid_media_spends, value.name = "response", variable.name = "channel") - plotDT_scurve <- cbind(plotDT_saturation, plotDT_decomp[, .(response)]) - plotDT_scurve <- plotDT_scurve[spend >= 0] # remove outlier introduced by MM nls fitting - plotDT_scurveMeanResponse <- OutputCollect$xDecompAgg[solID == select_model & rn %in% InputCollect$paid_media_spends] - dt_optimOutScurve <- rbind(dt_optimOut[, .(channels, initSpendUnit, initResponseUnit)][, type := "Initial"], + dt_optimOutScurve <- rbind( + dt_optimOut[, .(channels, initSpendUnit, initResponseUnit)][, type := "Initial"], dt_optimOut[, .(channels, optmSpendUnit, optmResponseUnit)][, type := "Optimised"], use.names = FALSE ) setnames(dt_optimOutScurve, c("channels", "spend", "response", "type")) - dt_optimOutScurve$hjust <- ifelse(dt_optimOutScurve$type == "Initial", 1.2, -0.2) + dt_optimOutScurve <- dt_optimOutScurve %>% + mutate(hjust = ifelse(.data$type == "Initial", 1.2, -0.2)) %>% + group_by(.data$channels) %>% + mutate(spend_dif = dplyr::last(.data$spend) - dplyr::first(.data$spend), + response_dif = dplyr::last(.data$response) - dplyr::first(.data$response)) p14 <- ggplot(data = plotDT_scurve, aes(x = .data$spend, y = .data$response, color = .data$channel)) + geom_line() + geom_point(data = dt_optimOutScurve, aes( x = .data$spend, y = .data$response, color = .data$channels, shape = .data$type ), size = 2.5) + - geom_text( - data = dt_optimOutScurve, aes( - x = .data$spend, y = .data$response, color = .data$channels, - hjust = .data$hjust, - label = formatNum(.data$spend, 2, abbr = TRUE) - ), - show.legend = FALSE + # geom_text( + # data = dt_optimOutScurve, aes( + # x = .data$spend, y = .data$response, color = .data$channels, + # hjust = .data$hjust, + # label = formatNum(.data$spend, 2, abbr = TRUE) + # ), + # show.legend = FALSE + # ) + + theme_lares(legend.position = c(0.9, 0), pal = 2) + + theme( + legend.position = c(0.9, 0.5), + legend.background = element_rect(fill = alpha("grey98", 0.6), color = "grey90") ) + - theme_lares(legend.position = c(0.9, 0.4), pal = 2) + - theme(legend.title = element_blank()) + labs( title = "Response Curve and Mean Spend* by Channel", - x = "Spend", y = "Response", + x = "Spend", y = "Response", shape = NULL, color = NULL, caption = sprintf( "*Based on date range: %s to %s (%s)", dt_optimOut$date_min[1], @@ -555,7 +564,8 @@ allocation_plots <- function(InputCollect, OutputCollect, dt_optimOut, select_mo filename <- paste0(OutputCollect$plot_folder, select_model, "_reallocated_", scenario, ".png") p13 <- p13 + labs(subtitle = NULL) p12 <- p12 + labs(subtitle = NULL) - plots <- (p13 + p12) / p14 + plot_annotation( + plots <- ((p13 + p12) / p14) + plot_annotation( + # plots <- ((p13 / p12) | p14) + plot_annotation( title = grobTitle, subtitle = subtitle, theme = theme_lares(background = "white") ) From 32a246608f12d90970cfc62f48ddd2498c5abbaa Mon Sep 17 00:00:00 2001 From: Bernardo Lares Date: Wed, 16 Mar 2022 13:30:53 -0500 Subject: [PATCH 08/17] feat: plot method for robyn_allocator objects --- R/NAMESPACE | 1 + R/R/allocator.R | 6 ++++++ R/R/plots.R | 32 +++++++++++++++++++------------- R/man/robyn_allocator.Rd | 3 +++ demo/demo.R | 4 ---- 5 files changed, 29 insertions(+), 17 deletions(-) diff --git a/R/NAMESPACE b/R/NAMESPACE index df14087fb..e8dfd2fce 100644 --- a/R/NAMESPACE +++ b/R/NAMESPACE @@ -1,5 +1,6 @@ # Generated by roxygen2: do not edit by hand +S3method(plot,robyn_allocator) S3method(print,robyn_allocator) S3method(print,robyn_inputs) S3method(print,robyn_models) diff --git a/R/R/allocator.R b/R/R/allocator.R index 7f5a18cdf..3e1e6aae4 100644 --- a/R/R/allocator.R +++ b/R/R/allocator.R @@ -373,6 +373,12 @@ Allocation Summary: )) } +#' @rdname robyn_allocator +#' @aliases robyn_allocator +#' @param x \code{robyn_allocator()} output. +#' @export +plot.robyn_allocator <- function(x, ...) plot(x$plots$plots, ...) + robyn_import <- function(robyn_object, select_build, quiet) { if (!file.exists(robyn_object)) { stop("File does not exist or is somewhere else. Check: ", robyn_object) diff --git a/R/R/plots.R b/R/R/plots.R index fb67f0916..9a66ceefa 100644 --- a/R/R/plots.R +++ b/R/R/plots.R @@ -443,6 +443,8 @@ robyn_onepagers <- function(InputCollect, OutputCollect, select_model = NULL, qu allocation_plots <- function(InputCollect, OutputCollect, dt_optimOut, select_model, scenario, export = TRUE, quiet = FALSE) { + outputs <- list() + subtitle <- paste0( "Total spend increase: ", dt_optimOut[ , round(mean(optmSpendUnitTotalDelta) * 100, 1) @@ -477,7 +479,8 @@ allocation_plots <- function(InputCollect, OutputCollect, dt_optimOut, select_mo fcts <- c("channel", "Initial Response / Time Unit", "Optimised Response / Time Unit") setnames(plotDT_resp, names(plotDT_resp), new = fcts) plotDT_resp <- suppressWarnings(melt.data.table(plotDT_resp, id.vars = "channel", value.name = "response")) - p12 <- ggplot(plotDT_resp, aes(y = .data$channel, x = .data$response, fill = reorder(.data$variable, as.numeric(.data$variable)))) + + outputs[["p12"]] <- p12 <- ggplot(plotDT_resp, aes( + y = .data$channel, x = .data$response, fill = reorder(.data$variable, as.numeric(.data$variable)))) + geom_bar(stat = "identity", width = 0.5, position = position_dodge2(reverse = TRUE, padding = 0)) + scale_fill_brewer(palette = 3) + geom_text(aes(x = 0, label = formatNum(.data$response, 0), hjust = -0.1), @@ -499,7 +502,8 @@ allocation_plots <- function(InputCollect, OutputCollect, dt_optimOut, select_mo fcts <- c("channel", "Initial Avg. Spend Share", "Optimised Avg. Spend Share") setnames(plotDT_share, names(plotDT_share), new = fcts) plotDT_share <- suppressWarnings(melt.data.table(plotDT_share, id.vars = "channel", value.name = "spend_share")) - p13 <- ggplot(plotDT_share, aes(y = .data$channel, x = .data$spend_share, fill = .data$variable)) + + outputs[["p13"]] <- p13 <- ggplot(plotDT_share, aes( + y = .data$channel, x = .data$spend_share, fill = .data$variable)) + geom_bar(stat = "identity", width = 0.5, position = position_dodge2(reverse = TRUE, padding = 0)) + scale_fill_brewer(palette = 3) + geom_text(aes(x = 0, label = formatNum(.data$spend_share * 100, 1, pos = "%"), hjust = -0.1), @@ -525,7 +529,8 @@ allocation_plots <- function(InputCollect, OutputCollect, dt_optimOut, select_mo group_by(.data$channels) %>% mutate(spend_dif = dplyr::last(.data$spend) - dplyr::first(.data$spend), response_dif = dplyr::last(.data$response) - dplyr::first(.data$response)) - p14 <- ggplot(data = plotDT_scurve, aes(x = .data$spend, y = .data$response, color = .data$channel)) + + outputs[["p14"]] <- p14 <- ggplot(data = plotDT_scurve, aes( + x = .data$spend, y = .data$response, color = .data$channel)) + geom_line() + geom_point(data = dt_optimOutScurve, aes( x = .data$spend, y = .data$response, @@ -557,19 +562,20 @@ allocation_plots <- function(InputCollect, OutputCollect, dt_optimOut, select_mo scale_x_abbr() + scale_y_abbr() + # Gather all plots into a single one + p13 <- p13 + labs(subtitle = NULL) + p12 <- p12 + labs(subtitle = NULL) + outputs[["plots"]] <- plots <- ((p13 + p12) / p14) + plot_annotation( + title = paste0("Budget Allocator Optimum Result for Model ID ", select_model), + subtitle = subtitle, + theme = theme_lares(background = "white") + ) + # Gather all plots if (export) { - grobTitle <- paste0("Budget Allocator Optimum Result for Model ID ", select_model) + if (!quiet) message("Exporting charts into file: ", filename) scenario <- ifelse(scenario == "max_historical_response", "hist", "respo") filename <- paste0(OutputCollect$plot_folder, select_model, "_reallocated_", scenario, ".png") - p13 <- p13 + labs(subtitle = NULL) - p12 <- p12 + labs(subtitle = NULL) - plots <- ((p13 + p12) / p14) + plot_annotation( - # plots <- ((p13 / p12) | p14) + plot_annotation( - title = grobTitle, subtitle = subtitle, - theme = theme_lares(background = "white") - ) - if (!quiet) message("Exporting charts into file: ", filename) ggsave( filename = filename, plot = plots, limitsize = FALSE, @@ -577,5 +583,5 @@ allocation_plots <- function(InputCollect, OutputCollect, dt_optimOut, select_mo ) } - return(list(p12 = p12, p13 = p13, p14 = p14)) + return(invisible(outputs)) } diff --git a/R/man/robyn_allocator.Rd b/R/man/robyn_allocator.Rd index 682d25e4c..b8627cac0 100644 --- a/R/man/robyn_allocator.Rd +++ b/R/man/robyn_allocator.Rd @@ -3,6 +3,7 @@ \name{robyn_allocator} \alias{robyn_allocator} \alias{print.robyn_allocator} +\alias{plot.robyn_allocator} \title{Budget Allocator} \usage{ robyn_allocator( @@ -27,6 +28,8 @@ robyn_allocator( ) \method{print}{robyn_allocator}(x, ...) + +\method{plot}{robyn_allocator}(x, ...) } \arguments{ \item{robyn_object}{Character. Path of the \code{Robyn.RDS} object diff --git a/demo/demo.R b/demo/demo.R index abbba5e27..58898593b 100644 --- a/demo/demo.R +++ b/demo/demo.R @@ -360,8 +360,6 @@ AllocatorCollect <- robyn_allocator( , date_max = "2018-02-01" ) print(AllocatorCollect) -AllocatorCollect$dt_optimOut -AllocatorCollect$plots$p14 # Run the "max_response_expected_spend" scenario: "What's the maximum response for a given # total spend based on historical saturation and what is the spend mix?" "optmSpendShareUnit" @@ -377,8 +375,6 @@ AllocatorCollect <- robyn_allocator( , expected_spend_days = 7 # Duration of expected_spend in days ) print(AllocatorCollect) -AllocatorCollect$dt_optimOut -AllocatorCollect$plots$p14 ## A csv is exported into the folder for further usage. Check schema here: ## https://github.com/facebookexperimental/Robyn/blob/main/demo/schema.R From 7efd8d0f2815296bbc8eae7f1241c8c018e6f2f6 Mon Sep 17 00:00:00 2001 From: Bernardo Lares Date: Wed, 16 Mar 2022 14:28:45 -0500 Subject: [PATCH 09/17] fix: filename first, message next --- R/R/plots.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/R/plots.R b/R/R/plots.R index 9a66ceefa..2b6162e44 100644 --- a/R/R/plots.R +++ b/R/R/plots.R @@ -573,9 +573,9 @@ allocation_plots <- function(InputCollect, OutputCollect, dt_optimOut, select_mo # Gather all plots if (export) { - if (!quiet) message("Exporting charts into file: ", filename) scenario <- ifelse(scenario == "max_historical_response", "hist", "respo") filename <- paste0(OutputCollect$plot_folder, select_model, "_reallocated_", scenario, ".png") + if (!quiet) message("Exporting charts into file: ", filename) ggsave( filename = filename, plot = plots, limitsize = FALSE, From 26616a00cbd84a405d5eb95346a7f43bec9e7170 Mon Sep 17 00:00:00 2001 From: Bernardo Lares Date: Wed, 16 Mar 2022 14:42:38 -0500 Subject: [PATCH 10/17] docs: demo.R update --- demo/demo.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/demo/demo.R b/demo/demo.R index 58898593b..f8f85a5ea 100644 --- a/demo/demo.R +++ b/demo/demo.R @@ -360,6 +360,7 @@ AllocatorCollect <- robyn_allocator( , date_max = "2018-02-01" ) print(AllocatorCollect) +# plot(AllocatorCollect) # Run the "max_response_expected_spend" scenario: "What's the maximum response for a given # total spend based on historical saturation and what is the spend mix?" "optmSpendShareUnit" @@ -375,6 +376,7 @@ AllocatorCollect <- robyn_allocator( , expected_spend_days = 7 # Duration of expected_spend in days ) print(AllocatorCollect) +# plot(AllocatorCollect) ## A csv is exported into the folder for further usage. Check schema here: ## https://github.com/facebookexperimental/Robyn/blob/main/demo/schema.R From d0cd7d82d6177f45751ca6d8649f76b69c5fff01 Mon Sep 17 00:00:00 2001 From: Bernardo Lares Date: Thu, 17 Mar 2022 09:50:19 -0500 Subject: [PATCH 11/17] recode: simplify df builds --- R/R/allocator.R | 18 ++++++++------- R/R/plots.R | 61 +++++++++++++++++++++++-------------------------- 2 files changed, 39 insertions(+), 40 deletions(-) diff --git a/R/R/allocator.R b/R/R/allocator.R index 3e1e6aae4..295ae1c92 100644 --- a/R/R/allocator.R +++ b/R/R/allocator.R @@ -129,11 +129,9 @@ robyn_allocator <- function(robyn_object = NULL, paid_media_spends <- InputCollect$paid_media_spends mediaVarSorted <- paid_media_vars[media_order] mediaSpendSorted <- paid_media_spends[media_order] - exposure_vars <- InputCollect$exposure_vars startRW <- InputCollect$rollingWindowStartWhich endRW <- InputCollect$rollingWindowEndWhich adstock <- InputCollect$adstock - spendExpoMod <- InputCollect$modNLSCollect } ## Check inputs and parameters @@ -143,11 +141,13 @@ robyn_allocator <- function(robyn_object = NULL, expected_spend, expected_spend_days, constr_mode ) - # Channel contrains + # Channels contrains # channel_constr_low <- rep(0.8, length(paid_media_spends)) # channel_constr_up <- rep(1.2, length(paid_media_spends)) names(channel_constr_low) <- paid_media_spends names(channel_constr_up) <- paid_media_spends + channel_constr_low <- channel_constr_low[media_order] + channel_constr_up <- channel_constr_up[media_order] # Hyper-parameters and results dt_hyppar <- OutputCollect$resultHypParam[solID == select_model] @@ -170,8 +170,8 @@ robyn_allocator <- function(robyn_object = NULL, dt_hyppar <- dt_hyppar[, .SD, .SDcols = hyper_names(adstock, mediaSpendSortedFiltered)] setcolorder(dt_hyppar, sort(names(dt_hyppar))) dt_bestCoef <- dt_bestCoef[rn %in% mediaSpendSortedFiltered] - channelConstrLowSorted <- channel_constr_low[media_order][coefSelectorSorted] - channelConstrUpSorted <- channel_constr_up[media_order][coefSelectorSorted] + channelConstrLowSorted <- channel_constr_low[coefSelectorSorted] + channelConstrUpSorted <- channel_constr_up[coefSelectorSorted] ## Get adstock parameters for each channel getAdstockHypPar <- get_adstock_params(InputCollect, dt_hyppar) @@ -286,12 +286,15 @@ robyn_allocator <- function(robyn_object = NULL, ## Collect output dt_optimOut <- data.table( + solID = select_model, channels = mediaSpendSortedFiltered, date_min = date_min, date_max = date_max, periods = sprintf("%s %ss", nPeriod, InputCollect$intervalType), + constr_low = channel_constr_low, + constr_up = channel_constr_up, # Initial - histSpend = histSpend[mediaSpendSortedFiltered], + histSpend = histSpend, histSpendTotal = histSpendTotal, initSpendUnitTotal = histSpendUnitTotal, initSpendUnit = histSpendUnit, @@ -312,8 +315,7 @@ robyn_allocator <- function(robyn_object = NULL, optmResponseUnit = -eval_f(nlsMod$solution)[["objective.channel"]], optmResponseUnitTotal = sum(-eval_f(nlsMod$solution)[["objective.channel"]]), optmRoiUnit = -eval_f(nlsMod$solution)[["objective.channel"]] / nlsMod$solution, - optmResponseUnitLift = (-eval_f(nlsMod$solution)[["objective.channel"]] / histResponseUnitModel) - 1, - solID = select_model + optmResponseUnitLift = (-eval_f(nlsMod$solution)[["objective.channel"]] / histResponseUnitModel) - 1 ) dt_optimOut[, optmResponseUnitTotalLift := (optmResponseUnitTotal / initResponseUnitTotal) - 1] .Options$ROBYN_TEMP <- NULL # Clean auxiliary method diff --git a/R/R/plots.R b/R/R/plots.R index 2b6162e44..5f43bd01b 100644 --- a/R/R/plots.R +++ b/R/R/plots.R @@ -454,16 +454,9 @@ allocation_plots <- function(InputCollect, OutputCollect, dt_optimOut, select_mo ], "% with optimised spend allocation" ) - plotDT_saturation <- melt.data.table(OutputCollect$mediaVecCollect[ - solID == select_model & type == "saturatedSpendReversed" - ], id.vars = "ds", measure.vars = InputCollect$paid_media_spends, value.name = "spend", variable.name = "channel") - plotDT_decomp <- melt.data.table(OutputCollect$mediaVecCollect[ - solID == select_model & type == "decompMedia" - ], id.vars = "ds", measure.vars = InputCollect$paid_media_spends, value.name = "response", variable.name = "channel") - plotDT_scurve <- cbind(plotDT_saturation, plotDT_decomp[, .(response)]) - plotDT_scurve <- plotDT_scurve[spend >= 0] # remove outlier introduced by MM nls fitting - plotDT_scurveMeanResponse <- OutputCollect$xDecompAgg[solID == select_model & rn %in% InputCollect$paid_media_spends] - + # Calculate errors for subtitles + plotDT_scurveMeanResponse <- OutputCollect$xDecompAgg[ + solID == select_model & rn %in% InputCollect$paid_media_spends] errors <- paste0( "R2 train: ", plotDT_scurveMeanResponse[, round(mean(rsq_train), 4)], ", NRMSE = ", plotDT_scurveMeanResponse[, round(mean(nrmse), 4)], @@ -472,15 +465,13 @@ allocation_plots <- function(InputCollect, OutputCollect, dt_optimOut, select_mo ) # 1. Response comparison plot - plotDT_resp <- dt_optimOut[, c("channels", "initResponseUnit", "optmResponseUnit")][order(rank(channels))] - plotDT_resp[, channels := as.factor(channels)] - chn_levels <- plotDT_resp[, as.character(channels)] - plotDT_resp[, channels := factor(channels, levels = chn_levels)] - fcts <- c("channel", "Initial Response / Time Unit", "Optimised Response / Time Unit") - setnames(plotDT_resp, names(plotDT_resp), new = fcts) + plotDT_resp <- select(dt_optimOut, .data$channels, .data$initResponseUnit, .data$optmResponseUnit) %>% + mutate(channels = as.factor(.data$channels)) + names(plotDT_resp) <- c("channel", "Initial Avg. Spend Share", "Optimised Avg. Spend Share") plotDT_resp <- suppressWarnings(melt.data.table(plotDT_resp, id.vars = "channel", value.name = "response")) outputs[["p12"]] <- p12 <- ggplot(plotDT_resp, aes( - y = .data$channel, x = .data$response, fill = reorder(.data$variable, as.numeric(.data$variable)))) + + y = reorder(.data$channel, -as.integer(.data$channel)), + x = .data$response, fill = reorder(.data$variable, as.numeric(.data$variable)))) + geom_bar(stat = "identity", width = 0.5, position = position_dodge2(reverse = TRUE, padding = 0)) + scale_fill_brewer(palette = 3) + geom_text(aes(x = 0, label = formatNum(.data$response, 0), hjust = -0.1), @@ -495,15 +486,13 @@ allocation_plots <- function(InputCollect, OutputCollect, dt_optimOut, select_mo ) # 2. Budget share comparison plot - plotDT_share <- dt_optimOut[, c("channels", "initSpendShare", "optmSpendShareUnit")][order(rank(channels))] - plotDT_share[, channels := as.factor(channels)] - chn_levels <- plotDT_share[, as.character(channels)] - plotDT_share[, channels := factor(channels, levels = chn_levels)] - fcts <- c("channel", "Initial Avg. Spend Share", "Optimised Avg. Spend Share") - setnames(plotDT_share, names(plotDT_share), new = fcts) + plotDT_share <- select(dt_optimOut, .data$channels, .data$initSpendShare, .data$optmSpendShareUnit) %>% + mutate(channels = as.factor(.data$channels)) + names(plotDT_share) <- c("channel", "Initial Avg. Spend Share", "Optimised Avg. Spend Share") plotDT_share <- suppressWarnings(melt.data.table(plotDT_share, id.vars = "channel", value.name = "spend_share")) outputs[["p13"]] <- p13 <- ggplot(plotDT_share, aes( - y = .data$channel, x = .data$spend_share, fill = .data$variable)) + + y = reorder(.data$channel, -as.integer(.data$channel)), + x = .data$spend_share, fill = .data$variable)) + geom_bar(stat = "identity", width = 0.5, position = position_dodge2(reverse = TRUE, padding = 0)) + scale_fill_brewer(palette = 3) + geom_text(aes(x = 0, label = formatNum(.data$spend_share * 100, 1, pos = "%"), hjust = -0.1), @@ -517,18 +506,26 @@ allocation_plots <- function(InputCollect, OutputCollect, dt_optimOut, select_mo fill = NULL, x = "Budget Allocation [%]", y = NULL ) - ## 3. Response curve + ## 3. Response curves + plotDT_saturation <- melt.data.table(OutputCollect$mediaVecCollect[ + solID == select_model & type == "saturatedSpendReversed" + ], id.vars = "ds", measure.vars = InputCollect$paid_media_spends, value.name = "spend", variable.name = "channel") + plotDT_decomp <- melt.data.table(OutputCollect$mediaVecCollect[ + solID == select_model & type == "decompMedia" + ], id.vars = "ds", measure.vars = InputCollect$paid_media_spends, value.name = "response", variable.name = "channel") + plotDT_scurve <- data.frame(plotDT_saturation, response = plotDT_decomp$response) %>% + filter(.data$spend >= 0) %>% as_tibble() + dt_optimOutScurve <- rbind( - dt_optimOut[, .(channels, initSpendUnit, initResponseUnit)][, type := "Initial"], - dt_optimOut[, .(channels, optmSpendUnit, optmResponseUnit)][, type := "Optimised"], + select(dt_optimOut, .data$channels, .data$initSpendUnit, .data$initResponseUnit) %>% mutate(type = "Initial"), + select(dt_optimOut, .data$channels, .data$optmSpendUnit, .data$optmResponseUnit) %>% mutate(type = "Optimised"), use.names = FALSE - ) - setnames(dt_optimOutScurve, c("channels", "spend", "response", "type")) - dt_optimOutScurve <- dt_optimOutScurve %>% - mutate(hjust = ifelse(.data$type == "Initial", 1.2, -0.2)) %>% + ) %>% + magrittr::set_colnames(c("channels", "spend", "response", "type")) %>% group_by(.data$channels) %>% mutate(spend_dif = dplyr::last(.data$spend) - dplyr::first(.data$spend), response_dif = dplyr::last(.data$response) - dplyr::first(.data$response)) + outputs[["p14"]] <- p14 <- ggplot(data = plotDT_scurve, aes( x = .data$spend, y = .data$response, color = .data$channel)) + geom_line() + @@ -550,7 +547,7 @@ allocation_plots <- function(InputCollect, OutputCollect, dt_optimOut, select_mo legend.background = element_rect(fill = alpha("grey98", 0.6), color = "grey90") ) + labs( - title = "Response Curve and Mean Spend* by Channel", + title = "Response Curve and Mean* Spend by Channel", x = "Spend", y = "Response", shape = NULL, color = NULL, caption = sprintf( "*Based on date range: %s to %s (%s)", From dbe385c3101ad75cf4ab8372d6e3548dab792923 Mon Sep 17 00:00:00 2001 From: Bernardo Lares Date: Thu, 17 Mar 2022 16:33:12 -0500 Subject: [PATCH 12/17] feat: added scenario and bounds in robyn_allocator print method --- R/R/allocator.R | 22 ++++++++++++++++++---- demo/demo.R | 4 ++-- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/R/R/allocator.R b/R/R/allocator.R index 295ae1c92..6c4043823 100644 --- a/R/R/allocator.R +++ b/R/R/allocator.R @@ -330,6 +330,9 @@ robyn_allocator <- function(robyn_object = NULL, dt_optimOut = dt_optimOut, nlsMod = nlsMod, plots = plots, + scenario = scenario, + expected_spend = expected_spend, + expected_spend_days = expected_spend_days, ui = if (ui) plots else NULL ) @@ -345,13 +348,21 @@ print.robyn_allocator <- function(x, ...) { print(glued( " Model ID: {x$dt_optimOut$solID[1]} -Total Spend Increase: {spend_increase_p}% ({spend_increase}) +Scenario: {scenario} +Total Spend Increase: {spend_increase_p}% ({spend_increase}{scenario_plus}) Total Response Increase (Optimized): {signif(100 * x$dt_optimOut$optmResponseUnitTotalLift[1], 3)}% Window: {x$dt_optimOut$date_min[1]}:{x$dt_optimOut$date_max[1]} ({x$dt_optimOut$periods[1]}) Allocation Summary: {summary} ", + scenario = ifelse( + x$scenario == "max_historical_response", + "Maximum Historical Response", + "Maximum Response with Expected Spend"), + scenario_plus = ifelse( + x$scenario == "max_response_expected_spend", + sprintf(" in %s days", x$expected_spend_days), ""), spend_increase_p = signif(100 * x$dt_optimOut$expSpendUnitDelta[1], 3), spend_increase = formatNum( sum(x$dt_optimOut$optmSpendUnitTotal) - sum(x$dt_optimOut$initSpendUnitTotal), @@ -360,10 +371,13 @@ Allocation Summary: summary = paste(sprintf( " - %s: - Spend Share: Initial (avg) = %s%% -> Optimized = %s%% - Mean Response (per time unit): %s -> Optimized = %s - Response: %s -> Optimized = %s (Delta = %s%%)", + Optimizable Range (bounds): [%s%%, %s%%] + Mean Spend Share (avg): %s%% -> Optimized = %s%% + Mean Response: %s -> Optimized = %s + Mean Spend (per time unit): %s -> Optimized = %s [Delta = %s%%]", x$dt_optimOut$channels, + 100 * x$dt_optimOut$constr_low - 100, + 100 * x$dt_optimOut$constr_up - 100, signif(100 * x$dt_optimOut$initSpendShare, 3), signif(100 * x$dt_optimOut$optmSpendShareUnit, 3), formatNum(x$dt_optimOut$initResponseUnit, 0), diff --git a/demo/demo.R b/demo/demo.R index f8f85a5ea..daa4b92d0 100644 --- a/demo/demo.R +++ b/demo/demo.R @@ -356,8 +356,8 @@ AllocatorCollect <- robyn_allocator( , scenario = "max_historical_response" , channel_constr_low = c(0.7, 0.7, 0.7, 0.7, 0.7) , channel_constr_up = c(1.2, 1.5, 1.5, 1.5, 1.5) - , date_min = "2017-12-01" - , date_max = "2018-02-01" + # , date_min = "2017-12-01" + # , date_max = "2018-02-01" ) print(AllocatorCollect) # plot(AllocatorCollect) From c206a863f3b1662ee9c384cb1ead0be10aeda845 Mon Sep 17 00:00:00 2001 From: Bernardo Lares Date: Tue, 22 Mar 2022 17:06:58 -0500 Subject: [PATCH 13/17] fix: allocation fixes and quiet parameters --- R/R/allocator.R | 17 +++++++++-------- R/R/model.R | 21 +++++++++++---------- R/R/outputs.R | 2 +- R/R/pareto.R | 5 +++-- R/R/plots.R | 5 +++-- R/R/refresh.R | 30 ++++++++++++------------------ R/man/robyn_refresh.Rd | 10 +++++----- R/man/robyn_response.Rd | 5 ++++- R/man/robyn_save.Rd | 27 ++++++++++++--------------- demo/demo.R | 37 ++++++++++++++++++++----------------- 10 files changed, 80 insertions(+), 79 deletions(-) diff --git a/R/R/allocator.R b/R/R/allocator.R index 6c4043823..1ae341e8c 100644 --- a/R/R/allocator.R +++ b/R/R/allocator.R @@ -162,16 +162,16 @@ robyn_allocator <- function(robyn_object = NULL, names(coefSelectorSorted) <- dt_coefSorted$rn ## Filter and sort all variables by name that is essential for the apply function later - mediaSpendSortedFiltered <- mediaSpendSorted[coefSelectorSorted] if (!all(coefSelectorSorted)) { - chn_coef0 <- setdiff(mediaVarSorted, mediaSpendSortedFiltered) + chn_coef0 <- setdiff(mediaVarSorted, mediaSpendSorted[coefSelectorSorted]) message("Excluded in optimiser because their coeffients are 0: ", paste(chn_coef0, collapse = ", ")) } + mediaSpendSortedFiltered <- mediaSpendSorted[coefSelectorSorted] dt_hyppar <- dt_hyppar[, .SD, .SDcols = hyper_names(adstock, mediaSpendSortedFiltered)] setcolorder(dt_hyppar, sort(names(dt_hyppar))) dt_bestCoef <- dt_bestCoef[rn %in% mediaSpendSortedFiltered] - channelConstrLowSorted <- channel_constr_low[coefSelectorSorted] - channelConstrUpSorted <- channel_constr_up[coefSelectorSorted] + channelConstrLowSorted <- channel_constr_low[mediaSpendSortedFiltered] + channelConstrUpSorted <- channel_constr_up[mediaSpendSortedFiltered] ## Get adstock parameters for each channel getAdstockHypPar <- get_adstock_params(InputCollect, dt_hyppar) @@ -196,7 +196,7 @@ robyn_allocator <- function(robyn_object = NULL, histSpendB <- select(histFiltered, any_of(mediaSpendSortedFiltered)) histSpendTotal <- sum(histSpendB) - histSpend <- unlist(summarise_all(select(histFiltered, any_of(mediaSpendSorted)), sum)) + histSpend <- unlist(summarise_all(select(histFiltered, any_of(mediaSpendSortedFiltered)), sum)) histSpendUnit <- unlist(summarise_all(histSpendB, function(x) sum(x) / sum(x > 0))) histSpendUnitTotal <- sum(histSpendUnit) histSpendShare <- histSpendUnit / histSpendUnitTotal @@ -215,7 +215,8 @@ robyn_allocator <- function(robyn_object = NULL, dt_hyppar = OutputCollect$resultHypParam, dt_coef = OutputCollect$xDecompAgg, InputCollect = InputCollect, - OutputCollect = OutputCollect + OutputCollect = OutputCollect, + quiet = quiet )$response ) } @@ -291,8 +292,8 @@ robyn_allocator <- function(robyn_object = NULL, date_min = date_min, date_max = date_max, periods = sprintf("%s %ss", nPeriod, InputCollect$intervalType), - constr_low = channel_constr_low, - constr_up = channel_constr_up, + constr_low = channelConstrLowSorted, + constr_up = channelConstrUpSorted, # Initial histSpend = histSpend, histSpendTotal = histSpendTotal, diff --git a/R/R/model.R b/R/R/model.R index 951ac3140..8240fd723 100644 --- a/R/R/model.R +++ b/R/R/model.R @@ -958,7 +958,8 @@ robyn_response <- function(robyn_object = NULL, dt_hyppar = NULL, dt_coef = NULL, InputCollect = NULL, - OutputCollect = NULL) { + OutputCollect = NULL, + quiet = FALSE) { ## Get input if (!is.null(robyn_object)) { @@ -974,14 +975,14 @@ robyn_response <- function(robyn_object = NULL, select_build_all <- 0:(length(Robyn) - 1) if (is.null(select_build)) { select_build <- max(select_build_all) - message( - "Using latest model: ", ifelse(select_build == 0, "initial model", paste0("refresh model nr.", select_build)), + if (!quiet & length(select_build_all) > 1) message( + "Using latest model: ", ifelse(select_build == 0, "initial model", paste0("refresh model #", select_build)), " for the response function. Use parameter 'select_build' to specify which run to use" ) } if (!(select_build %in% select_build_all) | length(select_build) != 1) { - stop("select_build must be one value of ", paste(select_build_all, collapse = ", ")) + stop("'select_build' must be one value of ", paste(select_build_all, collapse = ", ")) } listName <- ifelse(select_build == 0, "listInit", paste0("listRefresh", select_build)) @@ -1016,14 +1017,14 @@ robyn_response <- function(robyn_object = NULL, } ## get media valu - if (media_metric %in% paid_media_spends & length(media_metric)==1) { + if (media_metric %in% paid_media_spends & length(media_metric) == 1) { metric_type <- "spend" - } else if (media_metric %in% exposure_vars & length(media_metric)==1){ + } else if (media_metric %in% exposure_vars & length(media_metric) == 1){ metric_type <- "exposure" - } else if (media_metric %in% organic_vars & length(media_metric)==1) { + } else if (media_metric %in% organic_vars & length(media_metric) == 1) { metric_type <- "organic" } else { - stop("media_metric must be one value from paid_media_spends, paid_media_vars or organic_vars") + stop("'media_metric' must be one value from paid_media_spends, paid_media_vars or organic_vars") } if (!is.null(metric_value)) { @@ -1040,7 +1041,7 @@ robyn_response <- function(robyn_object = NULL, # use non-0 mean as marginal level if metric_value not provided if (is.null(metric_value)) { metric_value <- mean(expo_vec[startRW:endRW][expo_vec[startRW:endRW] > 0]) - message("'metric_value' not provided. Using mean of ", media_metric, " instead") + if (!quiet) message("'metric_value' not provided. Using mean of ", media_metric, " instead") } # fit spend to exposure @@ -1061,7 +1062,7 @@ robyn_response <- function(robyn_object = NULL, # use non-0 means marginal level if spend not provided if (is.null(metric_value)) { metric_value <- mean(media_vec[startRW:endRW][media_vec[startRW:endRW] > 0]) - message("metric_value not provided. using mean of ", media_metric, " instead") + if (!quiet) message("'metric_value' not provided. Using mean of ", media_metric, " instead") } hpm_name <- media_metric } diff --git a/R/R/outputs.R b/R/R/outputs.R index c6734a024..1a4570b37 100644 --- a/R/R/outputs.R +++ b/R/R/outputs.R @@ -65,7 +65,7 @@ robyn_outputs <- function(InputCollect, OutputModels, if (!isTRUE(attr(OutputModels, "hyper_fixed"))) message(sprintf( ">>> Running Pareto calculations for %s models on %s front%s...", totalModels, pareto_fronts, ifelse(pareto_fronts > 1, "s", ""))) - pareto_results <- robyn_pareto(InputCollect, OutputModels, pareto_fronts, calibration_constraint) + pareto_results <- robyn_pareto(InputCollect, OutputModels, pareto_fronts, calibration_constraint, quiet) allSolutions <- unique(pareto_results$xDecompVecCollect$solID) ##################################### diff --git a/R/R/pareto.R b/R/R/pareto.R index 3a85e4019..6216423b4 100644 --- a/R/R/pareto.R +++ b/R/R/pareto.R @@ -3,7 +3,7 @@ # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. -robyn_pareto <- function(InputCollect, OutputModels, pareto_fronts, calibration_constraint = 0.1) { +robyn_pareto <- function(InputCollect, OutputModels, pareto_fronts, calibration_constraint = 0.1, quiet = FALSE) { hyper_fixed <- attr(OutputModels, "hyper_fixed") OutModels <- OutputModels[sapply(OutputModels, function(x) "resultCollect" %in% names(x))] @@ -61,7 +61,8 @@ robyn_pareto <- function(InputCollect, OutputModels, pareto_fronts, calibration_ dt_hyppar = resultHypParamPar, dt_coef = xDecompAggPar, InputCollect = InputCollect, - OutputCollect = OutputModels + OutputCollect = OutputModels, + quiet = quiet )$response dt_resp <- data.table(mean_response = get_resp, rn = decompSpendDistPar$rn[respN], diff --git a/R/R/plots.R b/R/R/plots.R index 5f43bd01b..1e18cddb7 100644 --- a/R/R/plots.R +++ b/R/R/plots.R @@ -520,8 +520,9 @@ allocation_plots <- function(InputCollect, OutputCollect, dt_optimOut, select_mo select(dt_optimOut, .data$channels, .data$initSpendUnit, .data$initResponseUnit) %>% mutate(type = "Initial"), select(dt_optimOut, .data$channels, .data$optmSpendUnit, .data$optmResponseUnit) %>% mutate(type = "Optimised"), use.names = FALSE - ) %>% - magrittr::set_colnames(c("channels", "spend", "response", "type")) %>% + ) + colnames(dt_optimOutScurve) <- c("channels", "spend", "response", "type") + dt_optimOutScurve <- dt_optimOutScurve %>% group_by(.data$channels) %>% mutate(spend_dif = dplyr::last(.data$spend) - dplyr::first(.data$spend), response_dif = dplyr::last(.data$response) - dplyr::first(.data$response)) diff --git a/R/R/refresh.R b/R/R/refresh.R index 98616d3d4..d31797dff 100644 --- a/R/R/refresh.R +++ b/R/R/refresh.R @@ -12,27 +12,21 @@ #' @return (Invisible) file's name. #' @examples #' \dontrun{ -#' ## Get all model IDs in result from OutputCollect$allSolutions -#' -#' ## Select one from above +#' # Get model IDs from OutputCollect #' select_model <- "3_10_3" #' -#' ## Save the robyn object. Overwriting old object needs confirmation. -#' robyn_object <- "~/Desktop/Robyn.RDS" +#' # Save the results. Overwriting old object needs confirmation. #' robyn_save( -#' robyn_object = robyn_object, -#' select_model = select_model, +#' robyn_object = "~/Desktop/Robyn.RDS", #' InputCollect = InputCollect, -#' OutputCollect = OutputCollect +#' OutputCollect = OutputCollect, +#' select_model = select_model #' ) #' } #' @export -robyn_save <- function(robyn_object, - select_model, - InputCollect, - OutputCollect) { +robyn_save <- function(robyn_object, InputCollect, OutputCollect, select_model = NULL) { check_robyn_object(robyn_object) - + if (is.null(select_model)) select_model <- OutputCollect[["selectID"]] if (!(select_model %in% OutputCollect$resultHypParam$solID)) { stop(paste0("'select_model' must be one of these values: ", paste( OutputCollect$resultHypParam$solID, @@ -66,16 +60,16 @@ robyn_save <- function(robyn_object, #' Build Refresh Model #' #' @description -#' \code{robyn_refresh()} builds update models based on +#' \code{robyn_refresh()} builds updated models based on #' the previously built models saved in the \code{Robyn.RDS} object specified #' in \code{robyn_object}. For example, when updating the initial build with 4 #' weeks of new data, \code{robyn_refresh()} consumes the selected model of -#' the initial build. it sets lower and upper bounds of hyperparameters for the +#' the initial build, sets lower and upper bounds of hyperparameters for the #' new build around the selected hyperparameters of the previous build, -#' stabilizes the effect of baseline variables across old and new builds and +#' stabilizes the effect of baseline variables across old and new builds, and #' regulates the new effect share of media variables towards the latest -#' spend level. It returns aggregated result with all previous builds for -#' reporting purpose and produces reporting plots. +#' spend level. It returns the aggregated results with all previous builds for +#' reporting purposes and produces reporting plots. #' #' You must run \code{robyn_save()} to select and save an initial model first, #' before refreshing. diff --git a/R/man/robyn_refresh.Rd b/R/man/robyn_refresh.Rd index 28e71c6d0..ea9c91450 100644 --- a/R/man/robyn_refresh.Rd +++ b/R/man/robyn_refresh.Rd @@ -64,16 +64,16 @@ passed into initial model.} List. The Robyn object, class \code{robyn_refresh}. } \description{ -\code{robyn_refresh()} builds update models based on +\code{robyn_refresh()} builds updated models based on the previously built models saved in the \code{Robyn.RDS} object specified in \code{robyn_object}. For example, when updating the initial build with 4 weeks of new data, \code{robyn_refresh()} consumes the selected model of -the initial build. it sets lower and upper bounds of hyperparameters for the +the initial build, sets lower and upper bounds of hyperparameters for the new build around the selected hyperparameters of the previous build, -stabilizes the effect of baseline variables across old and new builds and +stabilizes the effect of baseline variables across old and new builds, and regulates the new effect share of media variables towards the latest -spend level. It returns aggregated result with all previous builds for -reporting purpose and produces reporting plots. +spend level. It returns the aggregated results with all previous builds for +reporting purposes and produces reporting plots. You must run \code{robyn_save()} to select and save an initial model first, before refreshing. diff --git a/R/man/robyn_response.Rd b/R/man/robyn_response.Rd index ada1689b5..b94ee1295 100644 --- a/R/man/robyn_response.Rd +++ b/R/man/robyn_response.Rd @@ -13,7 +13,8 @@ robyn_response( dt_hyppar = NULL, dt_coef = NULL, InputCollect = NULL, - OutputCollect = NULL + OutputCollect = NULL, + quiet = FALSE ) } \arguments{ @@ -47,6 +48,8 @@ Required when \code{robyn_object} is not provided.} \item{OutputCollect}{List. Containing all model result. Required when \code{robyn_object} is not provided.} + +\item{quiet}{Boolean. Keep messages off?} } \description{ \code{robyn_response()} returns the response for a given diff --git a/R/man/robyn_save.Rd b/R/man/robyn_save.Rd index 4a25b65fe..2377c51d6 100644 --- a/R/man/robyn_save.Rd +++ b/R/man/robyn_save.Rd @@ -4,23 +4,23 @@ \alias{robyn_save} \title{Export Robyn Model to Local File} \usage{ -robyn_save(robyn_object, select_model, InputCollect, OutputCollect) +robyn_save(robyn_object, InputCollect, OutputCollect, select_model = NULL) } \arguments{ \item{robyn_object}{Character. Path of the \code{Robyn.RDS} object that contains all previous modeling information.} -\item{select_model}{Character. A model \code{SolID}. When \code{robyn_object} -is provided, \code{select_model} defaults to the already selected \code{SolID}. When -\code{robyn_object} is not provided, \code{select_model} must be provided with -\code{InputCollect} and \code{OutputCollect}, and must be one of -\code{OutputCollect$allSolutions}.} - \item{InputCollect}{List. Contains all input parameters for the model. Required when \code{robyn_object} is not provided.} \item{OutputCollect}{List. Containing all model result. Required when \code{robyn_object} is not provided.} + +\item{select_model}{Character. A model \code{SolID}. When \code{robyn_object} +is provided, \code{select_model} defaults to the already selected \code{SolID}. When +\code{robyn_object} is not provided, \code{select_model} must be provided with +\code{InputCollect} and \code{OutputCollect}, and must be one of +\code{OutputCollect$allSolutions}.} } \value{ (Invisible) file's name. @@ -30,18 +30,15 @@ Use \code{robyn_save()} to select and save as .RDS file the initial model. } \examples{ \dontrun{ -## Get all model IDs in result from OutputCollect$allSolutions - -## Select one from above +# Get model IDs from OutputCollect select_model <- "3_10_3" -## Save the robyn object. Overwriting old object needs confirmation. -robyn_object <- "~/Desktop/Robyn.RDS" +# Save the results. Overwriting old object needs confirmation. robyn_save( - robyn_object = robyn_object, - select_model = select_model, + robyn_object = "~/Desktop/Robyn.RDS", InputCollect = InputCollect, - OutputCollect = OutputCollect + OutputCollect = OutputCollect, + select_model = select_model ) } } diff --git a/demo/demo.R b/demo/demo.R index daa4b92d0..4185fa779 100644 --- a/demo/demo.R +++ b/demo/demo.R @@ -273,7 +273,7 @@ OutputModels <- robyn_run( #, cores = NULL # default #, add_penalty_factor = FALSE # Untested feature. Use with caution. , iterations = 2000 # recommended for the dummy dataset - , trials = 5 # recommended for the dummy dataset + , trials = 3 # recommended for the dummy dataset , outputs = FALSE # outputs = FALSE disables direct model output ) print(OutputModels) @@ -494,29 +494,32 @@ response_imps$plot ## Example of getting organic media exposure response curves sendings <- 30000 response_sending <- robyn_response( - robyn_object = robyn_object - #, select_build = 1 - , media_metric = "newsletter" - , metric_value = sendings) + robyn_object = robyn_object, + # select_build = 0, + media_metric = "newsletter", + metric_value = sendings) response_sending$response / sendings * 1000 response_sending$plot ################################################################ #### Optional: get old model results -# Get old hyperparameters and select model -dt_hyper_fixed <- data.table::fread("~/Desktop/2022-02-21 11.29 rf11/pareto_hyperparameters.csv") -select_model <- "1_51_11" -dt_hyper_fixed <- dt_hyper_fixed[solID == select_model] +# Get InputCollect and selected model ID +robyn_object <- "~/Desktop/MyRobyn.RDS" +MyOldRobyn <- readRDS(robyn_object) +select_model <- MyOldRobyn[[length(MyOldRobyn)]]$OutputCollect$selectID +# Get hyperparameters for selected model +dt_hyper_fixed <- read.csv("~/Desktop/2022-03-22 16.47 init/pareto_hyperparameters.csv") +dt_hyper_fixed <- dt_hyper_fixed[dt_hyper_fixed$solID == select_model,] +# Re-generate OutputCollect with fixed hyperparameters OutputCollectFixed <- robyn_run( - # InputCollect must be provided by robyn_inputs with same dataset and parameters as before - InputCollect = InputCollect - , plot_folder = robyn_object - , dt_hyper_fixed = dt_hyper_fixed) + InputCollect = MyOldRobyn$listInit$InputCollect, + plot_folder = robyn_object, + dt_hyper_fixed = dt_hyper_fixed) # Save Robyn object for further refresh -robyn_save(robyn_object = robyn_object - , select_model = select_model - , InputCollect = InputCollect - , OutputCollect = OutputCollectFixed) +robyn_save(robyn_object = robyn_object, + InputCollect = InputCollect, + OutputCollect = OutputCollectFixed, + select_model = select_model) From 90393390e0862a1719b76e6691dd22bc8e432c6e Mon Sep 17 00:00:00 2001 From: Bernardo Lares Date: Thu, 24 Mar 2022 12:29:23 -0500 Subject: [PATCH 14/17] feat: add 3 new checks on calibration inputs --- R/R/allocator.R | 4 +- R/R/checks.R | 34 +++++++++++++++- R/R/inputs.R | 57 +++++++++++++------------- R/R/plots.R | 7 ++-- R/man/robyn_inputs.Rd | 4 +- demo/demo.R | 93 +++++++++++++++++++++++-------------------- 6 files changed, 121 insertions(+), 78 deletions(-) diff --git a/R/R/allocator.R b/R/R/allocator.R index 1ae341e8c..99cb53555 100644 --- a/R/R/allocator.R +++ b/R/R/allocator.R @@ -165,7 +165,7 @@ robyn_allocator <- function(robyn_object = NULL, if (!all(coefSelectorSorted)) { chn_coef0 <- setdiff(mediaVarSorted, mediaSpendSorted[coefSelectorSorted]) message("Excluded in optimiser because their coeffients are 0: ", paste(chn_coef0, collapse = ", ")) - } + } else chn_coef0 <- "None" mediaSpendSortedFiltered <- mediaSpendSorted[coefSelectorSorted] dt_hyppar <- dt_hyppar[, .SD, .SDcols = hyper_names(adstock, mediaSpendSortedFiltered)] setcolorder(dt_hyppar, sort(names(dt_hyppar))) @@ -334,6 +334,7 @@ robyn_allocator <- function(robyn_object = NULL, scenario = scenario, expected_spend = expected_spend, expected_spend_days = expected_spend_days, + skipped = chn_coef0, ui = if (ui) plots else NULL ) @@ -350,6 +351,7 @@ print.robyn_allocator <- function(x, ...) { " Model ID: {x$dt_optimOut$solID[1]} Scenario: {scenario} +Media Skipped (coef = 0): {paste0(x$skipped, collapse = ',')} Total Spend Increase: {spend_increase_p}% ({spend_increase}{scenario_plus}) Total Response Increase (Optimized): {signif(100 * x$dt_optimOut$optmResponseUnitTotalLift[1], 3)}% Window: {x$dt_optimOut$date_min[1]}:{x$dt_optimOut$date_max[1]} ({x$dt_optimOut$periods[1]}) diff --git a/R/R/checks.R b/R/R/checks.R index 415f23010..034bfcb26 100644 --- a/R/R/checks.R +++ b/R/R/checks.R @@ -416,16 +416,46 @@ check_hyper_limits <- function(hyperparameters, hyper) { } } -check_calibration <- function(dt_input, date_var, calibration_input, dayInterval) { +check_calibration <- function(dt_input, date_var, calibration_input, dayInterval, dep_var) { if (!is.null(calibration_input)) { calibration_input <- as.data.table(calibration_input) - if (!all(names(calibration_input) %in% c("channel", "liftStartDate", "liftEndDate", "liftAbs"))) { + if (!all(c("channel", "liftStartDate", "liftEndDate", "liftAbs") %in% names(calibration_input))) { stop("calibration_input must contain columns 'channel', 'liftStartDate', 'liftEndDate', 'liftAbs'") } if ((min(calibration_input$liftStartDate) < min(dt_input[, get(date_var)])) | (max(calibration_input$liftEndDate) > (max(dt_input[, get(date_var)]) + dayInterval - 1))) { stop("We recommend you to only use lift results conducted within your MMM input data date range") } + if ("spend" %in% colnames(calibration_input)) { + for (i in 1:nrow(calibration_input)) { + temp <- calibration_input[i, ] + dt_input_spend <- filter(dt_input, get(date_var) >= temp$liftStartDate, get(date_var) <= temp$liftEndDate) %>% + pull(get(temp$channel)) %>% sum(.) %>% round(., 0) + if (dt_input_spend > temp$spend * 1.1 | dt_input_spend < temp$spend * 0.9) { + warning(sprintf("Your calibration's spend (%s) for %s between %s and %s does not match your dt_input spend (~%s)", + formatNum(temp$spend, 0), temp$channel, temp$liftStartDate, temp$liftEndDate, + formatNum(dt_input_spend, 3, abbr = TRUE))) + } + } + } + if ("confidence" %in% colnames(calibration_input)) { + for (i in 1:nrow(calibration_input)) { + temp <- calibration_input[i, ] + if (temp$confidence < 0.8) { + warning(sprintf("Your calibration's confidence for %s between %s and %s is lower than 80%%, thus low-confidence", + temp$channel, temp$liftStartDate, temp$liftEndDate)) + } + } + } + if ("metric" %in% colnames(calibration_input)) { + for (i in 1:nrow(calibration_input)) { + temp <- calibration_input[i, ] + if (temp$metric != dep_var) { + warning(sprintf("Your calibration's metric for %s between %s and %s is not '%s'", + temp$channel, temp$liftStartDate, temp$liftEndDate, dep_var)) + } + } + } } return(calibration_input) } diff --git a/R/R/inputs.R b/R/R/inputs.R index 57c00305b..809bd905f 100644 --- a/R/R/inputs.R +++ b/R/R/inputs.R @@ -107,7 +107,9 @@ #' in full for the model calculation of trend, seasonality and holidays effects. #' Whereas the window period will determine how much of the full data set will be #' used for media, organic and context variables. -#' @param calibration_input data.frame. Optional provide experimental results. +#' @param calibration_input data.frame. Optional. Provide experimental results to +#' calibrate. Your input should include the following values for each experiment: +#' channel, liftStartDate, liftEndDate, liftAbs, spend, confidence, metric. #' Check "Guide for calibration source" section. #' @param InputCollect Default to NULL. \code{robyn_inputs}'s output when #' \code{hyperparameters} are not yet set. @@ -176,55 +178,55 @@ robyn_inputs <- function(dt_input = NULL, dt_input <- as.data.table(dt_input) dt_holidays <- as.data.table(dt_holidays) - # check for NA values + ## Check for NA values check_nas(dt_input) check_nas(dt_holidays) - # check vars names (duplicates and valid) + ## Check vars names (duplicates and valid) check_varnames(dt_input, dt_holidays, dep_var, date_var, context_vars, paid_media_spends, organic_vars) - ## check date input (and set dayInterval and intervalType) + ## Check date input (and set dayInterval and intervalType) date_input <- check_datevar(dt_input, date_var) dt_input <- date_input$dt_input # sort date by ascending date_var <- date_input$date_var # when date_var = "auto" dayInterval <- date_input$dayInterval intervalType <- date_input$intervalType - ## check dependent var + ## Check dependent var check_depvar(dt_input, dep_var, dep_var_type) - ## check prophet + ## Check prophet prophet_signs <- check_prophet(dt_holidays, prophet_country, prophet_vars, prophet_signs) - ## check baseline variables (and maybe transform context_signs) + ## Check baseline variables (and maybe transform context_signs) context <- check_context(dt_input, context_vars, context_signs) context_signs <- context$context_signs - ## check paid media variables (set mediaVarCount and maybe transform paid_media_signs) + ## Check paid media variables (set mediaVarCount and maybe transform paid_media_signs) paidmedia <- check_paidmedia(dt_input, paid_media_vars, paid_media_signs, paid_media_spends) paid_media_signs <- paidmedia$paid_media_signs mediaVarCount <- paidmedia$mediaVarCount exposure_vars <- paid_media_vars[!(paid_media_vars == paid_media_spends)] - ## check organic media variables (and maybe transform organic_signs) + ## Check organic media variables (and maybe transform organic_signs) organic <- check_organicvars(dt_input, organic_vars, organic_signs) organic_signs <- organic$organic_signs - ## check factor_vars + ## Check factor_vars check_factorvars(factor_vars, context_vars, organic_vars) - ## check all vars + ## Check all vars all_media <- c(paid_media_spends, organic_vars) all_ind_vars <- c(prophet_vars, context_vars, all_media) check_allvars(all_ind_vars) - ## check data dimension + ## Check data dimension check_datadim(dt_input, all_ind_vars, rel = 10) - ## check window_start & window_end (and transform parameters/data) + ## Check window_start & window_end (and transform parameters/data) windows <- check_windows(dt_input, date_var, all_media, window_start, window_end) dt_input <- windows$dt_input window_start <- windows$window_start @@ -234,21 +236,21 @@ robyn_inputs <- function(dt_input = NULL, rollingWindowEndWhich <- windows$rollingWindowEndWhich rollingWindowLength <- windows$rollingWindowLength - ## check adstock + ## Check adstock adstock <- check_adstock(adstock) - ## check hyperparameters (if passed) + ## Check hyperparameters (if passed) hyperparameters <- check_hyperparameters( hyperparameters, adstock, paid_media_spends, organic_vars, exposure_vars) - ## check calibration and iters/trials - calibration_input <- check_calibration(dt_input, date_var, calibration_input, dayInterval) + ## Check calibration and iters/trials + calibration_input <- check_calibration(dt_input, date_var, calibration_input, dayInterval, dep_var) ## Not used variables unused_vars <- colnames(dt_input)[!colnames(dt_input) %in% c( dep_var, date_var, context_vars, paid_media_vars, paid_media_spends, organic_vars)] - ## collect input + ## Collect input InputCollect <- output <- list( dt_input = dt_input, dt_holidays = dt_holidays, @@ -288,35 +290,34 @@ robyn_inputs <- function(dt_input = NULL, ... ) - ### Use case 1: running robyn_inputs() for the first time if (!is.null(hyperparameters)) { - ### conditional output 1.2 - ## running robyn_inputs() for the 1st time & 'hyperparameters' provided --> run robyn_engineering() + ### Conditional output 1.2 + ## Running robyn_inputs() for the 1st time & 'hyperparameters' provided --> run robyn_engineering() output <- robyn_engineering(InputCollect, ...) } } else { - + ### Use case 2: adding 'hyperparameters' and/or 'calibration_input' using robyn_inputs() # Check for legacy (deprecated) inputs check_legacy_input(InputCollect) - ### Use case 2: adding 'hyperparameters' and/or 'calibration_input' using robyn_inputs() - ## check calibration and iters/trials + ## Check calibration and iters/trials calibration_input <- check_calibration( InputCollect$dt_input, InputCollect$date_var, calibration_input, - InputCollect$dayInterval + InputCollect$dayInterval, + InputCollect$dep_var ) - ## update calibration_input + ## Update calibration_input if (!is.null(calibration_input)) InputCollect$calibration_input <- calibration_input if (!is.null(hyperparameters)) InputCollect$hyperparameters <- hyperparameters if (is.null(InputCollect$hyperparameters) & is.null(hyperparameters)) { stop("must provide hyperparameters in robyn_inputs()") } else { - ### conditional output 2.1 + ### Conditional output 2.1 ## 'hyperparameters' provided --> run robyn_engineering() - ## update & check hyperparameters + ## Update & check hyperparameters if (is.null(InputCollect$hyperparameters)) InputCollect$hyperparameters <- hyperparameters check_hyperparameters(InputCollect$hyperparameters, InputCollect$adstock, InputCollect$all_media) output <- robyn_engineering(InputCollect, ...) diff --git a/R/R/plots.R b/R/R/plots.R index 1e18cddb7..b4de6b274 100644 --- a/R/R/plots.R +++ b/R/R/plots.R @@ -28,7 +28,7 @@ robyn_plots <- function(InputCollect, OutputCollect, export = TRUE) { geom_line(color = "steelblue") + facet_wrap(~variable, scales = "free", ncol = 1) + labs(title = "Prophet decomposition") + - xlab(NULL) + ylab(NULL) + theme_lares() + xlab(NULL) + ylab(NULL) + theme_lares() + scale_y_abbr() if (export) { ggsave( paste0(OutputCollect$plot_folder, "prophet_decomp.png"), @@ -544,8 +544,9 @@ allocation_plots <- function(InputCollect, OutputCollect, dt_optimOut, select_mo # ) + theme_lares(legend.position = c(0.9, 0), pal = 2) + theme( - legend.position = c(0.9, 0.5), - legend.background = element_rect(fill = alpha("grey98", 0.6), color = "grey90") + legend.position = c(0.87, 0.5), + legend.background = element_rect(fill = alpha("grey98", 0.6), color = "grey90"), + legend.spacing.y = unit(0.2, 'cm') ) + labs( title = "Response Curve and Mean* Spend by Channel", diff --git a/R/man/robyn_inputs.Rd b/R/man/robyn_inputs.Rd index 22b51e91f..7448535f1 100644 --- a/R/man/robyn_inputs.Rd +++ b/R/man/robyn_inputs.Rd @@ -132,7 +132,9 @@ in full for the model calculation of trend, seasonality and holidays effects. Whereas the window period will determine how much of the full data set will be used for media, organic and context variables.} -\item{calibration_input}{data.frame. Optional provide experimental results. +\item{calibration_input}{data.frame. Optional. Provide experimental results to +calibrate. Your input should include the following values for each experiment: +channel, liftStartDate, liftEndDate, liftAbs, spend, confidence, metric. Check "Guide for calibration source" section.} \item{InputCollect}{Default to NULL. \code{robyn_inputs}'s output when diff --git a/demo/demo.R b/demo/demo.R index 4185fa779..7b0e42aae 100644 --- a/demo/demo.R +++ b/demo/demo.R @@ -4,18 +4,22 @@ # LICENSE file in the root directory of this source tree. ############################################################################################# -#################### Facebook MMM Open Source - Robyn 3.6.0 ###################### +#################### Facebook MMM Open Source - Robyn 3.6.1 ###################### #################### Quick guide ####################### ############################################################################################# ################################################################ -#### Step 0: setup environment +#### Step 0: Setup environment -## Install and load libraries +## Install, load, and check (latest) version # install.packages("remotes") # Install remotes first if you haven't already library(Robyn) # remotes::install_github("facebookexperimental/Robyn/R") -## force multicore when using RStudio +# Please, check if you have installed the latest version before running this demo. Update if not +# https://github.com/facebookexperimental/Robyn/blob/main/R/DESCRIPTION#L4 +packageVersion("Robyn") + +## Force multicore when using RStudio Sys.setenv(R_FUTURE_FORK_ENABLE="true") options(future.fork.enable = TRUE) @@ -52,7 +56,7 @@ options(future.fork.enable = TRUE) # https://github.com/facebookexperimental/Robyn/issues/189 ################################################################ -#### Step 1: load data +#### Step 1: Load data ## Check simulated dataset or load your own dataset data("dt_simulated_weekly") @@ -224,19 +228,23 @@ print(InputCollect) ## -------------------------------- NOTE v3.6.0 CHANGE !!! ---------------------------------- ## ## As noted above, calibration channels need to be paid_media_spends name. ## ------------------------------------------------------------------------------------------ ## -# dt_calibration <- data.frame( -# channel = c("facebook_S", "tv_S", "facebook_S") +# calibration_input <- data.frame( # # channel name must in paid_media_vars -# , liftStartDate = as.Date(c("2018-05-01", "2017-11-27", "2018-07-01")) +# channel = c("facebook_S", "tv_S", "facebook_S"), # # liftStartDate must be within input data range -# , liftEndDate = as.Date(c("2018-06-10", "2017-12-03", "2018-07-20")) +# liftStartDate = as.Date(c("2018-05-01", "2018-04-03", "2018-07-01")), # # liftEndDate must be within input data range -# , liftAbs = c(400000, 300000, 200000) # Provided value must be -# # tested on same campaign level in model and same metric as dep_var_type +# liftEndDate = as.Date(c("2018-06-10", "2018-06-03", "2018-07-20")), +# # Provided value must be tested on same campaign level in model and same metric as dep_var_type +# liftAbs = c(400000, 300000, 200000), +# # Spend within experiment: should match within a 10% error your spend on date range for each channel from dt_input +# spend = c(421000, 7100, 240000), +# # Confidence: if frequentist experiment, you may use 1 - pvalue +# confidence = c(0.85, 0.8, 0.99), +# # KPI measured: must match your dep_var +# metric = c("revenue", "revenue", "revenue") # ) -# -# InputCollect <- robyn_inputs(InputCollect = InputCollect -# , calibration_input = dt_calibration) +# InputCollect <- robyn_inputs(InputCollect = InputCollect, calibration_input = calibration_input) ################################################################ @@ -273,7 +281,7 @@ OutputModels <- robyn_run( #, cores = NULL # default #, add_penalty_factor = FALSE # Untested feature. Use with caution. , iterations = 2000 # recommended for the dummy dataset - , trials = 3 # recommended for the dummy dataset + , trials = 5 # recommended for the dummy dataset , outputs = FALSE # outputs = FALSE disables direct model output ) print(OutputModels) @@ -328,12 +336,15 @@ print(OutputCollect) ## Compare all model one-pagers and select one that mostly reflects your business reality print(OutputCollect) -select_model <- "1_18_4" # select one from above -robyn_save(robyn_object = robyn_object # model object location and name - , select_model = select_model # selected model ID - , InputCollect = InputCollect # all model input - , OutputCollect = OutputCollect # all model output +select_model <- "1_101_4" # select one from above +ExportedModel <- robyn_save( + robyn_object = robyn_object # model object location and name + , select_model = select_model # selected model ID + , InputCollect = InputCollect # all model input + , OutputCollect = OutputCollect # all model output ) +print(ExportedModel) +# plot(ExportedModel) ################################################################ #### Step 5: Get budget allocation based on the selected model above @@ -345,6 +356,7 @@ robyn_save(robyn_object = robyn_object # model object location and name OutputCollect$xDecompAgg[solID == select_model & !is.na(mean_spend) , .(rn, coef,mean_spend, mean_response, roi_mean , total_spend, total_response=xDecompAgg, roi_total, solID)] +# OR: print(ExportedModel) # Run ?robyn_allocator to check parameter definition # Run the "max_historical_response" scenario: "What's the revenue lift potential with the @@ -356,11 +368,9 @@ AllocatorCollect <- robyn_allocator( , scenario = "max_historical_response" , channel_constr_low = c(0.7, 0.7, 0.7, 0.7, 0.7) , channel_constr_up = c(1.2, 1.5, 1.5, 1.5, 1.5) - # , date_min = "2017-12-01" - # , date_max = "2018-02-01" ) print(AllocatorCollect) -# plot(AllocatorCollect) +AllocatorCollect$dt_optimOut # Run the "max_response_expected_spend" scenario: "What's the maximum response for a given # total spend based on historical saturation and what is the spend mix?" "optmSpendShareUnit" @@ -376,7 +386,7 @@ AllocatorCollect <- robyn_allocator( , expected_spend_days = 7 # Duration of expected_spend in days ) print(AllocatorCollect) -# plot(AllocatorCollect) +AllocatorCollect$dt_optimOut ## A csv is exported into the folder for further usage. Check schema here: ## https://github.com/facebookexperimental/Robyn/blob/main/demo/schema.R @@ -494,32 +504,29 @@ response_imps$plot ## Example of getting organic media exposure response curves sendings <- 30000 response_sending <- robyn_response( - robyn_object = robyn_object, - # select_build = 0, - media_metric = "newsletter", - metric_value = sendings) + robyn_object = robyn_object + #, select_build = 1 + , media_metric = "newsletter" + , metric_value = sendings) response_sending$response / sendings * 1000 response_sending$plot ################################################################ #### Optional: get old model results -# Get InputCollect and selected model ID -robyn_object <- "~/Desktop/MyRobyn.RDS" -MyOldRobyn <- readRDS(robyn_object) -select_model <- MyOldRobyn[[length(MyOldRobyn)]]$OutputCollect$selectID -# Get hyperparameters for selected model -dt_hyper_fixed <- read.csv("~/Desktop/2022-03-22 16.47 init/pareto_hyperparameters.csv") -dt_hyper_fixed <- dt_hyper_fixed[dt_hyper_fixed$solID == select_model,] +# Get old hyperparameters and select model +dt_hyper_fixed <- data.table::fread("~/Desktop/2022-02-21 11.29 rf11/pareto_hyperparameters.csv") +select_model <- "1_51_11" +dt_hyper_fixed <- dt_hyper_fixed[solID == select_model] -# Re-generate OutputCollect with fixed hyperparameters OutputCollectFixed <- robyn_run( - InputCollect = MyOldRobyn$listInit$InputCollect, - plot_folder = robyn_object, - dt_hyper_fixed = dt_hyper_fixed) + # InputCollect must be provided by robyn_inputs with same dataset and parameters as before + InputCollect = InputCollect + , plot_folder = robyn_object + , dt_hyper_fixed = dt_hyper_fixed) # Save Robyn object for further refresh -robyn_save(robyn_object = robyn_object, - InputCollect = InputCollect, - OutputCollect = OutputCollectFixed, - select_model = select_model) +robyn_save(robyn_object = robyn_object + , select_model = select_model + , InputCollect = InputCollect + , OutputCollect = OutputCollectFixed) From d2ca2fb21040d3468dddfbd2aee60968762328a7 Mon Sep 17 00:00:00 2001 From: Bernardo Lares Date: Thu, 24 Mar 2022 13:21:52 -0500 Subject: [PATCH 15/17] feat: check calibration inputs and provide recommendations #307 --- R/R/checks.R | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/R/R/checks.R b/R/R/checks.R index 034bfcb26..a84d3ca06 100644 --- a/R/R/checks.R +++ b/R/R/checks.R @@ -432,9 +432,11 @@ check_calibration <- function(dt_input, date_var, calibration_input, dayInterval dt_input_spend <- filter(dt_input, get(date_var) >= temp$liftStartDate, get(date_var) <= temp$liftEndDate) %>% pull(get(temp$channel)) %>% sum(.) %>% round(., 0) if (dt_input_spend > temp$spend * 1.1 | dt_input_spend < temp$spend * 0.9) { - warning(sprintf("Your calibration's spend (%s) for %s between %s and %s does not match your dt_input spend (~%s)", - formatNum(temp$spend, 0), temp$channel, temp$liftStartDate, temp$liftEndDate, - formatNum(dt_input_spend, 3, abbr = TRUE))) + warning(sprintf(paste( + "Your calibration's spend (%s) for %s between %s and %s does not match your dt_input spend (~%s).", + "Please, check again your dates or split your media inputs into separate media channels."), + formatNum(temp$spend, 0), temp$channel, temp$liftStartDate, temp$liftEndDate, + formatNum(dt_input_spend, 3, abbr = TRUE))) } } } @@ -442,8 +444,10 @@ check_calibration <- function(dt_input, date_var, calibration_input, dayInterval for (i in 1:nrow(calibration_input)) { temp <- calibration_input[i, ] if (temp$confidence < 0.8) { - warning(sprintf("Your calibration's confidence for %s between %s and %s is lower than 80%%, thus low-confidence", - temp$channel, temp$liftStartDate, temp$liftEndDate)) + warning(sprintf(paste( + "Your calibration's confidence for %s between %s and %s is lower than 80%%, thus low-confidence.", + "Consider getting rid of this experiment and running it again."), + temp$channel, temp$liftStartDate, temp$liftEndDate)) } } } @@ -451,8 +455,10 @@ check_calibration <- function(dt_input, date_var, calibration_input, dayInterval for (i in 1:nrow(calibration_input)) { temp <- calibration_input[i, ] if (temp$metric != dep_var) { - warning(sprintf("Your calibration's metric for %s between %s and %s is not '%s'", - temp$channel, temp$liftStartDate, temp$liftEndDate, dep_var)) + warning(sprintf(paste( + "Your calibration's metric for %s between %s and %s is not '%s'.", + "Please, remove this experiment from 'calibration_input'."), + temp$channel, temp$liftStartDate, temp$liftEndDate, dep_var)) } } } From 43301770ee03c76096d18e2ed492fd02b817e3e6 Mon Sep 17 00:00:00 2001 From: Bernardo Lares Date: Fri, 25 Mar 2022 11:09:28 -0500 Subject: [PATCH 16/17] feat: avoid weekly trend when granularity is larger than week --- R/R/checks.R | 6 ++++-- R/R/inputs.R | 20 +++++++++----------- R/man/prophet_decomp.Rd | 15 +++------------ demo/demo.R | 3 ++- 4 files changed, 18 insertions(+), 26 deletions(-) diff --git a/R/R/checks.R b/R/R/checks.R index a84d3ca06..cd906dfa0 100644 --- a/R/R/checks.R +++ b/R/R/checks.R @@ -131,7 +131,7 @@ check_depvar <- function(dt_input, dep_var, dep_var_type) { } } -check_prophet <- function(dt_holidays, prophet_country, prophet_vars, prophet_signs) { +check_prophet <- function(dt_holidays, prophet_country, prophet_vars, prophet_signs, dayInterval) { if (is.null(prophet_vars)) { prophet_signs <- NULL prophet_country <- NULL @@ -141,6 +141,9 @@ check_prophet <- function(dt_holidays, prophet_country, prophet_vars, prophet_si if (!all(prophet_vars %in% opts)) { stop("Allowed values for 'prophet_vars' are: ", paste(opts, collapse = ", ")) } + if ("weekday" %in% prophet_vars & dayInterval > 7) { + warning("Ignoring prophet_vars = 'weekday' input given your data granularity") + } if (is.null(prophet_country) | length(prophet_country) > 1 | !prophet_country %in% unique(dt_holidays$country)) { stop(paste( @@ -152,7 +155,6 @@ check_prophet <- function(dt_holidays, prophet_country, prophet_vars, prophet_si } if (is.null(prophet_signs)) { prophet_signs <- rep("default", length(prophet_vars)) - # message("'prophet_signs' were not provided. 'default' is used") } if (!all(prophet_signs %in% opts_pnd)) { stop("Allowed values for 'prophet_signs' are: ", paste(opts_pnd, collapse = ", ")) diff --git a/R/R/inputs.R b/R/R/inputs.R index 809bd905f..99c991b53 100644 --- a/R/R/inputs.R +++ b/R/R/inputs.R @@ -199,7 +199,7 @@ robyn_inputs <- function(dt_input = NULL, check_depvar(dt_input, dep_var, dep_var_type) ## Check prophet - prophet_signs <- check_prophet(dt_holidays, prophet_country, prophet_vars, prophet_signs) + prophet_signs <- check_prophet(dt_holidays, prophet_country, prophet_vars, prophet_signs, dayInterval) ## Check baseline variables (and maybe transform context_signs) context <- check_context(dt_input, context_vars, context_signs) @@ -640,6 +640,7 @@ robyn_engineering <- function(x, ...) { context_vars = InputCollect$context_vars, paid_media_spends = paid_media_spends, intervalType = InputCollect$intervalType, + dayInterval = InputCollect$dayInterval, custom_params = custom_params ) } @@ -668,20 +669,17 @@ robyn_engineering <- function(x, ...) { #' dependent variable. #' @param dt_transform A data.frame with all model features. #' @param dt_holidays As in \code{robyn_inputs()} -#' @param prophet_country As in \code{robyn_inputs()} -#' @param prophet_vars As in \code{robyn_inputs()} -#' @param prophet_signs As in \code{robyn_inputs()} -#' @param factor_vars As in \code{robyn_inputs()} -#' @param context_vars As in \code{robyn_inputs()} -#' @param paid_media_spends As in \code{robyn_inputs()} -#' @param intervalType As included in \code{InputCollect} +#' @param context_vars,paid_media_spends,intervalType,dayInterval +#' As included in \code{InputCollect} +#' @param prophet_country,prophet_vars,prophet_signs,factor_vars +#' As included in \code{InputCollect} #' @param custom_params List. Custom parameters passed to \code{prophet()} #' @return A list containing all prophet decomposition output. prophet_decomp <- function(dt_transform, dt_holidays, prophet_country, prophet_vars, prophet_signs, factor_vars, context_vars, paid_media_spends, - intervalType, custom_params) { - check_prophet(dt_holidays, prophet_country, prophet_vars, prophet_signs) + intervalType, dayInterval, custom_params) { + check_prophet(dt_holidays, prophet_country, prophet_vars, prophet_signs, dayInterval) recurrence <- subset(dt_transform, select = c("ds", "dep_var")) colnames(recurrence)[2] <- "y" @@ -698,7 +696,7 @@ prophet_decomp <- function(dt_transform, dt_holidays, yearly.seasonality = ifelse("yearly.seasonality" %in% names(custom_params), custom_params[["yearly.seasonality"]], use_season), - weekly.seasonality = ifelse("weekly.seasonality" %in% names(custom_params), + weekly.seasonality = ifelse("weekly.seasonality" %in% names(custom_params) & dayInterval <= 7, custom_params[["weekly.seasonality"]], use_weekday), daily.seasonality = FALSE # No hourly models allowed diff --git a/R/man/prophet_decomp.Rd b/R/man/prophet_decomp.Rd index 83a281769..d05d8ea93 100644 --- a/R/man/prophet_decomp.Rd +++ b/R/man/prophet_decomp.Rd @@ -14,6 +14,7 @@ prophet_decomp( context_vars, paid_media_spends, intervalType, + dayInterval, custom_params ) } @@ -22,19 +23,9 @@ prophet_decomp( \item{dt_holidays}{As in \code{robyn_inputs()}} -\item{prophet_country}{As in \code{robyn_inputs()}} +\item{prophet_country, prophet_vars, prophet_signs, factor_vars}{As included in \code{InputCollect}} -\item{prophet_vars}{As in \code{robyn_inputs()}} - -\item{prophet_signs}{As in \code{robyn_inputs()}} - -\item{factor_vars}{As in \code{robyn_inputs()}} - -\item{context_vars}{As in \code{robyn_inputs()}} - -\item{paid_media_spends}{As in \code{robyn_inputs()}} - -\item{intervalType}{As included in \code{InputCollect}} +\item{context_vars, paid_media_spends, intervalType, dayInterval}{As included in \code{InputCollect}} \item{custom_params}{List. Custom parameters passed to \code{prophet()}} } diff --git a/demo/demo.R b/demo/demo.R index 7b0e42aae..d7fff5c64 100644 --- a/demo/demo.R +++ b/demo/demo.R @@ -370,7 +370,7 @@ AllocatorCollect <- robyn_allocator( , channel_constr_up = c(1.2, 1.5, 1.5, 1.5, 1.5) ) print(AllocatorCollect) -AllocatorCollect$dt_optimOut +# plot(AllocatorCollect) # Run the "max_response_expected_spend" scenario: "What's the maximum response for a given # total spend based on historical saturation and what is the spend mix?" "optmSpendShareUnit" @@ -387,6 +387,7 @@ AllocatorCollect <- robyn_allocator( ) print(AllocatorCollect) AllocatorCollect$dt_optimOut +# plot(AllocatorCollect) ## A csv is exported into the folder for further usage. Check schema here: ## https://github.com/facebookexperimental/Robyn/blob/main/demo/schema.R From be4fe1a65ecc1c075a73ab58acab2425f3fdecee Mon Sep 17 00:00:00 2001 From: Bernardo Lares Date: Wed, 30 Mar 2022 18:47:24 -0500 Subject: [PATCH 17/17] Upgrade to version 3.6.2 --- R/DESCRIPTION | 2 +- R/R/allocator.R | 10 +++---- R/R/inputs.R | 58 +++++++++++++++++-------------------- R/man/fit_spend_exposure.Rd | 10 +++---- demo/schema.R | 4 +-- 5 files changed, 40 insertions(+), 44 deletions(-) diff --git a/R/DESCRIPTION b/R/DESCRIPTION index 557a545a8..cbb509936 100644 --- a/R/DESCRIPTION +++ b/R/DESCRIPTION @@ -1,7 +1,7 @@ Package: Robyn Type: Package Title: Semi-Automated Marketing Mix Modeling (MMM) from Meta Marketing Science -Version: 3.6.0 +Version: 3.6.2 Authors@R: c( person("Gufeng", "Zhou", , "gufeng@fb.com", c("aut")), person("Leonel", "Sentana", , "leonelsentana@fb.com", c("aut")), diff --git a/R/R/allocator.R b/R/R/allocator.R index 99cb53555..254061a5d 100644 --- a/R/R/allocator.R +++ b/R/R/allocator.R @@ -352,7 +352,7 @@ print.robyn_allocator <- function(x, ...) { Model ID: {x$dt_optimOut$solID[1]} Scenario: {scenario} Media Skipped (coef = 0): {paste0(x$skipped, collapse = ',')} -Total Spend Increase: {spend_increase_p}% ({spend_increase}{scenario_plus}) +Relative Spend Increase: {spend_increase_p}% ({spend_increase}{scenario_plus}) Total Response Increase (Optimized): {signif(100 * x$dt_optimOut$optmResponseUnitTotalLift[1], 3)}% Window: {x$dt_optimOut$date_min[1]}:{x$dt_optimOut$date_max[1]} ({x$dt_optimOut$periods[1]}) @@ -363,14 +363,14 @@ Allocation Summary: x$scenario == "max_historical_response", "Maximum Historical Response", "Maximum Response with Expected Spend"), - scenario_plus = ifelse( - x$scenario == "max_response_expected_spend", - sprintf(" in %s days", x$expected_spend_days), ""), spend_increase_p = signif(100 * x$dt_optimOut$expSpendUnitDelta[1], 3), spend_increase = formatNum( sum(x$dt_optimOut$optmSpendUnitTotal) - sum(x$dt_optimOut$initSpendUnitTotal), - abbr = TRUE + abbr = TRUE, sign = TRUE ), + scenario_plus = ifelse( + x$scenario == "max_response_expected_spend", + sprintf(" in %s days", x$expected_spend_days), ""), summary = paste(sprintf( " - %s: diff --git a/R/R/inputs.R b/R/R/inputs.R index 99c991b53..8e5692f46 100644 --- a/R/R/inputs.R +++ b/R/R/inputs.R @@ -546,6 +546,7 @@ robyn_engineering <- function(x, ...) { modNLSCollect <- list() yhatCollect <- list() plotNLSCollect <- list() + modelType_media_vars <- NULL for (i in 1:InputCollect$mediaVarCount) { if (exposure_selector[i]) { @@ -553,6 +554,7 @@ robyn_engineering <- function(x, ...) { # run models (NLS and/or LM) dt_spendModInput <- subset(dt_inputRollWind, select = c(paid_media_spends[i], paid_media_vars[i])) results <- fit_spend_exposure(dt_spendModInput, mediaCostFactor[i], paid_media_vars[i]) + modelType_media_vars <- c(modelType_media_vars, results$type) # compare NLS & LM, takes LM if NLS fits worse mod <- results$res exposure_selector[i] <- if (is.null(mod$rsq_nls)) FALSE else mod$rsq_nls > mod$rsq_lm @@ -564,9 +566,10 @@ robyn_engineering <- function(x, ...) { y = results$data$exposure, x = results$data$spend ) - dt_plotNLS <- melt.data.table(dt_plotNLS, - id.vars = c("channel", "y", "x"), - variable.name = "models", value.name = "yhat" + dt_plotNLS <- melt.data.table( + dt_plotNLS, + id.vars = c("channel", "y", "x"), + variable.name = "models", value.name = "yhat" ) dt_plotNLS[, models := str_remove(tolower(models), "yhat")] # create plot @@ -588,13 +591,19 @@ robyn_engineering <- function(x, ...) { theme_minimal() + theme(legend.position = "top", legend.justification = "left") - # save results into modNLSCollect. plotNLSCollect, yhatCollect + # Save results into modNLSCollect. plotNLSCollect, yhatCollect modNLSCollect[[paid_media_vars[i]]] <- mod plotNLSCollect[[paid_media_vars[i]]] <- models_plot yhatCollect[[paid_media_vars[i]]] <- dt_plotNLS } } - + # Message user when and with which channels Michaelis-Menten was not viable + no_mich_men <- paid_media_spends[modelType_media_vars == "lm"] + if (length(no_mich_men) > 0) + message(sprintf( + "Michaelis-Menten fitting for %s out of range. Used lm instead for these media channels", + v2t(no_mich_men, and = "and"))) + # Gather loop results modNLSCollect <- rbindlist(modNLSCollect) yhatNLSCollect <- rbindlist(yhatCollect) yhatNLSCollect$ds <- rep(dt_transformRollWind$ds, nrow(yhatNLSCollect) / nrow(dt_transformRollWind)) @@ -753,24 +762,16 @@ prophet_decomp <- function(dt_transform, dt_holidays, #' channel into sub-channels to achieve better fit, or just use #' spend as \code{paid_media_vars} #' -#' @param dt_spendModInput A data.frame with channel spends and exposure -#' data -#' @param mediaCostFactor A numeric vector. The ratio between raw media +#' @param dt_spendModInput data.frame. Containing channel spends and +#' exposure data. +#' @param mediaCostFactor Numeric vector. The ratio between raw media #' exposure and spend metrics. -#' @param paid_media_vars A character vector. All paid media variables. +#' @param paid_media_var Character. Paid media variable. #' @return A list containing the all spend-exposure model results. -fit_spend_exposure <- function(dt_spendModInput, mediaCostFactor, paid_media_vars) { +fit_spend_exposure <- function(dt_spendModInput, mediaCostFactor, paid_media_var) { if (ncol(dt_spendModInput) != 2) stop("Pass only 2 columns") colnames(dt_spendModInput) <- c("spend", "exposure") - # remove spend == 0 to avoid DIV/0 error - # dt_spendModInput$spend[dt_spendModInput$spend == 0] <- 0.01 - # # adapt exposure with avg when spend == 0 - # dt_spendModInput$exposure <- ifelse( - # dt_spendModInput$exposure == 0, dt_spendModInput$spend / mediaCostFactor, - # dt_spendModInput$exposure - # ) - # Model 1: Michaelis-Menten model Vmax * spend/(Km + spend) tryCatch( { @@ -793,29 +794,23 @@ fit_spend_exposure <- function(dt_spendModInput, mediaCostFactor, paid_media_var # identical(yhatNLS, yhatNLSQA) }, error = function(cond) { - message("Michaelis-Menten fitting for ", paid_media_vars, " out of range. Using lm instead") modNLS <- yhatNLS <- modNLSSum <- rsq_nls <- NULL }, warning = function(cond) { - message("Michaelis-Menten fitting for ", paid_media_vars, " out of range. Using lm instead") modNLS <- yhatNLS <- modNLSSum <- rsq_nls <- NULL }, - finally = { - if (!exists("modNLS")) modNLS <- yhatNLS <- modNLSSum <- rsq_nls <- NULL - } + finally = if (!exists("modNLS")) modNLS <- yhatNLS <- modNLSSum <- rsq_nls <- NULL ) - # build lm comparison model + # Model 2: Build lm comparison model modLM <- lm(exposure ~ spend - 1, data = dt_spendModInput) yhatLM <- predict(modLM) modLMSum <- summary(modLM) rsq_lm <- modLMSum$adj.r.squared - if (is.na(rsq_lm)) { - stop("Please check if ", paid_media_vars, " contains only 0s") - } + if (is.na(rsq_lm)) stop("Please check if ", paid_media_var, " contains only 0s") if (max(rsq_lm, rsq_nls) < 0.7) { warning(paste( - "Spend-exposure fitting for", paid_media_vars, + "Spend-exposure fitting for", paid_media_var, "has rsq = ", max(rsq_lm, rsq_nls), "To increase the fit, try splitting the variable.", "Otherwise consider using spend instead." @@ -824,7 +819,7 @@ fit_spend_exposure <- function(dt_spendModInput, mediaCostFactor, paid_media_var output <- list( res = data.table( - channel = paid_media_vars, + channel = paid_media_var, Vmax = if (!is.null(modNLS)) modNLSSum$coefficients[1, 1] else NA, Km = if (!is.null(modNLS)) modNLSSum$coefficients[2, 1] else NA, aic_nls = if (!is.null(modNLS)) AIC(modNLS) else NA, @@ -839,12 +834,13 @@ fit_spend_exposure <- function(dt_spendModInput, mediaCostFactor, paid_media_var modNLS = modNLS, yhatLM = yhatLM, modLM = modLM, - data = dt_spendModInput + data = dt_spendModInput, + type = ifelse(is.null(modNLS), "lm", "mm") ) - return(output) } + #################################################################### #' Detect and set date variable interval #' diff --git a/R/man/fit_spend_exposure.Rd b/R/man/fit_spend_exposure.Rd index 6e358e903..f22bbaf48 100644 --- a/R/man/fit_spend_exposure.Rd +++ b/R/man/fit_spend_exposure.Rd @@ -4,16 +4,16 @@ \alias{fit_spend_exposure} \title{Fit a nonlinear model for media spend and exposure} \usage{ -fit_spend_exposure(dt_spendModInput, mediaCostFactor, paid_media_vars) +fit_spend_exposure(dt_spendModInput, mediaCostFactor, paid_media_var) } \arguments{ -\item{dt_spendModInput}{A data.frame with channel spends and exposure -data} +\item{dt_spendModInput}{data.frame. Containing channel spends and +exposure data.} -\item{mediaCostFactor}{A numeric vector. The ratio between raw media +\item{mediaCostFactor}{Numeric vector. The ratio between raw media exposure and spend metrics.} -\item{paid_media_vars}{A character vector. All paid media variables.} +\item{paid_media_var}{Character. Paid media variable.} } \value{ A list containing the all spend-exposure model results. diff --git a/demo/schema.R b/demo/schema.R index d50913566..bb39409af 100644 --- a/demo/schema.R +++ b/demo/schema.R @@ -31,8 +31,8 @@ # $ roi_mean : Numeric. Pseudo-calc: roi_mean = mean_response / mean_spend. # $ roi_total : Numeric. Pseudo-calc: roi_total = xDecompAgg / total_spend. # $ cpa_total : Numeric. Pseudo-calc: cpa_total = total_spend / xDecompAgg -# $ mean_response : Numeric. Response of mean_spend. Pseudo-calc: mean_response1 = beta1 * saturaeted(mean_spend1) Note the difference to xDecompMeanNon0. -# $ next_unit_response : Numeric. Response of next unit spend from the level of mean_spend. Pseudo-calc: next_unit_response1 = beta1 * (saturaeted(mean_spend1 + 1) - saturaeted(mean_spend1)) +# $ mean_response : Numeric. Response of mean_spend. Pseudo-calc: mean_response1 = beta1 * saturated(mean_spend1) Note the difference to xDecompMeanNon0. +# $ next_unit_response : Numeric. Response of next unit spend from the level of mean_spend. Pseudo-calc: next_unit_response1 = beta1 * (saturated(mean_spend1 + 1) - saturated(mean_spend1)) #### pareto_hyperparameters.csv: Value of all hyperparameters of all pareto models. Number of column varies depending on input data