In [10]:
library(data.table) 
library(StanHeaders)
library(prophet)
library(rstan)
library(ggplot2)
library(reticulate)
library(Robyn)

# Training MMM Model Using Facebook's Robyn

## Daily Model

In [42]:
dt_data = fread("../data/processed/search_df_select.csv")
head(dt_data, 2)

revenue,google_search_Bottom,google_search_Mid,google_search_Top,bing_search_Bottom,bing_search_Top,facebook_Bottom,facebook_Mid,facebook_Top,google,bing,fb,top,mid,bottom,date
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<date>
30638.84,248.73,0.27,1286.4,17.23,49.94,121.58,220.94,5416.81,3070.8,134.34,5759.33,13506.3,442.42,775.08,2019-01-01
30134.13,209.05,1.94,1243.62,18.18,59.97,120.67,216.65,5711.9,2909.22,156.3,6049.22,14030.98,437.18,695.8,2019-01-02
31172.54,232.55,4.58,1349.84,18.48,52.71,119.48,220.88,7191.77,3173.94,142.38,7532.13,17188.64,450.92,741.02,2019-01-03
22587.84,289.16,7.5,1229.11,23.32,58.14,118.49,197.02,7005.19,3051.54,162.92,7320.7,16584.88,409.04,861.94,2019-01-04
26166.98,281.36,2.0,1422.2,18.9,46.91,121.03,196.41,5057.96,3411.12,131.62,5375.4,13054.14,396.82,842.58,2019-01-05
32981.04,286.91,65.37,1738.27,24.53,59.58,120.51,196.31,5010.45,4181.1,168.22,5327.27,13616.6,523.36,863.9,2019-01-06


In [15]:
robyn_object <- "Robyn.RData"

In [38]:
InputCollect <- robyn_inputs(
dt_input = fread("../data/processed/search_df_select.csv")
,dt_holidays = dt_prophet_holidays
,date_var = "date"
,dep_var = "revenue"
,dep_var_type = "revenue"
,prophet_vars = c("trend", "season", "weekday")
,prophet_signs = c("default","default", "default")
,prophet_country ='US'
,paid_media_vars = c("google_search_Bottom", "google_search_Mid", "google_search_Top", "bing_search_Bottom", "bing_search_Top", "facebook_Bottom", "facebook_Mid", "facebook_Top")
,paid_media_signs = c("positive", "positive","positive", "positive", "positive","positive", "positive","positive")
,paid_media_spends = c("google_search_Bottom", "google_search_Mid", "google_search_Top", "bing_search_Bottom", "bing_search_Top", "facebook_Bottom", "facebook_Mid", "facebook_Top")

# parameters
,cores = 3
,window_start = "2019-01-01"
,window_end = "2021-07-31"

#core model params
,adstock = "geometric"
,iterations = 500
,nevergrad_algo = "TwoPointsDE"
,trials = 5
)
    

'hyperparameters' are not provided yet. To include them, run robyn_inputs(InputCollect = InputCollect, hyperparameters = ...)



In [39]:
#hyperparameters

#plot_adstock(plot = FALSE)
#plot_saturation(plot = FALSE)

hyperparameters <- list(
  google_search_Bottom_alphas = c(0.5, 3)
  ,google_search_Bottom_gammas = c(0.3, 1)
  ,google_search_Bottom_thetas = c(0, 0.3)
    
  ,google_search_Mid_alphas = c(0.5, 3)
  ,google_search_Mid_gammas = c(0.3, 1)
  ,google_search_Mid_thetas = c(0, 0.3)

 ,google_search_Top_alphas = c(0.5, 3)
  ,google_search_Top_gammas = c(0.3, 1)
  ,google_search_Top_thetas = c(0, 0.3)

  ,bing_search_Bottom_alphas = c(0.5, 3)
  ,bing_search_Bottom_gammas = c(0.3, 1)
  ,bing_search_Bottom_thetas = c(0, 0.3)
    
  ,bing_search_Top_alphas = c(0.5, 3)
  ,bing_search_Top_gammas = c(0.3, 1)
  ,bing_search_Top_thetas = c(0, 0.3)  
    
  ,facebook_Bottom_alphas = c(0.5, 3)
  ,facebook_Bottom_gammas = c(0.3, 1)
  ,facebook_Bottom_thetas = c(0, 0.3)
    
  ,facebook_Mid_alphas = c(0.5, 3)
  ,facebook_Mid_gammas = c(0.3, 1)
  ,facebook_Mid_thetas = c(0, 0.3)
    
  ,facebook_Top_alphas = c(0.5, 3)
  ,facebook_Top_gammas = c(0.3, 1)
  ,facebook_Top_thetas = c(0, 0.3)
    )


In [40]:
InputCollect <- robyn_inputs(InputCollect = InputCollect, hyperparameters = hyperparameters)

“We recommend to run at least 2000 iterations per trial and 5 trials to build initial model”


In [41]:
OutputCollect <- robyn_run(
  InputCollect = InputCollect # feed in all model specification
  , plot_folder = robyn_object # plots will be saved in the same folder as robyn_object
  , pareto_fronts = 3
  , plot_pareto = TRUE
  # , calibration_constraint = 0.1 # run ?robyn_run to see description
  # , lambda_control = 1 # run ?robyn_run to see description
  )

Provided 'plot_folder' doesn't exist. Using default 'plot_folder = getwd()': /Users/Jaime/repos/search-for-green/notebooks

Input data has 943 days in total: 2019-01-01 to 2021-07-31

Initial model is built on rolling window of 943 days: 2019-01-01 to 2021-07-31

Using geometric adstocking with 24 hyperparameters & 10-fold ridge x-validation on 3 cores

>>> Start running 5 trials with 500 iterations per trial each with TwoPointsDE nevergrad algorithm...

 Running trial nr. 1






 Finished in 2.99 mins






 Running trial nr. 2






 Finished in 2.89 mins






 Running trial nr. 3






 Finished in 2.83 mins






 Running trial nr. 4






 Finished in 3.08 mins






 Running trial nr. 5






 Finished in 3.89 mins






>>> Collecting results...

>>> Exporting all charts into directory: /Users/Jaime/repos/search-for-green/notebooks/2021-11-09 00.55 init...

>>> Plotting summary charts...

No spend-exposure modelling needed. all media variables used for mmm are spend variables 

>>> Plotting 57 Pareto optimum models...






Total time: 19.96 mins



In [43]:
OutputCollect$allSolutions # get all model IDs in result
select_model <- "2_118_2" # select one from above
robyn_save(robyn_object = robyn_object # model object location and name
           , select_model = select_model # selected model ID
           , InputCollect = InputCollect # all model input
           , OutputCollect = OutputCollect # all model output
)

Robyn.RData already exists. Are you certain to overwrite it? (Yes/no/cancel) Yes


![Robyn Daily Model](../assets/2_118_2.png)

## Weekly Model

In [62]:
robyn_object <- "robyn_weekly/Robyn_weekly.RData"

In [63]:
InputCollect <- robyn_inputs(
dt_input = fread("../data/processed/weekly.csv")
,dt_holidays = dt_prophet_holidays
,date_var = "date"
,dep_var = "revenue"
,dep_var_type = "revenue"
,prophet_vars = c("trend", "season")
,prophet_signs = c("default","default")
,prophet_country ='US'
,paid_media_vars = c("google_search_Bottom", "google_search_Mid", "google_search_Top", "bing_search_Bottom", "bing_search_Top", "facebook_Bottom", "facebook_Mid", "facebook_Top")
,paid_media_signs = c("positive", "positive","positive", "positive", "positive","positive", "positive","positive")
,paid_media_spends = c("google_search_Bottom", "google_search_Mid", "google_search_Top", "bing_search_Bottom", "bing_search_Top", "facebook_Bottom", "facebook_Mid", "facebook_Top")

# parameters
,cores = 3
,window_start = "2019-01-07"
,window_end = "2021-08-02"

#core model params
,adstock = "geometric"
,iterations = 500
,nevergrad_algo = "TwoPointsDE"
,trials = 5
)
    

'hyperparameters' are not provided yet. To include them, run robyn_inputs(InputCollect = InputCollect, hyperparameters = ...)



In [64]:
#hyperparameters

#plot_adstock(plot = FALSE)
#plot_saturation(plot = FALSE)

hyperparameters <- list(
  google_search_Bottom_alphas = c(0.5, 3)
  ,google_search_Bottom_gammas = c(0.3, 1)
  ,google_search_Bottom_thetas = c(0, 0.3)
    
  ,google_search_Mid_alphas = c(0.5, 3)
  ,google_search_Mid_gammas = c(0.3, 1)
  ,google_search_Mid_thetas = c(0, 0.3)

 ,google_search_Top_alphas = c(0.5, 3)
  ,google_search_Top_gammas = c(0.3, 1)
  ,google_search_Top_thetas = c(0, 0.3)

  ,bing_search_Bottom_alphas = c(0.5, 3)
  ,bing_search_Bottom_gammas = c(0.3, 1)
  ,bing_search_Bottom_thetas = c(0, 0.3)
    
  ,bing_search_Top_alphas = c(0.5, 3)
  ,bing_search_Top_gammas = c(0.3, 1)
  ,bing_search_Top_thetas = c(0, 0.3)  
    
  ,facebook_Bottom_alphas = c(0.5, 3)
  ,facebook_Bottom_gammas = c(0.3, 1)
  ,facebook_Bottom_thetas = c(0, 0.3)
    
  ,facebook_Mid_alphas = c(0.5, 3)
  ,facebook_Mid_gammas = c(0.3, 1)
  ,facebook_Mid_thetas = c(0, 0.3)
    
  ,facebook_Top_alphas = c(0.5, 3)
  ,facebook_Top_gammas = c(0.3, 1)
  ,facebook_Top_thetas = c(0, 0.3)
    )


In [65]:
InputCollect <- robyn_inputs(InputCollect = InputCollect, hyperparameters = hyperparameters)

“We recommend to run at least 2000 iterations per trial and 5 trials to build initial model”


In [66]:
OutputCollect <- robyn_run(
  InputCollect = InputCollect # feed in all model specification
  , plot_folder = robyn_object # plots will be saved in the same folder as robyn_object
  , pareto_fronts = 3
  , plot_pareto = TRUE
  # , calibration_constraint = 0.1 # run ?robyn_run to see description
  # , lambda_control = 1 # run ?robyn_run to see description
  )

Provided 'plot_folder' doesn't exist. Using default 'plot_folder = getwd()': /Users/Jaime/repos/search-for-green/notebooks

Input data has 135 weeks in total: 2019-01-07 to 2021-08-02

Initial model is built on rolling window of 135 weeks: 2019-01-07 to 2021-08-02

Using geometric adstocking with 24 hyperparameters & 10-fold ridge x-validation on 3 cores

>>> Start running 5 trials with 500 iterations per trial each with TwoPointsDE nevergrad algorithm...

 Running trial nr. 1






 Finished in 3.17 mins






 Running trial nr. 2






 Finished in 3.07 mins






 Running trial nr. 3






 Finished in 3.19 mins






 Running trial nr. 4






 Finished in 3.66 mins






 Running trial nr. 5






 Finished in 3.52 mins






>>> Collecting results...

>>> Exporting all charts into directory: /Users/Jaime/repos/search-for-green/notebooks/2021-11-09 04.02 init...

>>> Plotting summary charts...

No spend-exposure modelling needed. all media variables used for mmm are spend variables 

>>> Plotting 43 Pareto optimum models...






Total time: 20.24 mins



In [68]:
# Save model 3_165_2
OutputCollect$allSolutions # get all model IDs in result
select_model <- "3_165_2" # select one from above
robyn_save(robyn_object = robyn_object # model object location and name
           , select_model = select_model # selected model ID
           , InputCollect = InputCollect # all model input
           , OutputCollect = OutputCollect # all model output
)

![Robyn Weekly Model](../assets/3_165_2.png)