# **Marketing Mix Modeling**

In [24]:
#imports and read dataset
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from robyn.robyn import Robyn
from robyn.data.entities.mmmdata import MMMData
from robyn.data.entities.holidays_data import HolidaysData
from robyn.data.entities.hyperparameters import Hyperparameters, ChannelHyperparameters
from robyn.data.entities.enums import AdstockType, DependentVarType

## Begin with some EDA

In [25]:
#df = pd.read_csv('input/synthetic_data.csv')
df= pd.read_csv('input/synthetic_data.csv')

# Load holidays data (used for seasonality modeling)
#dt_prophet_holidays = pd.read_csv("resources/dt_prophet_holidays.csv")
df.head()

Unnamed: 0,date,google_pla_I,google_nonpla_I,bing_pla_I,bing_nonpla_I,meta_I,tiktok_I,pinterest_I,affiliate_I,pr_I,...,bing_pla_S,bing_nonpla_S,meta_S,tiktok_S,pinterest_S,affiliate_S,pr_S,influencer_asc_S,sales,competitor_sales_B
0,10/3/21,33142875.0,7986860.0,,297128.0,19106059.0,3608123.0,2211778.0,85693.0,766352587.0,...,17873.08,10255.34,124335.2,11763.14,4547.54,20017.9,26594.0,14400.0,25147299.0,460340.82
1,10/10/21,24838733.0,2338098.0,2635119.0,1751916.0,5273845.0,1829623.0,1572375.0,80400.0,780232071.0,...,16828.26,14368.34,145480.16,10041.34,7535.0,23293.26,6313.0,4500.0,28903507.02,385431.11
2,10/17/21,18465422.0,1888239.0,7091734.0,1389649.0,7258917.0,1700600.0,220440.0,66693.0,414680611.0,...,3592.92,19871.33,94816.12,21856.13,7838.0,12148.58,26594.0,36200.0,26328953.0,429631.61
3,10/24/21,7777091.0,2085512.0,1150658.0,1341084.0,2265796.0,4731052.0,279854.0,50266.0,28433443.0,...,7523.46,52562.39,99636.66,5916.95,17529.28,22046.16,7193.0,15250.0,31054671.0,438455.96
4,10/31/21,16411077.0,1185898.0,2111625.0,2237272.0,11208520.0,2585920.0,1315035.0,78086.0,509698752.0,...,17958.5,13397.76,138841.64,9049.22,5933.47,29677.05,1882.0,13000.0,30692804.0,448547.07


In [26]:

mmm_data_spec = MMMData.MMMDataSpec(
    dep_var='sales',  # Target variable
    dep_var_type="revenue",  # Type: "revenue" or "conversion"
    date_var="date",  # Date column name
    context_vars=['competitor_sales_B'],#, 'events'],
    paid_media_spends=['meta_S', 'tiktok_S', 'pinterest_S', 'affiliate_S', 'influencer_asc_S', 'google_pla_S', 'google_nonpla_S', 'bing_pla_S', 'bing_nonpla_I', 'pr_S'],  # Media spend columns
    paid_media_vars=['meta_I','tiktok_I', 'pinterest_I', 'affiliate_I', 'influencer_asc_I', 'google_pla_I', 'google_nonpla_I', 'bing_pla_I', 'bing_nonpla_I', 'pr_I'],  # Media metrics
    organic_vars=['organic_youtube_I', 'organic_facebook_I', 'organic_instagram_I', 'organic_pinterest_I', 'organic_tiktok_I'],  # Non-paid marketing activities
    factor_vars=[],
    window_start="2021-10-03",  # Analysis start date
    window_end="2024-09-22" # Analysis end date
)

mmm_data = MMMData(data=df, mmmdata_spec=mmm_data_spec)

  self.data[self.mmmdata_spec.date_var] = pd.to_datetime(


In [27]:
#specify hyperparameters for each variable
hyperparameters = Hyperparameters(
    hyperparameters={
        # Paid Channels
        "meta_S": ChannelHyperparameters(alphas=[0.5, 3], gammas=[0.3, 1], thetas=[0, 0.3],),
        "tiktok_S": ChannelHyperparameters(alphas=[0.5, 3], gammas=[0.3, 1], thetas=[0, 0.3],),
        "pinterest_S": ChannelHyperparameters(alphas=[0.5, 3], gammas=[0.3, 1], thetas=[0, 0.3],),
        "affiliate_S": ChannelHyperparameters(alphas=[0.5, 3], gammas=[0.3, 1], thetas=[0, 0.3],),
        "influencer_asc_S": ChannelHyperparameters(alphas=[0.5, 3], gammas=[0.3, 1], thetas=[0, 0.3],),
        "pr_S": ChannelHyperparameters(alphas=[0.5, 3], gammas=[0.3, 1], thetas=[0, 0.3],),
        'google_pla_S': ChannelHyperparameters(alphas=[0.5, 3], gammas=[0.3, 1], thetas=[0, 0.3],),
        'google_nonpla_S': ChannelHyperparameters(alphas=[0.5, 3], gammas=[0.3, 1], thetas=[0, 0.3],),
        'bing_pla_S': ChannelHyperparameters(alphas=[0.5, 3], gammas=[0.3, 1], thetas=[0, 0.3],),
        'bing_nonpla_S': ChannelHyperparameters(alphas=[0.5, 3], gammas=[0.3, 1], thetas=[0, 0.3],),
        #'search_S': ChannelHyperparameters(alphas=[0.5, 3], gammas=[0.3, 1], thetas=[0, 0.3],),
        # Paid Channels (Incremental)
        "meta_I": ChannelHyperparameters(alphas=[0.5, 3], gammas=[0.3, 1], thetas=[0.1, 0.4],),
        "tiktok_I": ChannelHyperparameters(alphas=[0.5, 3], gammas=[0.3, 1], thetas=[0.1, 0.4],),
        "pinterest_I": ChannelHyperparameters(alphas=[0.5, 3], gammas=[0.3, 1], thetas=[0.1, 0.4],),
        "affiliate_I": ChannelHyperparameters(alphas=[0.5, 3], gammas=[0.3, 1], thetas=[0.1, 0.4],),
        "influencer_asc_I": ChannelHyperparameters(alphas=[0.5, 3], gammas=[0.3, 1], thetas=[0.1, 0.4],),
        "pr_I": ChannelHyperparameters(alphas=[0.5, 3], gammas=[0.3, 1], thetas=[0.1, 0.4],),
        'google_pla_I': ChannelHyperparameters(alphas=[0.5, 3], gammas=[0.3, 1], thetas=[0.1, 0.4],),
        'google_nonpla_I': ChannelHyperparameters(alphas=[0.5, 3], gammas=[0.3, 1], thetas=[0.1, 0.4],),
        'bing_pla_I':ChannelHyperparameters(alphas=[0.5, 3], gammas=[0.3, 1], thetas=[0.1, 0.4],),
        'bing_nonpla_I':ChannelHyperparameters(alphas=[0.5, 3], gammas=[0.3, 1], thetas=[0.1, 0.4],),
       #'search_I':ChannelHyperparameters(alphas=[0.5, 3], gammas=[0.3, 1], thetas=[0.1, 0.4],),
        # Organic Channels
        "organic_youtube_I": ChannelHyperparameters(alphas=[0.5, 3], gammas=[0.3, 1], thetas=[0.1, 0.3],),  # Default theta for organic channels),
        "organic_facebook_I": ChannelHyperparameters(alphas=[0.5, 3], gammas=[0.3, 1], thetas=[0.1, 0.3],),
        "organic_instagram_I": ChannelHyperparameters(alphas=[0.5, 3], gammas=[0.3, 1], thetas=[0.1, 0.3],),
        "organic_pinterest_I": ChannelHyperparameters(alphas=[0.5, 3], gammas=[0.3, 1], thetas=[0.1, 0.3],),
        "organic_tiktok_I": ChannelHyperparameters(alphas=[0.5, 3], gammas=[0.3, 1], thetas=[0.1, 0.3],)
    },
    adstock=AdstockType.GEOMETRIC,  # Default adstock type
    lambda_=[0.0001, 0.001, 0.01, 0.1, 1, 10],               # Regularization range
    train_size=[0.7, 0.8],         # Default train size
)

In [28]:
holidays_data = HolidaysData(
    dt_holidays=pd.read_csv('/Users/grs/Documents/GA DAI/Capstone/Marketing Mix Modeling/formatted_holidays.csv'),
    prophet_vars=["trend", "season", "holiday"],
    prophet_country="US",
    prophet_signs=["default", "default", "default"],
)

In [29]:
#initialize the model
robyn = Robyn(working_dir="output")
robyn.initialize(
    mmm_data=mmm_data,
    holidays_data=holidays_data,
    hyperparameters=hyperparameters,
)

INFO: Initialized Robyn in output
INFO: Initialized Robyn in output
INFO: Initialized Robyn in output
INFO: Initialized Robyn in output
2025-01-29 19:25:04,434 - robyn.robyn - INFO - Initialized Robyn in output
INFO: Validating input data
INFO: Validating input data
INFO: Validating input data
INFO: Validating input data
2025-01-29 19:25:04,436 - robyn.robyn - INFO - Validating input data
2025-01-29 19:25:04,438 - robyn.data.validation.mmmdata_validation - INFO - Starting complete MMMData validation
2025-01-29 19:25:04,440 - robyn.data.validation.mmmdata_validation - ERROR - Validation failed: Dataset contains missing (NA) values in columns: date, google_pla_I, google_nonpla_I, bing_pla_I, bing_nonpla_I, meta_I, tiktok_I, pinterest_I, affiliate_I, pr_I, influencer_asc_I, organic_youtube_I, organic_facebook_I, organic_instagram_I, organic_pinterest_I, organic_tiktok_I, events, google_pla_S, google_nonpla_S, bing_pla_S, bing_nonpla_S, meta_S, tiktok_S, pinterest_S, affiliate_S, pr_S, inf

In [31]:
#matplotlib to visualize feature relationships
%matplotlib inline
# Run feature engineering
robyn.feature_engineering();

INFO: Performing feature engineering
INFO: Performing feature engineering
INFO: Performing feature engineering
INFO: Performing feature engineering
2025-01-29 19:26:38,745 - robyn.robyn - INFO - Performing feature engineering
2025-01-29 19:26:38,752 - robyn.modeling.feature_engineering - INFO - Starting feature engineering process
2025-01-29 19:26:38,758 - root - ERROR - Error in feature engineering: Cannot convert non-finite values (NA or inf) to integer


IntCastingNaNError: Cannot convert non-finite values (NA or inf) to integer

In [32]:
#configure and train models on our data
from robyn.modeling.entities.enums import Models, NevergradAlgorithm
from robyn.modeling.entities.modelrun_trials_config import TrialsConfig

trials_config = TrialsConfig(iterations=54, trials=5)

robyn.train_models(
    trials_config=trials_config,
    ts_validation=True,
    add_penalty_factor=False,
    rssd_zero_penalty=True,
    cores=8,
    nevergrad_algo=NevergradAlgorithm.TWO_POINTS_DE,
    model_name=Models.RIDGE,
)

ValueError: Must call initialize() first

In [None]:
#build models and display results for evaluation
from robyn.modeling.clustering.clustering_config import ClusterBy, ClusteringConfig

configs = ClusteringConfig(
    dep_var_type=DependentVarType(mmm_data.mmmdata_spec.dep_var_type),
    cluster_by=ClusterBy.HYPERPARAMETERS,
    max_clusters=10,
    min_clusters=3,
    weights=[1.0, 1.0, 1.0],
)


In [None]:
#robyn.evaluate_models(cluster_config=configs)
robyn.evaluate_models(cluster_config=configs)

In [None]:
robyn.generate_one_pager()

In [None]:
from robyn.allocator.entities.allocation_params import AllocatorParams
from robyn.allocator.constants import (
    SCENARIO_MAX_RESPONSE,
    CONSTRAINT_MODE_EQ,
)

allocator_params = AllocatorParams(
    scenario=SCENARIO_MAX_RESPONSE,
    total_budget=None,  # Uses total spend in date_range when None
    date_range="all",
    channel_constr_low=[0.7],  # Minimum spend multiplier
    channel_constr_up=[1.2],  # Maximum spend multiplier
    channel_constr_multiplier=3.0,
    optim_algo="SLSQP_AUGLAG",
    maxeval=100000,
    constr_mode=CONSTRAINT_MODE_EQ,
    plots=True,
)

allocation_result = robyn.optimize_budget(
    allocator_params=allocator_params,
    select_model=None,
)