In [40]:
!pip install mamimo




[notice] A new release of pip available: 22.2.2 -> 22.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [41]:
from mamimo.datasets import load_fake_mmm
data = load_fake_mmm()
X = data.drop(columns=['Sales'])
y = data['Sales']

In [42]:
data

Unnamed: 0_level_0,TV,Radio,Banners,Sales
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2018-01-07,13528.10,0.00,0.00,4779.80
2018-01-14,0.00,5349.65,2218.93,8405.72
2018-01-21,0.00,4235.86,2046.96,7353.87
2018-01-28,0.00,3562.21,0.00,4351.05
2018-02-04,0.00,0.00,2187.29,5472.92
...,...,...,...,...
2021-10-03,0.00,0.00,1691.68,25422.62
2021-10-10,11543.58,4615.35,2518.88,32441.72
2021-10-17,0.00,4556.16,1919.19,29536.68
2021-10-24,0.00,0.00,1707.65,25934.91


In [43]:
from mamimo.carryover import ExponentialCarryover
from mamimo.saturation import ExponentialSaturation
from mamimo.linear_model import LinearRegression
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
adstock = ColumnTransformer(
    [
     ('tv_pipe', Pipeline([
            ('carryover', ExponentialCarryover()),
            ('saturation', ExponentialSaturation())
     ]), ['TV']),
     ('radio_pipe', Pipeline([
            ('carryover', ExponentialCarryover()),
            ('saturation', ExponentialSaturation())
     ]), ['Radio']),
     ('banners_pipe', Pipeline([
            ('carryover', ExponentialCarryover()),
            ('saturation', ExponentialSaturation())
     ]), ['Banners']),
    ]
)
model = Pipeline([
    ('adstock', adstock),
    ('regression', LinearRegression(positive=True))
])

In [57]:
model.fit(X, y).score(X, y)

0.10985072579909416

In [45]:
from scipy.stats import uniform, randint
from sklearn.model_selection import RandomizedSearchCV, TimeSeriesSplit
tuned_model = RandomizedSearchCV(
    model,
    param_distributions={
        'adstock__tv_pipe__carryover__window': randint(1, 10),
        'adstock__tv_pipe__carryover__strength': uniform(0, 1),
        'adstock__tv_pipe__saturation__exponent': uniform(0, 1),
        'adstock__radio_pipe__carryover__window': randint(1, 10),
        'adstock__radio_pipe__carryover__strength': uniform(0, 1),
        'adstock__radio_pipe__saturation__exponent': uniform(0, 1),
        'adstock__banners_pipe__carryover__window': randint(1, 10),
        'adstock__banners_pipe__carryover__strength': uniform(0, 1),
        'adstock__banners_pipe__saturation__exponent': uniform(0,1),
    },
    cv=TimeSeriesSplit(),
    random_state=0,
    n_iter=100,

)

This basically tries to find the best hyperparameters in the ranges 0 to 1 for the carryover strengths and saturation exponents, and integers between 1 and 10 (weeks) for the carryover length. The algorithm tries n_iter=100 different random hyperparameter combinations and evaluates the r² using a 5-fold expanding window time series split using sklearn's TimeSeriesSplit().

time features

In [59]:
from mamimo.time_utils import add_time_features, add_date_indicators
X = (X
     .pipe(add_time_features, month=True)
     .pipe(add_date_indicators, special_date=["2020-01-05"])
     .assign(trend=range(200))
)

In [60]:
from mamimo.time_utils import PowerTrend
from mamimo.carryover import ExponentialCarryover
from mamimo.saturation import ExponentialSaturation
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
cats =  [list(range(1, 13))] # different months, known beforehand
preprocess = ColumnTransformer(
    [
     ('tv_pipe', Pipeline([
            ('carryover', ExponentialCarryover()),
            ('saturation', ExponentialSaturation())
     ]), ['TV']),
     ('radio_pipe', Pipeline([
            ('carryover', ExponentialCarryover()),
            ('saturation', ExponentialSaturation())
     ]), ['Radio']),
     ('banners_pipe', Pipeline([
            ('carryover', ExponentialCarryover()),
            ('saturation', ExponentialSaturation())
     ]), ['Banners']),
    ('month', OneHotEncoder(sparse=False, categories=cats), ['month']),
    ('trend', PowerTrend(), ['trend']),
    ('special_date', ExponentialCarryover(), ['special_date'])
    ]
)
new_model = Pipeline([
    ('preprocess', preprocess),
    ('regression', LinearRegression(
        positive=True,
        fit_intercept=False) # no intercept because of the months
    )
])

In [63]:
data

Unnamed: 0_level_0,TV,Radio,Banners,Sales
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2018-01-07,13528.10,0.00,0.00,4779.80
2018-01-14,0.00,5349.65,2218.93,8405.72
2018-01-21,0.00,4235.86,2046.96,7353.87
2018-01-28,0.00,3562.21,0.00,4351.05
2018-02-04,0.00,0.00,2187.29,5472.92
...,...,...,...,...
2021-10-03,0.00,0.00,1691.68,25422.62
2021-10-10,11543.58,4615.35,2518.88,32441.72
2021-10-17,0.00,4556.16,1919.19,29536.68
2021-10-24,0.00,0.00,1707.65,25934.91


In [65]:
from scipy.stats import randint, uniform
from sklearn.model_selection import RandomizedSearchCV, TimeSeriesSplit
tuned_new_model = RandomizedSearchCV(
  new_model,
  param_distributions={
    'preprocess__tv_pipe__carryover__window': randint(1, 10),
    'preprocess__tv_pipe__carryover__strength': uniform(0, 1),
    'preprocess__tv_pipe__saturation__exponent': uniform(0, 1),
    'preprocess__radio_pipe__carryover__window': randint(1, 10),
    'preprocess__radio_pipe__carryover__strength': uniform(0,1),
    'preprocess__radio_pipe__saturation__exponent': uniform(0, 1),
    'preprocess__banners_pipe__carryover__window': randint(1, 10),
    'preprocess__banners_pipe__carryover__strength': uniform(0, 1),
    'preprocess__banners_pipe__saturation__exponent': uniform(0, 1),
    'preprocess__trend__power': uniform(0, 2),           # new
    'preprocess__special_date__window': randint(1, 10),  # new
    'preprocess__special_date__strength': uniform(0, 1), # new
  },
  cv=TimeSeriesSplit(),
  random_state=0,
  n_iter=1000, # some more iterations, takes more time
)
tuned_model.fit(X, y)

RandomizedSearchCV(cv=TimeSeriesSplit(gap=0, max_train_size=None, n_splits=5, test_size=None),
                   estimator=Pipeline(steps=[('adstock',
                                              ColumnTransformer(transformers=[('tv_pipe',
                                                                               Pipeline(steps=[('carryover',
                                                                                                ExponentialCarryover()),
                                                                                               ('saturation',
                                                                                                ExponentialSaturation())]),
                                                                               ['TV']),
                                                                              ('radio_pipe',
                                                                               Pipeline(steps=[('carryover',
    

In [70]:
from mamimo.analysis import breakdown
contributions = breakdown(tuned_new_model.best_estimator_, X, y)
ax = contributions.plot.area(
    figsize=(16, 10),
    linewidth=1,
    title="Predicted Sales and Breakdown",
    ylabel="Sales",
    xlabel="Date",
)
handles, labels = ax.get_legend_handles_labels()
ax.legend(
    handles[::-1],
    labels[::-1],
    title="Channels",
    loc="center left",
    bbox_to_anchor=(1.01, 0.5),
)

AttributeError: 'RandomizedSearchCV' object has no attribute 'best_estimator_'