In [1]:
import sys
import os

# Add parent directory (where src/ exists) to Python path
sys.path.append(os.path.abspath(os.path.join(".."))) 

In [2]:
import pandas as pd
from src.ingestion.generator import generate_marketing_data

df = generate_marketing_data(n_weeks=10)

In [3]:
from src.models.baseline_model import BaselineMMM

# Prepare features and target (same as notebook)
features = ["tv_spend", "digital_spend", "search_spend", "social_spend",
            "promo_flag", "holiday_flag", "price_index"]
target = "sales"

X = df[features]
y = df[target]

# Initialize and train model
baseline_model = BaselineMMM(test_size=0.2, shuffle=False)
baseline_model.fit(X, y)

# Get coefficients
coef_df = baseline_model.get_coefficients()
print("Feature Coefficients:")
print(coef_df)

# Evaluate
metrics = baseline_model.evaluate()
print(f"Baseline MMM RMSE: {metrics['RMSE']:.2f}")
print(f"Baseline MMM R²: {metrics['R2']:.2f}")

# Predict on test set if needed
y_pred = baseline_model.predict(baseline_model.X_test)


Feature Coefficients:
         feature   coefficient
2   search_spend    683.904802
1  digital_spend    591.443101
0       tv_spend    209.816431
5   holiday_flag      0.000000
3   social_spend   -269.348186
4     promo_flag -22931.853207
6    price_index -55346.162768
Baseline MMM RMSE: 1426.43
Baseline MMM R²: 0.44


In [4]:
from src.features.feature_builder import MediaFeatureBuilder
from sklearn.model_selection import train_test_split

# Copy df
df_mmm = df.copy()

# Define channel parameters
channel_params = {
    "tv_spend": {"decay": 0.6, "gamma": 0.5},
    "digital_spend": {"decay": 0.4, "gamma": 0.6},
    "search_spend": {"decay": 0.3, "gamma": 0.5},
    "social_spend": {"decay": 0.5, "gamma": 0.4},
}

# Build features
builder = MediaFeatureBuilder(channel_params)
df_mmm = builder.transform(df_mmm)

# Prepare features for model
features_mmm = [
    "tv_spend_adstock", "digital_spend_adstock",
    "search_spend_adstock", "social_spend_adstock",
    "promo_flag", "holiday_flag", "price_index"
]

X_mmm = df_mmm[features_mmm]
y_mmm = df_mmm["sales"]

# Train/test split (chronological)
X_train, X_test, y_train, y_test = train_test_split(
    X_mmm, y_mmm, test_size=0.2, shuffle=False
)

In [5]:
from src.models.mmm_model import RegularizedMMM

# Features and target
features_mmm = [
    "tv_spend_adstock", "digital_spend_adstock",
    "search_spend_adstock", "social_spend_adstock",
    "promo_flag", "holiday_flag", "price_index"
]
X_mmm = df_mmm[features_mmm]
y_mmm = df_mmm["sales"]

# Initialize and train Ridge MMM
ridge_mmm = RegularizedMMM(alpha=1.0, test_size=0.2, shuffle=False)
ridge_mmm.fit(X_mmm, y_mmm)

# Coefficients
coef_df = ridge_mmm.get_coefficients()
print("Regularized MMM Coefficients:")
print(coef_df)

# Evaluate
metrics = ridge_mmm.evaluate()
print(f"Regularized MMM RMSE: {metrics['RMSE']:.2f}")
print(f"Regularized MMM R²: {metrics['R2']:.2f}")

# Channel contributions
channel_cols = ["tv_spend_adstock", "digital_spend_adstock",
                "search_spend_adstock", "social_spend_adstock"]
contrib_df = ridge_mmm.channel_contribution(channel_cols)
print("Total Channel Contributions:")
print(contrib_df)

Regularized MMM Coefficients:
                 feature  coefficient
6            price_index  1373.445092
3   social_spend_adstock   399.748690
2   search_spend_adstock   387.943451
0       tv_spend_adstock   264.766046
1  digital_spend_adstock    14.483002
5           holiday_flag     0.000000
4             promo_flag -1966.459312
Regularized MMM RMSE: 5297.56
Regularized MMM R²: -6.74
Total Channel Contributions:
                       total_contribution
search_spend_adstock          2777.712726
social_spend_adstock          2753.809340
tv_spend_adstock              1980.294599
digital_spend_adstock          107.816401


In [6]:
from src.evaluation.roi import ROIAnalyzer

# Define channel parameters (adstock + gamma)
channel_params = {
    "tv_spend": {"decay": 0.6, "gamma": 0.5},
    "digital_spend": {"decay": 0.4, "gamma": 0.6},
    "search_spend": {"decay": 0.3, "gamma": 0.5},
    "social_spend": {"decay": 0.5, "gamma": 0.4},
}

# Raw channel spend columns
raw_channels = ["tv_spend", "digital_spend", "search_spend", "social_spend"]

# Adstocked feature columns
adstock_channels = ["tv_spend_adstock", "digital_spend_adstock",
                    "search_spend_adstock", "social_spend_adstock"]

# Initialize ROI analyzer
roi_analyzer = ROIAnalyzer(
    model=ridge_mmm,
    df=df_mmm,
    channel_params=channel_params,
    features=features_mmm
)

# Incremental sales
contrib_df = roi_analyzer.incremental_sales(adstock_channels)
print(contrib_df)

# ROI calculation
roi_df = roi_analyzer.roi(adstock_channels, raw_channels)
print(roi_df)

# Simulated ROI for +10% spend per channel
sim_roi_df = roi_analyzer.simulate_roi_all(raw_channels, increase_pct=0.1)
print(sim_roi_df)


                       incremental_sales
search_spend_adstock         3477.519387
social_spend_adstock         3442.282188
tv_spend_adstock             2476.855018
digital_spend_adstock         134.741770
                       incremental_sales  total_spend       ROI
search_spend_adstock         3477.519387  1011.365393  3.438440
social_spend_adstock         3442.282188   522.842213  6.583788
tv_spend_adstock             2476.855018   575.786561  4.301690
digital_spend_adstock         134.741770   544.805365  0.247321
                    ROI
tv_spend       0.073643
digital_spend  0.009978
search_spend   0.292225
social_spend   0.329612


In [7]:
from src.simulation.scenarios import ScenarioSimulator
from src.simulation.optimizer import BudgetOptimizer

# Initialize simulator
simulator = ScenarioSimulator(
    model=ridge_mmm,
    df=df_mmm,
    channel_params=channel_params,
    features=features_mmm
)

# Example Scenario Simulation
scenarios = {
    "Baseline": {},
    "TV → Search (20%)": {"tv_spend": -0.2, "search_spend": 0.2},
    "Social +30%": {"social_spend": 0.3}
}

scenario_df = simulator.compare_scenarios(scenarios)
print(scenario_df)

# Automated Budget Optimization
optimizer = BudgetOptimizer(simulator, channels=["tv_spend", "digital_spend", "search_spend", "social_spend"], increase_pct=0.2)
opt_df = optimizer.optimize()
print(opt_df)

            Scenario    Total Sales  Sales Lift
2        Social +30%  681729.228549   48.248600
1  TV → Search (20%)  681693.929473   12.949524
0           Baseline  681680.979949    0.000000
         channel  sales_lift
3   social_spend   33.920989
2   search_spend   31.637165
0       tv_spend   13.981860
1  digital_spend    0.976012


In [8]:
from src.models.forecasting import DemandForecaster

df_baseline = df_mmm.copy()

df_baseline["date"] = pd.to_datetime(df_baseline["date"], dayfirst=True)

df_baseline["weekofyear"] = df_baseline["date"].dt.isocalendar().week

# Baseline features
baseline_features = ["price_index", "promo_flag", "holiday_flag", "weekofyear"]

# Initialize forecaster
forecaster = DemandForecaster(
    baseline_features=baseline_features,
    mmm_model=ridge_mmm,
    channel_params=channel_params,
    features_mmm=features_mmm
)

# Fit baseline demand model
forecaster.fit_baseline(df_baseline)

# Compute historical uplift
df_baseline["baseline_sales"] = forecaster.predict_baseline(df_baseline)
df_baseline["marketing_uplift"] = ridge_mmm.predict(X_mmm)
df_baseline["observed_uplift"] = df_baseline["sales"] - df_baseline["baseline_sales"]

# Prepare future data (next 12 weeks) with optimized spend
optimized_spend = {
    "social_spend": df_mmm["social_spend"].mean() * 1.3,
    "search_spend": df_mmm["search_spend"].mean() * 1.2,
    "tv_spend": df_mmm["tv_spend"].mean() * 0.8,
    "digital_spend": df_mmm["digital_spend"].mean() * 0.7
}

future_df = forecaster.prepare_future_data(df_mmm, future_weeks=12, optimized_spend=optimized_spend)

# Forecast future sales
future_forecast = forecaster.forecast(future_df)
print(future_forecast[["date", "forecast_sales"]])


                 date  forecast_sales
2021-03-14 2021-03-14   133571.886390
2021-03-21 2021-03-21   133336.137256
2021-03-28 2021-03-28   133099.678548
2021-04-04 2021-04-04   132862.877404
2021-04-11 2021-04-11   132625.903223
2021-04-18 2021-04-18   132388.840615
2021-04-25 2021-04-25   132151.732708
2021-05-02 2021-05-02   131914.601579
2021-05-09 2021-05-09   131677.458530
2021-05-16 2021-05-16   131440.309351
2021-05-23 2021-05-23   131203.157008
2021-05-30 2021-05-30   130966.003027
