In [0]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from itertools import product

In [0]:
def adstock_transform(x, strength, length):
    """Apply adstock transformation for carryover effect"""
    if length == 0 or strength == 0:
        return x
    
    result = np.zeros_like(x, dtype=float)
    result[0] = x[0]
    
    for i in range(1, len(x)):
        carryover = 0
        for j in range(1, min(i + 1, length + 1)):
            carryover += result[i - j] * (strength ** j)
        result[i] = x[i] + carryover
    
    return result


In [0]:
def saturation_transform(x, alpha):
    """Apply saturation transformation"""
    if alpha == 0:
        return x
    
    x_max = np.max(x)
    if x_max == 0:
        return x
    
    x_norm = x / x_max
    saturated = x_norm / (alpha + x_norm)
    return saturated * x_max

In [0]:
def mmm_grid_search(df):
    """
    Find best MMM parameters using grid search
    """
    print("Starting MMM Grid Search Optimization...")
    
    media_channels = ['video_spend', 'audio_spend', 'display_spend', 
                     'search_spend', 'social_spend', 'connected_tv_spend']
    
    # Grid search parameters
    strength_vals = np.linspace(0, 1, 5)  # [0, 0.25, 0.5, 0.75, 1.0]
    length_vals = range(0, 7)  # [0, 1, 2, 3, 4, 5, 6]
    alpha_vals = np.linspace(0, 0.01, 5)  # [0, 0.0025, 0.005, 0.0075, 0.01]
    
    best_score = -np.inf
    best_params = None
    best_coefficients = None
    
    total_combinations = len(strength_vals) * len(length_vals) * len(alpha_vals)
    print(f"Testing {total_combinations} parameter combinations...")
    
    count = 0
    for strength in strength_vals:
        for length in length_vals:
            for alpha in alpha_vals:
                count += 1
                if count % 20 == 0:
                    print(f"Progress: {count}/{total_combinations} ({count/total_combinations*100:.1f}%)")
                
                try:
                    # Transform media variables
                    X_features = []
                    feature_names = []
                    
                    for channel in media_channels:
                        if channel in df.columns and df[channel].sum() > 0:
                            # Apply adstock
                            adstocked = adstock_transform(df[channel].values, strength, length)
                            # Apply saturation
                            transformed = saturation_transform(adstocked, alpha)
                            X_features.append(transformed)
                            feature_names.append(channel)
                    
                    if len(X_features) == 0:
                        continue
                    
                    # Create feature matrix
                    X = np.column_stack(X_features)
                    y = df['incremental_sales'].values
                    
                    # Fit linear regression
                    model = LinearRegression()
                    model.fit(X, y)
                    
                    # Calculate R²
                    y_pred = model.predict(X)
                    r2 = r2_score(y, y_pred)
                    
                    # Update best model
                    if r2 > best_score:
                        best_score = r2
                        best_params = {'strength': strength, 'length': length, 'alpha': alpha}
                        best_coefficients = dict(zip(feature_names, model.coef_))
                
                except Exception as e:
                    continue
    
    print(f"Grid search completed!")
    print(f"Best R² Score: {best_score:.4f}")
    print(f"Best Parameters: {best_params}")
    
    return best_coefficients, best_params, best_score

In [0]:
def optimize_campaign_budgets(df, coefficients):
    """
    Optimize budget allocation for each campaign based on total_spend
    """
    media_channels = ['video_spend', 'audio_spend', 'display_spend', 
                     'search_spend', 'social_spend', 'connected_tv_spend']
    
    print(f"\nCalculating allocation weights from model coefficients...")
    
    # Handle coefficients more intelligently
    valid_coeffs = {}
    for channel in media_channels:
        coeff = coefficients.get(channel, 0)
        # Only use positive coefficients (negative means harmful)
        if coeff > 0:
            valid_coeffs[channel] = coeff
        else:
            valid_coeffs[channel] = 0.01  # Small positive value for negative/zero coefficients
    
    # Calculate relative efficiency scores
    coeff_values = list(valid_coeffs.values())
    total_positive_coeff = sum(coeff_values)
    
    # Create weights with more variation
    weights = {}
    if total_positive_coeff > 0:
        for channel in media_channels:
            raw_weight = valid_coeffs[channel] / total_positive_coeff
            # Amplify differences by using exponential scaling
            weights[channel] = np.exp(raw_weight * 5) / sum(np.exp(v/total_positive_coeff * 5) for v in coeff_values)
    else:
        # Fallback to historical performance if no good coefficients
        print("Using historical spend patterns as fallback...")
        historical_totals = {}
        for channel in media_channels:
            historical_totals[channel] = df[channel].sum()
        
        total_historical = sum(historical_totals.values())
        if total_historical > 0:
            for channel in media_channels:
                weights[channel] = historical_totals[channel] / total_historical
        else:
            # Last resort - equal weights
            for channel in media_channels:
                weights[channel] = 1.0 / len(media_channels)
    
    # Display the weights
    print(f"\nAllocation weights based on model efficiency:")
    sorted_weights = sorted(weights.items(), key=lambda x: x[1], reverse=True)
    for channel, weight in sorted_weights:
        channel_name = channel.replace('_spend', '').replace('_', ' ').title()
        print(f"  {channel_name:<15}: {weight*100:5.1f}%")
    
    # Create optimized allocations
    results = []
    
    for idx, row in df.iterrows():
        campaign_id = row['campaign_id']
        campaign_date = row['campaign_start_date']
        total_budget = row['total_spend']
        
        # Allocate budget based on efficiency weights
        allocation = {
            'campaign_id': campaign_id,
            'campaign_start_date': campaign_date,
            'total_spend': total_budget
        }
        
        for channel in media_channels:
            optimized_spend = weights[channel] * total_budget
            allocation[channel] = optimized_spend
        
        results.append(allocation)
    
    return pd.DataFrame(results)

In [0]:
def run_mmm_optimization(df):
    """
    Main function to run MMM optimization
    
    Parameters:
    df: DataFrame with columns: campaign_id, campaign_start_date, incremental_sales,
        video_spend, audio_spend, display_spend, search_spend, social_spend, 
        connected_tv_spend, total_spend
    
    Returns:
    DataFrame with optimized budget allocations
    """
    
    print("="*60)
    print("MMM CAMPAIGN BUDGET OPTIMIZATION")
    print("="*60)
    
    # Validate input data
    required_cols = ['campaign_id', 'campaign_start_date', 'incremental_sales', 
                    'video_spend', 'audio_spend', 'display_spend', 'search_spend', 
                    'social_spend', 'connected_tv_spend', 'total_spend']
    
    missing_cols = [col for col in required_cols if col not in df.columns]
    if missing_cols:
        print(f"Error: Missing columns: {missing_cols}")
        return None
    
    print(f"Data loaded: {len(df)} campaigns")
    print(f"Total spend across all campaigns: ${df['total_spend'].sum():,.0f}")
    print()
    
    # Run grid search to find best MMM model
    coefficients, best_params, best_score = mmm_grid_search(df)
    
    if coefficients is None:
        print("Could not find optimal MMM model")
        return None
    
    # Generate optimized budget allocations
    optimized_df = optimize_campaign_budgets(df, coefficients)
    
    # Display results
    print("\n" + "="*60)
    print("OPTIMIZED CAMPAIGN ALLOCATIONS")
    print("="*60)
    
    media_channels = ['video_spend', 'audio_spend', 'display_spend', 
                     'search_spend', 'social_spend', 'connected_tv_spend']
    
    # Show campaign-by-campaign results
    total_budget = 0
    channel_totals = {ch: 0 for ch in media_channels}
    
    for idx, row in optimized_df.iterrows():
        campaign_id = row['campaign_id']
        date = row['campaign_start_date']
        budget = row['total_spend']
        total_budget += budget
        
        print(f"\nCampaign ID {campaign_id} ({date}) - Total Budget: ${budget:,.0f}")
        print("-" * 55)
        
        for channel in media_channels:
            amount = row[channel]
            pct = (amount / budget * 100) if budget > 0 else 0
            channel_name = channel.replace('_spend', '').replace('_', ' ').title()
            print(f"  {channel_name:<15}: ${amount:8,.0f} ({pct:5.1f}%)")
            channel_totals[channel] += amount
    
    # Summary
    print(f"\n{'='*60}")
    print("SUMMARY - TOTAL ALLOCATION ACROSS ALL CAMPAIGNS")
    print(f"{'='*60}")
    print(f"Total Budget: ${total_budget:,.0f}")
    print()
    
    for channel in sorted(channel_totals.keys(), key=lambda x: channel_totals[x], reverse=True):
        total = channel_totals[channel]
        pct = (total / total_budget * 100) if total_budget > 0 else 0
        channel_name = channel.replace('_spend', '').replace('_', ' ').title()
        print(f"{channel_name:<15}: ${total:10,.0f} ({pct:5.1f}%)")
    
    print(f"\nModel Performance:")
    print(f"R² Score: {best_score:.4f}")
    print(f"Optimal Parameters: {best_params}")
    
    return optimized_df