# MMM Analysis - Data Exploration and Preprocessing

This notebook performs comprehensive MMM analysis including data exploration, preprocessing, and mediation analysis.

In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge, Lasso
from sklearn.metrics import mean_squared_error, r2_score
import scipy.stats as stats
from scipy.optimize import minimize
import statsmodels.api as sm
import warnings
warnings.filterwarnings('ignore')

# Set random seed for reproducibility
np.random.seed(42)

print('Libraries imported successfully!')

In [None]:
# Load and explore data
try:
    df = pd.read_csv('../data/Assessment 2 - MMM Weekly.csv')
    df['week'] = pd.to_datetime(df['week'])
    print(f'Data loaded successfully! Shape: {df.shape}')
    print(f'Date range: {df["week"].min()} to {df["week"].max()}')
    print(f'Columns: {list(df.columns)}')
except FileNotFoundError:
    print('Data file not found. Please check the file path.')
    df = None

In [None]:
# Define channel groups
social_channels = ['facebook_spend', 'tiktok_spend', 'instagram_spend', 'snapchat_spend']
mediator_channel = 'google_spend'
outcome_variable = 'revenue'
control_variables = ['social_followers', 'average_price', 'promotions', 'emails_send', 'sms_send']

print('Channel Structure:')
print(f'Social Channels: {social_channels}')
print(f'Mediator: {mediator_channel}')
print(f'Outcome: {outcome_variable}')
print(f'Controls: {control_variables}')

In [None]:
# Data exploration
if df is not None:
    print('=== DATA OVERVIEW ===')
    print(f'Dataset shape: {df.shape}')
    print(f'Missing values: {df.isnull().sum().sum()}')
    
    print('\n=== SUMMARY STATISTICS ===')
    print(df.describe())
    
    # Check for missing values
    missing_data = df.isnull().sum()
    if missing_data.sum() > 0:
        print('\n=== MISSING VALUES ===')
        print(missing_data[missing_data > 0])

In [None]:
# Define transformation functions
def adstock_transform(x, decay_rate):
    '''Apply adstock transformation to capture carryover effects'''
    adstocked = np.zeros_like(x)
    adstocked[0] = x[0]
    for i in range(1, len(x)):
        adstocked[i] = x[i] + decay_rate * adstocked[i-1]
    return adstocked

def saturation_transform(x, alpha, gamma):
    '''Apply saturation transformation using Hill transformation'''
    return alpha * (x ** gamma) / (alpha ** gamma + x ** gamma)

def transform_media(x, decay_rate, alpha, gamma):
    '''Apply both adstock and saturation transformations'''
    adstocked = adstock_transform(x, decay_rate)
    saturated = saturation_transform(adstocked, alpha, gamma)
    return saturated

print('Transformation functions defined successfully!')

In [None]:
# Mediation Analysis Class
class MediationAnalysis:
    def __init__(self, X, M, Y, controls=None):
        self.X = X
        self.M = M
        self.Y = Y
        self.controls = controls if controls is not None else []
        self.results = {}
    
    def fit_path_models(self):
        # Path a: X -> M
        X_with_controls = np.column_stack([self.X] + self.controls) if self.controls else self.X
        X_with_controls = sm.add_constant(X_with_controls)
        self.model_a = sm.OLS(self.M, X_with_controls).fit()
        
        # Path b: M -> Y
        XM_with_controls = np.column_stack([self.X, self.M] + self.controls) if self.controls else np.column_stack([self.X, self.M])
        XM_with_controls = sm.add_constant(XM_with_controls)
        self.model_b = sm.OLS(self.Y, XM_with_controls).fit()
        
        # Path c: X -> Y
        self.model_c = sm.OLS(self.Y, X_with_controls).fit()
        return self
    
    def calculate_effects(self):
        a_coef = self.model_a.params[1]
        b_coef = self.model_b.params[-1]
        c_coef = self.model_c.params[1]
        c_prime_coef = self.model_b.params[1]
        
        indirect_effect = a_coef * b_coef
        direct_effect = c_prime_coef
        total_effect = c_coef
        prop_mediated = indirect_effect / total_effect if total_effect != 0 else 0
        
        self.results = {
            'indirect_effect': indirect_effect,
            'direct_effect': direct_effect,
            'total_effect': total_effect,
            'proportion_mediated': prop_mediated,
            'a_coefficient': a_coef,
            'b_coefficient': b_coef
        }
        return self.results

print('MediationAnalysis class defined successfully!')

In [None]:
# Apply transformations and create features
if df is not None:
    df_processed = df.copy()
    
    # Create seasonality features
    df_processed['month'] = df_processed['week'].dt.month
    df_processed['quarter'] = df_processed['week'].dt.quarter
    df_processed['month_sin'] = np.sin(2 * np.pi * df_processed['month'] / 12)
    df_processed['month_cos'] = np.cos(2 * np.pi * df_processed['month'] / 12)
    df_processed['quarter_sin'] = np.sin(2 * np.pi * df_processed['quarter'] / 4)
    df_processed['quarter_cos'] = np.cos(2 * np.pi * df_processed['quarter'] / 4)
    
    # Apply media transformations
    for channel in social_channels + [mediator_channel]:
        if channel in df_processed.columns:
            decay_rate = 0.5
            alpha = 1.0
            gamma = 0.5
            
            transformed_col = f'{channel}_transformed'
            df_processed[transformed_col] = transform_media(
                df_processed[channel].values, decay_rate, alpha, gamma
            )
            print(f'Transformed {channel} -> {transformed_col}')
    
    print(f'\nProcessed dataset shape: {df_processed.shape}')
    print('Feature engineering completed successfully!')