# Phase 3: The Two-Step Bayesian Modeling Framework

## Step 2: Long-Term Brand Effects Model (BVAR)

This model quantifies how marketing builds brand equity, which in turn drives long-term growth in base sales.

In [None]:
import sys
sys.path.append('..')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scripts.bvar_optimized import BVAR_Optimized

# Load the data
base_sales_df = pd.read_csv('../data/base_sales.csv', parse_dates=['Date'])
brand_metrics_df = pd.read_csv('../data/brand_metrics.csv', parse_dates=['Date'])
marketing_df = pd.read_csv('../data/marketing_spend.csv', parse_dates=['Date'])

# Merge all data
df = pd.merge(base_sales_df, brand_metrics_df, on='Date')
df = pd.merge(df, marketing_df, on='Date')

print(f"Loaded {len(df)} weeks of data")
print(f"Date range: {df['Date'].min()} to {df['Date'].max()}")

# Prepare for BVAR
base_sales = df['Base_Sales'].values
brand_cols = ['Awareness', 'Consideration']
brand_metrics = df[brand_cols].values

marketing_channels = ['Content Marketing', 'Events', 'Google Ads', 'LinkedIn']
marketing_data = df[marketing_channels].values

# Combine endogenous variables (base sales + brand metrics)
endog = np.column_stack([base_sales, brand_metrics])
endog_names = ['Base_Sales'] + brand_cols

print(f"\nEndogenous variables (outcomes): {endog_names}")
print(f"Exogenous variables (marketing): {marketing_channels}")
print(f"\nData shapes:")
print(f"  Endogenous: {endog.shape}")
print(f"  Exogenous: {marketing_data.shape}")

In [None]:
# Fit the BVAR Model with MCMC

print("Fitting BVAR with MCMC...")
print("Configuration:")
print("  • 500 tuning + 500 draws × 4 chains")
print("  • Target accept: 0.95")
print("  • Estimated time: 5-10 minutes")
print()

bvar.fit(
    draws=500,
    tune=500,
    chains=4,
    target_accept=0.95
)

print("\n✓ BVAR fitted successfully!")

# Check convergence
summary = bvar.summary()
rhat_max = summary['r_hat'].max()
ess_min = summary['ess_bulk'].min()

print(f"\nConvergence Diagnostics:")
print(f"  Max R-hat: {rhat_max:.4f} (should be < 1.01)")
print(f"  Min ESS:   {ess_min:.0f} (should be > 1000)")

if rhat_max < 1.01:
    print("  ✓ EXCELLENT convergence!")
elif rhat_max < 1.05:
    print("  ✓ Good convergence")
else:
    print("  ⚠ Consider increasing draws")

In [ ]:
# Calculate Impulse Response Functions (IRFs)

print("Calculating IRFs...")
print("  • Simulating 24-week forward response")
print("  • 95% credible intervals")
print()

irf = bvar.calculate_irf(periods=24, shock_size=1.0, credible_interval=0.95)

print(f"✓ Calculated {len(irf)} IRF trajectories")
print("\nAvailable IRF paths:")
for key in list(irf.keys())[:6]:  # Show first 6
    print(f"  • {key}")
print("  ...")

# Example: Show LinkedIn → Base Sales IRF with uncertainty
linkedin_to_sales = irf.get('LinkedIn_to_Base_Sales', None)
if linkedin_to_sales:
    print("\nSample IRF: LinkedIn → Base Sales (first 12 weeks)")
    print("Week  Mean      Lower (2.5%)  Upper (97.5%)")
    print("-" * 50)
    for week in range(12):
        mean = linkedin_to_sales['mean'][week]
        lower = linkedin_to_sales['lower'][week]
        upper = linkedin_to_sales['upper'][week]
        print(f"{week:4d}  ${mean:>8.2f}  ${lower:>12.2f}  ${upper:>12.2f}")

In [None]:
# Visualize IRFs for All Channels → Base Sales

fig, axes = plt.subplots(2, 2, figsize=(14, 10))
axes = axes.flatten()

for idx, channel in enumerate(marketing_channels):
    irf_key = f'{channel}_to_Base_Sales'
    if irf_key in irf:
        irf_data = irf[irf_key]
        weeks = np.arange(24)
        
        ax = axes[idx]
        ax.plot(weeks, irf_data['mean'], '-', color='#2E86AB', linewidth=2, label='Mean')
        ax.fill_between(weeks, irf_data['lower'], irf_data['upper'], 
                         color='#2E86AB', alpha=0.2, label='95% CI')
        ax.axhline(0, color='black', linestyle='--', linewidth=1, alpha=0.3)
        ax.set_xlabel('Weeks After Shock', fontsize=10, fontweight='bold')
        ax.set_ylabel('Response ($)', fontsize=10, fontweight='bold')
        ax.set_title(f'{channel} → Base Sales', fontsize=11, fontweight='bold')
        ax.legend(loc='upper right', fontsize=8)
        ax.grid(True, alpha=0.3)

plt.suptitle('Impulse Response Functions: Marketing → Base Sales (24 weeks)', 
             fontsize=14, fontweight='bold', y=1.00)
plt.tight_layout()
plt.show()

print("\nInterpretation:")
print("  • Shows how a $1 marketing shock propagates over 24 weeks")
print("  • Positive slope = sustained brand-building effect")
print("  • Area under curve ≈ long-term ROI contribution")

In [None]:
# Calculate Long-Term ROI with Uncertainty

long_term_roi = bvar.calculate_long_term_roi(irf=irf, sales_var_name='Base_Sales')

print("LONG-TERM ROI (Brand-Building Effects with 95% CI)")
print("="*70)
print(f"{'Channel':<20s} {'Mean ROI':>12s} {'Lower (2.5%)':>15s} {'Upper (97.5%)':>15s}")
print("-"*70)

for channel, roi_dict in long_term_roi.items():
    mean_roi = roi_dict['mean']
    lower = roi_dict['lower']
    upper = roi_dict['upper']
    print(f"{channel:<20s} ${mean_roi:>11,.2f} ${lower:>14,.2f} ${upper:>14,.2f}")

# Visualize with error bars
fig, ax = plt.subplots(figsize=(12, 7))

channels = list(long_term_roi.keys())
means = [long_term_roi[ch]['mean'] for ch in channels]
lowers = [long_term_roi[ch]['lower'] for ch in channels]
uppers = [long_term_roi[ch]['upper'] for ch in channels]

# Calculate error bar sizes
yerr_lower = [means[i] - lowers[i] for i in range(len(channels))]
yerr_upper = [uppers[i] - means[i] for i in range(len(channels))]

colors = plt.cm.viridis(np.linspace(0.2, 0.8, len(channels)))
ax.barh(channels, means, color=colors, alpha=0.7)
ax.errorbar(means, channels, xerr=[yerr_lower, yerr_upper], 
            fmt='none', color='black', linewidth=2, capsize=5, capthick=2)

ax.set_xlabel('Long-Term ROI per $1 Spent', fontsize=12, fontweight='bold')
ax.set_title('Long-Term ROI with 95% Credible Intervals', 
             fontsize=14, fontweight='bold', pad=20)
ax.grid(True, alpha=0.3, axis='x')
plt.tight_layout()
plt.show()

print("\nNote: This captures sustained brand equity effects over 24 weeks.")
print("Total ROI = Short-term ROI (from notebook 02) + Long-term ROI (above)")

### Long-Term ROI

In [None]:
# Calculate long-term ROI
long_term_roi = model.calculate_long_term_roi(irf)
print(f'Long-Term ROI: {long_term_roi}')