# Reach Curve Exploration and Modeling

This notebook explores and models reach curves for marketing analytics.

## Objectives:
1. Load and explore reach curve data
2. Visualize reach patterns
3. Build predictive models
4. Evaluate model performance

In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
import warnings
warnings.filterwarnings('ignore')

# Set plotting style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette('husl')

## 1. Data Loading and Exploration

In [None]:
# Load reach curve data
# Replace with actual data path
# data = pd.read_csv('../data/reach_curve_data.csv')

# Generate sample data for demonstration
np.random.seed(42)
n_samples = 1000

# Simulate reach curve data
impressions = np.random.exponential(10000, n_samples)
frequency = np.random.poisson(3, n_samples) + 1
reach = impressions / frequency * (1 - np.exp(-frequency/10)) * np.random.uniform(0.8, 1.2, n_samples)

data = pd.DataFrame({
    'campaign_id': range(n_samples),
    'impressions': impressions,
    'frequency': frequency,
    'reach': reach,
    'cost': impressions * np.random.uniform(0.001, 0.005, n_samples)
})

print("Data shape:", data.shape)
data.head()

In [None]:
# Basic statistics
data.describe()

## 2. Exploratory Data Analysis

In [None]:
# Visualize distributions
fig, axes = plt.subplots(2, 2, figsize=(12, 10))

data['impressions'].hist(bins=50, ax=axes[0, 0])
axes[0, 0].set_title('Distribution of Impressions')
axes[0, 0].set_xlabel('Impressions')

data['reach'].hist(bins=50, ax=axes[0, 1])
axes[0, 1].set_title('Distribution of Reach')
axes[0, 1].set_xlabel('Reach')

data['frequency'].hist(bins=20, ax=axes[1, 0])
axes[1, 0].set_title('Distribution of Frequency')
axes[1, 0].set_xlabel('Frequency')

# Reach vs Impressions
axes[1, 1].scatter(data['impressions'], data['reach'], alpha=0.5)
axes[1, 1].set_xlabel('Impressions')
axes[1, 1].set_ylabel('Reach')
axes[1, 1].set_title('Reach vs Impressions')

plt.tight_layout()
plt.show()

## 3. Reach Curve Modeling

In [None]:
# Define reach curve function
def reach_curve(impressions, a, b, c):
    """
    Logarithmic reach curve model
    reach = a * log(1 + b * impressions) + c
    """
    return a * np.log(1 + b * impressions) + c

# Alternative: Saturation curve
def saturation_curve(impressions, k, n):
    """
    Saturation reach curve model
    reach = impressions / (k + impressions) * n
    """
    return impressions / (k + impressions) * n

In [None]:
# Fit models using scipy
from scipy.optimize import curve_fit

# Prepare data
X = data['impressions'].values
y = data['reach'].values

# Fit logarithmic model
popt_log, _ = curve_fit(reach_curve, X, y, p0=[1000, 0.001, 0])
print(f"Logarithmic model parameters: a={popt_log[0]:.2f}, b={popt_log[1]:.6f}, c={popt_log[2]:.2f}")

# Fit saturation model
popt_sat, _ = curve_fit(saturation_curve, X, y, p0=[1000, 10000])
print(f"Saturation model parameters: k={popt_sat[0]:.2f}, n={popt_sat[1]:.2f}")

In [None]:
# Visualize fitted curves
plt.figure(figsize=(12, 6))

# Sort for smooth curve plotting
sort_idx = np.argsort(X)
X_sorted = X[sort_idx]
y_sorted = y[sort_idx]

# Plot data
plt.scatter(X, y, alpha=0.3, label='Actual data')

# Plot fitted curves
y_pred_log = reach_curve(X_sorted, *popt_log)
y_pred_sat = saturation_curve(X_sorted, *popt_sat)

plt.plot(X_sorted, y_pred_log, 'r-', linewidth=2, label='Logarithmic model')
plt.plot(X_sorted, y_pred_sat, 'g-', linewidth=2, label='Saturation model')

plt.xlabel('Impressions')
plt.ylabel('Reach')
plt.title('Reach Curve Models')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

## 4. Model Evaluation

In [None]:
# Calculate model performance metrics
y_pred_log_all = reach_curve(X, *popt_log)
y_pred_sat_all = saturation_curve(X, *popt_sat)

# R-squared
r2_log = r2_score(y, y_pred_log_all)
r2_sat = r2_score(y, y_pred_sat_all)

# RMSE
rmse_log = np.sqrt(mean_squared_error(y, y_pred_log_all))
rmse_sat = np.sqrt(mean_squared_error(y, y_pred_sat_all))

print("Model Performance:")
print(f"\nLogarithmic Model:")
print(f"  R-squared: {r2_log:.4f}")
print(f"  RMSE: {rmse_log:.2f}")
print(f"\nSaturation Model:")
print(f"  R-squared: {r2_sat:.4f}")
print(f"  RMSE: {rmse_sat:.2f}")

## 5. Reach Optimization

In [None]:
# Calculate marginal reach (derivative)
def marginal_reach_log(impressions, a, b, c):
    """Derivative of logarithmic reach curve"""
    return a * b / (1 + b * impressions)

def marginal_reach_sat(impressions, k, n):
    """Derivative of saturation reach curve"""
    return n * k / ((k + impressions) ** 2)

# Calculate cost efficiency
impressions_range = np.linspace(100, 50000, 1000)
marginal_log = marginal_reach_log(impressions_range, *popt_log)
marginal_sat = marginal_reach_sat(impressions_range, *popt_sat)

# Assume cost per impression
cost_per_impression = 0.003
marginal_cost_efficiency_log = marginal_log / cost_per_impression
marginal_cost_efficiency_sat = marginal_sat / cost_per_impression

In [None]:
# Plot marginal reach and cost efficiency
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10))

# Marginal reach
ax1.plot(impressions_range, marginal_log, 'r-', label='Logarithmic model')
ax1.plot(impressions_range, marginal_sat, 'g-', label='Saturation model')
ax1.set_xlabel('Impressions')
ax1.set_ylabel('Marginal Reach')
ax1.set_title('Marginal Reach vs Impressions')
ax1.legend()
ax1.grid(True, alpha=0.3)

# Cost efficiency
ax2.plot(impressions_range, marginal_cost_efficiency_log, 'r-', label='Logarithmic model')
ax2.plot(impressions_range, marginal_cost_efficiency_sat, 'g-', label='Saturation model')
ax2.set_xlabel('Impressions')
ax2.set_ylabel('Marginal Reach per Dollar')
ax2.set_title('Cost Efficiency vs Impressions')
ax2.legend()
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 6. Recommendations and Insights

Based on the reach curve analysis:

1. **Optimal impression levels**: The marginal reach decreases as impressions increase
2. **Cost efficiency**: There's a sweet spot for cost-efficient reach
3. **Model selection**: Choose the model with better fit for your data

### Next Steps:
- Segment analysis by campaign type
- Time-series analysis of reach patterns
- Multi-channel reach optimization