In [None]:
# Customer Retention Cohort Analysis
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load data
orders = pd.read_csv('../data/customer_orders.csv')
payments = pd.read_csv('../data/payments.csv')

# Convert dates
orders['order_date'] = pd.to_datetime(orders['order_date'])
payments['payment_date'] = pd.to_datetime(payments['payment_date'])

# Cohort analysis
first_orders = orders.groupby('customer_id')['order_date'].min().reset_index()
first_orders.columns = ['customer_id', 'first_order_date']
first_orders['cohort_month'] = first_orders['first_order_date'].dt.to_period('M')

# Merge with all orders
orders_with_cohort = pd.merge(orders, first_orders, on='customer_id')
orders_with_cohort['order_month'] = orders_with_cohort['order_date'].dt.to_period('M')

# Calculate cohort periods
orders_with_cohort['cohort_period'] = (orders_with_cohort['order_month'] - orders_with_cohort['cohort_month']).apply(lambda x: x.n)

# Retention heatmap
cohort_pivot = orders_with_cohort.pivot_table(
    index='cohort_month',
    columns='cohort_period',
    values='customer_id',
    aggfunc=pd.Series.nunique
)

plt.figure(figsize=(12, 8))
sns.heatmap(cohort_pivot, annot=True, fmt='.0f', cmap='Blues')
plt.title('Customer Retention by Cohort')
plt.savefig('../visualizations/retention_heatmap.png')
plt.show()