In [None]:
import pandas as pd
import numpy as np

# Sample data for cohort analysis
data = {'user_id': [1, 2, 3, 4, 5, 6],
        'signup_date': ['2023-01-01', '2023-01-01', '2023-02-01', '2023-02-01', '2023-03-01', '2023-03-01'],
        'order_date': ['2023-01-10', '2023-01-15', '2023-02-20', '2023-03-10', '2023-03-25', '2023-04-05'],
        'order_value': [100, 150, 200, 50, 75, 300]}

df = pd.DataFrame(data)

# Convert dates to datetime objects
df['signup_date'] = pd.to_datetime(df['signup_date'])
df['order_date'] = pd.to_datetime(df['order_date'])

# Extract year and month for cohort grouping
df['signup_month'] = df['signup_date'].dt.to_period('M')
df['order_month'] = df['order_date'].dt.to_period('M')

# Create a cohort index
df['cohort_index'] = (df['order_month'] - df['signup_month']).apply(attrgetter('n'))

# Calculate cohort analysis metrics
cohort_data = df.groupby(['signup_month', 'cohort_index']).agg(
    users=('user_id', 'nunique'),
    total_order_value=('order_value', 'sum')
).reset_index()

cohort_data