In [1]:
import pandas as pd

In [2]:
# задача: посчитать окупаемость маркетинга за последние 12 месяцев

# тратим деньги
# привлекаем пользователей
# пользователи что-то покупают

# когорты от даты регистрации
# посчитаем 
# - выручку и LTV
# - рекламные расходы
# - окупаемость маркетинга

In [None]:
# Алгоритм
# 0. Формулируем задачу
# 1. Определяем когорту (событие и временной промежуток) — даты регистрации по месяцам
# 2. Выделяем целевые метрики - выручка, LTV, ROAS
# 3. Выбираем подходящий формат отчета - возрастной

In [206]:
ads = pd.read_csv('data/ads.csv')
ads['dt'] = pd.to_datetime(ads['dt'])
ads['month'] = ads['dt'].dt.to_period('M')

ads_monthly = ads.groupby('month').agg(ad_spend = ('ad_spend', 'sum'))

ads_monthly.head()

Unnamed: 0_level_0,ad_spend
month,Unnamed: 1_level_1
2019-11,43.4
2019-12,3052.6
2020-01,10912.514286
2020-02,13781.914286
2020-03,21480.142857


In [52]:
users = pd.read_csv('data/users.csv', index_col='id')
users['reg_date'] = pd.to_datetime(users['reg_date'])
users['reg_month'] = users['reg_date'].dt.to_period('M')
users[['reg_date', 'reg_month']]

Unnamed: 0_level_0,reg_date,reg_month
id,Unnamed: 1_level_1,Unnamed: 2_level_1
1124,2020-06-28,2020-06
1766,2020-09-20,2020-09
4132,2021-09-01,2021-09
3662,2021-06-23,2021-06
4804,2021-12-15,2021-12
...,...,...
3895,2021-07-27,2021-07
4235,2021-09-21,2021-09
1776,2020-09-22,2020-09
2919,2021-03-09,2021-03


In [72]:
cohort_size = users.groupby('reg_month').agg(n_users = ('email', 'count')).reset_index()

In [55]:
orders = pd.read_csv('data/orders.csv', index_col='payment_id')
orders['created_at'] = pd.to_datetime(orders['created_at'])
orders = (
    orders
    .groupby('user_id')
    .agg(first_payment_at = ('created_at','min'))
    .merge(orders, how='inner', left_index=True, right_on='user_id')
)
orders['payment_month'] = orders['created_at'].dt.to_period('M')
orders['first_payment_month'] = orders['first_payment_at'].dt.to_period('M')

orders = orders.merge(
    users[['reg_date', 'reg_month']], 
    how='inner', 
    left_on='user_id', 
    right_index=True)

orders.head()

orders['cohort_age_days'] = (orders['created_at'] - orders['reg_date']).dt.days
orders['cohort_age_months'] = orders['cohort_age_days'] // 30

In [111]:
orders.reg_month.max()

Period('2022-11', 'M')

Period('2022-11', 'M')

In [62]:
cohort_size.tail()

Unnamed: 0_level_0,n_users
reg_month,Unnamed: 1_level_1
2022-07,276
2022-08,235
2022-09,233
2022-10,213
2022-11,165


In [211]:
cohorts = (
    orders
    .groupby(['reg_month', 'cohort_age_months'])
    .agg(revenue = ('payment_sum', 'sum'))
    .reset_index()
)
cohorts = cohorts.merge(cohort_size, how='outer', on='reg_month')
cohorts['rev_per_user'] = cohorts['revenue'] / cohorts['n_users']

cohorts['ltv'] = cohorts.groupby('reg_month')['rev_per_user'].expanding().sum().values

cohorts = cohorts.merge(ads_monthly, 
                        how='left', 
                        left_on = 'reg_month',
                        right_index=True)

cohorts['roas'] = cohorts['revenue'] / cohorts['ad_spend']
cohorts['roas_total'] = cohorts.groupby('reg_month')['roas'].expanding().sum().values

In [212]:

cohorts

Unnamed: 0,reg_month,cohort_age_months,revenue,n_users,rev_per_user,ltv,ad_spend,roas,roas_total
0,2019-12,0,533.38,22,24.244545,24.244545,3052.600000,0.174730,0.174730
1,2019-12,1,79.06,22,3.593636,27.838182,3052.600000,0.025899,0.200629
2,2019-12,2,46.81,22,2.127727,29.965909,3052.600000,0.015334,0.215963
3,2019-12,3,151.29,22,6.876818,36.842727,3052.600000,0.049561,0.265524
4,2019-12,4,251.66,22,11.439091,48.281818,3052.600000,0.082441,0.347966
...,...,...,...,...,...,...,...,...,...
652,2022-09,1,2164.62,233,9.290215,43.095665,79591.614286,0.027197,0.126160
653,2022-09,2,657.94,233,2.823777,45.919442,79591.614286,0.008266,0.134427
654,2022-10,0,7842.31,213,36.818357,36.818357,78843.157143,0.099467,0.099467
655,2022-10,1,908.57,213,4.265587,41.083944,78843.157143,0.011524,0.110991


In [213]:
cohorts.pivot(
    index='reg_month',
    columns='cohort_age_months',
    values='roas_total'
).round(2).fillna('')

cohort_age_months,0,1,2,3,4,5,6,7,8,9,...,26,27,28,29,30,31,32,33,34,35
reg_month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2019-12,0.17,0.2,0.22,0.27,0.35,0.36,0.38,0.38,0.43,0.47,...,0.68,0.7,,0.71,,,,0.73,,
2020-01,0.31,0.36,0.4,0.46,0.52,0.59,0.64,0.69,0.72,0.76,...,1.01,,1.03,1.04,1.04,1.04,1.05,1.06,1.06,1.06
2020-02,0.25,0.31,0.37,0.4,0.45,0.49,0.52,0.57,0.6,0.62,...,0.83,0.84,0.84,0.84,0.84,0.84,0.85,0.86,,
2020-03,0.15,0.19,0.21,0.25,0.29,0.33,0.37,0.4,0.42,0.44,...,0.56,0.57,0.58,0.58,0.59,0.59,0.59,,,
2020-04,0.12,0.17,0.2,0.23,0.25,0.28,0.29,0.31,0.33,0.34,...,0.44,0.44,0.45,0.45,0.46,,,,,
2020-05,0.15,0.18,0.22,0.24,0.27,0.28,0.31,0.33,0.34,0.35,...,0.44,0.44,0.45,0.45,0.45,,,,,
2020-06,0.13,0.17,0.2,0.22,0.24,0.26,0.27,0.28,0.3,0.3,...,0.39,0.39,0.39,,,,,,,
2020-07,0.14,0.17,0.19,0.22,0.24,0.27,0.29,0.3,0.32,0.33,...,0.43,0.44,0.44,,,,,,,
2020-08,0.11,0.13,0.16,0.17,0.19,0.2,0.22,0.23,0.24,0.25,...,0.33,,,,,,,,,
2020-09,0.08,0.1,0.12,0.14,0.15,0.16,0.17,0.18,0.19,0.2,...,0.28,0.28,,,,,,,,
