### Код для автоматизации клиентов, купленных минут и прибыли по когортам и вывода на дашборд

In [1]:
import pandas as pd
from sqlalchemy import create_engine

engine = create_engine('postgresql://hidden')

### Когортная выручка

In [4]:
def cohort_money():
    query = f'''
    with endd as (
                with first_data as (
                        SELECT date_trunc('month', min(date)) as first_date,
                               клиент as client
                        FROM level_2_analytical_tables.transactions
                        WHERE first_sale = 1
                            and fractional > 0
                        GROUP BY client),


                all_pays as (
                        SELECT date,
                               клиент as client,
                               amount as money
                        FROM level_2_analytical_tables.transactions
                        WHERE date >= '2022-01-01'
                            AND fractional > 0

                        UNION

                        SELECT date_trunc('month', min(date)) as date,
                               клиент as client,
                               amount as money
                        FROM level_2_analytical_tables.transactions
                        WHERE date >= '2022-01-01'
                        and first_sale = 1

                            and fractional > 0
                        GROUP BY client, money)

                select date_trunc('month', a.date) as date, 
                       a.client, a.money, 
                       f.first_date,
                row_number () over(partition by f.client order by date) as rn
                from all_pays as a
                inner join first_data as f on a.client = f.client)
    select * 
    from endd 
    where rn > 1

    '''

    df = pd.read_sql_query(query, engine)


    df['period'] = df['date'].dt.to_period('M').astype(int) - df['first_date'].dt.to_period('M').astype(int)
    df = df[df["period"] >= 0]

    df = df[df['first_date'] >= '2022-01']
    
    
    
    return df


df = cohort_money()
df


Unnamed: 0,date,client,money,first_date,rn,period
0,2023-05-01,rec00076pYVJuhg5l,7990.0,2023-05-01,2,0
1,2022-11-01,rec00zU79EXuoL2Oe,5990.0,2022-11-01,2,0
2,2022-10-01,rec01rU6UnCflTwCr,3748.0,2022-10-01,2,0
3,2022-10-01,rec01rU6UnCflTwCr,3748.0,2022-10-01,3,0
4,2022-11-01,rec01rU6UnCflTwCr,3748.0,2022-10-01,4,1
...,...,...,...,...,...,...
30979,2023-03-01,reczzYlxM27XpC4ga,7990.0,2023-03-01,2,0
30980,2023-04-01,reczzYlxM27XpC4ga,10990.0,2023-03-01,3,1
30981,2023-06-01,reczzYlxM27XpC4ga,6990.0,2023-03-01,4,3
30982,2023-04-01,recZzYw6clmFYpmN6,7990.0,2023-04-01,2,0


In [19]:
df.to_sql(
    name='cohort_money_first_sale',
    con=engine,
    if_exists='replace',
    index=False,
    schema='level_3_metrics',
    chunksize=10000
)

3135

### Когортный ср чек 

In [12]:
def cohort_money_mean():
    query_users = '''with u as (
                                select first_date, period, n_customers
                                from level_3_metrics.cohort_analysis_first_sale
                               ),

                                m as(
                                select first_date, period, sum(money)
                                from level_3_metrics.cohort_money_first_sale
                                group by first_date, period
                                order by first_date, period)

                                select u.first_date,
                                       u.period,
                                       m.sum,
                                       u.n_customers,
                                       ROUND(sum / (SELECT n_customers FROM u WHERE u.first_date = m.first_date AND u.period = 0)) AS mean_check
                                from u
                                full join m on u.first_date = m.first_date and u.period = m.period'''

    df = pd.read_sql_query(query_users, engine)

    return df



In [None]:
itog.to_sql(
    name='cohort_money_mean',
    con=engine,
    if_exists='replace',
    index=False,
    schema='level_3_metrics',
    chunksize=10000
)

### Когорты минут

In [10]:
def cohort_minutes():
    
    query = f'''
    with endd as (
                with first_data as (
                        SELECT date_trunc('month', min(date)) as first_date,
                               клиент as client
                        FROM level_2_analytical_tables.transactions
                        WHERE first_sale = 1
                            AND fractional > 0
                        GROUP BY client),


                all_pays as (
                        SELECT date,
                               клиент as client,
                               (minutes * fractional) as minutes
                        FROM level_2_analytical_tables.transactions
                        WHERE date >= '2022-01-01'
                            AND fractional > 0
                            AND minutes is not null

                        UNION

                        SELECT date_trunc('month', min(date)) as date,
                               клиент as client,
                               (minutes * fractional) as minutes
                        FROM level_2_analytical_tables.transactions
                        WHERE date >= '2022-01-01'
                            AND first_sale = 1
                            AND minutes is not null
                            AND fractional > 0
                        GROUP BY client, minutes, fractional )

                SELECT date_trunc('month', a.date) as date, 
                       a.client, a.minutes, 
                       f.first_date,
                       row_number () over(partition by f.client order by date) as rn
                FROM all_pays as a
                INNER JOIN first_data as f on a.client = f.client)
    SELECT * 
    FROM endd 
    WHERE rn > 1

    '''

    df = pd.read_sql_query(query, engine)


    df['period'] = df['date'].dt.to_period('M').astype(int) - df['first_date'].dt.to_period('M').astype(int)
    df = df[df["period"] >= 0]

    df = df[df['first_date'] >= '2022-01']

return df



In [13]:
df_minutes.to_sql(
    name='cohort_minutes_first_sale',
    con=engine,
    if_exists='replace',
    index=False,
    schema='level_3_metrics',
    chunksize=10000
)

2860

### Для когорты покупателей 

In [10]:
query = f'''
with endd as (
            with first_data as (
                    SELECT date_trunc('month', min(date)) as first_date,
                           клиент as client
                    FROM level_2_analytical_tables.transactions
                    WHERE first_sale = 1
                        and fractional > 0
                        and date >= '2022-01-01'
                        and rn_package_id = 1
                    GROUP BY client),


            all_pays as (
                    SELECT date,
                           клиент as client
                    FROM level_2_analytical_tables.transactions
                    WHERE date >= '2022-01-01'
                        AND fractional > 0

                    UNION

                    SELECT date_trunc('month', min(date)) as date,
                           клиент as client
                    FROM level_2_analytical_tables.transactions
                    WHERE date >= '2022-01-01'
                    and first_sale = 1
                    
                        and fractional > 0
                    GROUP BY client)

            select date_trunc('month', a.date) as date, 
                   a.client, 
                   f.first_date
            from all_pays as a
            inner join first_data as f on a.client = f.client)
select * 
from endd 

'''

df = pd.read_sql_query(query, engine)



In [11]:
# Создание столбца 'cohort'
df['cohort'] = df.groupby('client')['date'].transform('min').dt.to_period('M')

# df['date'] = pd.to_datetime(df['date'])
df['cohort_for_datalens'] = df.groupby('client')['date'].transform('min')


# Группировка данных и подсчет количества клиентов
df_cohort = df.groupby(['cohort_for_datalens','cohort', 'date']).agg(n_customers=('client', 'nunique')).reset_index()

# Рассчет номера периода
df_cohort['period_number'] = (df_cohort['date'].dt.to_period('M') - df_cohort['cohort']).apply(lambda x: x.n)

# Построение когортного графика с количеством клиентов
customer_matrix = df_cohort.pivot_table(index='cohort', columns='period_number', values='n_customers', aggfunc='sum')
customer_matrix

period_number,0,1,2,3,4,5,6,7,8,9,...,17,18,19,20,21,22,23,24,25,26
cohort,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-01,280.0,22.0,45.0,59.0,36.0,11.0,13.0,28.0,28.0,26.0,...,2.0,4.0,5.0,5.0,3.0,11.0,6.0,5.0,5.0,4.0
2022-02,412.0,39.0,94.0,64.0,18.0,18.0,52.0,43.0,41.0,36.0,...,8.0,8.0,11.0,11.0,13.0,6.0,13.0,7.0,8.0,
2022-03,393.0,43.0,33.0,25.0,13.0,38.0,43.0,32.0,37.0,31.0,...,7.0,11.0,6.0,16.0,7.0,4.0,6.0,7.0,,
2022-04,393.0,56.0,45.0,24.0,46.0,34.0,52.0,44.0,26.0,19.0,...,5.0,5.0,15.0,4.0,7.0,3.0,4.0,,,
2022-05,212.0,27.0,26.0,37.0,19.0,26.0,34.0,20.0,11.0,15.0,...,5.0,4.0,7.0,5.0,4.0,2.0,,,,
2022-06,328.0,34.0,56.0,37.0,26.0,31.0,27.0,10.0,13.0,18.0,...,9.0,5.0,7.0,,4.0,,,,,
2022-07,378.0,58.0,40.0,67.0,51.0,38.0,26.0,24.0,20.0,23.0,...,6.0,6.0,6.0,5.0,,,,,,
2022-08,499.0,73.0,77.0,84.0,66.0,39.0,39.0,43.0,29.0,16.0,...,8.0,9.0,7.0,,,,,,,
2022-09,815.0,129.0,183.0,164.0,93.0,69.0,102.0,80.0,31.0,22.0,...,15.0,14.0,,,,,,,,
2022-10,995.0,212.0,177.0,171.0,75.0,86.0,73.0,44.0,24.0,15.0,...,16.0,,,,,,,,,
