In [32]:
import pandas as pd 
import glob

# display full width of dataframe
pd.set_option('display.max_columns', None)
# transaction_files = glob.glob('data/client_*_transactions_3m.csv')
# transfer_files = glob.glob('data/client_*_transfers_3m.csv')

# all_transactions = pd.concat([pd.read_csv(f, encoding='utf-8') for f in transaction_files], ignore_index=True)
# all_transactions.to_csv('data/all_transactions.csv', index=False, encoding='utf-8')

# # Read and concatenate transfers
# all_transfers = pd.concat([pd.read_csv(f, encoding='utf-8') for f in transfer_files], ignore_index=True)
# all_transfers.to_csv('data/all_transfers.csv', index=False, encoding='utf-8')

In [33]:
clients = pd.read_csv('data/clients.csv', encoding='utf-8')
transactions = pd.read_csv('data/all_transactions.csv', encoding='utf-8')
transfers = pd.read_csv('data/all_transfers.csv', encoding='utf-8')

# dropping direction column from transfers as it's not useful
transfers = transfers.drop(columns=['direction'])
# transactions = transactions.drop(columns=['date'])

# standardize client types
types = {
    'Зарплатный клиент': ['зарплатный клиент', 'зп'],
    'Премиальный клиент': ['премиальный клиент', 'вип'],
    'Студент': ['студент', 'студент'],
    'Стандартный клиент': ['стандартный клиент', 'обычный']
}

clients['status'] = clients['status'].str.lower()
transactions['status'] = transactions['status'].str.lower()
transfers['status'] = transfers['status'].str.lower()

for df in [clients, transactions, transfers]:
    df['status'] = df['status'].str.strip()
    
for df in [clients, transactions, transfers]:
    for standard, variants in types.items():
        df.loc[df['status'].isin(variants), 'status'] = standard
        
print(f'Clients shape: {clients.shape}')
display(clients.sort_values('client_code').head(5))
print(f'Transfers shape: {transfers.shape}')
display(transfers.sort_values('client_code').head(5))
print(f'Transactions shape: {transactions.shape}')
display(transactions.sort_values('client_code').head(5))

Clients shape: (59, 6)


Unnamed: 0,client_code,name,status,age,city,avg_monthly_balance_KZT
0,1,Айгерим,Зарплатный клиент,29,Алматы,92643
1,2,Данияр,Премиальный клиент,41,Астана,1577073
2,3,Сабина,Студент,22,Алматы,63116
3,4,Тимур,Зарплатный клиент,36,Караганда,83351
4,5,Камилла,Премиальный клиент,45,Алматы,1336536


Transfers shape: (17700, 9)


Unnamed: 0,client_code,name,product,status,city,date,type,amount,currency
15583,1,Айгерим,Карта для путешествий,Зарплатный клиент,Алматы,2025-08-27 13:52:57,atm_withdrawal,28318.31,KZT
15582,1,Айгерим,Карта для путешествий,Зарплатный клиент,Алматы,2025-08-26 12:23:26,card_out,21578.45,KZT
15581,1,Айгерим,Карта для путешествий,Зарплатный клиент,Алматы,2025-08-26 12:19:08,card_out,23260.79,KZT
15580,1,Айгерим,Карта для путешествий,Зарплатный клиент,Алматы,2025-08-26 09:29:13,p2p_out,14201.3,KZT
15595,1,Айгерим,Карта для путешествий,Зарплатный клиент,Алматы,2025-08-30 15:26:18,card_out,36485.0,KZT


Transactions shape: (17700, 9)


Unnamed: 0,client_code,name,product,status,city,date,category,amount,currency
5451,1,Айгерим,Карта для путешествий,Зарплатный клиент,Алматы,2025-06-15 19:00:40,Кафе и рестораны,5725.94,KZT
5450,1,Айгерим,Карта для путешествий,Зарплатный клиент,Алматы,2025-06-15 15:50:24,Кино,4286.91,KZT
5449,1,Айгерим,Карта для путешествий,Зарплатный клиент,Алматы,2025-06-15 12:10:12,Продукты питания,14803.48,KZT
5448,1,Айгерим,Карта для путешествий,Зарплатный клиент,Алматы,2025-06-15 10:30:00,Путешествия,39623.64,KZT
5499,1,Айгерим,Карта для путешествий,Зарплатный клиент,Алматы,2025-06-30 18:20:10,Смотрим дома,6813.12,KZT


In [34]:
transfers['month'] = pd.to_datetime(transfers['date']).dt.to_period('M')
transactions['month'] = pd.to_datetime(transactions['date']).dt.to_period('M')

transfers = transfers.merge(clients[['client_code', 'avg_monthly_balance_KZT', 'age']], on='client_code', how='left')
transactions = transactions.merge(clients[['client_code', 'avg_monthly_balance_KZT', 'age']], on='client_code', how='left')


transfers_aggregated = (
    transfers.groupby(['client_code', 'name', 'product', 'status', 'city', 'type', 'currency', 'month', 'age', 'avg_monthly_balance_KZT'], dropna=False)['amount']
    .sum()
    .reset_index()
    .sort_values(['currency', 'amount'], ascending=[True, False])
)

transactions_aggregated = (
    transactions.groupby(['client_code', 'name', 'product', 'status', 'city', 'category', 'currency', 'month', 'age', 'avg_monthly_balance_KZT'], dropna=False)['amount']
    .sum()
    .reset_index()
    .sort_values(['currency', 'amount'], ascending=[True, False])
)

display(transfers_aggregated.sort_values('client_code'))
display(transactions_aggregated.sort_values('client_code'))

Unnamed: 0,client_code,name,product,status,city,type,currency,month,age,avg_monthly_balance_KZT,amount
18,1,Айгерим,Карта для путешествий,Зарплатный клиент,Алматы,refund_in,KZT,2025-06,29,92643,38328.65
20,1,Айгерим,Карта для путешествий,Зарплатный клиент,Алматы,refund_in,KZT,2025-08,29,92643,37768.02
6,1,Айгерим,Карта для путешествий,Зарплатный клиент,Алматы,card_out,KZT,2025-06,29,92643,1313972.43
7,1,Айгерим,Карта для путешествий,Зарплатный клиент,Алматы,card_out,KZT,2025-07,29,92643,1258473.03
8,1,Айгерим,Карта для путешествий,Зарплатный клиент,Алматы,card_out,KZT,2025-08,29,92643,1257506.30
...,...,...,...,...,...,...,...,...,...,...,...
1755,60,Ермек,Золотые слитки,Зарплатный клиент,Кызылорда,refund_in,KZT,2025-06,42,48779,18216.93
1745,60,Ермек,Золотые слитки,Зарплатный клиент,Кызылорда,gold_buy_out,KZT,2025-08,42,48779,1500000.00
1737,60,Ермек,Золотые слитки,Зарплатный клиент,Кызылорда,card_out,KZT,2025-06,42,48779,1071282.02
1739,60,Ермек,Золотые слитки,Зарплатный клиент,Кызылорда,card_out,KZT,2025-08,42,48779,1078071.77


Unnamed: 0,client_code,name,product,status,city,category,currency,month,age,avg_monthly_balance_KZT,amount
25,1,Айгерим,Карта для путешествий,Зарплатный клиент,Алматы,Такси,KZT,2025-06,29,92643,82873.29
3,1,Айгерим,Карта для путешествий,Зарплатный клиент,Алматы,Едим дома,KZT,2025-06,29,92643,50173.52
17,1,Айгерим,Карта для путешествий,Зарплатный клиент,Алматы,Продукты питания,KZT,2025-07,29,92643,163779.33
8,1,Айгерим,Карта для путешествий,Зарплатный клиент,Алматы,Играем дома,KZT,2025-08,29,92643,53243.06
6,1,Айгерим,Карта для путешествий,Зарплатный клиент,Алматы,Играем дома,KZT,2025-06,29,92643,64227.45
...,...,...,...,...,...,...,...,...,...,...,...
1464,60,Ермек,Золотые слитки,Зарплатный клиент,Кызылорда,АЗС,KZT,2025-06,42,48779,25391.80
1480,60,Ермек,Золотые слитки,Зарплатный клиент,Кызылорда,Продукты питания,KZT,2025-07,42,48779,244057.31
1479,60,Ермек,Золотые слитки,Зарплатный клиент,Кызылорда,Продукты питания,KZT,2025-06,42,48779,260951.62
1467,60,Ермек,Золотые слитки,Зарплатный клиент,Кызылорда,Едим дома,KZT,2025-06,42,48779,27626.31


In [39]:
keys = ['client_code', 'name', 'status', 'city', 'age', 'month', 'avg_monthly_balance_KZT', 'product', 'currency']

# transfers: type -> columns
transfers_wide = (
    transfers.groupby(keys + ['type'], dropna=False)['amount']
             .sum()
             .reset_index()
             .pivot_table(index=keys, columns='type', values='amount', aggfunc='sum', fill_value=0)
             .add_prefix('trf_')
             .reset_index()
)

# transactions: category -> columns
transactions_wide = (
    transactions.groupby(keys + ['category'], dropna=False)['amount']
                .sum()
                .reset_index()
                .pivot_table(index=keys, columns='category', values='amount', aggfunc='sum', fill_value=0)
                .add_prefix('trs_')
                .reset_index()
)

# outer-join to keep everyone; zeros for missing combos
features = (transfers_wide
            .merge(transactions_wide, on=keys, how='outer')
            .fillna(0))

income_cols = [col for col in features.columns if col.endswith('_in')]
spending_cols = [col for col in features.columns if col.endswith('_out')]

features['income_total'] = features[income_cols].sum(axis=1)
features['spending_total'] = features[spending_cols].sum(axis=1)
features['net_total'] = features['income_total'] - features['spending_total']

In [40]:
features

Unnamed: 0,client_code,name,status,city,age,month,avg_monthly_balance_KZT,product,currency,trf_atm_withdrawal,trf_card_in,trf_card_out,trf_cashback_in,trf_cc_repayment_out,trf_deposit_topup_out,trf_family_in,trf_fx_buy,trf_fx_sell,trf_gold_buy_out,trf_gold_sell_in,trf_installment_payment_out,trf_invest_in,trf_invest_out,trf_loan_payment_out,trf_p2p_out,trf_refund_in,trf_salary_in,trf_stipend_in,trf_utilities_out,trs_АЗС,trs_Авто,trs_Едим дома,trs_Играем дома,trs_Кафе и рестораны,trs_Кино,trs_Косметика и Парфюмерия,trs_Мебель,trs_Одежда и обувь,trs_Отели,trs_Подарки,trs_Продукты питания,trs_Путешествия,trs_Развлечения,trs_Ремонт дома,trs_Смотрим дома,trs_Спорт,trs_Такси,trs_Ювелирные украшения,income_total,spending_total,net_total
0,1,Айгерим,Зарплатный клиент,Алматы,29,2025-06,92643,Карта для путешествий,KZT,257730.54,54537.34,1313972.43,45440.16,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.0,0.0,0.0,175301.74,498767.88,38328.65,488634.28,0.0,157837.21,32469.43,0.0,50173.52,64227.45,142944.94,67037.20,0.0,0.0,0.0,0.00,0.0,248409.89,39623.64,0.00,0.0,42855.13,0.0,82873.29,0.0,626940.43,2145879.26,-1518938.83
1,1,Айгерим,Зарплатный клиент,Алматы,29,2025-07,92643,Карта для путешествий,KZT,150235.27,59064.84,1258473.03,46884.00,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.0,0.0,0.0,173934.70,393223.37,38374.83,474202.74,0.0,132250.21,50680.68,0.0,45020.27,35831.90,250721.25,56285.24,0.0,0.0,0.0,134087.07,0.0,163779.33,235810.08,8854.05,0.0,43470.60,0.0,94315.68,0.0,618526.41,1957881.31,-1339354.90
2,1,Айгерим,Зарплатный клиент,Алматы,29,2025-08,92643,Карта для путешествий,KZT,209980.90,62321.89,1257506.30,45949.18,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.0,0.0,0.0,158773.82,382138.01,37768.02,483584.71,0.0,202064.99,70083.23,0.0,79445.23,53243.06,125091.40,52063.02,0.0,0.0,0.0,0.00,0.0,233077.84,0.00,0.00,0.0,69108.71,0.0,55331.14,0.0,629623.80,2000483.12,-1370859.32
3,2,Данияр,Премиальный клиент,Астана,41,2025-06,1577073,Карта для путешествий,KZT,324630.92,36272.19,1181789.15,51516.00,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.0,0.0,0.0,182495.01,407224.62,14984.58,444038.37,0.0,120515.73,38170.26,0.0,98339.68,33029.27,134763.60,57006.56,0.0,0.0,0.0,0.00,0.0,342561.15,0.00,0.00,0.0,47737.26,0.0,65342.74,0.0,546811.14,1892024.51,-1345213.37
4,2,Данияр,Премиальный клиент,Астана,41,2025-07,1577073,Карта для путешествий,KZT,247593.66,86688.90,1040723.48,19285.47,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.0,0.0,0.0,181069.35,454437.00,15915.18,472479.66,0.0,137713.04,71142.03,0.0,55881.28,75358.51,190609.15,39439.99,0.0,0.0,0.0,0.00,0.0,252958.23,0.00,0.00,0.0,57450.11,0.0,62866.44,0.0,594369.21,1813942.87,-1219573.66
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
173,59,Динара,Премиальный клиент,Алматы,55,2025-07,4427461,Золотые слитки,KZT,258764.76,75220.42,1084455.85,59333.87,0.0,0.0,0.0,0.0,0.0,1008070.38,1299777.71,0.0,0.0,0.0,197625.22,529988.03,23604.31,650545.89,0.0,142667.79,91425.97,0.0,88535.20,51914.47,249555.04,26001.97,0.0,0.0,0.0,0.00,0.0,341677.16,0.00,0.00,0.0,58699.41,0.0,93682.41,0.0,2108482.20,2962807.27,-854325.07
174,59,Динара,Премиальный клиент,Алматы,55,2025-08,4427461,Золотые слитки,KZT,240929.66,87759.78,989758.59,49899.67,0.0,0.0,0.0,0.0,0.0,1215076.38,574435.01,0.0,0.0,0.0,173990.63,571937.44,38509.25,669947.56,0.0,155352.07,58257.22,0.0,97409.79,57580.84,262585.45,56482.63,0.0,0.0,0.0,0.00,0.0,327955.83,0.00,0.00,0.0,30607.29,0.0,88140.79,0.0,1420551.27,3106115.11,-1685563.84
175,60,Ермек,Зарплатный клиент,Кызылорда,42,2025-06,48779,Золотые слитки,KZT,288294.23,99747.94,1071282.02,21886.55,0.0,0.0,0.0,0.0,0.0,996431.57,1500000.00,0.0,0.0,0.0,168807.47,484768.68,18216.93,248319.57,0.0,157607.23,25391.80,0.0,27626.31,69864.83,113463.66,74717.28,0.0,0.0,0.0,0.00,0.0,260951.62,0.00,0.00,0.0,66369.97,0.0,100142.86,0.0,1888170.99,2878896.97,-990725.98
176,60,Ермек,Зарплатный клиент,Кызылорда,42,2025-07,48779,Золотые слитки,KZT,356274.56,85367.15,1140116.08,46541.41,0.0,0.0,0.0,0.0,0.0,1204259.92,1094015.14,0.0,0.0,0.0,146701.46,459950.60,23943.71,394687.31,0.0,105613.20,69599.09,0.0,61274.99,39465.58,218105.45,42103.84,0.0,0.0,0.0,0.00,0.0,244057.31,0.00,0.00,0.0,61625.01,0.0,51813.04,0.0,1644554.72,3056641.26,-1412086.54
