In [2]:
import pandas as pd
import numpy as np
import os
from tqdm.notebook import tqdm
import gc

import catboost as cb

In [None]:
import neptune

neptune.init(project_qualified_name='', # change this to your `workspace_name/project_name`
             api_token='', # change this to your api token
            )

In [3]:
TRAIN_TRANSACTIONS_PATH = './train_transactions_contest/'
TEST_TRANSACTIONS_PATH = './test_transactions_contest/'

TRAIN_TARGET_PATH = './alfabattle2_train_target.csv'

In [4]:
def read_parquet_dataset_from_local(path_to_dataset: str, start_from: int = 0,
                                     num_parts_to_read: int = 2, columns=None, verbose=False) -> pd.DataFrame:
    """
    читает num_parts_to_read партиций, преобразует их к pd.DataFrame и возвращает
    :param path_to_dataset: путь до директории с партициями
    :param start_from: номер партиции, с которой начать чтение
    :param num_parts_to_read: количество партиций, которые требуется прочитать
    :param columns: список колонок, которые нужно прочитать из партиции
    :return: pd.DataFrame
    """

    res = []
    dataset_paths = sorted([os.path.join(path_to_dataset, filename) for filename in os.listdir(path_to_dataset) 
                              if filename.startswith('part')])
    
    start_from = max(0, start_from)
    chunks = dataset_paths[start_from: start_from + num_parts_to_read]
    if verbose:
        print('Reading chunks:\n')
        for chunk in chunks:
            print(chunk)
    for chunk_path in tqdm(chunks, desc="Reading dataset with pandas"):
        chunk = pd.read_parquet(chunk_path,columns=columns)
        res.append(chunk)
    return pd.concat(res).reset_index(drop=True)

In [210]:
transactions_frame = read_parquet_dataset_from_local(TRAIN_TRANSACTIONS_PATH, start_from=0, num_parts_to_read=1)

memory_usage_of_frame = transactions_frame.memory_usage(index=True).sum() / 10**9
expected_memory_usage = memory_usage_of_frame * 50
print(f'Объем памяти в  RAM одной партиции данных с транзакциями: {round(memory_usage_of_frame, 3)} Gb')
print(f'Ожидаемый размер в RAM всего датасета: {round(expected_memory_usage, 3)} Gb')

HBox(children=(HTML(value='Reading dataset with pandas'), FloatProgress(value=0.0, max=1.0), HTML(value='')))


Объем памяти в  RAM одной партиции данных с транзакциями: 0.476 Gb
Ожидаемый размер в RAM всего датасета: 23.798 Gb


In [198]:
transactions_frame

Unnamed: 0,app_id,amnt,currency,operation_kind,card_type,operation_type,operation_type_group,ecommerce_flag,payment_system,income_flag,mcc,country,city,mcc_category,day_of_week,hour,days_before,weekofyear,hour_diff,transaction_number
0,0,0.465425,1,4,98,4,2,3,7,3,2,1,37,2,4,19,351,34,-1,1
1,0,0.000000,1,2,98,7,1,3,7,3,2,1,49,2,4,20,351,34,0,2
2,0,0.521152,1,2,98,3,1,3,7,3,2,1,37,2,4,20,351,34,0,3
3,0,0.356078,1,1,5,2,1,3,7,3,10,1,49,7,2,0,348,34,52,4
4,0,0.000000,1,2,98,7,1,3,7,3,2,1,49,2,4,16,337,53,280,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5408643,23646,0.390944,1,1,37,2,1,1,3,1,1,1,3,1,6,9,2,48,32,453
5408644,23646,0.428447,1,1,37,2,1,1,3,1,39,1,120,7,5,12,1,48,27,454
5408645,23646,0.371478,1,1,37,2,1,1,3,1,22,1,2,9,5,13,1,48,1,455
5408646,23646,0.348726,1,1,37,2,1,1,3,1,22,1,3,9,5,13,1,48,1,456


In [199]:
del transactions_frame
gc.collect()

94602

In [200]:
! rm -r ./preprocessed_test_transactions/
! mkdir ./preprocessed_test_transactions/
! rm -r ./preprocessed_transactions/
! mkdir ./preprocessed_transactions/

rm: ./preprocessed_test_transactions/: No such file or directory


In [5]:
def extract_from_df(df: pd.DataFrame, prefix_colum = ''):
    p5 = ('p5', lambda x: np.percentile(x, q=5))
    p25 = ('p25', lambda x: np.percentile(x, q=25))
    p75 = ('p75', lambda x: np.percentile(x, q=75))
    p95 = ('p95', lambda x: np.percentile(x, q=95))
    t = df.groupby(['app_id']).agg({
        'amnt':['count', 'std', 'mean', 'median', 'sum', 'max', 'min', p5, p25, p75, p95],
        'currency':['nunique', 'mean', 'median', p5, p25, p75, p95],
        'operation_kind':['nunique', 'mean', 'median', p5, p25, p75, p95],
        'card_type':['nunique', 'mean', 'median', p5, p25, p75, p95],
        'operation_type':['nunique', 'mean', 'median', p5, p25, p75, p95],
        'operation_type_group':['nunique', 'mean', 'median', p5, p25, p75, p95],
        'ecommerce_flag':['nunique', 'mean', 'median', p5, p25, p75, p95],
        'payment_system':['nunique', 'mean', 'median', p5, p25, p75, p95],
        'income_flag':['nunique', 'mean', 'median', p5, p25, p75, p95],
        'mcc':['nunique', 'mean', 'median', p5, p25, p75, p95],
        'country':['nunique', 'mean', p5, p95],
        'city':['nunique', 'mean', p5, p95],
        'mcc_category':['nunique', 'mean', 'median', p5, p25, p75, p95],
        'hour_diff':['nunique', 'mean', 'median', p5, p25, p75, p95],
    })
    t.columns = [prefix_colum + "_".join(x) for x in t.columns.ravel()]
    return t

def extract_basic_aggregations(transactions_frame: pd.DataFrame) -> pd.DataFrame:
    for slice_date in tqdm([0, 14, 30, 60, 90, 180], desc="Extract features in data slice"):
        if slice_date == 0:
            buf = extract_from_df(transactions_frame[transactions_frame['amnt'] != 0.0])
        else:
            buf = pd.merge(buf, extract_from_df(transactions_frame[(transactions_frame['amnt'] != 0.0) & (transactions_frame['days_before'] <= slice_date)], str(slice_date) + '_'), on='app_id')
    return buf

In [223]:
transactions_frame = extract_basic_aggregations(transactions_frame)

HBox(children=(HTML(value='Extract features in data slice'), FloatProgress(value=0.0, max=6.0), HTML(value='')…




In [224]:
memory_usage_of_frame = transactions_frame.memory_usage(index=True).sum() / 10**9
expected_memory_usage = memory_usage_of_frame * 50
print(f'Объем памяти в RAM одной партиции данных с экспортированными фичами: {round(memory_usage_of_frame, 3)} Gb')
print(f'Ожидаемый размер в RAM всего датасета: {round(expected_memory_usage, 3)} Gb')

Объем памяти в RAM одной партиции данных с экспортированными фичами: 0.105 Gb
Ожидаемый размер в RAM всего датасета: 5.249 Gb


In [225]:
transactions_frame

Unnamed: 0_level_0,amnt_count,amnt_std,amnt_mean,amnt_median,amnt_sum,amnt_max,amnt_min,amnt_p5,amnt_p25,amnt_p75,...,180_mcc_category_p25,180_mcc_category_p75,180_mcc_category_p95,180_hour_diff_nunique,180_hour_diff_mean,180_hour_diff_median,180_hour_diff_p5,180_hour_diff_p25,180_hour_diff_p75,180_hour_diff_p95
app_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,181,0.094032,0.386645,0.387677,69.982751,0.609342,0.000000,0.246583,0.325579,0.451127,...,2.0,2.0,11.0,74,44.613260,20.0,0.0,1.0,52.00,166.00
1,356,0.078044,0.335351,0.336345,119.384783,0.578477,0.155581,0.225528,0.265272,0.387677,...,2.0,7.0,12.5,81,24.053371,14.0,0.0,1.0,26.25,74.00
2,229,0.087890,0.306107,0.310611,70.098407,0.496822,0.000000,0.163471,0.252813,0.357151,...,1.0,6.0,16.0,75,36.655022,13.0,0.0,2.0,27.00,189.40
3,67,0.098987,0.330808,0.298968,22.164130,0.572565,0.161877,0.187516,0.268158,0.400036,...,2.0,4.0,12.6,33,109.104478,4.0,0.0,0.0,33.00,665.50
4,117,0.128579,0.412301,0.387677,48.239197,0.646036,0.000000,0.296444,0.320248,0.516834,...,2.0,2.0,11.0,71,71.837607,35.0,0.0,0.0,113.00,268.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23642,161,0.082391,0.423662,0.406542,68.209525,0.620993,0.219023,0.283833,0.368165,0.477944,...,1.0,9.0,18.0,88,52.639752,24.0,0.0,1.0,61.00,220.00
23643,199,0.096412,0.354889,0.348838,70.622863,0.561074,0.000000,0.257734,0.297589,0.406668,...,2.0,11.0,14.0,90,42.477387,15.0,0.0,0.0,65.50,163.30
23644,404,0.067686,0.327334,0.320248,132.242786,0.488173,0.162190,0.227991,0.278754,0.367687,...,1.0,6.0,15.0,77,20.938119,9.5,0.0,1.0,23.00,70.85
23645,197,0.079493,0.381171,0.367607,75.090673,0.646036,0.214840,0.272874,0.327250,0.419882,...,1.0,7.0,14.0,56,41.898477,6.0,0.0,2.0,20.00,219.20


In [226]:
del transactions_frame
gc.collect()

378

In [6]:
def prepare_transactions_dataset(path_to_dataset: str, targets: pd.DataFrame, num_parts_to_preprocess_at_once: int = 1, num_parts_total: int=50, 
                                 save_to_path=None, verbose: bool=False):
    """
    возвращает готовый pd.DataFrame с признаками, на которых можно учить модель для целевой задачи.
    path_to_dataset: str  
        путь до датасета с партициями
    num_parts_to_preprocess_at_once: int 
        количество партиций, которые будут одновременно держаться в памяти и обрабатываться
    num_parts_total: int 
        общее количество партиций, которые нужно обработать
    save_to_path: str
        путь до папки, в которой будет сохранен каждый обработанный блок в .parquet формате. Если None, то не будет сохранен 
    verbose: bool
        логирует каждый обрабатываемый кусок данных
    """
    preprocessed_frames = []
    block = 0
    for step in tqdm(range(0, num_parts_total, num_parts_to_preprocess_at_once), 
                                   desc="Transforming transactions data"):
        transactions_frame = read_parquet_dataset_from_local(path_to_dataset, step, num_parts_to_preprocess_at_once, 
                                                             verbose=verbose)
        features = extract_basic_aggregations(transactions_frame).merge(targets, how='left', on=['app_id'])
        if save_to_path:
            block_as_str = str(block)
            block += 1
            if len(block_as_str) == 1:
                block_as_str = '00' + block_as_str
            else:
                block_as_str = '0' + block_as_str
            features.to_parquet(os.path.join(save_to_path, f'processed_chunk_{block_as_str}.parquet'))
            
        preprocessed_frames.append(features)
    return pd.concat(preprocessed_frames)

In [7]:
targets = pd.read_csv(TRAIN_TARGET_PATH)

In [233]:
data = prepare_transactions_dataset(TRAIN_TRANSACTIONS_PATH, targets[['app_id', 'product', 'flag']], num_parts_to_preprocess_at_once=5, num_parts_total=50, 
                                    save_to_path='./preprocessed_transactions/')

HBox(children=(HTML(value='Transforming transactions data'), FloatProgress(value=0.0, max=10.0), HTML(value=''…

HBox(children=(HTML(value='Reading dataset with pandas'), FloatProgress(value=0.0, max=5.0), HTML(value='')))




HBox(children=(HTML(value='Extract features in data slice'), FloatProgress(value=0.0, max=6.0), HTML(value='')…




HBox(children=(HTML(value='Reading dataset with pandas'), FloatProgress(value=0.0, max=5.0), HTML(value='')))




HBox(children=(HTML(value='Extract features in data slice'), FloatProgress(value=0.0, max=6.0), HTML(value='')…




HBox(children=(HTML(value='Reading dataset with pandas'), FloatProgress(value=0.0, max=5.0), HTML(value='')))




HBox(children=(HTML(value='Extract features in data slice'), FloatProgress(value=0.0, max=6.0), HTML(value='')…




HBox(children=(HTML(value='Reading dataset with pandas'), FloatProgress(value=0.0, max=5.0), HTML(value='')))




HBox(children=(HTML(value='Extract features in data slice'), FloatProgress(value=0.0, max=6.0), HTML(value='')…




HBox(children=(HTML(value='Reading dataset with pandas'), FloatProgress(value=0.0, max=5.0), HTML(value='')))




HBox(children=(HTML(value='Extract features in data slice'), FloatProgress(value=0.0, max=6.0), HTML(value='')…




HBox(children=(HTML(value='Reading dataset with pandas'), FloatProgress(value=0.0, max=5.0), HTML(value='')))




HBox(children=(HTML(value='Extract features in data slice'), FloatProgress(value=0.0, max=6.0), HTML(value='')…




HBox(children=(HTML(value='Reading dataset with pandas'), FloatProgress(value=0.0, max=5.0), HTML(value='')))




HBox(children=(HTML(value='Extract features in data slice'), FloatProgress(value=0.0, max=6.0), HTML(value='')…




HBox(children=(HTML(value='Reading dataset with pandas'), FloatProgress(value=0.0, max=5.0), HTML(value='')))




HBox(children=(HTML(value='Extract features in data slice'), FloatProgress(value=0.0, max=6.0), HTML(value='')…




HBox(children=(HTML(value='Reading dataset with pandas'), FloatProgress(value=0.0, max=5.0), HTML(value='')))




HBox(children=(HTML(value='Extract features in data slice'), FloatProgress(value=0.0, max=6.0), HTML(value='')…




HBox(children=(HTML(value='Reading dataset with pandas'), FloatProgress(value=0.0, max=5.0), HTML(value='')))




HBox(children=(HTML(value='Extract features in data slice'), FloatProgress(value=0.0, max=6.0), HTML(value='')…





In [235]:
data.to_csv('merged_data.csv', index=False)

In [8]:
TEST_TARGET_PATH = './alfabattle2_test_target_contest.csv'
test_target =  pd.read_csv(TEST_TARGET_PATH)

In [237]:
test_data = prepare_transactions_dataset(TEST_TRANSACTIONS_PATH, test_target[['app_id', 'product']], num_parts_to_preprocess_at_once=5, num_parts_total=50, 
                                         save_to_path='./preprocessed_test_transactions/')

HBox(children=(HTML(value='Transforming transactions data'), FloatProgress(value=0.0, max=10.0), HTML(value=''…

HBox(children=(HTML(value='Reading dataset with pandas'), FloatProgress(value=0.0, max=5.0), HTML(value='')))




HBox(children=(HTML(value='Extract features in data slice'), FloatProgress(value=0.0, max=6.0), HTML(value='')…




HBox(children=(HTML(value='Reading dataset with pandas'), FloatProgress(value=0.0, max=5.0), HTML(value='')))




HBox(children=(HTML(value='Extract features in data slice'), FloatProgress(value=0.0, max=6.0), HTML(value='')…




HBox(children=(HTML(value='Reading dataset with pandas'), FloatProgress(value=0.0, max=5.0), HTML(value='')))




HBox(children=(HTML(value='Extract features in data slice'), FloatProgress(value=0.0, max=6.0), HTML(value='')…




HBox(children=(HTML(value='Reading dataset with pandas'), FloatProgress(value=0.0, max=5.0), HTML(value='')))




HBox(children=(HTML(value='Extract features in data slice'), FloatProgress(value=0.0, max=6.0), HTML(value='')…




HBox(children=(HTML(value='Reading dataset with pandas'), FloatProgress(value=0.0, max=5.0), HTML(value='')))




HBox(children=(HTML(value='Extract features in data slice'), FloatProgress(value=0.0, max=6.0), HTML(value='')…




HBox(children=(HTML(value='Reading dataset with pandas'), FloatProgress(value=0.0, max=5.0), HTML(value='')))




HBox(children=(HTML(value='Extract features in data slice'), FloatProgress(value=0.0, max=6.0), HTML(value='')…




HBox(children=(HTML(value='Reading dataset with pandas'), FloatProgress(value=0.0, max=5.0), HTML(value='')))




HBox(children=(HTML(value='Extract features in data slice'), FloatProgress(value=0.0, max=6.0), HTML(value='')…




HBox(children=(HTML(value='Reading dataset with pandas'), FloatProgress(value=0.0, max=5.0), HTML(value='')))




HBox(children=(HTML(value='Extract features in data slice'), FloatProgress(value=0.0, max=6.0), HTML(value='')…




HBox(children=(HTML(value='Reading dataset with pandas'), FloatProgress(value=0.0, max=5.0), HTML(value='')))




HBox(children=(HTML(value='Extract features in data slice'), FloatProgress(value=0.0, max=6.0), HTML(value='')…




HBox(children=(HTML(value='Reading dataset with pandas'), FloatProgress(value=0.0, max=5.0), HTML(value='')))




HBox(children=(HTML(value='Extract features in data slice'), FloatProgress(value=0.0, max=6.0), HTML(value='')…





In [238]:
test_data.to_csv('merged_test_data.csv', index=False)

In [9]:
data = pd.read_csv('merged_data.csv')
test_data = pd.read_csv('merged_test_data.csv')

In [10]:
test_data.drop(['product'], axis=1, inplace=True)
test_data = test_data.merge(test_target[['app_id', 'product']], on='app_id', how='right')

data.drop(['product', 'flag'], axis=1, inplace=True)
data = data.merge(targets[['app_id', 'product', 'flag']], on='app_id', how='right')

In [11]:
# Разделяем обучающую выборку по результату


In [12]:
data.fillna(0, inplace=True)

In [33]:
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split, StratifiedKFold, KFold
from sklearn.linear_model import RidgeClassifier
from catboost import CatBoostClassifier, Pool, cv

In [None]:
neptune.create_experiment(name='CatBoostClassifier', upload_source_files=['main.ipynb'])

In [40]:
def fitModel(data: pd.DataFrame):
    #X_train, X_test, y_train, y_test = train_test_split(data.drop(['flag'], axis=1), data['flag'], test_size=.3,
    #                                                    random_state=6)
    #model = CatBoostClassifier(random_seed=6, logging_level='Silent').fit(X_train, y_train, logging_level='Verbose', eval_set=(X_test, y_test), plot=True)
    
    #y_pred = model.predict(X_test)
    
    features = [x for x in data.columns if x not in ['app_id', 'flag']]
    targets = data.flag.values

    cv = KFold(n_splits=5, random_state=100, shuffle=True)

    oof = np.zeros(len(data))
    train_preds = np.zeros(len(data))

    models = []


    tree_params = {
        'max_depth': 10,
        'eval_metric': 'AUC',
        'loss_function': 'Logloss',
        'random_state': 100,
        'l2_leaf_reg': 1,
    }


    for fold_, (train_idx, val_idx) in enumerate(cv.split(data, targets), 1):
        print(f'Training with fold {fold_} started.')
        model = cb.CatBoostClassifier(**tree_params)    

        train, val = data.iloc[train_idx], data.iloc[val_idx]

        train_pool = cb.Pool(train[features], train.flag.values)
        val_pool = cb.Pool(val[features], val.flag.values)


        model.fit(train_pool, eval_set=[val_pool], early_stopping_rounds=100, verbose_eval=50, use_best_model=True, plot=False)

        oof[val_idx] = model.predict_proba(val_pool)[:, 1]

        train_preds[train_idx] += model.predict_proba(train_pool)[:, 1] / (cv.n_splits-1)
        models.append(model)
        print(f'Training with fold {fold_} completed.')
    
    
    metrics = roc_auc_score(y_test, train_preds)
    print(metrics)
    neptune.log_metric('roc_auc_score', metrics)
    return models

In [None]:
model = fitModel(data)

Training with fold 1 started.
0:	test: 0.5884239	best: 0.5884239 (0)	total: 3.26s	remaining: 54m 15s
50:	test: 0.7126339	best: 0.7126339 (50)	total: 2m 52s	remaining: 53m 33s
100:	test: 0.7294661	best: 0.7294661 (100)	total: 6m 4s	remaining: 54m 8s
150:	test: 0.7351640	best: 0.7351640 (150)	total: 9m 6s	remaining: 51m 12s
200:	test: 0.7390302	best: 0.7390302 (200)	total: 11m 34s	remaining: 45m 59s
250:	test: 0.7415928	best: 0.7415928 (250)	total: 14m 3s	remaining: 41m 56s
300:	test: 0.7429358	best: 0.7429358 (300)	total: 16m 26s	remaining: 38m 11s
350:	test: 0.7446032	best: 0.7446032 (350)	total: 18m 50s	remaining: 34m 51s
400:	test: 0.7458489	best: 0.7458497 (399)	total: 21m 11s	remaining: 31m 39s
450:	test: 0.7468467	best: 0.7468661 (449)	total: 23m 30s	remaining: 28m 36s
500:	test: 0.7479401	best: 0.7479641 (498)	total: 25m 47s	remaining: 25m 41s
550:	test: 0.7488143	best: 0.7488725 (549)	total: 28m 1s	remaining: 22m 50s
600:	test: 0.7495885	best: 0.7496295 (598)	total: 30m 19s	rema

In [None]:
test_data.fillna(0, inplace=True)

In [None]:
features = [x for x in test_data.columns if x not in ['app_id', 'flag']]
score = np.zeros(len(test_data))

test_pool = cb.Pool(test_data[features])

for m in tqdm.tqdm_notebook(models):
    score += m.predict_proba(test_pool)[:, 1] / len(model)
    
submission = pd.DataFrame({
    'app_id' : test_data.app_id.values,
    'flag': score
}) # ~ 0.732 на public test

In [None]:

submission.to_csv('CatBoostClassifier.csv', index=None)

In [31]:
pred = model.predict(test_data)

In [32]:
submission = pd.DataFrame({
    'app_id' : test_data.app_id.values,
    'flag': pred
}) 

submission.to_csv('CatBoostClassifier.csv', index=None)

In [None]:
neptune.stop()

In [287]:
test_data.shape

(502716, 578)

In [291]:
submission.shape

(502716, 2)

In [284]:
test_target.shape

(502716, 2)

In [272]:
test_target.columns

Index(['app_id', 'product'], dtype='object')

In [286]:
test_data.drop(['product'], axis=1, inplace=True)
test_data = test_data.merge(test_target[['app_id', 'product']], on='app_id', how='right')

In [294]:
test_data[test_data['app_id'] == 1580440]

Unnamed: 0,app_id,amnt_count,amnt_std,amnt_mean,amnt_median,amnt_sum,amnt_max,amnt_min,amnt_p5,amnt_p25,...,180_mcc_category_p75,180_mcc_category_p95,180_hour_diff_nunique,180_hour_diff_mean,180_hour_diff_median,180_hour_diff_p5,180_hour_diff_p25,180_hour_diff_p75,180_hour_diff_p95,product
502713,1580440,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0


In [296]:
data.shape

(816536, 579)

In [297]:
targets.shape

(963811, 3)

In [282]:
submission.shape

(434516, 2)

In [5]:
data = pd.read_csv('merged_data.csv')

In [6]:
targets = pd.read_csv(TRAIN_TARGET_PATH)

In [7]:

data.drop(['product', 'flag'], axis=1, inplace=True)
data = data.merge(targets[['app_id', 'product', 'flag']], on='app_id', how='right')
data.fillna(0, inplace=True)

In [8]:
data.to_csv('merged_data.csv', index=False)