In [1]:
from google.colab import drive
import yaml
import os

if 'google.colab' in str(get_ipython()):
  if 'drive' not in os.listdir():
      drive.mount('/content/drive')

path_to_load = 'drive/My Drive/pytorch-lifestream/configs'

# Конфиги Датасетов для Coles/S2T датасетов

## SberHack gender prediction

In [2]:
config_name = 'sberhack_gender_prediction_data.yaml'
config_seq2target_data_prep = dict(
              path_folder = 'drive/My Drive/pytorch-lifestream/data/sber_gender_prediction',
              events_file = 'transactions.csv',
              train_file = 'train.csv',
              test_file = 'test.csv',
              val_size = 0.2,
              id_col = 'client_id',
              time_col = 'trans_time',
              target_col = 'gender',
              random_state = 42,
              need_time_preprocess = True,
              category_cols = ['mcc_code','trans_type','trans_city'],
              numeric_cols = ['amount','day_of_month','month','hour'],
              )

with open(os.path.join(path_to_load,config_name), 'w') as f:
    yaml.dump(config_seq2target_data_prep, f)

## Rosbank churn prediction


In [3]:
config_name = 'rosbank_churn_prediction_data.yaml'
config_seq2target_data_prep = dict(
              path_folder = 'drive/My Drive/pytorch-lifestream/data/rosbank_churn_prediction',
              events_file = 'transactions_rosbank.csv',
              train_file = 'rosbank_train.csv',
              test_file = 'rosbank_test.csv',
              val_size = 0.2,
              id_col = 'cl_id',
              time_col = 'PERIOD',
              target_col = 'target_flag',
              random_state=42,
              need_time_preprocess = True,
              category_cols = ['trx_category','MCC','channel_type','currency'],
              numeric_cols = ['amount','day_of_month','month'],
              )

with open(os.path.join(path_to_load,config_name), 'w') as f:
    yaml.dump(config_seq2target_data_prep, f)

## Sber age prediction

In [4]:
config_name = 'sber_age_prediction_data.yaml'
config_seq2target_data_prep = dict(
              path_folder = 'drive/My Drive/pytorch-lifestream/data/sber_age_prediction',
              events_file = 'transactions_age.csv',
              train_file = 'age_train.csv',
              test_file = 'age_test.csv',
              val_size = 0.2,
              id_col = 'client_id',
              time_col = 'trans_date',
              target_col = 'bins',
              random_state=42,
              need_time_preprocess = False,
              category_cols = ['small_group'],
              numeric_cols = ['amount_rur',],
              trx_embed_dim = {'small_group': {'in': 202, 'out': 16}}
              )

with open(os.path.join(path_to_load,config_name), 'w') as f:
    yaml.dump(config_seq2target_data_prep, f)

## DataFusion churn prediction

In [5]:
config_name = 'datafusion_churn_prediction_data.yaml'
config_seq2target_data_prep = dict(
              path_folder = 'drive/My Drive/pytorch-lifestream/data/datafusion_churn',
              events_file = 'transactions.csv',
              train_file = 'train.csv',
              add_file = 'clients.csv',
              val_size = 0.15,
              test_size = 0.05,
              id_col = 'user_id',
              time_col = 'transaction_dttm',
              target_col = 'target',
              random_state=42,
              need_time_preprocess = True,
              category_cols = ['mcc_code', 'currency_rk', 'employee_count_nm', 'bankemplstatus', 'customer_age', 'report'],
              numeric_cols = ['transaction_amt'],
              trx_embed_dim = {'mcc_code': {'in': 334, 'out': 16},
               'currency_rk': {'in': 6, 'out': 2},
               'employee_count_nm': {'in': 12, 'out': 3},
               'bankemplstatus': {'in': 4, 'out': 2},
               'customer_age': {'in': 6, 'out': 2},
               'report': {'in': 14, 'out': 3}
               }
              )

with open(os.path.join(path_to_load,config_name), 'w') as f:
    yaml.dump(config_seq2target_data_prep, f)

## Alpha BKI scoring prediction

In [6]:
# config_name = 'alpha_bki_scoring_prediction_data.yaml'
# config_seq2target_data_prep = dict(
#               path_folder = 'drive/My Drive/pytorch-lifestream/data/alpha_bki_scoring',
#               events_file = 'train_data',
#               events_test = 'test_data',
#               train_file = 'train_target.csv',
#               test_file = 'test_target.csv',
#               val_size = 0.2,
#               id_col = 'id',
#               time_col = 'transaction_dttm',
#               target_col = 'flag',
#               random_state=42,
#               need_time_preprocess = False,
#               category_cols = ['pre_fterm','enc_paym_5','pre_till_pclose','enc_loans_account_cur','pre_loans_total_overdue',
#                               'enc_paym_24','is_zero_loans6090','enc_paym_19','enc_paym_3','pre_maxover2limit','pre_pterm',
#                               'enc_paym_22','is_zero_loans90','pre_loans_credit_limit','is_zero_maxover2limit','enc_paym_11',
#                               'enc_paym_16','pclose_flag','enc_paym_7','pre_since_confirmed','pre_loans_max_overdue_sum',
#                               'enc_loans_account_holder_type','enc_loans_credit_type','enc_paym_4','enc_paym_1','pre_over2limit',
#                               'enc_paym_21','enc_paym_2','enc_paym_9','enc_paym_12','is_zero_loans5','pre_till_fclose','enc_paym_10',
#                               'enc_paym_13','is_zero_loans530','enc_loans_credit_status','pre_loans_outstanding','is_zero_over2limit',
#                               'enc_paym_17','enc_paym_0','enc_paym_8','pre_since_opened','is_zero_util',
#                               'enc_paym_6','enc_paym_18','pre_loans_credit_cost_rate','enc_paym_23','is_zero_loans3060',
#                               'enc_paym_20','pre_util','fclose_flag','enc_paym_14','enc_paym_15'],
#               numeric_cols = ['pre_loans_next_pay_summ', 'pre_loans5', 'pre_loans530', 'pre_loans3060', 'pre_loans6090','pre_loans90'],
#               trx_embed_dim = None
#               )

# with open(os.path.join(path_to_load,config_name), 'w') as f:
#     yaml.dump(config_seq2target_data_prep, f)