In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_curve, roc_auc_score, f1_score
from tqdm import tqdm
import json

pd.set_option('display.max_columns', 1000)
pd.set_option('display.max_rows', 1000)
pd.options.mode.chained_assignment = None
np.seterr(divide='ignore', invalid='ignore')

{'divide': 'warn', 'over': 'warn', 'under': 'ignore', 'invalid': 'warn'}

In [2]:
ogrv = pd.read_csv('../data/raw/OGRV.csv', sep = ';')
ogrv['date'] = ogrv['date'].to_numpy().astype('datetime64[M]')

n_illness_days_df = (
    ogrv[ogrv['graphic_rule_level_1'] == 'Больничный']
    .groupby(['hash_tab_num', 'date'])
    .agg(n_illness_days=('work_shift_type', len))
    .reset_index()
)

n_illness_days_df.head(2)

Unnamed: 0,hash_tab_num,date,n_illness_days
0,0,2016-03-01,3
1,0,2016-04-01,10


In [3]:
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta

def to_datetime(date):
    """
    Converts a numpy datetime64 object to a python datetime object 
    Input:
      date - a np.datetime64 object
    Output:
      DATE - a python datetime object
    """
    timestamp = ((date - np.datetime64('1970-01-01T00:00:00')) / np.timedelta64(1, 's'))
    return datetime.utcfromtimestamp(timestamp)

In [4]:
# Считывание данных

sot = pd.read_csv('../data/raw/sotrudniki.csv', sep = ';')
sot['date'] = pd.to_datetime(sot['date'], format='%Y-%m-%d')

train_target_df = sot[['hash_tab_num', 'date', 'sick']]
train_target_df.head()

Unnamed: 0,hash_tab_num,date,sick
0,0,2015-04-01,0
1,0,2015-05-01,0
2,0,2015-06-01,0
3,0,2015-07-01,0
4,0,2015-08-01,0


---

In [5]:
sot_data = sot[[
    'hash_tab_num','date','category','gender','razryad_fact','work_experience_company',
    'name_fact_lvl5','education','home_to_work_distance'
]]

sot_data['gender'] = sot_data['gender'].map(lambda x: 1 if x == 'мужской' else 0)

In [6]:
# Создание вспомогательно датасета с информацией о количестве сотрудников в подразделении
# по фактическому месту работы

division_count = sot_data[['hash_tab_num','date','name_fact_lvl5']].\
groupby(['name_fact_lvl5','date']).agg('count').reset_index()

division_count.columns = ['name_fact_lvl5', 'date', 'personel_num']

sot_data = pd.merge(sot_data, division_count, how = 'left', on = ['date','name_fact_lvl5'])

In [7]:
# Создание dummy переменных

sot_data.education = sot_data['education']\
.map(lambda x: 'Высшее' if x in ['Высшее образование','Высшее-бакалавриат','Высшее-специалитет'] else(\
'Среднее_профессинальное' if x in ['Ср.профессиональное','Нач.профессиональное'] else 'Начальное_среднее'))

sot_data = pd.get_dummies(sot_data, columns = ['category','education','razryad_fact']).drop('name_fact_lvl5', axis = 1)
sot_data['orig_date'] = sot_data['date'].copy()

In [121]:
sot_data.shape

(93221, 27)

In [8]:
sot_data.head()

Unnamed: 0,hash_tab_num,date,gender,work_experience_company,home_to_work_distance,personel_num,category_Рабочие,category_Руководители,category_Служащие,category_Специалисты,education_Высшее,education_Начальное_среднее,education_Среднее_профессинальное,razryad_fact_0,razryad_fact_1,razryad_fact_2,razryad_fact_3,razryad_fact_4,razryad_fact_5,razryad_fact_6,orig_date
0,0,2015-04-01,1,9.0,,,1,0,0,0,0,1,0,0,0,0,1,0,0,0,2015-04-01
1,0,2015-05-01,1,9.0,,,1,0,0,0,0,1,0,0,0,0,1,0,0,0,2015-05-01
2,0,2015-06-01,1,9.0,,,1,0,0,0,0,1,0,0,0,0,1,0,0,0,2015-06-01
3,0,2015-07-01,1,9.0,,,1,0,0,0,0,1,0,0,0,0,1,0,0,0,2015-07-01
4,0,2015-08-01,1,9.0,,,1,0,0,0,0,1,0,0,0,0,1,0,0,0,2015-08-01


In [9]:
submission_extra = sot_data[sot_data['orig_date'] == pd.to_datetime('2019-08-01')]
submission_extra['target'] = 0

---

In [11]:
with open('../data/preprocessed/date_of_birth.json', 'r') as f:
    date_of_birth_dict = json.load(f)
    date_of_birth_dict = {int(k): int(v) for k, v in date_of_birth_dict.items()}


def calc_age(hash_tab_num, calc_date, date_of_birth_dict):
    cur_date = int(calc_date)
    birth_date = date_of_birth_dict[hash_tab_num]
    age = cur_date - birth_date
    return age


with open('../data/preprocessed/relatives_info.json', 'r') as f:
    relatives_dict = json.load(f)
    relatives_dict = {int(k): v for k, v in relatives_dict.items()}


def calc_relatives_bins(hash_tab_num, calc_date, relatives_dict):
    '''
    bins:
        0: 0 - 3: младенец
        1: 4 - 7: ребенок
        2: 8 - 18: школьник
        3: 19 - 35: молодежь :)
        4: 36 - 55(F), 60(M): предпенсионный возраст
        5: 55(F), 60(M) - +++: пенсионер
        6: кол-во родственников мужского рода
        7: кол-во родственников женского рода
    '''
    
    bins = [0] * 8
    if hash_tab_num not in relatives_dict:
        return bins
    
    cur_date = int(calc_date)
    for (sex, birth_date) in relatives_dict[hash_tab_num]:
        if sex == 'M':
            bins[6] += 1
        elif sex == 'F':
            bins[7] += 1
            
        if birth_date < 0:
            continue
            
        age = cur_date - birth_date
        if age < 0:
            continue
        elif age <= 3:
            bins[0] += 1
        elif age <= 7:
            bins[1] += 1
        elif age <= 18:
            bins[2] += 1
        elif age <= 35:
            bins[3] += 1
        else:
            if (sex == 'M' and age >= 60) or (sex == 'F' and age >= 55):
                bins[5] += 1
            else:
                bins[4] += 1
    return bins

In [12]:
def target_date_features(df):
    df['year'] = df['date'].dt.year
    df['month'] = df['date'].dt.month
    df['age'] = df.apply(lambda x: calc_age(x['hash_tab_num'], x['year'], date_of_birth_dict), axis=1)
    df['is_pensioner'] = (((df['age'] >= 60) & (df['gender'] == 1)) | ((df['age'] >= 55) & (df['gender'] == 0))).astype(int)
    df['relatives'] = df.apply(lambda x: calc_relatives_bins(x['hash_tab_num'], x['year'], relatives_dict), axis=1)
    for i in range(8):
        df[f'relatives_{i}'] = df['relatives'].apply(lambda x: x[i])
    df = df.drop(columns=['year', 'relatives'])
    
    return df

## Add previous ilnesses

In [13]:
print(sot_data.shape)
new_start_date = to_datetime(sot_data['orig_date'].min()) + relativedelta(months=6)
sot_data = sot_data[sot_data['orig_date'] >= new_start_date]
print(sot_data.shape)

(99214, 21)
(93221, 21)


In [14]:
def add_illness_days(df):
    for month_shift in [1, 2, 3, 4, 5, 6]:
        df['date'] = df['orig_date'] - pd.DateOffset(months=month_shift)
        df = pd.merge(df, n_illness_days_df, on=['hash_tab_num', 'date'], how='left')
        df['n_illness_days'] = df['n_illness_days'].fillna(-1).astype('int')
        df = df.rename(columns={'n_illness_days': f'n_illness_days_{month_shift}'})
    return df

In [15]:
sot_data = add_illness_days(sot_data)
submission_extra = add_illness_days(submission_extra)

In [75]:
# Standard python libraries
import os
import time
import re

# Installed libraries
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, f1_score, precision_recall_curve
from sklearn.model_selection import train_test_split
import torch

# Imports from our package
from lightautoml.automl.presets.tabular_presets import TabularAutoML, TabularUtilizedAutoML
from lightautoml.dataset.roles import DatetimeRole
from lightautoml.tasks import Task
from lightautoml.utils.profiler import Profiler
from lightautoml.validation.np_iterators import TimeSeriesIterator


task = Task('binary', )

roles = {
    'target': 'sick',
    'category': ["gender"],
    DatetimeRole(seasonality=("y", "m"), date_format='%Y-%m-%d', base_feats=True): "date",
}

N_THREADS = 30 # threads cnt for lgbm and linear models
N_FOLDS = 5 # folds cnt for AutoML
RANDOM_STATE = 42 # fixed random state for various reasons
TEST_SIZE = 0.2 # Test size for metric check
TIMEOUT = 300 # Time in seconds for automl run

In [101]:
def pandas_fill(arr):
    ser = pd.Series(arr)
    ser = ser.fillna(ser.mean())
    return ser.values

In [110]:
results = []
models = []

for months in range(1, 13):
    sot_data['date'] = sot_data['orig_date'] + pd.DateOffset(months=months)
    df_train = pd.merge(sot_data, train_target_df, on=['hash_tab_num', 'date'])
    df_train = target_date_features(df_train)

    X = df_train.drop(columns=['hash_tab_num', 'orig_date']).fillna(-100)
    y = df_train['sick']
#     X_train, X_test, y_train, y_test = train_test_split(
#         X, y, test_size=0.2, random_state=42, shuffle=True, stratify=y,
#     )
#     X_train["sick"] = y_train
    
    cv_iter = TimeSeriesIterator(X["date"].astype(np.datetime64), n_splits=5, sorted_kfold=False)

    automl = TabularAutoML(task = task, 
                       timeout = TIMEOUT,
                       cpu_limit = N_THREADS,
                       general_params = {'use_algos': [['linear_l2', 'lgb', 'lgb_tuned']]},
                       reader_params = {'n_jobs': N_THREADS})
    oof_pred = automl.fit_predict(X, roles = roles, cv_iter=cv_iter)
    
#     models.append(model)

    p, r, thresholds = precision_recall_curve(y, pandas_fill(oof_pred.data[:, 0]))
    f1_scores = 2 * r * p / (r + p)
    f1_scores = f1_scores[p > 0]
    th = thresholds[np.argmax(f1_scores)]
    print("Results: ", months, th, max(f1_scores))

    submission_extra['date'] = submission_extra['orig_date'] + pd.DateOffset(months=months)
    df_test = target_date_features(submission_extra)
    
    df_test = df_test.drop(columns=['hash_tab_num', 'orig_date', 'target']).fillna(-100)
    sub_pred = automl.predict(df_test).data[:, 0]
    print(f"Test pred has {sum(np.isnan(sub_pred))} nans")
    sub_pred = pandas_fill(sub_pred)
    submission_extra['target'] = (sub_pred >= th).astype(int)
    results.append(submission_extra[['hash_tab_num', 'date', 'target']].copy())
    
result_df = pd.concat(results, ignore_index=True)
result_df.head()

Start automl preset with listed constraints:
- time: 300 seconds
- cpus: 30 cores
- memory: 16 gb

Train data shape: (90298, 37)
Feats was rejected during automatic roles guess: []


Layer 1 ...
Train process start. Time left 291.4738619327545 secs
Start fitting Lvl_0_Pipe_0_Mod_0_LinearL2 ...

===== Start working with fold 0 for Lvl_0_Pipe_0_Mod_0_LinearL2 =====

Linear model: C = 1e-05 score = 0.6518654222582698
Linear model: C = 5e-05 score = 0.6561847916681569
Linear model: C = 0.0001 score = 0.6578909022719253
Linear model: C = 0.0005 score = 0.6597775864117227
Linear model: C = 0.001 score = 0.6595777945037391
Linear model: C = 0.005 score = 0.6581848216904498

===== Start working with fold 1 for Lvl_0_Pipe_0_Mod_0_LinearL2 =====

Linear model: C = 1e-05 score = 0.649159121424521
Linear model: C = 5e-05 score = 0.6563659750370896
Linear model: C = 0.0001 score = 0.6585275167398509
Linear model: C = 0.0005 score = 0.6597814071435789
Linear model: C = 0.001 score = 0.65925072776965

Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.698707
[200]	valid's auc: 0.695356
Early stopping, best iteration is:
[91]	valid's auc: 0.699135
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.695849
[200]	valid's auc: 0.694307
[300]	valid's auc: 0.691186
Early stopping, best iteration is:
[104]	valid's auc: 0.696267
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid'

Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.700239
[200]	valid's auc: 0.698001
[300]	valid's auc: 0.695245
Early stopping, best iteration is:
[141]	valid's auc: 0.700442
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.698326
[200]	valid's auc: 0.697066
[300]	valid's auc: 0.694583
Early stopping, best iteration is:
[136]	valid's auc: 0.698444
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.69436
[200]	valid's auc

Training until validation scores don't improve for 100 rounds
[100]	valid's auc: 0.725237
Early stopping, best iteration is:
[86]	valid's auc: 0.726016

===== Start working with fold 3 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 100 rounds
[100]	valid's auc: 0.727191
[200]	valid's auc: 0.729941
[300]	valid's auc: 0.731229
[400]	valid's auc: 0.731993
[500]	valid's auc: 0.731735
Early stopping, best iteration is:
[465]	valid's auc: 0.732291
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Time left 95.98853182792664
Blending: Optimization starts with equal weights and score 0.7172545288778027
Blending, iter 0: score = 0.7187126676175246, weights = [0.15616275 0.38184068 0.46199656]
Blending, iter 1: score = 0.7187142362156043, weights = [0.1514562  0.38597432 0.46256948]
Blending, iter 2: score = 0.7187146949824681, weights = [0.1519464  0.3872497  0.46080396]
Blending, iter 3: score = 0.7187146775841239, weights = [0.1519464  0.3


===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.674854
[200]	valid's auc: 0.672072
Early stopping, best iteration is:
[58]	valid's auc: 0.675868
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.679263
[200]	valid's auc: 0.680461
[300]	valid's auc: 0.679635
Early stopping, best iteration is:
[160]	valid's auc: 0.681322
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.678174
[200]	valid's auc: 0.67808
[300]	valid's auc: 0.675246
Early stopping, best iteration is:
[

Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.679693
[200]	valid's auc: 0.680612
[300]	valid's auc: 0.679103
Early stopping, best iteration is:
[174]	valid's auc: 0.681032
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.682533
[200]	valid's auc: 0.681962
[300]	valid's auc: 0.678714
Early stopping, best iteration is:
[142]	valid's auc: 0.683137
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.680428
[200]	valid's au


===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.680104
[200]	valid's auc: 0.682153
[300]	valid's auc: 0.681324
[400]	valid's auc: 0.680696
Early stopping, best iteration is:
[219]	valid's auc: 0.682296
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.681714
[200]	valid's auc: 0.681994
[300]	valid's auc: 0.680298
Early stopping, best iteration is:
[178]	valid's auc: 0.68261
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.680827
[200]	valid's auc: 0.678696
[300]	v

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.680614
[200]	valid's auc: 0.681836
[300]	valid's auc: 0.679897
[400]	valid's auc: 0.677953
Early stopping, best iteration is:
[211]	valid's auc: 0.681953
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.682058
[200]	valid's auc: 0.682849
[300]	valid's auc: 0.681959
Early stopping, best iteration is:
[169]	valid's auc: 0.683567
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.680359
[200]	valid's auc: 0.680867
[300]	valid's auc: 0.67925
Early stopping, best iteration is:
[132]	valid's au

[300]	valid's auc: 0.6795
Early stopping, best iteration is:
[140]	valid's auc: 0.682399
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.680508
[200]	valid's auc: 0.680079
[300]	valid's auc: 0.677335
Early stopping, best iteration is:
[133]	valid's auc: 0.681559
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.680896
[200]	valid's auc: 0.679172
[300]	valid's auc: 0.677424
Early stopping, best iteration is:
[133]	valid's auc: 0.681712
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fo

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.705449
[200]	valid's auc: 0.704401
[300]	valid's auc: 0.701486
Early stopping, best iteration is:
[106]	valid's auc: 0.706198
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.708355
[200]	valid's auc: 0.707604
[300]	valid's auc: 0.70558
Early stopping, best iteration is:
[125]	valid's auc: 0.709708
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.70589
[200]	valid's auc: 0.704232
[300]	valid's auc: 0.701636
Early stopping, best iteration is:
[117]	valid's auc: 0.707052
Lvl_0_Pipe_1_Mod_

[100]	valid's auc: 0.708935
[200]	valid's auc: 0.709153
[300]	valid's auc: 0.708578
Early stopping, best iteration is:
[166]	valid's auc: 0.7099
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.708308
[200]	valid's auc: 0.710172
[300]	valid's auc: 0.709499
Early stopping, best iteration is:
[182]	valid's auc: 0.710717
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.710704
[200]	valid's auc: 0.710069
[300]	valid's auc: 0.70756
Early stopping, best iteration is:
[103]	valid's auc: 0.7109
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_P

Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.709655
[200]	valid's auc: 0.711132
[300]	valid's auc: 0.710826
[400]	valid's auc: 0.709566
Early stopping, best iteration is:
[222]	valid's auc: 0.711597
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.709232
[200]	valid's auc: 0.709884
[300]	valid's auc: 0.705055
Early stopping, best iteration is:
[160]	valid's auc: 0.710958
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's au

Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.706289
[200]	valid's auc: 0.70488
[300]	valid's auc: 0.702749
Early stopping, best iteration is:
[127]	valid's auc: 0.706863
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.709835
[200]	valid's auc: 0.71051
[300]	valid's auc: 0.709257
Early stopping, best iteration is:
[191]	valid's auc: 0.71089
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.710697
[200]	valid's auc: 


===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.70931
[200]	valid's auc: 0.709258
[300]	valid's auc: 0.704933
Early stopping, best iteration is:
[118]	valid's auc: 0.710956
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.710165
[200]	valid's auc: 0.711243
[300]	valid's auc: 0.709688
Early stopping, best iteration is:
[196]	valid's auc: 0.711724
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.71136
[200]	valid's auc: 0.711387
[300]	valid's auc: 0.710169
Early st


===== Start working with fold 1 for Lvl_0_Pipe_0_Mod_0_LinearL2 =====

Linear model: C = 1e-05 score = 0.6213522738179544
Linear model: C = 5e-05 score = 0.6319311138896311
Linear model: C = 0.0001 score = 0.6368518359993002
Linear model: C = 0.0005 score = 0.6441497743317213
Linear model: C = 0.001 score = 0.6464214630002885
Linear model: C = 0.005 score = 0.6490469337850445
Linear model: C = 0.01 score = 0.6492693919220585
Linear model: C = 0.05 score = 0.649011319586577
Linear model: C = 0.1 score = 0.649011319586577

===== Start working with fold 2 for Lvl_0_Pipe_0_Mod_0_LinearL2 =====

Linear model: C = 1e-05 score = 0.6482866206852451
Linear model: C = 5e-05 score = 0.6597190491330969
Linear model: C = 0.0001 score = 0.66359362038861
Linear model: C = 0.0005 score = 0.6690103040777575
Linear model: C = 0.001 score = 0.6700795053474272
Linear model: C = 0.005 score = 0.6700031972659823
Linear model: C = 0.01 score = 0.6700031972659823

===== Start working with fold 3 for Lvl_0_Pi

Early stopping, best iteration is:
[133]	valid's auc: 0.684296
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.686584
[200]	valid's auc: 0.686263
[300]	valid's auc: 0.684869
Early stopping, best iteration is:
[141]	valid's auc: 0.687683
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.685269
[200]	valid's auc: 0.684297
Early stopping, best iteration is:
[52]	valid's auc: 0.686403
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training un

[100]	valid's auc: 0.686299
[200]	valid's auc: 0.683215
Early stopping, best iteration is:
[91]	valid's auc: 0.687491
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.686533
[200]	valid's auc: 0.686119
Early stopping, best iteration is:
[93]	valid's auc: 0.687246
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.677425
[200]	valid's auc: 0.676403
Early stopping, best iteration is:
[87]	valid's auc: 0.678066
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_L

Linear model: C = 1e-05 score = 0.6389067924681586
Linear model: C = 5e-05 score = 0.6455157684781753
Linear model: C = 0.0001 score = 0.6487768076424398
Linear model: C = 0.0005 score = 0.6534948538959183
Linear model: C = 0.001 score = 0.6537381787188861
Linear model: C = 0.005 score = 0.65143646506568
Linear model: C = 0.01 score = 0.6500487045664014

===== Start working with fold 1 for Lvl_0_Pipe_0_Mod_0_LinearL2 =====

Linear model: C = 1e-05 score = 0.6484143823185334
Linear model: C = 5e-05 score = 0.6553174998823683
Linear model: C = 0.0001 score = 0.6580034214098217
Linear model: C = 0.0005 score = 0.6607053648489464
Linear model: C = 0.001 score = 0.6606328910260109
Linear model: C = 0.005 score = 0.6600752572079653

===== Start working with fold 2 for Lvl_0_Pipe_0_Mod_0_LinearL2 =====

Linear model: C = 1e-05 score = 0.6420468934176276
Linear model: C = 5e-05 score = 0.6517857750352629
Linear model: C = 0.0001 score = 0.6552326296479479
Linear model: C = 0.0005 score = 0.659

[100]	valid's auc: 0.702262
[200]	valid's auc: 0.704312
[300]	valid's auc: 0.704055
[400]	valid's auc: 0.703048
Early stopping, best iteration is:
[251]	valid's auc: 0.704879
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.701372
[200]	valid's auc: 0.702053
[300]	valid's auc: 0.698543
Early stopping, best iteration is:
[137]	valid's auc: 0.702384
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.699332
[200]	valid's auc: 0.698419
Early stopping, best iteration is:
[27]	valid's auc: 0.70111
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_

[100]	valid's auc: 0.702331
[200]	valid's auc: 0.702507
[300]	valid's auc: 0.702358
Early stopping, best iteration is:
[121]	valid's auc: 0.703244
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.703337
[200]	valid's auc: 0.701146
Early stopping, best iteration is:
[39]	valid's auc: 0.703941
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.702708
[200]	valid's auc: 0.705274
[300]	valid's auc: 0.705078
[400]	valid's auc: 0.704211
Early stopping, best iteration is:
[281]	valid's auc: 0.705801
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl

[300]	valid's auc: 0.702546
Early stopping, best iteration is:
[177]	valid's auc: 0.703672
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.702456
[200]	valid's auc: 0.704915
[300]	valid's auc: 0.704856
[400]	valid's auc: 0.703566
Early stopping, best iteration is:
[244]	valid's auc: 0.705377
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.702345
[200]	valid's auc: 0.70436
[300]	valid's auc: 0.70308
Early stopping, best iteration is:
[173]	valid's auc: 0.704461
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...


Early stopping, best iteration is:
[288]	valid's auc: 0.710505
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.709498
[200]	valid's auc: 0.709917
[300]	valid's auc: 0.710441
Early stopping, best iteration is:
[146]	valid's auc: 0.710643
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.704854
[200]	valid's auc: 0.70599
[300]	valid's auc: 0.704781
Early stopping, best iteration is:
[161]	valid's auc: 0.707725
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1

[100]	valid's auc: 0.707201
[200]	valid's auc: 0.709512
[300]	valid's auc: 0.711845
[400]	valid's auc: 0.711582
[500]	valid's auc: 0.711865
[600]	valid's auc: 0.711057
Early stopping, best iteration is:
[469]	valid's auc: 0.712135
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.707852
[200]	valid's auc: 0.709693
[300]	valid's auc: 0.709444
Early stopping, best iteration is:
[188]	valid's auc: 0.710024
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.704589
[200]	valid's auc: 0.707264
[300]	valid's auc: 0.707995
[400]	valid's auc: 0.70739
[500]	valid's auc: 0.707228
E

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.700845
[200]	valid's auc: 0.700868
[300]	valid's auc: 0.698839
Early stopping, best iteration is:
[124]	valid's auc: 0.701369
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.69858
[200]	valid's auc: 0.699696
[300]	valid's auc: 0.697012
[400]	valid's auc: 0.69435
Early stopping, best iteration is:
[219]	valid's auc: 0.700512
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.70013
[200]	valid's auc: 0.700441
[300]	valid's auc: 0.69855
Early stopping, best iteration is:
[143]	valid's auc: 

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.700101
[200]	valid's auc: 0.703501
[300]	valid's auc: 0.703288
[400]	valid's auc: 0.701255
Early stopping, best iteration is:
[214]	valid's auc: 0.703725
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.700758
[200]	valid's auc: 0.700693
[300]	valid's auc: 0.696395
Early stopping, best iteration is:
[137]	valid's auc: 0.701758
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.696604
[200]	valid's auc: 0.695047
[300]	valid's auc: 0.693817
Early stopping, best iteration is:
[129]	valid's a

Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.702712
[200]	valid's auc: 0.703644
[300]	valid's auc: 0.702877
[400]	valid's auc: 0.701831
Early stopping, best iteration is:
[224]	valid's auc: 0.704158
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.702212
[200]	valid's auc: 0.703201
[300]	valid's auc: 0.70146
Early stopping, best iteration is:
[145]	valid's auc: 0.703817
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until val

Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.701631
[200]	valid's auc: 0.699376
Early stopping, best iteration is:
[91]	valid's auc: 0.702167
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.700454
[200]	valid's auc: 0.699617
[300]	valid's auc: 0.697472
Early stopping, best iteration is:
[114]	valid's auc: 0.701331
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.692535
[200]	valid's auc: 0.692701
[300]	valid's auc

[300]	valid's auc: 0.692987
Early stopping, best iteration is:
[132]	valid's auc: 0.69771
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.699028
[200]	valid's auc: 0.700905
[300]	valid's auc: 0.700799
Early stopping, best iteration is:
[174]	valid's auc: 0.701417
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.702632
[200]	valid's auc: 0.703033
[300]	valid's auc: 0.701726
Early stopping, best iteration is:
[131]	valid's auc: 0.704224
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with f

Linear model: C = 1e-05 score = 0.6071137734888419
Linear model: C = 5e-05 score = 0.6257792568174505
Linear model: C = 0.0001 score = 0.6344830012770324
Linear model: C = 0.0005 score = 0.6494286994963239
Linear model: C = 0.001 score = 0.653081652718189
Linear model: C = 0.005 score = 0.6546281804702173
Linear model: C = 0.01 score = 0.653763377840081
Linear model: C = 0.05 score = 0.6505277875126736

===== Start working with fold 3 for Lvl_0_Pipe_0_Mod_0_LinearL2 =====

Linear model: C = 1e-05 score = 0.6135000813825731
Linear model: C = 5e-05 score = 0.6278433523666974
Linear model: C = 0.0001 score = 0.6333511453681742
Linear model: C = 0.0005 score = 0.6445357330916188
Linear model: C = 0.001 score = 0.647666446260453
Linear model: C = 0.005 score = 0.6490801170680635
Linear model: C = 0.01 score = 0.6486520140237882
Linear model: C = 0.05 score = 0.6476122305841143
Lvl_0_Pipe_0_Mod_0_LinearL2 fitting and predicting completed
Time left 285.18238377571106
Start fitting Selector_Li

[200]	valid's auc: 0.719711
[300]	valid's auc: 0.717879
Early stopping, best iteration is:
[156]	valid's auc: 0.721069
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.722598
[200]	valid's auc: 0.724183
[300]	valid's auc: 0.723455
[400]	valid's auc: 0.722883
Early stopping, best iteration is:
[225]	valid's auc: 0.724405
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.719947
[200]	valid's auc: 0.717798
[300]	valid's auc: 0.714461
Early stopping, best iteration is:
[128]	valid's auc: 0.720263
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lv

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.720599
[200]	valid's auc: 0.719574
[300]	valid's auc: 0.716371
Early stopping, best iteration is:
[103]	valid's auc: 0.720808
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.723987
[200]	valid's auc: 0.724866
[300]	valid's auc: 0.725296
[400]	valid's auc: 0.725049
Early stopping, best iteration is:
[238]	valid's auc: 0.725482
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.723556
[200]	valid's auc: 0.724822
[300]	valid's auc: 0.723578
Early stopping, best iteration is:
[197]	valid's a

Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.698152
[200]	valid's auc: 0.696878
[300]	valid's auc: 0.696839
Early stopping, best iteration is:
[108]	valid's auc: 0.698706
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.696303
[200]	valid's auc: 0.697362
[300]	valid's auc: 0.69694
Early stopping, best iteration is:
[143]	valid's auc: 0.697906
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.693255
[200]	valid's auc

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.697004
[200]	valid's auc: 0.700916
[300]	valid's auc: 0.700626
[400]	valid's auc: 0.698353
Early stopping, best iteration is:
[237]	valid's auc: 0.702146
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.698727
[200]	valid's auc: 0.702697
[300]	valid's auc: 0.701769
[400]	valid's auc: 0.699822
Early stopping, best iteration is:
[223]	valid's auc: 0.703215
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.69808
[200]	valid's auc: 0.70402
[300]	valid's auc: 0.701746
Early stopping, best ite

Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.696259
[200]	valid's auc: 0.700809
[300]	valid's auc: 0.699262
[400]	valid's auc: 0.696054
Early stopping, best iteration is:
[219]	valid's auc: 0.700838
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.698944
[200]	valid's auc: 0.701242
[300]	valid's auc: 0.699075
Early stopping, best iteration is:
[171]	valid's auc: 0.701357
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 100 rounds
[100]	valid's au

Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.719209
[200]	valid's auc: 0.719014
[300]	valid's auc: 0.717482
Early stopping, best iteration is:
[127]	valid's auc: 0.720154
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.720113
[200]	valid's auc: 0.720854
[300]	valid's auc: 0.719023
Early stopping, best iteration is:
[198]	valid's auc: 0.721021
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.71608
[200]	valid's auc

[100]	valid's auc: 0.717397
[200]	valid's auc: 0.719239
[300]	valid's auc: 0.718204
[400]	valid's auc: 0.717474
Early stopping, best iteration is:
[210]	valid's auc: 0.719686
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.717696
[200]	valid's auc: 0.719714
[300]	valid's auc: 0.719854
[400]	valid's auc: 0.718438
Early stopping, best iteration is:
[239]	valid's auc: 0.72026
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.717231
[200]	valid's auc: 0.718945
[300]	valid's auc: 0.717781
[400]	valid's auc: 0.716005
Early stopping, best iteration is:
[221]	valid's auc: 0.7

[100]	valid's auc: 0.718363
[200]	valid's auc: 0.718064
[300]	valid's auc: 0.717219
Early stopping, best iteration is:
[184]	valid's auc: 0.718824
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.7194
[200]	valid's auc: 0.717855
[300]	valid's auc: 0.716515
Early stopping, best iteration is:
[117]	valid's auc: 0.720022
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.714223
[200]	valid's auc: 0.711314
Early stopping, best iteration is:
[59]	valid's auc: 0.715468
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...



[100]	valid's auc: 0.718134
[200]	valid's auc: 0.718014
[300]	valid's auc: 0.717087
Early stopping, best iteration is:
[151]	valid's auc: 0.719435
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.715367
[200]	valid's auc: 0.715094
[300]	valid's auc: 0.711728
Early stopping, best iteration is:
[123]	valid's auc: 0.716415
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_1_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.717919
[200]	valid's auc: 0.717304
[300]	valid's auc: 0.716284
Early stopping, best iteration is:
[158]	valid's auc: 0.718594
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Start fitting Lv

[100]	valid's auc: 0.645176
[200]	valid's auc: 0.642393
Early stopping, best iteration is:
[73]	valid's auc: 0.646289
Selector_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_1_Mod_0_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_1_Mod_0_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.698934
[200]	valid's auc: 0.697943
[300]	valid's auc: 0.694476
Early stopping, best iteration is:
[132]	valid's auc: 0.700055

===== Start working with fold 1 for Lvl_0_Pipe_1_Mod_0_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.702934
[200]	valid's auc: 0.705549
[300]	valid's auc: 0.705021
[400]	valid's auc: 0.70339
Early stopping, best iteration is:
[239]	valid's auc: 0.706

===== Start working with fold 2 for Lvl_0_Pipe_1_Mod_0_LightGBM =====

Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.722646
[200]	valid's auc: 0.727056
[300]	val

Early stopping, best iteration is:
[78]	valid's auc: 0.727183

===== Start working with fold 3 for Lvl_0_Pipe_1_Mod_1_LightGBM =====

Training until validation scores don't improve for 100 rounds
[100]	valid's auc: 0.729426
Early stopping, best iteration is:
[94]	valid's auc: 0.73005
Lvl_0_Pipe_1_Mod_1_LightGBM fitting and predicting completed
Time left 156.18980407714844
Blending: Optimization starts with equal weights and score 0.7190663585552489
Blending, iter 0: score = 0.720997290181556, weights = [0.12641408 0.5501111  0.32347482]
Blending, iter 1: score = 0.7210252655648077, weights = [0.14856485 0.5371199  0.31431526]
Blending, iter 2: score = 0.7210247960882927, weights = [0.14897314 0.5368624  0.31416452]
Blending, iter 3: score = 0.7210247960882927, weights = [0.14897314 0.5368624  0.31416452]
No score update. Terminated

Automl preset training completed in 145.17 seconds.
Results:  12 0.13466325 0.30532769190916154
Test pred has 0 nans


Unnamed: 0,hash_tab_num,date,target
0,0,2019-09-01,1
1,1,2019-09-01,0
2,2,2019-09-01,0
3,3,2019-09-01,0
4,4,2019-09-01,1


In [120]:
X.shape

(64271, 37)

In [112]:
result_df["target"].value_counts(normalize=True)

0    0.717739
1    0.282261
Name: target, dtype: float64

In [22]:
sorted(zip(models[0].feature_importances_, X_train))

[(3.9293222373502877e-05, 'category_Служащие'),
 (0.0002462157230397062, 'razryad_fact_1'),
 (0.0005464754039567421, 'category_Специалисты'),
 (0.0008090219452134489, 'category_Руководители'),
 (0.0017127778442173796, 'razryad_fact_6'),
 (0.001742901758003719, 'category_Рабочие'),
 (0.002805546320707022, 'is_pensioner'),
 (0.003227443847889691, 'razryad_fact_0'),
 (0.003963376005101811, 'education_Высшее'),
 (0.006602067686274391, 'razryad_fact_5'),
 (0.007019434914386372, 'razryad_fact_2'),
 (0.009136071671756953, 'education_Начальное_среднее'),
 (0.009419241119991553, 'gender'),
 (0.009823888563966734, 'education_Среднее_профессинальное'),
 (0.009935706532837749, 'razryad_fact_4'),
 (0.010048185610300995, 'razryad_fact_3'),
 (0.010829461436609516, 'relatives_5'),
 (0.013619709975229467, 'relatives_0'),
 (0.01362717319443656, 'relatives_1'),
 (0.01828834717499448, 'relatives_4'),
 (0.020625990311766484, 'relatives_2'),
 (0.021381950164929105, 'relatives_3'),
 (0.027962758531691577, 'n

In [23]:
sorted(zip(models[-1].feature_importances_, X_train))

[(2.792951461429063e-05, 'category_Служащие'),
 (0.0003192472968838628, 'razryad_fact_1'),
 (0.0005744031798352845, 'category_Специалисты'),
 (0.0010356968618346616, 'category_Руководители'),
 (0.0017549361567989648, 'category_Рабочие'),
 (0.0018565759917234725, 'razryad_fact_6'),
 (0.0032328661022218053, 'is_pensioner'),
 (0.0032434505354505426, 'razryad_fact_0'),
 (0.003917243101740642, 'education_Высшее'),
 (0.007058844048286317, 'razryad_fact_5'),
 (0.007642287360506522, 'razryad_fact_2'),
 (0.008488808400501556, 'education_Начальное_среднее'),
 (0.008966422386197027, 'education_Среднее_профессинальное'),
 (0.0097768938720705, 'razryad_fact_3'),
 (0.009834809985117736, 'gender'),
 (0.010327513725400429, 'razryad_fact_4'),
 (0.011589537105387833, 'relatives_5'),
 (0.013493086890885337, 'relatives_0'),
 (0.013959955278446332, 'relatives_1'),
 (0.018628030083885853, 'relatives_4'),
 (0.0218437020020996, 'relatives_3'),
 (0.021871741226295587, 'relatives_2'),
 (0.025274173101300142, 'n

In [113]:
check_df = pd.read_csv('../data/raw/submission_check.csv', sep=';').drop(columns=['target'])
check_df['date'] = pd.to_datetime(check_df['date'], format='%Y-%m-%d')
check_df.head()

Unnamed: 0,hash_tab_num,date
0,0,2019-09-01
1,0,2019-10-01
2,0,2019-11-01
3,0,2019-12-01
4,0,2020-01-01


In [114]:
result_df_new = pd.merge(result_df, check_df, on=['hash_tab_num', 'date']).sort_values(['hash_tab_num', 'date'])
result_df_new.head()

Unnamed: 0,hash_tab_num,date,target
0,0,2019-09-01,1
1757,0,2019-10-01,1
3509,0,2019-11-01,1
5265,0,2019-12-01,1
7025,0,2020-01-01,1


In [115]:
result_df_new.to_csv('submission_aa_aml_3.csv', sep=';', index=False)

In [116]:
from sklearn.metrics import f1_score