In [1]:
import os
import time
from typing import Optional
from typing import Tuple, List

import numpy as np
import pandas as pd
import catboost as cb
import seaborn as sns
import matplotlib.pyplot as plt
import shap
import xgboost as xgb


from sklearn.metrics import roc_auc_score
from sklearn.pipeline import Pipeline
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.ensemble import BaggingClassifier, RandomForestClassifier
from sklearn.model_selection import train_test_split, validation_curve, learning_curve
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.metrics import r2_score, mean_absolute_error
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OrdinalEncoder
from scipy.stats import rankdata
from scipy.stats.mstats import winsorize
from tqdm import tqdm
from teacher_bot.risk_default_prediction import RiskDefaultPrediction
pd.set_option("display.max_columns", 150)

In [42]:
test[categorical] = test[categorical].astype(str)
prediction = estimators.predict_proba(test.drop("sk_id_curr", axis=1))[:, 1]

submit = pd.DataFrame({
    "sk_id_curr": test["sk_id_curr"],
    "score": prediction
})
submit.head(n=3)

Unnamed: 0,sk_id_curr,score
0,174545,0.027164
1,209898,0.061285
2,454938,0.051884


In [102]:
prediction

array([[0.96574133, 0.03425867],
       [0.95600603, 0.04399397],
       [0.96836336, 0.03163664],
       ...,
       [0.96568941, 0.03431059],
       [0.96621518, 0.03378482],
       [0.98764815, 0.01235185]])

In [103]:
submit = pd.DataFrame({
    "sk_id_curr": test["sk_id_curr"],
    "score": prediction[:,1]
})
submit.head(n=3)

Unnamed: 0,sk_id_curr,score
0,174545,0.034259
1,209898,0.043994
2,454938,0.031637


In [104]:
bot = RiskDefaultPrediction()
bot.production_quality(answer=submit)

Привет! Приятно познакомиться!
Запускаю тестирование...
Проверяю метрики...
Твой результат: 0.7641490799216262
Думаю...
А что если попробовать бустинг?
Нужно качество больше 0.77, я верю - у тебя получится!


In [2]:
def get_input(data_path: str) -> pd.DataFrame:
    """
    Считывание данных и вывод основной информации о наборе данных.

    Parameters
    ----------
    data_path: str
        Название файла.

    Returns
    -------
    data: pandas.core.frame.DataFrame
        Загруженный набор данных в pandas.DataFrame

    """
    base_path = "./data/"
    data = pd.read_csv(f"{base_path}/{data_path}")
    data.columns = [col.lower() for col in data.columns]
    print(f"{data_path}: shape = {data.shape[0]} rows, {data.shape[1]} cols")

    return data

def check_missings(X: pd.DataFrame):
    """
    Вычисление количества пропусков и пропусков в данных;

    Parameters
    ----------
    X: pandas.core.frame.DataFrame
        Матрица признаков для исследования.

    Returns
    -------
    na_stats: pandas.core.frame.DataFrame
        Матрица с со статистикой пропусков.
    
    """
    total = X.isnull().sum().sort_values(ascending = False)
    percent = (X.isnull().sum()/X.isnull().count()*100).sort_values(ascending = False)
    na_stats = pd.concat([total, percent], axis=1, keys=['Total', 'Percent'])
    return na_stats.T


def plot_categorical(data, col, size=[8 ,4], xlabel_angle=0, title=''):
    """
    Визуализация распределения значений категориального признака.

    """
    plotdata = data[col].value_counts()
    plt.figure(figsize = size)
    sns.barplot(x = plotdata.index, y=plotdata.values, palette="viridis")
    plt.title(title)
    if xlabel_angle!=0: 
        plt.xticks(rotation=xlabel_angle)
        
        
def plot_kde_target(feature_name: str, data: pd.DataFrame):
    """
    Визуализация функции распределения признаков в зависимости
    от значения целевой переменной на обучающей выборке.
    Вывод коэффициента корреляции между значением признака
    и значением целевой переменной, вывод медианы
    значений признака в разрезе целевой переменной.

    Parameters
    ----------
    faeture_name: str
        Название анализируемого признака.

    data: pandas.core.frame.DataFrame
        Матрица признаков для обучения.

    """
    corr = data["target"].corr(data[feature_name])

    mask = data["target"] == 1
    avg_target = data.loc[mask, feature_name].median()
    avg_non_target = data.loc[~mask, feature_name].median()

    fig = plt.figure(figsize=(12, 6))
    plt.title(f"{feature_name} Distribution", size=14)
    sns.kdeplot(data.loc[mask, feature_name], linewidth=3, color="blue", label="TARGET = 1")
    sns.kdeplot(data.loc[~mask, feature_name], linewidth=3, color="green", label="TARGET = 0")
    plt.legend(loc="best", fontsize=14)
    plt.xlabel(feature_name, size=14)
    plt.ylabel("Density", size=14)

    print(f"The correlation between {feature_name} and target = {round(corr, 4)}")
    print(f"Median-value for default-loan = {round(avg_target, 4)}")
    print(f"Median-value for non default-loan = {round(avg_target, 4)}")


def catboost_hold_out_validation(params, X, y, split_params = [0.7, 0.2, 0.1], categorical = None):
    """
    Hold-Out валидация для модели catbooost.

    Parameters
    ----------
    params: dict
        Словарь гиперпараметров модели.

    X: pandas.core.frame.DataFrame
        Матрица признако для обучения модели.

    y: pandas.core.frame.Series
        Вектор целевой переменной для обучения модели.

    split_params: List[float], optional, default = [0.7, 0.2, 0.1]
        Параметры (доли) разбиения выборки.
        Опциональный параметр, по умолчанию, равен [0.7, 0.2, 0.1].
    
    categorical: str, optional, default = None
        Список категориальных признаков.
        Опциональный параметр, по умолчанию, не используется.

    Returns
    -------
    estimator: catboost.core.CatBoostClassifier
        Обученный классификатор catboost.

    test_prediction: np.array, optional
        Вектор прогнозов для тестовой выборки.
        Опциональный объект, возвращается только, если split_params
        содержит 3 значения.

    """
    print(f"Starting at {time.ctime()}")
    numeric = list(set(X.columns) - set(categorical))
    if categorical:
        X[categorical] = X[categorical].astype(str)

    x_train, x_valid = train_test_split(
        X, train_size=split_params[0], random_state=27
    )
    y_train, y_valid = train_test_split(
        y, train_size=split_params[0], random_state=27
    )

    if len(split_params) == 3:
        test_size = int(split_params[2] * X.shape[0])

        x_valid, x_test = train_test_split(
            x_valid, test_size=test_size, random_state=72
        )
        y_valid, y_test = train_test_split(
            y_valid, test_size=test_size, random_state=72
        )

    model = cb.CatBoostClassifier(**params)
    model.fit(
        x_train, y_train, categorical,
        eval_set=[(x_train, y_train), (x_valid, y_valid)]
    )

    print("="*80)
    valid_score = roc_auc_score(y_valid, model.predict_proba(x_valid)[:, 1])
    print(f"Valid Score = {round(valid_score, 4)}")

    if len(split_params) == 3:

        test_prediction = model.predict_proba(x_test)[:, 1]
        test_score = roc_auc_score(y_test, test_prediction)
        print(f"Test Score = {round(test_score, 4)}")

        return model, test_prediction

    else:
        return model


def create_numerical_aggs(data: pd.DataFrame,
                          groupby_id: str,
                          aggs: dict,
                          prefix: Optional[str] = None,
                          suffix: Optional[str] = None,
                          ) -> pd.DataFrame:
    """
    Построение агрегаций для числовых признаков.

    Parameters
    ----------
    data: pandas.core.frame.DataFrame
        Выборка для построения агрегаций.

    groupby_id: str
        Название ключа, по которому нужно произвести группировку.

    aggs: dict
        Словарь с названием признака и списка функций.
        Ключ словаря - название признака, который используется для
        вычисления агрегаций, значение словаря - список с названием
        функций для вычисления агрегаций.

    prefix: str, optional, default = None
        Префикс для названия признаков.
        Опциональный параметр, по умолчанию, не используется.

    suffix: str, optional, default = None
        Суффикс для названия признаков.
        Опциональный параметр, по умолчанию, не используется.

    Returns
    -------
    stats: pandas.core.frame.DataFrame
        Выборка с рассчитанными агрегациями.

    """
    if not prefix:
        prefix = ""
    if not suffix:
        suffix = ""

    data_grouped = data.groupby(groupby_id)
    stats = data_grouped.agg(aggs)
    stats.columns = [f"{prefix}{feature}_{stat}{suffix}".upper() for feature, stat in stats]
    stats = stats.reset_index()

    return stats

In [67]:
%pwd

'C:\\Users\\olegi\\Python-lab-and-homework\\edu-data\\workshop15\\12. Скоринг кредитного портфеля'

In [68]:
train = pd.read_csv('./data/train.csv')
test = pd.read_csv('./data/test.csv')

In [69]:
train.head(n=2)

Unnamed: 0,sk_id_curr,target,name_contract_type,code_gender,flag_own_car,flag_own_realty,cnt_children,amt_income_total,amt_credit,amt_annuity,amt_goods_price,name_type_suite,name_income_type,name_education_type,name_family_status,name_housing_type,region_population_relative,days_birth,days_employed,days_registration,days_id_publish,own_car_age,flag_mobil,flag_emp_phone,flag_work_phone,flag_cont_mobile,flag_phone,flag_email,occupation_type,cnt_fam_members,region_rating_client,region_rating_client_w_city,weekday_appr_process_start,hour_appr_process_start,reg_region_not_live_region,reg_region_not_work_region,live_region_not_work_region,reg_city_not_live_city,reg_city_not_work_city,live_city_not_work_city,organization_type,ext_source_1,ext_source_2,ext_source_3,apartments_avg,basementarea_avg,years_beginexpluatation_avg,years_build_avg,commonarea_avg,elevators_avg,entrances_avg,floorsmax_avg,floorsmin_avg,landarea_avg,livingapartments_avg,livingarea_avg,nonlivingapartments_avg,nonlivingarea_avg,apartments_mode,basementarea_mode,years_beginexpluatation_mode,years_build_mode,commonarea_mode,elevators_mode,entrances_mode,floorsmax_mode,floorsmin_mode,landarea_mode,livingapartments_mode,livingarea_mode,nonlivingapartments_mode,nonlivingarea_mode,apartments_medi,basementarea_medi,years_beginexpluatation_medi,years_build_medi,commonarea_medi,elevators_medi,entrances_medi,floorsmax_medi,floorsmin_medi,landarea_medi,livingapartments_medi,livingarea_medi,nonlivingapartments_medi,nonlivingarea_medi,fondkapremont_mode,housetype_mode,totalarea_mode,wallsmaterial_mode,emergencystate_mode,obs_30_cnt_social_circle,def_30_cnt_social_circle,obs_60_cnt_social_circle,def_60_cnt_social_circle,days_last_phone_change,flag_document_2,flag_document_3,flag_document_4,flag_document_5,flag_document_6,flag_document_7,flag_document_8,flag_document_9,flag_document_10,flag_document_11,flag_document_12,flag_document_13,flag_document_14,flag_document_15,flag_document_16,flag_document_17,flag_document_18,flag_document_19,flag_document_20,flag_document_21,amt_req_credit_bureau_hour,amt_req_credit_bureau_day,amt_req_credit_bureau_week,amt_req_credit_bureau_mon,amt_req_credit_bureau_qrt,amt_req_credit_bureau_year
0,342217,0,Revolving loans,F,N,Y,0,202500.0,585000.0,29250.0,585000.0,Unaccompanied,State servant,Higher education,Married,House / apartment,0.00712,-14937,-5026,-1.0,-4606,,1,1,0,1,0,0,Managers,2.0,2,2,FRIDAY,10,0,0,0,0,0,0,Government,,0.612914,0.408359,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,-797.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,1.0,0.0,0.0,2.0
1,183133,0,Cash loans,F,N,Y,0,112500.0,656811.0,30564.0,567000.0,"Spouse, partner",Pensioner,Secondary / secondary special,Married,House / apartment,0.016612,-22021,365243,-2979.0,-5036,,1,0,0,1,0,0,,2.0,2,2,MONDAY,11,0,0,0,0,0,0,XNA,,0.703983,0.75574,0.3711,0.2556,0.9955,0.9388,,0.36,0.3103,0.375,,0.0,,0.3977,,0.0,0.3782,0.2653,0.9955,0.9412,,0.3625,0.3103,0.375,,0.0,,0.4144,,0.0,0.3747,0.2556,0.9955,0.9396,,0.36,0.3103,0.375,,0.0,,0.4049,,0.0,,block of flats,0.3983,Panel,No,4.0,0.0,4.0,0.0,-1542.0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,3.0


In [70]:
test.head(n=2)

Unnamed: 0,sk_id_curr,name_contract_type,code_gender,flag_own_car,flag_own_realty,cnt_children,amt_income_total,amt_credit,amt_annuity,amt_goods_price,name_type_suite,name_income_type,name_education_type,name_family_status,name_housing_type,region_population_relative,days_birth,days_employed,days_registration,days_id_publish,own_car_age,flag_mobil,flag_emp_phone,flag_work_phone,flag_cont_mobile,flag_phone,flag_email,occupation_type,cnt_fam_members,region_rating_client,region_rating_client_w_city,weekday_appr_process_start,hour_appr_process_start,reg_region_not_live_region,reg_region_not_work_region,live_region_not_work_region,reg_city_not_live_city,reg_city_not_work_city,live_city_not_work_city,organization_type,ext_source_1,ext_source_2,ext_source_3,apartments_avg,basementarea_avg,years_beginexpluatation_avg,years_build_avg,commonarea_avg,elevators_avg,entrances_avg,floorsmax_avg,floorsmin_avg,landarea_avg,livingapartments_avg,livingarea_avg,nonlivingapartments_avg,nonlivingarea_avg,apartments_mode,basementarea_mode,years_beginexpluatation_mode,years_build_mode,commonarea_mode,elevators_mode,entrances_mode,floorsmax_mode,floorsmin_mode,landarea_mode,livingapartments_mode,livingarea_mode,nonlivingapartments_mode,nonlivingarea_mode,apartments_medi,basementarea_medi,years_beginexpluatation_medi,years_build_medi,commonarea_medi,elevators_medi,entrances_medi,floorsmax_medi,floorsmin_medi,landarea_medi,livingapartments_medi,livingarea_medi,nonlivingapartments_medi,nonlivingarea_medi,fondkapremont_mode,housetype_mode,totalarea_mode,wallsmaterial_mode,emergencystate_mode,obs_30_cnt_social_circle,def_30_cnt_social_circle,obs_60_cnt_social_circle,def_60_cnt_social_circle,days_last_phone_change,flag_document_2,flag_document_3,flag_document_4,flag_document_5,flag_document_6,flag_document_7,flag_document_8,flag_document_9,flag_document_10,flag_document_11,flag_document_12,flag_document_13,flag_document_14,flag_document_15,flag_document_16,flag_document_17,flag_document_18,flag_document_19,flag_document_20,flag_document_21,amt_req_credit_bureau_hour,amt_req_credit_bureau_day,amt_req_credit_bureau_week,amt_req_credit_bureau_mon,amt_req_credit_bureau_qrt,amt_req_credit_bureau_year
0,174545,Cash loans,F,N,Y,0,135000.0,654498.0,31617.0,585000.0,Unaccompanied,Pensioner,Secondary / secondary special,Married,House / apartment,0.005313,-20614,365243,-8985.0,-3098,,1,0,0,1,0,0,,2.0,2,2,TUESDAY,9,0,0,0,0,0,0,XNA,,0.622652,0.602386,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,-1237.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,3.0
1,209898,Cash loans,M,Y,Y,0,135000.0,234576.0,25393.5,202500.0,Unaccompanied,Working,Secondary / secondary special,Single / not married,With parents,0.010643,-9080,-1080,-1909.0,-1743,13.0,1,1,0,1,0,0,Drivers,1.0,2,2,TUESDAY,17,0,0,0,0,0,0,Government,,0.65596,0.49206,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,0.0,1.0,0.0,-379.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,1.0,0.0,0.0,0.0


In [71]:
applications = pd.read_csv('./data/previous_application.csv')
applications.head(n=2)
##
#Applications: 1 670 214 rows
#Unique clients: 338 857

Unnamed: 0,SK_ID_PREV,SK_ID_CURR,NAME_CONTRACT_TYPE,AMT_ANNUITY,AMT_APPLICATION,AMT_CREDIT,AMT_DOWN_PAYMENT,AMT_GOODS_PRICE,WEEKDAY_APPR_PROCESS_START,HOUR_APPR_PROCESS_START,FLAG_LAST_APPL_PER_CONTRACT,NFLAG_LAST_APPL_IN_DAY,RATE_DOWN_PAYMENT,RATE_INTEREST_PRIMARY,RATE_INTEREST_PRIVILEGED,NAME_CASH_LOAN_PURPOSE,NAME_CONTRACT_STATUS,DAYS_DECISION,NAME_PAYMENT_TYPE,CODE_REJECT_REASON,NAME_TYPE_SUITE,NAME_CLIENT_TYPE,NAME_GOODS_CATEGORY,NAME_PORTFOLIO,NAME_PRODUCT_TYPE,CHANNEL_TYPE,SELLERPLACE_AREA,NAME_SELLER_INDUSTRY,CNT_PAYMENT,NAME_YIELD_GROUP,PRODUCT_COMBINATION,DAYS_FIRST_DRAWING,DAYS_FIRST_DUE,DAYS_LAST_DUE_1ST_VERSION,DAYS_LAST_DUE,DAYS_TERMINATION,NFLAG_INSURED_ON_APPROVAL
0,2030495,271877,Consumer loans,1730.43,17145.0,17145.0,0.0,17145.0,SATURDAY,15,Y,1,0.0,0.182832,0.867336,XAP,Approved,-73,Cash through the bank,XAP,,Repeater,Mobile,POS,XNA,Country-wide,35,Connectivity,12.0,middle,POS mobile with interest,365243.0,-42.0,300.0,-42.0,-37.0,0.0
1,2802425,108129,Cash loans,25188.615,607500.0,679671.0,,607500.0,THURSDAY,11,Y,1,,,,XNA,Approved,-164,XNA,XAP,Unaccompanied,Repeater,XNA,Cash,x-sell,Contact center,-1,XNA,36.0,low_action,Cash X-Sell: low,365243.0,-134.0,916.0,365243.0,365243.0,1.0


In [100]:
payments = pd.read_csv('./data/installments_payments.csv')
payments.head(n=2)
##
#payments: 13 605 401 rows
#Unique clients: 339 587

Unnamed: 0,SK_ID_PREV,SK_ID_CURR,NUM_INSTALMENT_VERSION,NUM_INSTALMENT_NUMBER,DAYS_INSTALMENT,DAYS_ENTRY_PAYMENT,AMT_INSTALMENT,AMT_PAYMENT
0,1054186,161674,1.0,6,-1180.0,-1187.0,6948.36,6948.36
1,1330831,151639,0.0,34,-2156.0,-2156.0,1716.525,1716.525


In [72]:
bureau = pd.read_csv('./data/bureau.csv')
bureau.head(n=2)
##
#credit history bureau: 1 716 428 rows
#Unique clients: 305 811

Unnamed: 0,SK_ID_CURR,SK_ID_BUREAU,CREDIT_ACTIVE,CREDIT_CURRENCY,DAYS_CREDIT,CREDIT_DAY_OVERDUE,DAYS_CREDIT_ENDDATE,DAYS_ENDDATE_FACT,AMT_CREDIT_MAX_OVERDUE,CNT_CREDIT_PROLONG,AMT_CREDIT_SUM,AMT_CREDIT_SUM_DEBT,AMT_CREDIT_SUM_LIMIT,AMT_CREDIT_SUM_OVERDUE,CREDIT_TYPE,DAYS_CREDIT_UPDATE,AMT_ANNUITY
0,215354,5714462,Closed,currency 1,-497,0,-153.0,-153.0,,0,91323.0,0.0,,0.0,Consumer credit,-131,
1,215354,5714463,Active,currency 1,-208,0,1075.0,,,0,225000.0,171342.0,,0.0,Credit card,-20,


In [None]:
bureau_balance = pd.read_csv('./data/bureau_balance.csv')
bureau_balance.head(n=2)

Unnamed: 0,SK_ID_BUREAU,MONTHS_BALANCE,STATUS
0,5715448,0,C
1,5715448,-1,C


In [74]:
credit_card_balance = pd.read_csv('./data/credit_card_balance.csv')
credit_card_balance.head(n=2)
##
#credit history bureau: 3 840 312 rows
#Unique clients: 103 558

Unnamed: 0,SK_ID_PREV,SK_ID_CURR,MONTHS_BALANCE,AMT_BALANCE,AMT_CREDIT_LIMIT_ACTUAL,AMT_DRAWINGS_ATM_CURRENT,AMT_DRAWINGS_CURRENT,AMT_DRAWINGS_OTHER_CURRENT,AMT_DRAWINGS_POS_CURRENT,AMT_INST_MIN_REGULARITY,AMT_PAYMENT_CURRENT,AMT_PAYMENT_TOTAL_CURRENT,AMT_RECEIVABLE_PRINCIPAL,AMT_RECIVABLE,AMT_TOTAL_RECEIVABLE,CNT_DRAWINGS_ATM_CURRENT,CNT_DRAWINGS_CURRENT,CNT_DRAWINGS_OTHER_CURRENT,CNT_DRAWINGS_POS_CURRENT,CNT_INSTALMENT_MATURE_CUM,NAME_CONTRACT_STATUS,SK_DPD,SK_DPD_DEF
0,2562384,378907,-6,56.97,135000,0.0,877.5,0.0,877.5,1700.325,1800.0,1800.0,0.0,0.0,0.0,0.0,1,0.0,1.0,35.0,Active,0,0
1,2582071,363914,-1,63975.555,45000,2250.0,2250.0,0.0,0.0,2250.0,2250.0,2250.0,60175.08,64875.555,64875.555,1.0,1,0.0,0.0,69.0,Active,0,0


In [75]:
balance = pd.read_csv('./data/POS_CASH_balance.csv')
balance.head(n=2)
##
#credit history bureau: 10 001 358 rows
#Unique clients: 337 252

Unnamed: 0,SK_ID_PREV,SK_ID_CURR,MONTHS_BALANCE,CNT_INSTALMENT,CNT_INSTALMENT_FUTURE,NAME_CONTRACT_STATUS,SK_DPD,SK_DPD_DEF
0,1803195,182943,-31,48.0,45.0,Active,0,0
1,1715348,367990,-33,36.0,35.0,Active,0,0


In [81]:
train.merge(applications, left_on = 'sk_id_curr', right_on = 'SK_ID_CURR', how = 'inner')

Unnamed: 0,sk_id_curr,target,name_contract_type,code_gender,flag_own_car,flag_own_realty,cnt_children,amt_income_total,amt_credit,amt_annuity,amt_goods_price,name_type_suite,name_income_type,name_education_type,name_family_status,name_housing_type,region_population_relative,days_birth,days_employed,days_registration,days_id_publish,own_car_age,flag_mobil,flag_emp_phone,flag_work_phone,flag_cont_mobile,flag_phone,flag_email,occupation_type,cnt_fam_members,region_rating_client,region_rating_client_w_city,weekday_appr_process_start,hour_appr_process_start,reg_region_not_live_region,reg_region_not_work_region,live_region_not_work_region,reg_city_not_live_city,reg_city_not_work_city,live_city_not_work_city,organization_type,ext_source_1,ext_source_2,ext_source_3,apartments_avg,basementarea_avg,years_beginexpluatation_avg,years_build_avg,commonarea_avg,elevators_avg,entrances_avg,floorsmax_avg,floorsmin_avg,landarea_avg,livingapartments_avg,livingarea_avg,nonlivingapartments_avg,nonlivingarea_avg,apartments_mode,basementarea_mode,years_beginexpluatation_mode,years_build_mode,commonarea_mode,elevators_mode,entrances_mode,floorsmax_mode,floorsmin_mode,landarea_mode,livingapartments_mode,livingarea_mode,nonlivingapartments_mode,nonlivingarea_mode,apartments_medi,basementarea_medi,years_beginexpluatation_medi,...,nonlivingapartments_medi,nonlivingarea_medi,fondkapremont_mode,housetype_mode,totalarea_mode,wallsmaterial_mode,emergencystate_mode,obs_30_cnt_social_circle,def_30_cnt_social_circle,obs_60_cnt_social_circle,def_60_cnt_social_circle,days_last_phone_change,flag_document_2,flag_document_3,flag_document_4,flag_document_5,flag_document_6,flag_document_7,flag_document_8,flag_document_9,flag_document_10,flag_document_11,flag_document_12,flag_document_13,flag_document_14,flag_document_15,flag_document_16,flag_document_17,flag_document_18,flag_document_19,flag_document_20,flag_document_21,amt_req_credit_bureau_hour,amt_req_credit_bureau_day,amt_req_credit_bureau_week,amt_req_credit_bureau_mon,amt_req_credit_bureau_qrt,amt_req_credit_bureau_year,SK_ID_PREV,SK_ID_CURR,NAME_CONTRACT_TYPE,AMT_ANNUITY,AMT_APPLICATION,AMT_CREDIT,AMT_DOWN_PAYMENT,AMT_GOODS_PRICE,WEEKDAY_APPR_PROCESS_START,HOUR_APPR_PROCESS_START,FLAG_LAST_APPL_PER_CONTRACT,NFLAG_LAST_APPL_IN_DAY,RATE_DOWN_PAYMENT,RATE_INTEREST_PRIMARY,RATE_INTEREST_PRIVILEGED,NAME_CASH_LOAN_PURPOSE,NAME_CONTRACT_STATUS,DAYS_DECISION,NAME_PAYMENT_TYPE,CODE_REJECT_REASON,NAME_TYPE_SUITE,NAME_CLIENT_TYPE,NAME_GOODS_CATEGORY,NAME_PORTFOLIO,NAME_PRODUCT_TYPE,CHANNEL_TYPE,SELLERPLACE_AREA,NAME_SELLER_INDUSTRY,CNT_PAYMENT,NAME_YIELD_GROUP,PRODUCT_COMBINATION,DAYS_FIRST_DRAWING,DAYS_FIRST_DUE,DAYS_LAST_DUE_1ST_VERSION,DAYS_LAST_DUE,DAYS_TERMINATION,NFLAG_INSURED_ON_APPROVAL
0,342217,0,Revolving loans,F,N,Y,0,202500.0,585000.0,29250.0,585000.0,Unaccompanied,State servant,Higher education,Married,House / apartment,0.007120,-14937,-5026,-1.0,-4606,,1,1,0,1,0,0,Managers,2.0,2,2,FRIDAY,10,0,0,0,0,0,0,Government,,0.612914,0.408359,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,0.0,0.0,0.0,0.0,-797.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,1.0,0.0,0.0,2.0,1167072,342217,Cash loans,53277.660,1354500.0,1483231.5,,1354500.0,TUESDAY,9,Y,1,,,,XNA,Refused,-598,Cash through the bank,HC,,Repeater,XNA,Cash,x-sell,Credit and cash offices,-1,XNA,48.0,low_normal,Cash X-Sell: low,,,,,,
1,342217,0,Revolving loans,F,N,Y,0,202500.0,585000.0,29250.0,585000.0,Unaccompanied,State servant,Higher education,Married,House / apartment,0.007120,-14937,-5026,-1.0,-4606,,1,1,0,1,0,0,Managers,2.0,2,2,FRIDAY,10,0,0,0,0,0,0,Government,,0.612914,0.408359,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,0.0,0.0,0.0,0.0,-797.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,1.0,0.0,0.0,2.0,1871202,342217,Consumer loans,5468.490,58441.5,52596.0,5845.5,58441.5,TUESDAY,12,Y,1,0.108934,,,XAP,Approved,-612,Cash through the bank,XAP,,Repeater,Computers,POS,XNA,Country-wide,100,Connectivity,12.0,middle,POS mobile with interest,365243.0,-581.0,-251.0,-491.0,-488.0,0.0
2,342217,0,Revolving loans,F,N,Y,0,202500.0,585000.0,29250.0,585000.0,Unaccompanied,State servant,Higher education,Married,House / apartment,0.007120,-14937,-5026,-1.0,-4606,,1,1,0,1,0,0,Managers,2.0,2,2,FRIDAY,10,0,0,0,0,0,0,Government,,0.612914,0.408359,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,0.0,0.0,0.0,0.0,-797.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,1.0,0.0,0.0,2.0,1828554,342217,Cash loans,41728.410,1129500.0,1293502.5,,1129500.0,TUESDAY,9,Y,1,,,,XNA,Refused,-598,Cash through the bank,HC,,Repeater,XNA,Cash,x-sell,Credit and cash offices,-1,XNA,60.0,low_normal,Cash X-Sell: low,,,,,,
3,342217,0,Revolving loans,F,N,Y,0,202500.0,585000.0,29250.0,585000.0,Unaccompanied,State servant,Higher education,Married,House / apartment,0.007120,-14937,-5026,-1.0,-4606,,1,1,0,1,0,0,Managers,2.0,2,2,FRIDAY,10,0,0,0,0,0,0,Government,,0.612914,0.408359,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,0.0,0.0,0.0,0.0,-797.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,1.0,0.0,0.0,2.0,2452679,342217,Consumer loans,23194.260,106816.5,125428.5,0.0,106816.5,SATURDAY,13,Y,1,0.000000,,,XAP,Approved,-797,Cash through the bank,XAP,Family,New,Audio/Video,POS,XNA,Country-wide,1500,Consumer electronics,6.0,middle,POS household with interest,365243.0,-765.0,-615.0,-615.0,-607.0,0.0
4,342217,0,Revolving loans,F,N,Y,0,202500.0,585000.0,29250.0,585000.0,Unaccompanied,State servant,Higher education,Married,House / apartment,0.007120,-14937,-5026,-1.0,-4606,,1,1,0,1,0,0,Managers,2.0,2,2,FRIDAY,10,0,0,0,0,0,0,Government,,0.612914,0.408359,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,0.0,0.0,0.0,0.0,-797.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,1.0,0.0,0.0,2.0,2289736,342217,Cash loans,48887.325,1354500.0,1515415.5,,1354500.0,TUESDAY,9,Y,1,,,,XNA,Refused,-598,Cash through the bank,HC,,Repeater,XNA,Cash,x-sell,Credit and cash offices,-1,XNA,60.0,low_normal,Cash X-Sell: low,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
988723,409734,0,Cash loans,M,N,Y,1,135000.0,781920.0,47835.0,675000.0,Unaccompanied,Working,Secondary / secondary special,Married,House / apartment,0.008625,-16263,-339,-4694.0,-4331,,1,1,0,1,0,0,Security staff,3.0,2,2,TUESDAY,14,0,1,1,0,0,0,Security,,0.555707,0.778904,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,0.0,0.0,0.0,0.0,-1130.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,3.0,1597631,409734,Consumer loans,4971.195,90625.5,109764.0,0.0,90625.5,SATURDAY,14,Y,1,0.000000,,,XAP,Approved,-346,Cash through the bank,XAP,Unaccompanied,Repeater,Computers,POS,XNA,Country-wide,1100,Consumer electronics,24.0,low_action,POS household without interest,365243.0,-316.0,374.0,365243.0,365243.0,1.0
988724,409734,0,Cash loans,M,N,Y,1,135000.0,781920.0,47835.0,675000.0,Unaccompanied,Working,Secondary / secondary special,Married,House / apartment,0.008625,-16263,-339,-4694.0,-4331,,1,1,0,1,0,0,Security staff,3.0,2,2,TUESDAY,14,0,1,1,0,0,0,Security,,0.555707,0.778904,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,0.0,0.0,0.0,0.0,-1130.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,3.0,2601523,409734,Revolving loans,2250.000,45000.0,45000.0,,45000.0,SATURDAY,15,Y,1,,,,XAP,Approved,-346,XNA,XAP,Unaccompanied,Repeater,XNA,Cards,walk-in,Country-wide,1100,Consumer electronics,0.0,XNA,Card Street,365243.0,365243.0,365243.0,365243.0,365243.0,0.0
988725,248507,0,Cash loans,F,N,N,0,112500.0,315000.0,15448.5,315000.0,Unaccompanied,Commercial associate,Secondary / secondary special,Married,House / apartment,0.020246,-14438,-2594,-3898.0,-484,,1,1,0,1,1,0,,2.0,3,3,TUESDAY,13,0,0,0,0,0,0,Business Entity Type 3,0.748107,0.463994,0.619528,,,0.9846,,,0.08,0.069,0.3333,,0.0201,,0.0695,,0.0207,,,0.9846,,,0.0806,0.069,0.3333,,0.0206,,0.0725,,0.0219,,,0.9846,...,,0.0211,,block of flats,0.0663,Panel,No,2.0,0.0,2.0,0.0,-352.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,4.0,2028897,248507,Cash loans,7227.540,67500.0,91476.0,,67500.0,MONDAY,13,Y,1,,,,Urgent needs,Approved,-352,Cash through the bank,XAP,,New,XNA,Cash,walk-in,Credit and cash offices,-1,XNA,24.0,high,Cash Street: high,365243.0,-322.0,368.0,365243.0,365243.0,1.0
988726,248507,0,Cash loans,F,N,N,0,112500.0,315000.0,15448.5,315000.0,Unaccompanied,Commercial associate,Secondary / secondary special,Married,House / apartment,0.020246,-14438,-2594,-3898.0,-484,,1,1,0,1,1,0,,2.0,3,3,TUESDAY,13,0,0,0,0,0,0,Business Entity Type 3,0.748107,0.463994,0.619528,,,0.9846,,,0.08,0.069,0.3333,,0.0201,,0.0695,,0.0207,,,0.9846,,,0.0806,0.069,0.3333,,0.0206,,0.0725,,0.0219,,,0.9846,...,,0.0211,,block of flats,0.0663,Panel,No,2.0,0.0,2.0,0.0,-352.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,4.0,2052887,248507,Cash loans,,0.0,0.0,,,MONDAY,13,Y,1,,,,XNA,Canceled,-351,XNA,XAP,,New,XNA,XNA,XNA,Credit and cash offices,-1,XNA,,XNA,Cash,,,,,,


In [82]:
test.merge(applications, left_on = 'sk_id_curr', right_on = 'SK_ID_CURR', how = 'outer')

Unnamed: 0,sk_id_curr,name_contract_type,code_gender,flag_own_car,flag_own_realty,cnt_children,amt_income_total,amt_credit,amt_annuity,amt_goods_price,name_type_suite,name_income_type,name_education_type,name_family_status,name_housing_type,region_population_relative,days_birth,days_employed,days_registration,days_id_publish,own_car_age,flag_mobil,flag_emp_phone,flag_work_phone,flag_cont_mobile,flag_phone,flag_email,occupation_type,cnt_fam_members,region_rating_client,region_rating_client_w_city,weekday_appr_process_start,hour_appr_process_start,reg_region_not_live_region,reg_region_not_work_region,live_region_not_work_region,reg_city_not_live_city,reg_city_not_work_city,live_city_not_work_city,organization_type,ext_source_1,ext_source_2,ext_source_3,apartments_avg,basementarea_avg,years_beginexpluatation_avg,years_build_avg,commonarea_avg,elevators_avg,entrances_avg,floorsmax_avg,floorsmin_avg,landarea_avg,livingapartments_avg,livingarea_avg,nonlivingapartments_avg,nonlivingarea_avg,apartments_mode,basementarea_mode,years_beginexpluatation_mode,years_build_mode,commonarea_mode,elevators_mode,entrances_mode,floorsmax_mode,floorsmin_mode,landarea_mode,livingapartments_mode,livingarea_mode,nonlivingapartments_mode,nonlivingarea_mode,apartments_medi,basementarea_medi,years_beginexpluatation_medi,years_build_medi,...,nonlivingapartments_medi,nonlivingarea_medi,fondkapremont_mode,housetype_mode,totalarea_mode,wallsmaterial_mode,emergencystate_mode,obs_30_cnt_social_circle,def_30_cnt_social_circle,obs_60_cnt_social_circle,def_60_cnt_social_circle,days_last_phone_change,flag_document_2,flag_document_3,flag_document_4,flag_document_5,flag_document_6,flag_document_7,flag_document_8,flag_document_9,flag_document_10,flag_document_11,flag_document_12,flag_document_13,flag_document_14,flag_document_15,flag_document_16,flag_document_17,flag_document_18,flag_document_19,flag_document_20,flag_document_21,amt_req_credit_bureau_hour,amt_req_credit_bureau_day,amt_req_credit_bureau_week,amt_req_credit_bureau_mon,amt_req_credit_bureau_qrt,amt_req_credit_bureau_year,SK_ID_PREV,SK_ID_CURR,NAME_CONTRACT_TYPE,AMT_ANNUITY,AMT_APPLICATION,AMT_CREDIT,AMT_DOWN_PAYMENT,AMT_GOODS_PRICE,WEEKDAY_APPR_PROCESS_START,HOUR_APPR_PROCESS_START,FLAG_LAST_APPL_PER_CONTRACT,NFLAG_LAST_APPL_IN_DAY,RATE_DOWN_PAYMENT,RATE_INTEREST_PRIMARY,RATE_INTEREST_PRIVILEGED,NAME_CASH_LOAN_PURPOSE,NAME_CONTRACT_STATUS,DAYS_DECISION,NAME_PAYMENT_TYPE,CODE_REJECT_REASON,NAME_TYPE_SUITE,NAME_CLIENT_TYPE,NAME_GOODS_CATEGORY,NAME_PORTFOLIO,NAME_PRODUCT_TYPE,CHANNEL_TYPE,SELLERPLACE_AREA,NAME_SELLER_INDUSTRY,CNT_PAYMENT,NAME_YIELD_GROUP,PRODUCT_COMBINATION,DAYS_FIRST_DRAWING,DAYS_FIRST_DUE,DAYS_LAST_DUE_1ST_VERSION,DAYS_LAST_DUE,DAYS_TERMINATION,NFLAG_INSURED_ON_APPROVAL
0,174545,Cash loans,F,N,Y,0,135000.0,654498.0,31617.0,585000.0,Unaccompanied,Pensioner,Secondary / secondary special,Married,House / apartment,0.005313,-20614,365243,-8985.0,-3098,,1,0,0,1,0,0,,2.0,2,2,TUESDAY,9,0,0,0,0,0,0,XNA,,0.622652,0.602386,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,0.0,0.0,0.0,0.0,-1237.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,3.0,2282880,174545,Cash loans,16639.875,202500.0,222547.5,,202500.0,FRIDAY,10,Y,1,,,,XNA,Approved,-879,XNA,XAP,Unaccompanied,Repeater,XNA,Cash,x-sell,Credit and cash offices,-1,XNA,18.0,middle,Cash X-Sell: middle,365243.0,-849.0,-339.0,-339.0,-331.0,1.0
1,174545,Cash loans,F,N,Y,0,135000.0,654498.0,31617.0,585000.0,Unaccompanied,Pensioner,Secondary / secondary special,Married,House / apartment,0.005313,-20614,365243,-8985.0,-3098,,1,0,0,1,0,0,,2.0,2,2,TUESDAY,9,0,0,0,0,0,0,XNA,,0.622652,0.602386,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,0.0,0.0,0.0,0.0,-1237.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,3.0,1926239,174545,Consumer loans,10236.330,98010.0,112162.5,0.0,98010.0,THURSDAY,11,Y,1,0.000000,,,XAP,Approved,-1237,Cash through the bank,XAP,Unaccompanied,New,Audio/Video,POS,XNA,Country-wide,1300,Consumer electronics,12.0,low_action,POS household without interest,365243.0,-1206.0,-876.0,-876.0,-870.0,0.0
2,174545,Cash loans,F,N,Y,0,135000.0,654498.0,31617.0,585000.0,Unaccompanied,Pensioner,Secondary / secondary special,Married,House / apartment,0.005313,-20614,365243,-8985.0,-3098,,1,0,0,1,0,0,,2.0,2,2,TUESDAY,9,0,0,0,0,0,0,XNA,,0.622652,0.602386,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,0.0,0.0,0.0,0.0,-1237.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,3.0,2332561,174545,Consumer loans,6951.690,35766.0,37656.0,0.0,35766.0,WEDNESDAY,15,Y,1,0.000000,,,XAP,Approved,-846,Cash through the bank,XAP,Unaccompanied,Repeater,Consumer Electronics,POS,XNA,Country-wide,1300,Consumer electronics,6.0,middle,POS household with interest,365243.0,-815.0,-665.0,-665.0,-660.0,0.0
3,174545,Cash loans,F,N,Y,0,135000.0,654498.0,31617.0,585000.0,Unaccompanied,Pensioner,Secondary / secondary special,Married,House / apartment,0.005313,-20614,365243,-8985.0,-3098,,1,0,0,1,0,0,,2.0,2,2,TUESDAY,9,0,0,0,0,0,0,XNA,,0.622652,0.602386,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,0.0,0.0,0.0,0.0,-1237.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,3.0,1120603,174545,Cash loans,23644.935,675000.0,808650.0,,675000.0,TUESDAY,14,Y,1,,,,XNA,Approved,-196,XNA,XAP,Unaccompanied,Repeater,XNA,Cash,x-sell,Contact center,-1,XNA,60.0,low_normal,Cash X-Sell: low,365243.0,-166.0,1604.0,365243.0,365243.0,1.0
4,209898,Cash loans,M,Y,Y,0,135000.0,234576.0,25393.5,202500.0,Unaccompanied,Working,Secondary / secondary special,Single / not married,With parents,0.010643,-9080,-1080,-1909.0,-1743,13.0,1,1,0,1,0,0,Drivers,1.0,2,2,TUESDAY,17,0,0,0,0,0,0,Government,,0.655960,0.492060,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,1.0,0.0,1.0,0.0,-379.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,1.0,0.0,0.0,0.0,2583897,209898,Consumer loans,8720.775,93141.0,74511.0,18630.0,93141.0,MONDAY,16,Y,1,0.217839,,,XAP,Approved,-379,Cash through the bank,XAP,,New,Computers,POS,XNA,Country-wide,30,Connectivity,12.0,high,POS mobile with interest,365243.0,-341.0,-11.0,-11.0,-9.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
424968,325319,Cash loans,F,N,Y,0,67500.0,104256.0,10287.0,90000.0,Unaccompanied,Pensioner,Secondary / secondary special,Single / not married,House / apartment,0.016612,-22027,365243,-9211.0,-4776,,1,0,0,1,1,0,,1.0,2,2,THURSDAY,15,0,0,0,0,0,0,XNA,,0.407112,0.694093,0.0928,0.1015,0.9871,0.8232,0.0439,0.0,0.2069,0.1667,0.0417,0.1185,0.0756,0.0863,0.0,0.0,0.0945,0.1053,0.9871,0.8301,0.0443,0.0,0.2069,0.1667,0.0417,0.1212,0.0826,0.0899,0.0,0.0,0.0937,0.1015,0.9871,0.8256,...,0.0,0.0,reg oper account,block of flats,0.0919,Panel,No,5.0,2.0,5.0,1.0,-475.0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,1.0,0.0,3.0,1180463,325319,Cash loans,8992.800,90000.0,90000.0,,90000.0,WEDNESDAY,11,Y,1,,,,XNA,Approved,-869,Cash through the bank,XAP,Family,Repeater,XNA,Cash,x-sell,Country-wide,24,Connectivity,12.0,middle,Cash X-Sell: middle,365243.0,-839.0,-509.0,-599.0,-585.0,0.0
424969,325319,Cash loans,F,N,Y,0,67500.0,104256.0,10287.0,90000.0,Unaccompanied,Pensioner,Secondary / secondary special,Single / not married,House / apartment,0.016612,-22027,365243,-9211.0,-4776,,1,0,0,1,1,0,,1.0,2,2,THURSDAY,15,0,0,0,0,0,0,XNA,,0.407112,0.694093,0.0928,0.1015,0.9871,0.8232,0.0439,0.0,0.2069,0.1667,0.0417,0.1185,0.0756,0.0863,0.0,0.0,0.0945,0.1053,0.9871,0.8301,0.0443,0.0,0.2069,0.1667,0.0417,0.1212,0.0826,0.0899,0.0,0.0,0.0937,0.1015,0.9871,0.8256,...,0.0,0.0,reg oper account,block of flats,0.0919,Panel,No,5.0,2.0,5.0,1.0,-475.0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,1.0,0.0,3.0,2287769,325319,Consumer loans,8568.405,76450.5,67203.0,15300.0,76450.5,FRIDAY,14,Y,1,0.201970,,,XAP,Approved,-2183,Cash through the bank,XAP,Family,New,Mobile,POS,XNA,Stone,57,Connectivity,12.0,high,POS mobile with interest,365243.0,-2152.0,-1822.0,-1822.0,-1817.0,0.0
424970,325319,Cash loans,F,N,Y,0,67500.0,104256.0,10287.0,90000.0,Unaccompanied,Pensioner,Secondary / secondary special,Single / not married,House / apartment,0.016612,-22027,365243,-9211.0,-4776,,1,0,0,1,1,0,,1.0,2,2,THURSDAY,15,0,0,0,0,0,0,XNA,,0.407112,0.694093,0.0928,0.1015,0.9871,0.8232,0.0439,0.0,0.2069,0.1667,0.0417,0.1185,0.0756,0.0863,0.0,0.0,0.0945,0.1053,0.9871,0.8301,0.0443,0.0,0.2069,0.1667,0.0417,0.1212,0.0826,0.0899,0.0,0.0,0.0937,0.1015,0.9871,0.8256,...,0.0,0.0,reg oper account,block of flats,0.0919,Panel,No,5.0,2.0,5.0,1.0,-475.0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,1.0,0.0,3.0,1642842,325319,Cash loans,13619.700,135000.0,135000.0,,135000.0,FRIDAY,17,Y,1,,,,XNA,Approved,-671,Cash through the bank,XAP,,Repeater,XNA,Cash,x-sell,Country-wide,24,Connectivity,12.0,middle,Cash X-Sell: middle,365243.0,-641.0,-311.0,-581.0,-552.0,0.0
424971,325319,Cash loans,F,N,Y,0,67500.0,104256.0,10287.0,90000.0,Unaccompanied,Pensioner,Secondary / secondary special,Single / not married,House / apartment,0.016612,-22027,365243,-9211.0,-4776,,1,0,0,1,1,0,,1.0,2,2,THURSDAY,15,0,0,0,0,0,0,XNA,,0.407112,0.694093,0.0928,0.1015,0.9871,0.8232,0.0439,0.0,0.2069,0.1667,0.0417,0.1185,0.0756,0.0863,0.0,0.0,0.0945,0.1053,0.9871,0.8301,0.0443,0.0,0.2069,0.1667,0.0417,0.1212,0.0826,0.0899,0.0,0.0,0.0937,0.1015,0.9871,0.8256,...,0.0,0.0,reg oper account,block of flats,0.0919,Panel,No,5.0,2.0,5.0,1.0,-475.0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,1.0,0.0,3.0,2267667,325319,Cash loans,4915.485,45000.0,47970.0,,45000.0,FRIDAY,16,Y,1,,,,XNA,Refused,-475,Cash through the bank,HC,Unaccompanied,Repeater,XNA,Cash,x-sell,Country-wide,24,Connectivity,12.0,middle,Cash X-Sell: middle,,,,,,


In [None]:
#train.merge(payments, left_on = 'sk_id_curr', right_on = 'SK_ID_CURR', how = 'left')

In [None]:
#test.merge(payments, left_on = 'sk_id_curr', right_on = 'SK_ID_CURR', how = 'left')

In [83]:
train.merge(bureau, left_on = 'sk_id_curr', right_on = 'SK_ID_CURR', how = 'outer')

Unnamed: 0,sk_id_curr,target,name_contract_type,code_gender,flag_own_car,flag_own_realty,cnt_children,amt_income_total,amt_credit,amt_annuity,amt_goods_price,name_type_suite,name_income_type,name_education_type,name_family_status,name_housing_type,region_population_relative,days_birth,days_employed,days_registration,days_id_publish,own_car_age,flag_mobil,flag_emp_phone,flag_work_phone,flag_cont_mobile,flag_phone,flag_email,occupation_type,cnt_fam_members,region_rating_client,region_rating_client_w_city,weekday_appr_process_start,hour_appr_process_start,reg_region_not_live_region,reg_region_not_work_region,live_region_not_work_region,reg_city_not_live_city,reg_city_not_work_city,live_city_not_work_city,organization_type,ext_source_1,ext_source_2,ext_source_3,apartments_avg,basementarea_avg,years_beginexpluatation_avg,years_build_avg,commonarea_avg,elevators_avg,entrances_avg,floorsmax_avg,floorsmin_avg,landarea_avg,livingapartments_avg,livingarea_avg,nonlivingapartments_avg,nonlivingarea_avg,apartments_mode,basementarea_mode,years_beginexpluatation_mode,years_build_mode,commonarea_mode,elevators_mode,entrances_mode,floorsmax_mode,floorsmin_mode,landarea_mode,livingapartments_mode,livingarea_mode,nonlivingapartments_mode,nonlivingarea_mode,apartments_medi,basementarea_medi,years_beginexpluatation_medi,years_build_medi,commonarea_medi,elevators_medi,entrances_medi,floorsmax_medi,floorsmin_medi,landarea_medi,livingapartments_medi,livingarea_medi,nonlivingapartments_medi,nonlivingarea_medi,fondkapremont_mode,housetype_mode,totalarea_mode,wallsmaterial_mode,emergencystate_mode,obs_30_cnt_social_circle,def_30_cnt_social_circle,obs_60_cnt_social_circle,def_60_cnt_social_circle,days_last_phone_change,flag_document_2,flag_document_3,flag_document_4,flag_document_5,flag_document_6,flag_document_7,flag_document_8,flag_document_9,flag_document_10,flag_document_11,flag_document_12,flag_document_13,flag_document_14,flag_document_15,flag_document_16,flag_document_17,flag_document_18,flag_document_19,flag_document_20,flag_document_21,amt_req_credit_bureau_hour,amt_req_credit_bureau_day,amt_req_credit_bureau_week,amt_req_credit_bureau_mon,amt_req_credit_bureau_qrt,amt_req_credit_bureau_year,SK_ID_CURR,SK_ID_BUREAU,CREDIT_ACTIVE,CREDIT_CURRENCY,DAYS_CREDIT,CREDIT_DAY_OVERDUE,DAYS_CREDIT_ENDDATE,DAYS_ENDDATE_FACT,AMT_CREDIT_MAX_OVERDUE,CNT_CREDIT_PROLONG,AMT_CREDIT_SUM,AMT_CREDIT_SUM_DEBT,AMT_CREDIT_SUM_LIMIT,AMT_CREDIT_SUM_OVERDUE,CREDIT_TYPE,DAYS_CREDIT_UPDATE,AMT_ANNUITY
0,342217,0,Revolving loans,F,N,Y,0,202500.0,585000.0,29250.0,585000.0,Unaccompanied,State servant,Higher education,Married,House / apartment,0.007120,-14937,-5026,-1.0,-4606,,1,1,0,1,0,0,Managers,2.0,2,2,FRIDAY,10,0,0,0,0,0,0,Government,,0.612914,0.408359,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,-797.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,1.0,0.0,0.0,2.0,342217,6757526,Closed,currency 1,-1960,0,-864.0,-1523.0,,0,351171.0,0.0,0.0,0.0,Consumer credit,-1111,
1,342217,0,Revolving loans,F,N,Y,0,202500.0,585000.0,29250.0,585000.0,Unaccompanied,State servant,Higher education,Married,House / apartment,0.007120,-14937,-5026,-1.0,-4606,,1,1,0,1,0,0,Managers,2.0,2,2,FRIDAY,10,0,0,0,0,0,0,Government,,0.612914,0.408359,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,-797.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,1.0,0.0,0.0,2.0,342217,6757527,Closed,currency 1,-1340,0,-244.0,-340.0,,0,844236.0,0.0,0.0,0.0,Consumer credit,-337,
2,342217,0,Revolving loans,F,N,Y,0,202500.0,585000.0,29250.0,585000.0,Unaccompanied,State servant,Higher education,Married,House / apartment,0.007120,-14937,-5026,-1.0,-4606,,1,1,0,1,0,0,Managers,2.0,2,2,FRIDAY,10,0,0,0,0,0,0,Government,,0.612914,0.408359,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,-797.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,1.0,0.0,0.0,2.0,342217,6757528,Closed,currency 1,-2521,0,-695.0,-1340.0,,0,675000.0,0.0,0.0,0.0,Consumer credit,-1338,
3,342217,0,Revolving loans,F,N,Y,0,202500.0,585000.0,29250.0,585000.0,Unaccompanied,State servant,Higher education,Married,House / apartment,0.007120,-14937,-5026,-1.0,-4606,,1,1,0,1,0,0,Managers,2.0,2,2,FRIDAY,10,0,0,0,0,0,0,Government,,0.612914,0.408359,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,-797.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,1.0,0.0,0.0,2.0,342217,6757530,Active,currency 1,-470,0,899.0,,,0,672750.0,502056.0,0.0,0.0,Consumer credit,-9,
4,342217,0,Revolving loans,F,N,Y,0,202500.0,585000.0,29250.0,585000.0,Unaccompanied,State servant,Higher education,Married,House / apartment,0.007120,-14937,-5026,-1.0,-4606,,1,1,0,1,0,0,Managers,2.0,2,2,FRIDAY,10,0,0,0,0,0,0,Government,,0.612914,0.408359,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,-797.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,1.0,0.0,0.0,2.0,342217,6757531,Active,currency 1,-344,0,1482.0,,,0,1462500.0,1314193.5,0.0,0.0,Consumer credit,-36,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1025724,185050,0,Revolving loans,F,N,Y,2,135000.0,270000.0,13500.0,270000.0,Family,Commercial associate,Secondary / secondary special,Married,House / apartment,0.046220,-12970,-2955,-746.0,-1630,,1,1,1,1,1,0,Laborers,4.0,1,1,MONDAY,12,0,0,0,0,0,0,Business Entity Type 3,,0.696589,0.751724,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,-718.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,1.0,185050,6557471,Active,currency 1,-762,0,2891.0,,,0,7200000.0,3180442.5,0.0,0.0,Mortgage,-21,
1025725,248507,0,Cash loans,F,N,N,0,112500.0,315000.0,15448.5,315000.0,Unaccompanied,Commercial associate,Secondary / secondary special,Married,House / apartment,0.020246,-14438,-2594,-3898.0,-484,,1,1,0,1,1,0,,2.0,3,3,TUESDAY,13,0,0,0,0,0,0,Business Entity Type 3,0.748107,0.463994,0.619528,,,0.9846,,,0.08,0.069,0.3333,,0.0201,,0.0695,,0.0207,,,0.9846,,,0.0806,0.069,0.3333,,0.0206,,0.0725,,0.0219,,,0.9846,,,0.08,0.069,0.3333,,0.0205,,0.0708,,0.0211,,block of flats,0.0663,Panel,No,2.0,0.0,2.0,0.0,-352.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,4.0,248507,6841602,Closed,currency 1,-1498,0,-1192.0,-1165.0,,0,23130.0,0.0,0.0,0.0,Consumer credit,-756,
1025726,248507,0,Cash loans,F,N,N,0,112500.0,315000.0,15448.5,315000.0,Unaccompanied,Commercial associate,Secondary / secondary special,Married,House / apartment,0.020246,-14438,-2594,-3898.0,-484,,1,1,0,1,1,0,,2.0,3,3,TUESDAY,13,0,0,0,0,0,0,Business Entity Type 3,0.748107,0.463994,0.619528,,,0.9846,,,0.08,0.069,0.3333,,0.0201,,0.0695,,0.0207,,,0.9846,,,0.0806,0.069,0.3333,,0.0206,,0.0725,,0.0219,,,0.9846,,,0.08,0.069,0.3333,,0.0205,,0.0708,,0.0211,,block of flats,0.0663,Panel,No,2.0,0.0,2.0,0.0,-352.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,4.0,248507,6841603,Closed,currency 1,-1497,0,-1192.0,-1192.0,,0,23130.0,0.0,,0.0,Consumer credit,-1109,
1025727,248507,0,Cash loans,F,N,N,0,112500.0,315000.0,15448.5,315000.0,Unaccompanied,Commercial associate,Secondary / secondary special,Married,House / apartment,0.020246,-14438,-2594,-3898.0,-484,,1,1,0,1,1,0,,2.0,3,3,TUESDAY,13,0,0,0,0,0,0,Business Entity Type 3,0.748107,0.463994,0.619528,,,0.9846,,,0.08,0.069,0.3333,,0.0201,,0.0695,,0.0207,,,0.9846,,,0.0806,0.069,0.3333,,0.0206,,0.0725,,0.0219,,,0.9846,,,0.08,0.069,0.3333,,0.0205,,0.0708,,0.0211,,block of flats,0.0663,Panel,No,2.0,0.0,2.0,0.0,-352.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,4.0,248507,6841604,Closed,currency 1,-362,0,-64.0,-78.0,,0,66316.5,,,0.0,Microloan,-78,


In [84]:
test.merge(bureau, left_on = 'sk_id_curr', right_on = 'SK_ID_CURR', how = 'inner')

Unnamed: 0,sk_id_curr,name_contract_type,code_gender,flag_own_car,flag_own_realty,cnt_children,amt_income_total,amt_credit,amt_annuity,amt_goods_price,name_type_suite,name_income_type,name_education_type,name_family_status,name_housing_type,region_population_relative,days_birth,days_employed,days_registration,days_id_publish,own_car_age,flag_mobil,flag_emp_phone,flag_work_phone,flag_cont_mobile,flag_phone,flag_email,occupation_type,cnt_fam_members,region_rating_client,region_rating_client_w_city,weekday_appr_process_start,hour_appr_process_start,reg_region_not_live_region,reg_region_not_work_region,live_region_not_work_region,reg_city_not_live_city,reg_city_not_work_city,live_city_not_work_city,organization_type,ext_source_1,ext_source_2,ext_source_3,apartments_avg,basementarea_avg,years_beginexpluatation_avg,years_build_avg,commonarea_avg,elevators_avg,entrances_avg,floorsmax_avg,floorsmin_avg,landarea_avg,livingapartments_avg,livingarea_avg,nonlivingapartments_avg,nonlivingarea_avg,apartments_mode,basementarea_mode,years_beginexpluatation_mode,years_build_mode,commonarea_mode,elevators_mode,entrances_mode,floorsmax_mode,floorsmin_mode,landarea_mode,livingapartments_mode,livingarea_mode,nonlivingapartments_mode,nonlivingarea_mode,apartments_medi,basementarea_medi,years_beginexpluatation_medi,years_build_medi,commonarea_medi,elevators_medi,entrances_medi,floorsmax_medi,floorsmin_medi,landarea_medi,livingapartments_medi,livingarea_medi,nonlivingapartments_medi,nonlivingarea_medi,fondkapremont_mode,housetype_mode,totalarea_mode,wallsmaterial_mode,emergencystate_mode,obs_30_cnt_social_circle,def_30_cnt_social_circle,obs_60_cnt_social_circle,def_60_cnt_social_circle,days_last_phone_change,flag_document_2,flag_document_3,flag_document_4,flag_document_5,flag_document_6,flag_document_7,flag_document_8,flag_document_9,flag_document_10,flag_document_11,flag_document_12,flag_document_13,flag_document_14,flag_document_15,flag_document_16,flag_document_17,flag_document_18,flag_document_19,flag_document_20,flag_document_21,amt_req_credit_bureau_hour,amt_req_credit_bureau_day,amt_req_credit_bureau_week,amt_req_credit_bureau_mon,amt_req_credit_bureau_qrt,amt_req_credit_bureau_year,SK_ID_CURR,SK_ID_BUREAU,CREDIT_ACTIVE,CREDIT_CURRENCY,DAYS_CREDIT,CREDIT_DAY_OVERDUE,DAYS_CREDIT_ENDDATE,DAYS_ENDDATE_FACT,AMT_CREDIT_MAX_OVERDUE,CNT_CREDIT_PROLONG,AMT_CREDIT_SUM,AMT_CREDIT_SUM_DEBT,AMT_CREDIT_SUM_LIMIT,AMT_CREDIT_SUM_OVERDUE,CREDIT_TYPE,DAYS_CREDIT_UPDATE,AMT_ANNUITY
0,174545,Cash loans,F,N,Y,0,135000.0,654498.0,31617.0,585000.0,Unaccompanied,Pensioner,Secondary / secondary special,Married,House / apartment,0.005313,-20614,365243,-8985.0,-3098,,1,0,0,1,0,0,,2.0,2,2,TUESDAY,9,0,0,0,0,0,0,XNA,,0.622652,0.602386,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,-1237.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,3.0,174545,6577434,Closed,currency 1,-958,0,-866.0,-866.0,,0,29704.50,0.0,0.0,0.0,Consumer credit,-836,0.0
1,174545,Cash loans,F,N,Y,0,135000.0,654498.0,31617.0,585000.0,Unaccompanied,Pensioner,Secondary / secondary special,Married,House / apartment,0.005313,-20614,365243,-8985.0,-3098,,1,0,0,1,0,0,,2.0,2,2,TUESDAY,9,0,0,0,0,0,0,XNA,,0.622652,0.602386,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,-1237.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,3.0,174545,6577435,Closed,currency 1,-1181,0,-85.0,-1027.0,,0,405000.00,,,0.0,Consumer credit,-1020,
2,174545,Cash loans,F,N,Y,0,135000.0,654498.0,31617.0,585000.0,Unaccompanied,Pensioner,Secondary / secondary special,Married,House / apartment,0.005313,-20614,365243,-8985.0,-3098,,1,0,0,1,0,0,,2.0,2,2,TUESDAY,9,0,0,0,0,0,0,XNA,,0.622652,0.602386,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,-1237.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,3.0,174545,6577437,Closed,currency 1,-1316,0,-950.0,-1181.0,0.000,0,225000.00,0.0,0.0,0.0,Consumer credit,-1092,
3,174545,Cash loans,F,N,Y,0,135000.0,654498.0,31617.0,585000.0,Unaccompanied,Pensioner,Secondary / secondary special,Married,House / apartment,0.005313,-20614,365243,-8985.0,-3098,,1,0,0,1,0,0,,2.0,2,2,TUESDAY,9,0,0,0,0,0,0,XNA,,0.622652,0.602386,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,-1237.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,3.0,174545,6577438,Sold,currency 1,-1026,0,69.0,-189.0,8085.105,0,450000.00,0.0,0.0,0.0,Consumer credit,-189,
4,174545,Cash loans,F,N,Y,0,135000.0,654498.0,31617.0,585000.0,Unaccompanied,Pensioner,Secondary / secondary special,Married,House / apartment,0.005313,-20614,365243,-8985.0,-3098,,1,0,0,1,0,0,,2.0,2,2,TUESDAY,9,0,0,0,0,0,0,XNA,,0.622652,0.602386,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,-1237.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,3.0,174545,6577440,Closed,currency 1,-903,0,,-497.0,0.000,0,346500.00,0.0,0.0,0.0,Credit card,-492,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
439591,325319,Cash loans,F,N,Y,0,67500.0,104256.0,10287.0,90000.0,Unaccompanied,Pensioner,Secondary / secondary special,Single / not married,House / apartment,0.016612,-22027,365243,-9211.0,-4776,,1,0,0,1,1,0,,1.0,2,2,THURSDAY,15,0,0,0,0,0,0,XNA,,0.407112,0.694093,0.0928,0.1015,0.9871,0.8232,0.0439,0.0,0.2069,0.1667,0.0417,0.1185,0.0756,0.0863,0.0,0.0,0.0945,0.1053,0.9871,0.8301,0.0443,0.0,0.2069,0.1667,0.0417,0.1212,0.0826,0.0899,0.0,0.0,0.0937,0.1015,0.9871,0.8256,0.0442,0.0,0.2069,0.1667,0.0417,0.1206,0.077,0.0879,0.0,0.0,reg oper account,block of flats,0.0919,Panel,No,5.0,2.0,5.0,1.0,-475.0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,1.0,0.0,3.0,325319,5883801,Active,currency 1,-309,0,56.0,,0.000,0,46980.18,14737.5,0.0,0.0,Consumer credit,-35,
439592,325319,Cash loans,F,N,Y,0,67500.0,104256.0,10287.0,90000.0,Unaccompanied,Pensioner,Secondary / secondary special,Single / not married,House / apartment,0.016612,-22027,365243,-9211.0,-4776,,1,0,0,1,1,0,,1.0,2,2,THURSDAY,15,0,0,0,0,0,0,XNA,,0.407112,0.694093,0.0928,0.1015,0.9871,0.8232,0.0439,0.0,0.2069,0.1667,0.0417,0.1185,0.0756,0.0863,0.0,0.0,0.0945,0.1053,0.9871,0.8301,0.0443,0.0,0.2069,0.1667,0.0417,0.1212,0.0826,0.0899,0.0,0.0,0.0937,0.1015,0.9871,0.8256,0.0442,0.0,0.2069,0.1667,0.0417,0.1206,0.077,0.0879,0.0,0.0,reg oper account,block of flats,0.0919,Panel,No,5.0,2.0,5.0,1.0,-475.0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,1.0,0.0,3.0,325319,5883802,Closed,currency 1,-436,0,-259.0,-259.0,0.000,0,165060.00,0.0,0.0,0.0,Consumer credit,-256,0.0
439593,325319,Cash loans,F,N,Y,0,67500.0,104256.0,10287.0,90000.0,Unaccompanied,Pensioner,Secondary / secondary special,Single / not married,House / apartment,0.016612,-22027,365243,-9211.0,-4776,,1,0,0,1,1,0,,1.0,2,2,THURSDAY,15,0,0,0,0,0,0,XNA,,0.407112,0.694093,0.0928,0.1015,0.9871,0.8232,0.0439,0.0,0.2069,0.1667,0.0417,0.1185,0.0756,0.0863,0.0,0.0,0.0945,0.1053,0.9871,0.8301,0.0443,0.0,0.2069,0.1667,0.0417,0.1212,0.0826,0.0899,0.0,0.0,0.0937,0.1015,0.9871,0.8256,0.0442,0.0,0.2069,0.1667,0.0417,0.1206,0.077,0.0879,0.0,0.0,reg oper account,block of flats,0.0919,Panel,No,5.0,2.0,5.0,1.0,-475.0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,1.0,0.0,3.0,325319,5883803,Closed,currency 1,-475,0,-436.0,-436.0,0.000,0,46980.18,0.0,0.0,0.0,Consumer credit,-431,11250.0
439594,325319,Cash loans,F,N,Y,0,67500.0,104256.0,10287.0,90000.0,Unaccompanied,Pensioner,Secondary / secondary special,Single / not married,House / apartment,0.016612,-22027,365243,-9211.0,-4776,,1,0,0,1,1,0,,1.0,2,2,THURSDAY,15,0,0,0,0,0,0,XNA,,0.407112,0.694093,0.0928,0.1015,0.9871,0.8232,0.0439,0.0,0.2069,0.1667,0.0417,0.1185,0.0756,0.0863,0.0,0.0,0.0945,0.1053,0.9871,0.8301,0.0443,0.0,0.2069,0.1667,0.0417,0.1212,0.0826,0.0899,0.0,0.0,0.0937,0.1015,0.9871,0.8256,0.0442,0.0,0.2069,0.1667,0.0417,0.1206,0.077,0.0879,0.0,0.0,reg oper account,block of flats,0.0919,Panel,No,5.0,2.0,5.0,1.0,-475.0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,1.0,0.0,3.0,325319,5883804,Closed,currency 1,-651,0,-286.0,-378.0,0.000,0,31455.00,0.0,0.0,0.0,Consumer credit,-377,


In [85]:
train.merge(credit_card_balance, left_on = 'sk_id_curr', right_on = 'SK_ID_CURR', how = 'inner')

Unnamed: 0,sk_id_curr,target,name_contract_type,code_gender,flag_own_car,flag_own_realty,cnt_children,amt_income_total,amt_credit,amt_annuity,amt_goods_price,name_type_suite,name_income_type,name_education_type,name_family_status,name_housing_type,region_population_relative,days_birth,days_employed,days_registration,days_id_publish,own_car_age,flag_mobil,flag_emp_phone,flag_work_phone,flag_cont_mobile,flag_phone,flag_email,occupation_type,cnt_fam_members,region_rating_client,region_rating_client_w_city,weekday_appr_process_start,hour_appr_process_start,reg_region_not_live_region,reg_region_not_work_region,live_region_not_work_region,reg_city_not_live_city,reg_city_not_work_city,live_city_not_work_city,organization_type,ext_source_1,ext_source_2,ext_source_3,apartments_avg,basementarea_avg,years_beginexpluatation_avg,years_build_avg,commonarea_avg,elevators_avg,entrances_avg,floorsmax_avg,floorsmin_avg,landarea_avg,livingapartments_avg,livingarea_avg,nonlivingapartments_avg,nonlivingarea_avg,apartments_mode,basementarea_mode,years_beginexpluatation_mode,years_build_mode,commonarea_mode,elevators_mode,entrances_mode,floorsmax_mode,floorsmin_mode,landarea_mode,livingapartments_mode,livingarea_mode,nonlivingapartments_mode,nonlivingarea_mode,apartments_medi,basementarea_medi,years_beginexpluatation_medi,years_build_medi,commonarea_medi,elevators_medi,entrances_medi,floorsmax_medi,floorsmin_medi,landarea_medi,livingapartments_medi,livingarea_medi,nonlivingapartments_medi,nonlivingarea_medi,fondkapremont_mode,housetype_mode,totalarea_mode,wallsmaterial_mode,emergencystate_mode,obs_30_cnt_social_circle,def_30_cnt_social_circle,obs_60_cnt_social_circle,def_60_cnt_social_circle,days_last_phone_change,flag_document_2,flag_document_3,flag_document_4,flag_document_5,flag_document_6,flag_document_7,flag_document_8,flag_document_9,flag_document_10,flag_document_11,flag_document_12,flag_document_13,flag_document_14,flag_document_15,flag_document_16,flag_document_17,flag_document_18,flag_document_19,flag_document_20,flag_document_21,amt_req_credit_bureau_hour,amt_req_credit_bureau_day,amt_req_credit_bureau_week,amt_req_credit_bureau_mon,amt_req_credit_bureau_qrt,amt_req_credit_bureau_year,SK_ID_PREV,SK_ID_CURR,MONTHS_BALANCE,AMT_BALANCE,AMT_CREDIT_LIMIT_ACTUAL,AMT_DRAWINGS_ATM_CURRENT,AMT_DRAWINGS_CURRENT,AMT_DRAWINGS_OTHER_CURRENT,AMT_DRAWINGS_POS_CURRENT,AMT_INST_MIN_REGULARITY,AMT_PAYMENT_CURRENT,AMT_PAYMENT_TOTAL_CURRENT,AMT_RECEIVABLE_PRINCIPAL,AMT_RECIVABLE,AMT_TOTAL_RECEIVABLE,CNT_DRAWINGS_ATM_CURRENT,CNT_DRAWINGS_CURRENT,CNT_DRAWINGS_OTHER_CURRENT,CNT_DRAWINGS_POS_CURRENT,CNT_INSTALMENT_MATURE_CUM,NAME_CONTRACT_STATUS,SK_DPD,SK_DPD_DEF
0,183133,0,Cash loans,F,N,Y,0,112500.0,656811.0,30564.0,567000.0,"Spouse, partner",Pensioner,Secondary / secondary special,Married,House / apartment,0.016612,-22021,365243,-2979.0,-5036,,1,0,0,1,0,0,,2.0,2,2,MONDAY,11,0,0,0,0,0,0,XNA,,0.703983,0.755740,0.3711,0.2556,0.9955,0.9388,,0.36,0.3103,0.375,,0.0,,0.3977,,0.0,0.3782,0.2653,0.9955,0.9412,,0.3625,0.3103,0.375,,0.0,,0.4144,,0.0,0.3747,0.2556,0.9955,0.9396,,0.36,0.3103,0.375,,0.0,,0.4049,,0.0,,block of flats,0.3983,Panel,No,4.0,0.0,4.0,0.0,-1542.0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,3.0,2582160,183133,-92,8453.7,67500,0.0,0.0,0.0,0.0,3375.0,6750.000,6750.0,7261.605,8453.7,8453.7,0.0,0,0.0,0.0,14.0,Active,0,0
1,183133,0,Cash loans,F,N,Y,0,112500.0,656811.0,30564.0,567000.0,"Spouse, partner",Pensioner,Secondary / secondary special,Married,House / apartment,0.016612,-22021,365243,-2979.0,-5036,,1,0,0,1,0,0,,2.0,2,2,MONDAY,11,0,0,0,0,0,0,XNA,,0.703983,0.755740,0.3711,0.2556,0.9955,0.9388,,0.36,0.3103,0.375,,0.0,,0.3977,,0.0,0.3782,0.2653,0.9955,0.9412,,0.3625,0.3103,0.375,,0.0,,0.4144,,0.0,0.3747,0.2556,0.9955,0.9396,,0.36,0.3103,0.375,,0.0,,0.4049,,0.0,,block of flats,0.3983,Panel,No,4.0,0.0,4.0,0.0,-1542.0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,3.0,2582160,183133,-11,0.0,0,0.0,0.0,0.0,0.0,0.0,132.615,0.0,0.000,0.0,0.0,0.0,0,0.0,0.0,17.0,Active,0,0
2,183133,0,Cash loans,F,N,Y,0,112500.0,656811.0,30564.0,567000.0,"Spouse, partner",Pensioner,Secondary / secondary special,Married,House / apartment,0.016612,-22021,365243,-2979.0,-5036,,1,0,0,1,0,0,,2.0,2,2,MONDAY,11,0,0,0,0,0,0,XNA,,0.703983,0.755740,0.3711,0.2556,0.9955,0.9388,,0.36,0.3103,0.375,,0.0,,0.3977,,0.0,0.3782,0.2653,0.9955,0.9412,,0.3625,0.3103,0.375,,0.0,,0.4144,,0.0,0.3747,0.2556,0.9955,0.9396,,0.36,0.3103,0.375,,0.0,,0.4049,,0.0,,block of flats,0.3983,Panel,No,4.0,0.0,4.0,0.0,-1542.0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,3.0,2582160,183133,-5,0.0,0,0.0,0.0,0.0,0.0,0.0,132.615,0.0,0.000,0.0,0.0,0.0,0,0.0,0.0,17.0,Active,0,0
3,183133,0,Cash loans,F,N,Y,0,112500.0,656811.0,30564.0,567000.0,"Spouse, partner",Pensioner,Secondary / secondary special,Married,House / apartment,0.016612,-22021,365243,-2979.0,-5036,,1,0,0,1,0,0,,2.0,2,2,MONDAY,11,0,0,0,0,0,0,XNA,,0.703983,0.755740,0.3711,0.2556,0.9955,0.9388,,0.36,0.3103,0.375,,0.0,,0.3977,,0.0,0.3782,0.2653,0.9955,0.9412,,0.3625,0.3103,0.375,,0.0,,0.4144,,0.0,0.3747,0.2556,0.9955,0.9396,,0.36,0.3103,0.375,,0.0,,0.4049,,0.0,,block of flats,0.3983,Panel,No,4.0,0.0,4.0,0.0,-1542.0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,3.0,2582160,183133,-18,0.0,0,0.0,0.0,0.0,0.0,0.0,132.615,0.0,0.000,0.0,0.0,0.0,0,0.0,0.0,17.0,Active,0,0
4,183133,0,Cash loans,F,N,Y,0,112500.0,656811.0,30564.0,567000.0,"Spouse, partner",Pensioner,Secondary / secondary special,Married,House / apartment,0.016612,-22021,365243,-2979.0,-5036,,1,0,0,1,0,0,,2.0,2,2,MONDAY,11,0,0,0,0,0,0,XNA,,0.703983,0.755740,0.3711,0.2556,0.9955,0.9388,,0.36,0.3103,0.375,,0.0,,0.3977,,0.0,0.3782,0.2653,0.9955,0.9412,,0.3625,0.3103,0.375,,0.0,,0.4144,,0.0,0.3747,0.2556,0.9955,0.9396,,0.36,0.3103,0.375,,0.0,,0.4049,,0.0,,block of flats,0.3983,Panel,No,4.0,0.0,4.0,0.0,-1542.0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,3.0,2582160,183133,-76,0.0,67500,0.0,0.0,0.0,0.0,0.0,132.615,0.0,0.000,0.0,0.0,0.0,0,0.0,0.0,17.0,Active,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2251914,409734,0,Cash loans,M,N,Y,1,135000.0,781920.0,47835.0,675000.0,Unaccompanied,Working,Secondary / secondary special,Married,House / apartment,0.008625,-16263,-339,-4694.0,-4331,,1,1,0,1,0,0,Security staff,3.0,2,2,TUESDAY,14,0,1,1,0,0,0,Security,,0.555707,0.778904,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,-1130.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,3.0,2601523,409734,-6,0.0,90000,,0.0,,,0.0,,0.0,0.000,0.0,0.0,,0,,,0.0,Active,0,0
2251915,409734,0,Cash loans,M,N,Y,1,135000.0,781920.0,47835.0,675000.0,Unaccompanied,Working,Secondary / secondary special,Married,House / apartment,0.008625,-16263,-339,-4694.0,-4331,,1,1,0,1,0,0,Security staff,3.0,2,2,TUESDAY,14,0,1,1,0,0,0,Security,,0.555707,0.778904,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,-1130.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,3.0,2601523,409734,-12,0.0,45000,,0.0,,,0.0,,0.0,0.000,0.0,0.0,,0,,,0.0,Active,0,0
2251916,409734,0,Cash loans,M,N,Y,1,135000.0,781920.0,47835.0,675000.0,Unaccompanied,Working,Secondary / secondary special,Married,House / apartment,0.008625,-16263,-339,-4694.0,-4331,,1,1,0,1,0,0,Security staff,3.0,2,2,TUESDAY,14,0,1,1,0,0,0,Security,,0.555707,0.778904,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,-1130.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,3.0,2601523,409734,-10,0.0,90000,,0.0,,,0.0,,0.0,0.000,0.0,0.0,,0,,,0.0,Active,0,0
2251917,409734,0,Cash loans,M,N,Y,1,135000.0,781920.0,47835.0,675000.0,Unaccompanied,Working,Secondary / secondary special,Married,House / apartment,0.008625,-16263,-339,-4694.0,-4331,,1,1,0,1,0,0,Security staff,3.0,2,2,TUESDAY,14,0,1,1,0,0,0,Security,,0.555707,0.778904,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,-1130.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,3.0,2601523,409734,-8,0.0,90000,,0.0,,,0.0,,0.0,0.000,0.0,0.0,,0,,,0.0,Active,0,0


In [86]:
test.merge(credit_card_balance, left_on = 'sk_id_curr', right_on = 'SK_ID_CURR', how = 'inner')

Unnamed: 0,sk_id_curr,name_contract_type,code_gender,flag_own_car,flag_own_realty,cnt_children,amt_income_total,amt_credit,amt_annuity,amt_goods_price,name_type_suite,name_income_type,name_education_type,name_family_status,name_housing_type,region_population_relative,days_birth,days_employed,days_registration,days_id_publish,own_car_age,flag_mobil,flag_emp_phone,flag_work_phone,flag_cont_mobile,flag_phone,flag_email,occupation_type,cnt_fam_members,region_rating_client,region_rating_client_w_city,weekday_appr_process_start,hour_appr_process_start,reg_region_not_live_region,reg_region_not_work_region,live_region_not_work_region,reg_city_not_live_city,reg_city_not_work_city,live_city_not_work_city,organization_type,ext_source_1,ext_source_2,ext_source_3,apartments_avg,basementarea_avg,years_beginexpluatation_avg,years_build_avg,commonarea_avg,elevators_avg,entrances_avg,floorsmax_avg,floorsmin_avg,landarea_avg,livingapartments_avg,livingarea_avg,nonlivingapartments_avg,nonlivingarea_avg,apartments_mode,basementarea_mode,years_beginexpluatation_mode,years_build_mode,commonarea_mode,elevators_mode,entrances_mode,floorsmax_mode,floorsmin_mode,landarea_mode,livingapartments_mode,livingarea_mode,nonlivingapartments_mode,nonlivingarea_mode,apartments_medi,basementarea_medi,years_beginexpluatation_medi,years_build_medi,commonarea_medi,elevators_medi,entrances_medi,floorsmax_medi,floorsmin_medi,landarea_medi,livingapartments_medi,livingarea_medi,nonlivingapartments_medi,nonlivingarea_medi,fondkapremont_mode,housetype_mode,totalarea_mode,wallsmaterial_mode,emergencystate_mode,obs_30_cnt_social_circle,def_30_cnt_social_circle,obs_60_cnt_social_circle,def_60_cnt_social_circle,days_last_phone_change,flag_document_2,flag_document_3,flag_document_4,flag_document_5,flag_document_6,flag_document_7,flag_document_8,flag_document_9,flag_document_10,flag_document_11,flag_document_12,flag_document_13,flag_document_14,flag_document_15,flag_document_16,flag_document_17,flag_document_18,flag_document_19,flag_document_20,flag_document_21,amt_req_credit_bureau_hour,amt_req_credit_bureau_day,amt_req_credit_bureau_week,amt_req_credit_bureau_mon,amt_req_credit_bureau_qrt,amt_req_credit_bureau_year,SK_ID_PREV,SK_ID_CURR,MONTHS_BALANCE,AMT_BALANCE,AMT_CREDIT_LIMIT_ACTUAL,AMT_DRAWINGS_ATM_CURRENT,AMT_DRAWINGS_CURRENT,AMT_DRAWINGS_OTHER_CURRENT,AMT_DRAWINGS_POS_CURRENT,AMT_INST_MIN_REGULARITY,AMT_PAYMENT_CURRENT,AMT_PAYMENT_TOTAL_CURRENT,AMT_RECEIVABLE_PRINCIPAL,AMT_RECIVABLE,AMT_TOTAL_RECEIVABLE,CNT_DRAWINGS_ATM_CURRENT,CNT_DRAWINGS_CURRENT,CNT_DRAWINGS_OTHER_CURRENT,CNT_DRAWINGS_POS_CURRENT,CNT_INSTALMENT_MATURE_CUM,NAME_CONTRACT_STATUS,SK_DPD,SK_DPD_DEF
0,201672,Cash loans,M,N,Y,0,202500.0,900000.0,35158.5,900000.0,Unaccompanied,Working,Secondary / secondary special,Married,Municipal apartment,0.010006,-11757,-1593,-4380.0,-81,,1,1,0,1,0,0,Drivers,2.0,2,2,SATURDAY,8,0,0,0,0,1,1,Self-employed,0.068939,0.247563,0.227613,0.0340,0.0821,0.9861,0.8096,,0.0,0.1034,0.0833,0.1250,0.0000,0.0277,,0.0000,0.0649,0.0347,0.0852,0.9861,0.8171,,0.0,0.1034,0.0833,0.1250,0.0000,0.0303,,0.0000,0.0688,0.0344,0.0821,0.9861,0.8121,,0.0,0.1034,0.0833,0.1250,0.0000,0.0282,,0.0000,0.0663,not specified,block of flats,0.0355,"Stone, brick",No,0.0,0.0,0.0,0.0,-524.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,1.0,2.0,1675647,201672,-92,0.000,135000,,0.0,,,,0.00,0.0,0.0,0.000,0.000,,0,,,,Signed,0,0
1,201672,Cash loans,M,N,Y,0,202500.0,900000.0,35158.5,900000.0,Unaccompanied,Working,Secondary / secondary special,Married,Municipal apartment,0.010006,-11757,-1593,-4380.0,-81,,1,1,0,1,0,0,Drivers,2.0,2,2,SATURDAY,8,0,0,0,0,1,1,Self-employed,0.068939,0.247563,0.227613,0.0340,0.0821,0.9861,0.8096,,0.0,0.1034,0.0833,0.1250,0.0000,0.0277,,0.0000,0.0649,0.0347,0.0852,0.9861,0.8171,,0.0,0.1034,0.0833,0.1250,0.0000,0.0303,,0.0000,0.0688,0.0344,0.0821,0.9861,0.8121,,0.0,0.1034,0.0833,0.1250,0.0000,0.0282,,0.0000,0.0663,not specified,block of flats,0.0355,"Stone, brick",No,0.0,0.0,0.0,0.0,-524.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,1.0,2.0,1675647,201672,-18,0.000,0,0.0,0.0,0.0,0.0,0.0,696.33,0.0,0.0,0.000,0.000,0.0,0,0.0,0.0,18.0,Active,0,0
2,201672,Cash loans,M,N,Y,0,202500.0,900000.0,35158.5,900000.0,Unaccompanied,Working,Secondary / secondary special,Married,Municipal apartment,0.010006,-11757,-1593,-4380.0,-81,,1,1,0,1,0,0,Drivers,2.0,2,2,SATURDAY,8,0,0,0,0,1,1,Self-employed,0.068939,0.247563,0.227613,0.0340,0.0821,0.9861,0.8096,,0.0,0.1034,0.0833,0.1250,0.0000,0.0277,,0.0000,0.0649,0.0347,0.0852,0.9861,0.8171,,0.0,0.1034,0.0833,0.1250,0.0000,0.0303,,0.0000,0.0688,0.0344,0.0821,0.9861,0.8121,,0.0,0.1034,0.0833,0.1250,0.0000,0.0282,,0.0000,0.0663,not specified,block of flats,0.0355,"Stone, brick",No,0.0,0.0,0.0,0.0,-524.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,1.0,2.0,1675647,201672,-86,109337.805,135000,0.0,0.0,0.0,0.0,6750.0,9000.00,9000.0,106854.3,109337.805,109337.805,0.0,0,0.0,0.0,5.0,Active,0,0
3,201672,Cash loans,M,N,Y,0,202500.0,900000.0,35158.5,900000.0,Unaccompanied,Working,Secondary / secondary special,Married,Municipal apartment,0.010006,-11757,-1593,-4380.0,-81,,1,1,0,1,0,0,Drivers,2.0,2,2,SATURDAY,8,0,0,0,0,1,1,Self-employed,0.068939,0.247563,0.227613,0.0340,0.0821,0.9861,0.8096,,0.0,0.1034,0.0833,0.1250,0.0000,0.0277,,0.0000,0.0649,0.0347,0.0852,0.9861,0.8171,,0.0,0.1034,0.0833,0.1250,0.0000,0.0303,,0.0000,0.0688,0.0344,0.0821,0.9861,0.8121,,0.0,0.1034,0.0833,0.1250,0.0000,0.0282,,0.0000,0.0663,not specified,block of flats,0.0355,"Stone, brick",No,0.0,0.0,0.0,0.0,-524.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,1.0,2.0,1675647,201672,-47,0.000,0,0.0,0.0,0.0,0.0,0.0,696.33,0.0,0.0,0.000,0.000,0.0,0,0.0,0.0,18.0,Active,0,0
4,201672,Cash loans,M,N,Y,0,202500.0,900000.0,35158.5,900000.0,Unaccompanied,Working,Secondary / secondary special,Married,Municipal apartment,0.010006,-11757,-1593,-4380.0,-81,,1,1,0,1,0,0,Drivers,2.0,2,2,SATURDAY,8,0,0,0,0,1,1,Self-employed,0.068939,0.247563,0.227613,0.0340,0.0821,0.9861,0.8096,,0.0,0.1034,0.0833,0.1250,0.0000,0.0277,,0.0000,0.0649,0.0347,0.0852,0.9861,0.8171,,0.0,0.1034,0.0833,0.1250,0.0000,0.0303,,0.0000,0.0688,0.0344,0.0821,0.9861,0.8121,,0.0,0.1034,0.0833,0.1250,0.0000,0.0282,,0.0000,0.0663,not specified,block of flats,0.0355,"Stone, brick",No,0.0,0.0,0.0,0.0,-524.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,1.0,2.0,1675647,201672,-15,0.000,0,0.0,0.0,0.0,0.0,0.0,696.33,0.0,0.0,0.000,0.000,0.0,0,0.0,0.0,18.0,Active,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
976041,234032,Cash loans,F,N,Y,0,99000.0,225000.0,22252.5,225000.0,Unaccompanied,Working,Secondary / secondary special,Married,House / apartment,0.028663,-14678,-1686,-7597.0,-4620,,1,1,0,1,1,0,High skill tech staff,2.0,2,2,WEDNESDAY,11,0,0,0,0,0,0,Business Entity Type 3,,0.759403,0.652897,0.1381,0.0000,0.9801,0.7280,0.0028,0.0,0.1034,0.1667,0.2083,0.0359,0.1118,0.0543,0.0039,0.0182,0.1408,0.0000,0.9801,0.7387,0.0028,0.0,0.1034,0.1667,0.2083,0.0367,0.1221,0.0566,0.0039,0.0192,0.1395,0.0000,0.9801,0.7316,0.0028,0.0,0.1034,0.1667,0.2083,0.0365,0.1137,0.0553,0.0039,0.0185,reg oper account,specific housing,0.0482,"Stone, brick",No,5.0,2.0,5.0,1.0,-1598.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,2.0,2149793,234032,-27,0.000,90000,,0.0,,,,,0.0,0.0,0.000,0.000,,0,,,,Active,0,0
976042,234032,Cash loans,F,N,Y,0,99000.0,225000.0,22252.5,225000.0,Unaccompanied,Working,Secondary / secondary special,Married,House / apartment,0.028663,-14678,-1686,-7597.0,-4620,,1,1,0,1,1,0,High skill tech staff,2.0,2,2,WEDNESDAY,11,0,0,0,0,0,0,Business Entity Type 3,,0.759403,0.652897,0.1381,0.0000,0.9801,0.7280,0.0028,0.0,0.1034,0.1667,0.2083,0.0359,0.1118,0.0543,0.0039,0.0182,0.1408,0.0000,0.9801,0.7387,0.0028,0.0,0.1034,0.1667,0.2083,0.0367,0.1221,0.0566,0.0039,0.0192,0.1395,0.0000,0.9801,0.7316,0.0028,0.0,0.1034,0.1667,0.2083,0.0365,0.1137,0.0553,0.0039,0.0185,reg oper account,specific housing,0.0482,"Stone, brick",No,5.0,2.0,5.0,1.0,-1598.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,2.0,2149793,234032,-36,0.000,90000,,0.0,,,,,0.0,0.0,0.000,0.000,,0,,,,Active,0,0
976043,234032,Cash loans,F,N,Y,0,99000.0,225000.0,22252.5,225000.0,Unaccompanied,Working,Secondary / secondary special,Married,House / apartment,0.028663,-14678,-1686,-7597.0,-4620,,1,1,0,1,1,0,High skill tech staff,2.0,2,2,WEDNESDAY,11,0,0,0,0,0,0,Business Entity Type 3,,0.759403,0.652897,0.1381,0.0000,0.9801,0.7280,0.0028,0.0,0.1034,0.1667,0.2083,0.0359,0.1118,0.0543,0.0039,0.0182,0.1408,0.0000,0.9801,0.7387,0.0028,0.0,0.1034,0.1667,0.2083,0.0367,0.1221,0.0566,0.0039,0.0192,0.1395,0.0000,0.9801,0.7316,0.0028,0.0,0.1034,0.1667,0.2083,0.0365,0.1137,0.0553,0.0039,0.0185,reg oper account,specific housing,0.0482,"Stone, brick",No,5.0,2.0,5.0,1.0,-1598.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,2.0,2149793,234032,-28,0.000,90000,,0.0,,,,,0.0,0.0,0.000,0.000,,0,,,,Active,0,0
976044,234032,Cash loans,F,N,Y,0,99000.0,225000.0,22252.5,225000.0,Unaccompanied,Working,Secondary / secondary special,Married,House / apartment,0.028663,-14678,-1686,-7597.0,-4620,,1,1,0,1,1,0,High skill tech staff,2.0,2,2,WEDNESDAY,11,0,0,0,0,0,0,Business Entity Type 3,,0.759403,0.652897,0.1381,0.0000,0.9801,0.7280,0.0028,0.0,0.1034,0.1667,0.2083,0.0359,0.1118,0.0543,0.0039,0.0182,0.1408,0.0000,0.9801,0.7387,0.0028,0.0,0.1034,0.1667,0.2083,0.0367,0.1221,0.0566,0.0039,0.0192,0.1395,0.0000,0.9801,0.7316,0.0028,0.0,0.1034,0.1667,0.2083,0.0365,0.1137,0.0553,0.0039,0.0185,reg oper account,specific housing,0.0482,"Stone, brick",No,5.0,2.0,5.0,1.0,-1598.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,2.0,2149793,234032,-18,0.000,90000,,0.0,,,0.0,,0.0,0.0,0.000,0.000,,0,,,0.0,Active,0,0


In [32]:
train.dtypes

sk_id_curr                      int64
target                          int64
name_contract_type             object
code_gender                    object
flag_own_car                   object
                               ...   
amt_req_credit_bureau_day     float64
amt_req_credit_bureau_week    float64
amt_req_credit_bureau_mon     float64
amt_req_credit_bureau_qrt     float64
amt_req_credit_bureau_year    float64
Length: 122, dtype: object

In [87]:
test.dtypes

sk_id_curr                      int64
name_contract_type             object
code_gender                    object
flag_own_car                   object
flag_own_realty                object
                               ...   
amt_req_credit_bureau_day     float64
amt_req_credit_bureau_week    float64
amt_req_credit_bureau_mon     float64
amt_req_credit_bureau_qrt     float64
amt_req_credit_bureau_year    float64
Length: 121, dtype: object

In [88]:
%%time
for c in train.dtypes[train.dtypes == object].keys():
    print ("---- %s ---" % c)
#    print (train[c].value_counts())

---- name_contract_type ---
---- code_gender ---
---- flag_own_car ---
---- flag_own_realty ---
---- name_type_suite ---
---- name_income_type ---
---- name_education_type ---
---- name_family_status ---
---- name_housing_type ---
---- occupation_type ---
---- weekday_appr_process_start ---
---- organization_type ---
---- fondkapremont_mode ---
---- housetype_mode ---
---- wallsmaterial_mode ---
---- emergencystate_mode ---
Wall time: 6 ms


In [89]:
%%time
for c in test.dtypes[test.dtypes == object].keys():
    print ("---- %s ---" % c)

---- name_contract_type ---
---- code_gender ---
---- flag_own_car ---
---- flag_own_realty ---
---- name_type_suite ---
---- name_income_type ---
---- name_education_type ---
---- name_family_status ---
---- name_housing_type ---
---- occupation_type ---
---- weekday_appr_process_start ---
---- organization_type ---
---- fondkapremont_mode ---
---- housetype_mode ---
---- wallsmaterial_mode ---
---- emergencystate_mode ---
Wall time: 1 ms


In [90]:
train.replace('XNA', np.nan, inplace=True)

In [91]:
test.replace('XNA', np.nan, inplace=True)

In [92]:
cat_features = ['name_contract_type',
'code_gender',
'flag_own_car',
'flag_own_realty',
'name_type_suite',
'name_income_type',
'name_education_type',
'name_family_status',
'name_housing_type',
'occupation_type',
'weekday_appr_process_start',
'organization_type',
'fondkapremont_mode',
'housetype_mode',
'wallsmaterial_mode',
'emergencystate_mode']

In [93]:
imp_cat = SimpleImputer(strategy="most_frequent")

In [94]:
enc = OrdinalEncoder()

In [95]:
%%time
train[cat_features] = imp_cat.fit_transform(train[cat_features])
test[cat_features] = imp_cat.transform(test[cat_features])

Wall time: 380 ms


In [96]:
train[cat_features] = enc.fit_transform(train[cat_features])
test[cat_features] = enc.transform(test[cat_features])

In [97]:
train[cat_features].head()

Unnamed: 0,name_contract_type,code_gender,flag_own_car,flag_own_realty,name_type_suite,name_income_type,name_education_type,name_family_status,name_housing_type,occupation_type,weekday_appr_process_start,organization_type,fondkapremont_mode,housetype_mode,wallsmaterial_mode,emergencystate_mode
0,1.0,0.0,0.0,1.0,6.0,4.0,1.0,1.0,1.0,10.0,0.0,11.0,2.0,0.0,4.0,0.0
1,0.0,0.0,0.0,1.0,5.0,3.0,4.0,1.0,1.0,8.0,1.0,5.0,2.0,0.0,4.0,0.0
2,1.0,1.0,0.0,1.0,6.0,1.0,4.0,1.0,1.0,8.0,1.0,4.0,0.0,0.0,4.0,0.0
3,0.0,0.0,0.0,1.0,6.0,4.0,1.0,1.0,1.0,8.0,0.0,41.0,2.0,0.0,5.0,0.0
4,0.0,1.0,1.0,1.0,6.0,1.0,4.0,1.0,1.0,4.0,2.0,5.0,2.0,0.0,5.0,0.0


In [98]:
test[cat_features].head()

Unnamed: 0,name_contract_type,code_gender,flag_own_car,flag_own_realty,name_type_suite,name_income_type,name_education_type,name_family_status,name_housing_type,occupation_type,weekday_appr_process_start,organization_type,fondkapremont_mode,housetype_mode,wallsmaterial_mode,emergencystate_mode
0,0.0,0.0,0.0,1.0,6.0,3.0,4.0,1.0,1.0,8.0,5.0,5.0,2.0,0.0,4.0,0.0
1,0.0,1.0,1.0,1.0,6.0,7.0,4.0,3.0,5.0,4.0,5.0,11.0,2.0,0.0,4.0,0.0
2,0.0,1.0,1.0,1.0,6.0,7.0,4.0,3.0,1.0,8.0,5.0,33.0,2.0,0.0,4.0,0.0
3,0.0,1.0,1.0,0.0,6.0,7.0,2.0,1.0,1.0,16.0,2.0,40.0,2.0,0.0,4.0,0.0
4,0.0,1.0,0.0,1.0,6.0,7.0,4.0,1.0,2.0,4.0,2.0,42.0,0.0,0.0,5.0,0.0


In [45]:
numeric_features = train.dtypes[(train.dtypes == np.float64) | (train.dtypes == np.int64)].keys().tolist()

In [46]:
cat_bin_features = []
for i in numeric_features:
    if len(train[i].value_counts().keys().tolist()) == 2:
        cat_bin_features.append(i)

In [47]:
cat_bin_features

['target',
 'name_contract_type',
 'code_gender',
 'flag_own_car',
 'flag_own_realty',
 'flag_mobil',
 'flag_emp_phone',
 'flag_work_phone',
 'flag_cont_mobile',
 'flag_phone',
 'flag_email',
 'reg_region_not_live_region',
 'reg_region_not_work_region',
 'live_region_not_work_region',
 'reg_city_not_live_city',
 'reg_city_not_work_city',
 'live_city_not_work_city',
 'emergencystate_mode',
 'flag_document_2',
 'flag_document_3',
 'flag_document_4',
 'flag_document_5',
 'flag_document_6',
 'flag_document_7',
 'flag_document_8',
 'flag_document_9',
 'flag_document_10',
 'flag_document_11',
 'flag_document_12',
 'flag_document_13',
 'flag_document_14',
 'flag_document_15',
 'flag_document_16',
 'flag_document_17',
 'flag_document_18',
 'flag_document_19',
 'flag_document_20',
 'flag_document_21']

In [48]:
cat_bin_features.remove('target')

In [49]:
numeric_features.remove('target')

In [50]:
numeric_features

['sk_id_curr',
 'name_contract_type',
 'code_gender',
 'flag_own_car',
 'flag_own_realty',
 'cnt_children',
 'amt_income_total',
 'amt_credit',
 'amt_annuity',
 'amt_goods_price',
 'name_type_suite',
 'name_income_type',
 'name_education_type',
 'name_family_status',
 'name_housing_type',
 'region_population_relative',
 'days_birth',
 'days_employed',
 'days_registration',
 'days_id_publish',
 'own_car_age',
 'flag_mobil',
 'flag_emp_phone',
 'flag_work_phone',
 'flag_cont_mobile',
 'flag_phone',
 'flag_email',
 'occupation_type',
 'cnt_fam_members',
 'region_rating_client',
 'region_rating_client_w_city',
 'weekday_appr_process_start',
 'hour_appr_process_start',
 'reg_region_not_live_region',
 'reg_region_not_work_region',
 'live_region_not_work_region',
 'reg_city_not_live_city',
 'reg_city_not_work_city',
 'live_city_not_work_city',
 'organization_type',
 'ext_source_1',
 'ext_source_2',
 'ext_source_3',
 'apartments_avg',
 'basementarea_avg',
 'years_beginexpluatation_avg',
 'year

In [51]:
imp_bin_cat = SimpleImputer(strategy="most_frequent")

In [52]:
%%time
train[cat_bin_features] = imp_cat.fit_transform(train[cat_bin_features])
test[cat_bin_features] = imp_cat.transform(test[cat_bin_features])

Wall time: 1.31 s


In [53]:
imp_numeric = SimpleImputer(missing_values=np.nan, strategy='mean')

In [54]:
%%time
train[numeric_features] = imp_cat.fit_transform(train[numeric_features])
test[numeric_features] = imp_cat.transform(test[numeric_features])

Wall time: 2.56 s


In [99]:
#Построение baseline-модели
y_train = train["target"]
X_train = train.drop(["target", "sk_id_curr"], axis=1)
categorical = X_train.dtypes[X_train.dtypes == "object"]
categorical = list(categorical.index)

In [55]:
#Вычисление важности признака
x_train, x_valid = train_test_split(
    train.drop(["target", "sk_id_curr"], axis=1), train_size=0.7, random_state=1
)
y_train, y_valid = train_test_split(
    train["target"], train_size=0.7, random_state=1
)

In [56]:
model = RandomForestClassifier(max_depth=8, random_state=27, n_jobs=2)
model.fit(x_train.fillna(-9999), y_train)

RandomForestClassifier(max_depth=8, n_jobs=2, random_state=27)

In [57]:
def calculate_permutation_importance(estimator, 
                                     metric: callable,
                                     x_valid: pd.DataFrame,
                                     y_valid: pd.Series) -> pd.Series:
    """
    Расчет пермутированной важности признаков.
    """
    scores = {}
    y_pred = estimator.predict_proba(x_valid)[:, 1]
    base_score = metric(y_valid, y_pred)

    for feature in tqdm(x_valid.columns):
        x_valid_copy = x_valid.copy()
        x_valid_copy[feature] = np.random.permutation(x_valid_copy[feature])

        y_pred = estimator.predict_proba(x_valid_copy)[:, 1]
        score = metric(y_valid, y_pred)
        scores[feature] = base_score - score

    scores = pd.Series(scores)
    scores = scores.sort_values(ascending=False)

    return scores

In [58]:
perm_importance = calculate_permutation_importance(
    estimator=model, metric=roc_auc_score, x_valid=x_valid.fillna(-9999), y_valid=y_valid
)

100%|████████████████████████████████████████████████████████████████████████████████| 120/120 [00:46<00:00,  2.58it/s]


In [59]:
perm_importance

ext_source_3                    0.048993
ext_source_2                    0.042520
ext_source_1                    0.010919
name_education_type             0.004661
code_gender                     0.004149
                                  ...   
floorsmax_avg                  -0.000211
apartments_avg                 -0.000222
floorsmax_medi                 -0.000254
years_beginexpluatation_medi   -0.000273
elevators_medi                 -0.000310
Length: 120, dtype: float64

In [60]:
def fit_evaluate_model(estimator, x_train, y_train, x_valid, y_valid):
    """
    Функция для обучения и оценки качества модели.

    Parameters
    ----------
    estimator: callable
        Объект для обучения и применения модели.

    x_train: pandas.DataFrame
        Матрица признаков для обучения модели.

    y_train: pandas.Series
        Вектор целевой переменной для обучения модели.

    x_valid: pandas.DataFrame
        Матрица признаков для валидации модели.

    y_valid: pandas.Series
        Вектор целевой переменной для валидации модели.

    Returns
    -------
    y_train_pred: np.array
        Вектор прогнозов для обучающей выборки

    y_valid_pred: np.array
        Вектор прогнозов для валидационной выборки

    """
    estimator.fit(x_train, y_train)
    y_train_pred = estimator.predict_proba(x_train)[:, 1]
    y_valid_pred = estimator.predict_proba(x_valid)[:, 1]

    train_score = roc_auc_score(y_train, y_train_pred)
    valid_score = roc_auc_score(y_valid, y_valid_pred)
    print(f"Model Score: train = {round(train_score, 4)}, valid = {round(valid_score, 4)}")

    return y_train_pred, y_valid_pred

In [61]:
def fit_evaluate_model2(estimator, x_train, y_train, x_valid):
    """
    Функция для обучения и оценки качества модели.

    Parameters
    ----------
    estimator: callable
        Объект для обучения и применения модели.

    x_train: pandas.DataFrame
        Матрица признаков для обучения модели.

    y_train: pandas.Series
        Вектор целевой переменной для обучения модели.

    x_valid: pandas.DataFrame
        Матрица признаков для валидации модели.

    y_valid: pandas.Series
        Вектор целевой переменной для валидации модели.

    Returns
    -------
    y_train_pred: np.array
        Вектор прогнозов для обучающей выборки

    y_valid_pred: np.array
        Вектор прогнозов для валидационной выборки

    """
    estimator.fit(x_train, y_train)
    y_train_pred = estimator.predict_proba(x_train)[:, 1]
    y_valid_pred = estimator.predict_proba(x_valid)[:, 1]

   

    return y_train_pred, y_valid_pred

In [44]:
#
cb_params = {
    "n_estimators": 500,
    "learning_rate": 0.01,
    "loss_function": "Logloss",
    "eval_metric": "AUC",
    "task_type": "CPU",
    "verbose": 25,
    "max_depth": 16,
    "l2_leaf_reg": 10,
    "early_stopping_rounds": 50,
    "thread_count": 8,
    "random_seed": 42
}

estimators, prediction = catboost_hold_out_validation(
    params=cb_params, X=X_train, y=y_train, split_params=[0.6, 0.2, 0.2], categorical=categorical
)

Starting at Tue Feb  1 20:30:58 2022


CatBoostError: bad allocation

In [None]:
#Линейная модель
pipeline = Pipeline(
    steps=[
        ("scaling", StandardScaler()),
        ("model", LogisticRegression(random_state=27, C=1e-5))
    ]
)
y_train_pred, y_valid_pred = fit_evaluate_model(
    pipeline, x_train, y_train, x_valid, y_valid
)

In [62]:
#Решающие деревья без ограничения по глубине
tree = DecisionTreeClassifier(
    random_state=27
)
y_train_pred, y_valid_pred = fit_evaluate_model2(
    tree, X_train, y_train, test
)

The feature names should match those that were passed during fit. Starting version 1.2, an error will be raised.
Feature names unseen at fit time:
- sk_id_curr
Feature names must be in the same order as they were in fit.



ValueError: X has 121 features, but DecisionTreeClassifier is expecting 120 features as input.

In [51]:
#Решающее дерево с ограничением по глубине
tree = DecisionTreeClassifier(
    max_depth=5, random_state=27
)
y_train_pred, y_valid_pred = fit_evaluate_model2(
    tree, X_train, y_train, test
)

The feature names should match those that were passed during fit. Starting version 1.2, an error will be raised.
Feature names unseen at fit time:
- sk_id_curr
Feature names must be in the same order as they were in fit.



ValueError: X has 121 features, but DecisionTreeClassifier is expecting 120 features as input.

In [None]:
y_train_pred

In [None]:
y_valid_pred

In [55]:
pipeline = Pipeline(
    steps=[
        ("scaling", StandardScaler()),
        ("model", LogisticRegression(random_state=27, C=1e-5))
    ]
)
y_train_pred, y_valid_pred = fit_evaluate_model2(
    pipeline, X_train, y_train, test
)

The feature names should match those that were passed during fit. Starting version 1.2, an error will be raised.
Feature names unseen at fit time:
- sk_id_curr
Feature names must be in the same order as they were in fit.



ValueError: X has 121 features, but StandardScaler is expecting 120 features as input.

In [52]:
#Bagging над линейными моделями
bagging = BaggingClassifier(
    base_estimator=pipeline, random_state=27, n_jobs=2
)
y_train_pred, y_valid_pred = fit_evaluate_model(
    bagging, X_train, y_train, test
)

NameError: name 'pipeline' is not defined

In [34]:
#XGBoost (0.758

baseline = xgb.XGBClassifier(random_state=27)
baseline.fit(X_train, y_train)

XGBClassifier(random_state=27)

In [35]:
prediction = baseline.predict_proba(test.drop("sk_id_curr", axis=1))
#score = roc_auc_score(y_valid, y_pred[:, 1])
#print(f"score = {round(score, 4)}")

In [None]:
#XGBoost с использование оригинального API
xgb_params = {
    "booster": "gbtree",
    "objective": "binary:logistic",
    "eval_metric": "auc",
    "learning_rate": 0.1,
    "reg_lambda": 100,
    "max_depth": 4,
    "nthread": 6,
    "gamma": 10,
    "seed": 27
}

dtrain = xgb.DMatrix(
    data=x_train, label=y_train
)
dvalid = xgb.DMatrix(
    data=x_valid, label=y_valid
)

In [None]:
evals_result = {}

model = xgb.train(
    params=xgb_params,
    dtrain=dtrain,
    num_boost_round=1000,
    maximize=True,
    early_stopping_rounds=50,
    evals_result=evals_result,
    evals=[(dtrain, "train"), (dvalid, "valid")],
    verbose_eval=25,
    
)

In [None]:
explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(x_valid)

In [100]:
#CatBoost -- - Твой результат: 0.7642928161535106 табл applications + bureau
##0.7641490799216262 без других таблиц
#Твой результат: 0.7641490799216262 
model = cb.CatBoostClassifier(random_state=27)
model.fit(X_train, y_train)

Learning rate set to 0.102119
0:	learn: 0.5960954	total: 49.8ms	remaining: 49.7s
1:	learn: 0.5174474	total: 82.3ms	remaining: 41.1s
2:	learn: 0.4605648	total: 109ms	remaining: 36.3s
3:	learn: 0.4149897	total: 140ms	remaining: 34.8s
4:	learn: 0.3786679	total: 166ms	remaining: 33.1s
5:	learn: 0.3529040	total: 196ms	remaining: 32.5s
6:	learn: 0.3332445	total: 224ms	remaining: 31.8s
7:	learn: 0.3174172	total: 254ms	remaining: 31.5s
8:	learn: 0.3041543	total: 286ms	remaining: 31.5s
9:	learn: 0.2942253	total: 315ms	remaining: 31.2s
10:	learn: 0.2861340	total: 344ms	remaining: 31s
11:	learn: 0.2808398	total: 371ms	remaining: 30.5s
12:	learn: 0.2759254	total: 395ms	remaining: 30s
13:	learn: 0.2716625	total: 428ms	remaining: 30.1s
14:	learn: 0.2683127	total: 456ms	remaining: 29.9s
15:	learn: 0.2658785	total: 484ms	remaining: 29.8s
16:	learn: 0.2640707	total: 516ms	remaining: 29.8s
17:	learn: 0.2622068	total: 542ms	remaining: 29.6s
18:	learn: 0.2608934	total: 566ms	remaining: 29.2s
19:	learn: 0.

<catboost.core.CatBoostClassifier at 0x28e8af1a340>

In [101]:
prediction = model.predict_proba(test)