# Introduction

**Related notebooks**

Utility script notebook (with addtional functions, aggregators; data collection):

https://www.kaggle.com/andreynesterov/home-credit-baseline-data

Training models notebooks:

https://www.kaggle.com/andreynesterov/home-credit-baseline-training

https://www.kaggle.com/code/andreynesterov/home-credit-baseline-training-no-dates

https://www.kaggle.com/code/andreynesterov/home-credit-baseline-training-lightautoml

# Dependencies

In [1]:
!pip install --no-index -Uq --find-links=/kaggle/input/lightautoml-038-dependencies pandas==2.0.3

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
cudf 23.8.0 requires cubinlinker, which is not installed.
cudf 23.8.0 requires cupy-cuda11x>=12.0.0, which is not installed.
cudf 23.8.0 requires ptxcompiler, which is not installed.
cuml 23.8.0 requires cupy-cuda11x>=12.0.0, which is not installed.
dask-cudf 23.8.0 requires cupy-cuda11x>=12.0.0, which is not installed.
beatrix-jupyterlab 2023.128.151533 requires jupyterlab~=3.6.0, but you have jupyterlab 4.0.11 which is incompatible.
cudf 23.8.0 requires cuda-python<12.0a0,>=11.7.1, but you have cuda-python 12.3.0 which is incompatible.
cudf 23.8.0 requires pandas<1.6.0dev0,>=1.3, but you have pandas 2.0.3 which is incompatible.
cudf 23.8.0 requires protobuf<5,>=4.21, but you have protobuf 3.20.3 which is incompatible.
cuml 23.8.0 requires dask==2023.7.1, but you have dask 2024.1.0 which is incompa

In [2]:
import os
import gc
from glob import glob
from pathlib import Path
from datetime import datetime
import numpy as np
import pandas as pd
import polars as pl

from sklearn.metrics import roc_auc_score, log_loss
from sklearn.model_selection import StratifiedGroupKFold
from sklearn.ensemble import VotingClassifier
from sklearn.preprocessing import LabelEncoder
from scipy.optimize import minimize

import joblib
import lightgbm as lgb
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)



# Predictions

In [3]:
import home_credit_baseline_data as data_nb

In [4]:
### from https://www.kaggle.com/code/batprem/home-credit-risk-mode-utility-scripts
# 評価スコアを出すための関数
def gini_stability(base, score_col="score", w_fallingrate=88.0, w_resstd=-0.5):
    gini_in_time = base.loc[:, ["WEEK_NUM", "target", score_col]]\
        .sort_values("WEEK_NUM")\
        .groupby("WEEK_NUM")[["target", score_col]]\
        .apply(lambda x: 2*roc_auc_score(x["target"], x[score_col])-1).tolist()
    
    x = np.arange(len(gini_in_time))
    y = gini_in_time
    a, b = np.polyfit(x, y, 1)
    y_hat = a*x + b
    residuals = y - y_hat
    res_std = np.std(residuals)
    avg_gini = np.mean(gini_in_time)
    return avg_gini + w_fallingrate * min(0, a) + w_resstd * res_std

In [5]:
def predict_proba_in_batches(model, data, batch_size=100000, predict_mode="base"):
    num_samples = len(data)
    num_batches = int(np.ceil(num_samples / batch_size))
    probabilities = np.zeros((num_samples,))

    for batch_idx in range(num_batches):
        print(f"Processing batch: {batch_idx+1}/{num_batches}")
        start_idx = batch_idx * batch_size
        end_idx = min((batch_idx + 1) * batch_size, num_samples)
        X_batch = data.iloc[start_idx:end_idx]
        if predict_mode == "base":
            batch_probs = model.predict_proba(X_batch)[:, 1]
        elif predict_mode == "lightautoml":
            batch_probs = model.predict(X_batch).data.squeeze()
        probabilities[start_idx:end_idx] = batch_probs
        gc.collect()

    return probabilities

In [6]:
train_base_df = pd.read_csv("/kaggle/input/home-credit-credit-risk-model-stability/csv_files/train/train_base.csv")
y_train = train_base_df["target"]
oof_df = train_base_df
models_score_df = pd.DataFrame()
# model_names = []
test_preds_df = pd.DataFrame()

# 各モデルの読み込みと評価関数での精度
 - Model 1,2はlgbmのアンサンブルモデル、Model 3はTabularAutoMLのdenselightアルゴリズムを使ったnnモデル
 - 各モデルとその訓練に使ったデータを呼び出し、評価関数（上のgini_stability）での精度をそれぞれ出している
 - 各Modelのパラグラフで訓練データの予測値もモデル訓練時に保存したファイルから読み込んでおり、その値はoof_dfに保存している
 - テストデータに対する予測値はtest_preds_dfに保存し、その後のブレンディングの際に使っている
 - 要は
   - 評価関数（gini_stability関数）で訓練データで計算したスコア：model_score_df
   - 訓練データの予測値：oof_df
   - テストデータの予測値：test_preds_df

## Model 1

In [7]:
model_name = "model_1"
model_1 = joblib.load("/kaggle/input/home-credit-baseline-training/oof_model_1.pkl")
model_1
# model_names.append(model_name)

In [8]:
train_cols, cat_cols, drop_cols = joblib.load("/kaggle/input/home-credit-baseline-training/train_cat_columns.pkl")
print("train_cols:\t", len(train_cols))
print("cat_cols:\t", len(cat_cols))
print("drop_cols:\t", len(drop_cols))

train_cols:	 516
cat_cols:	 138
drop_cols:	 0


In [9]:
test_df = data_nb.prepare_df(data_nb.CFG.test_dir, cat_cols=cat_cols, mode="test", train_cols=train_cols)
display(test_df)

Collecting data...
Feature engeneering...
  feats_df shape:	 (10, 926)
Filter cols...
  feats_df shape:	 (10, 515)
Convert to pandas...


Unnamed: 0,case_id,WEEK_NUM,month_decision,weekday_decision,assignmentdate_238D,assignmentdate_4527235D,birthdate_574D,contractssum_5085716L,dateofbirth_337D,days120_123L,days180_256L,days30_165L,days360_512L,days90_310L,description_5085714M,education_1103M,education_88M,firstquarter_103L,fourthquarter_440L,maritalst_385M,maritalst_893M,numberofqueries_373L,pmtaverage_3A,pmtaverage_4527227A,pmtcount_4527229L,pmtcount_693L,pmtscount_423L,pmtssum_45A,requesttype_4525192L,responsedate_1012D,responsedate_4527233D,responsedate_4917613D,secondquarter_766L,thirdquarter_1082L,actualdpdtolerance_344P,amtinstpaidbefduel24m_4187115A,annuity_780A,annuitynextmonth_57A,applicationcnt_361L,applications30d_658L,applicationscnt_1086L,applicationscnt_464L,applicationscnt_629L,applicationscnt_867L,avgdbddpdlast24m_3658932P,avgdbddpdlast3m_4187120P,avgdbdtollast24m_4525197P,avgdpdtolclosure24_3658938P,avginstallast24m_3658937A,avglnamtstart24m_4525187A,avgmaxdpdlast9m_3716943P,avgoutstandbalancel6m_4187114A,avgpmtlast12m_4525200A,bankacctype_710L,cardtype_51L,clientscnt12m_3712952L,clientscnt3m_3712950L,clientscnt6m_3712949L,clientscnt_100L,clientscnt_1022L,clientscnt_1071L,clientscnt_1130L,clientscnt_157L,clientscnt_257L,clientscnt_304L,clientscnt_360L,clientscnt_493L,clientscnt_533L,clientscnt_887L,clientscnt_946L,cntincpaycont9m_3716944L,cntpmts24_3658933L,commnoinclast6m_3546845L,credamount_770A,credtype_322L,currdebt_22A,currdebtcredtyperange_828A,datefirstoffer_1144D,datelastinstal40dpd_247D,datelastunpaid_3546854D,daysoverduetolerancedd_3976961L,deferredmnthsnum_166L,disbursedcredamount_1113A,disbursementtype_67L,downpmt_116A,dtlastpmtallstes_4499206D,eir_270L,equalitydataagreement_891L,firstclxcampaign_1125D,firstdatedue_489D,homephncnt_628L,inittransactionamount_650A,inittransactioncode_186L,interestrate_311L,isbidproduct_1095L,isdebitcard_729L,lastactivateddate_801D,lastapplicationdate_877D,lastapprcommoditycat_1041M,lastapprcredamount_781A,lastapprdate_640D,lastcancelreason_561M,lastdelinqdate_224D,lastrejectcommoditycat_161M,lastrejectcommodtypec_5251769M,lastrejectcredamount_222A,lastrejectdate_50D,lastrejectreason_759M,lastrejectreasonclient_4145040M,lastst_736L,maininc_215A,mastercontrelectronic_519L,mastercontrexist_109L,maxannuity_159A,maxdbddpdlast1m_3658939P,maxdbddpdtollast12m_3658940P,maxdbddpdtollast6m_4187119P,maxdebt4_972A,maxdpdfrom6mto36m_3546853P,maxdpdinstldate_3546855D,maxdpdinstlnum_3546846P,maxdpdlast12m_727P,maxdpdlast24m_143P,maxdpdlast3m_392P,maxdpdlast6m_474P,maxdpdlast9m_1059P,maxdpdtolerance_374P,maxinstallast24m_3658928A,maxlnamtstart6m_4525199A,maxoutstandbalancel12m_4187113A,maxpmtlast3m_4525190A,mindbddpdlast24m_3658935P,mindbdtollast24m_4525191P,mobilephncnt_593L,monthsannuity_845L,numactivecreds_622L,numactivecredschannel_414L,numactiverelcontr_750L,numcontrs3months_479L,numincomingpmts_3546848L,numinstlallpaidearly3d_817L,numinstls_657L,numinstlsallpaid_934L,numinstlswithdpd10_728L,numinstlswithdpd5_4187116L,numinstlswithoutdpd_562L,numinstmatpaidtearly2d_4499204L,numinstpaid_4499208L,numinstpaidearly3d_3546850L,numinstpaidearly3dest_4493216L,numinstpaidearly5d_1087L,numinstpaidearly5dest_4493211L,numinstpaidearly5dobd_4499205L,numinstpaidearly_338L,numinstpaidearlyest_4493214L,numinstpaidlastcontr_4325080L,numinstpaidlate1d_3546852L,numinstregularpaid_973L,numinstregularpaidest_4493210L,numinsttopaygr_769L,numinsttopaygrest_4493213L,numinstunpaidmax_3546851L,numinstunpaidmaxest_4493212L,numnotactivated_1143L,numpmtchanneldd_318L,numrejects9m_859L,opencred_647L,paytype1st_925L,paytype_783L,pctinstlsallpaidearl3d_427L,pctinstlsallpaidlat10d_839L,pctinstlsallpaidlate1d_3546856L,pctinstlsallpaidlate4d_3546849L,pctinstlsallpaidlate6d_3546844L,pmtnum_254L,posfpd10lastmonth_333P,posfpd30lastmonth_3976960P,posfstqpd30lastmonth_3976962P,price_1097A,sellerplacecnt_915L,sellerplacescnt_216L,sumoutstandtotal_3546847A,sumoutstandtotalest_4493215A,totaldebt_9A,totalsettled_863A,totinstallast1m_4525188A,twobodfilling_608L,typesuite_864L,validfrom_1069D,max_actualdpd_943P,max_annuity_853A,max_credacc_actualbalance_314A,max_credacc_credlmt_575A,max_credacc_maxhisbal_375A,max_credacc_minhisbal_90A,max_credamount_590A,max_currdebt_94A,max_downpmt_134A,max_mainoccupationinc_437A,max_maxdpdtolerance_577P,max_outstandingdebt_522A,max_revolvingaccount_394A,min_actualdpd_943P,min_annuity_853A,min_credacc_actualbalance_314A,min_credacc_credlmt_575A,min_credacc_maxhisbal_375A,min_credacc_minhisbal_90A,min_credamount_590A,min_currdebt_94A,min_downpmt_134A,min_mainoccupationinc_437A,min_maxdpdtolerance_577P,min_outstandingdebt_522A,min_revolvingaccount_394A,first_actualdpd_943P,first_annuity_853A,first_credacc_credlmt_575A,first_credamount_590A,first_currdebt_94A,first_downpmt_134A,first_mainoccupationinc_437A,first_maxdpdtolerance_577P,first_outstandingdebt_522A,first_revolvingaccount_394A,last_actualdpd_943P,last_annuity_853A,last_credacc_actualbalance_314A,last_credacc_credlmt_575A,last_credacc_maxhisbal_375A,last_credacc_minhisbal_90A,last_credamount_590A,last_currdebt_94A,last_downpmt_134A,last_mainoccupationinc_437A,last_maxdpdtolerance_577P,last_outstandingdebt_522A,mean_actualdpd_943P,mean_annuity_853A,mean_credacc_actualbalance_314A,mean_credacc_credlmt_575A,mean_credacc_maxhisbal_375A,mean_credacc_minhisbal_90A,mean_credamount_590A,mean_currdebt_94A,mean_downpmt_134A,mean_mainoccupationinc_437A,mean_maxdpdtolerance_577P,mean_outstandingdebt_522A,mean_revolvingaccount_394A,max_approvaldate_319D,max_creationdate_885D,max_dateactivated_425D,max_dtlastpmt_581D,max_dtlastpmtallstes_3545839D,max_employedfrom_700D,max_firstnonzeroinstldate_307D,min_approvaldate_319D,min_creationdate_885D,min_dateactivated_425D,min_dtlastpmt_581D,min_dtlastpmtallstes_3545839D,min_employedfrom_700D,min_firstnonzeroinstldate_307D,first_approvaldate_319D,first_creationdate_885D,first_dateactivated_425D,first_dtlastpmt_581D,first_dtlastpmtallstes_3545839D,first_employedfrom_700D,first_firstnonzeroinstldate_307D,last_approvaldate_319D,last_creationdate_885D,last_dateactivated_425D,last_dtlastpmt_581D,last_dtlastpmtallstes_3545839D,last_employedfrom_700D,last_firstnonzeroinstldate_307D,mean_approvaldate_319D,mean_creationdate_885D,mean_dateactivated_425D,mean_dtlastpmt_581D,mean_dtlastpmtallstes_3545839D,mean_employedfrom_700D,mean_firstnonzeroinstldate_307D,max_cancelreason_3545846M,max_education_1138M,max_postype_4733339M,max_rejectreason_755M,max_rejectreasonclient_4145042M,min_cancelreason_3545846M,min_education_1138M,min_postype_4733339M,min_rejectreason_755M,min_rejectreasonclient_4145042M,first_cancelreason_3545846M,first_education_1138M,first_postype_4733339M,first_rejectreason_755M,first_rejectreasonclient_4145042M,last_cancelreason_3545846M,last_education_1138M,last_postype_4733339M,last_rejectreason_755M,last_rejectreasonclient_4145042M,mode_cancelreason_3545846M,mode_education_1138M,mode_postype_4733339M,mode_rejectreason_755M,mode_rejectreasonclient_4145042M,max_byoccupationinc_3656910L,max_childnum_21L,max_credacc_status_367L,max_credacc_transactions_402L,max_credtype_587L,max_familystate_726L,max_inittransactioncode_279L,max_isbidproduct_390L,max_isdebitcard_527L,max_pmtnum_8L,max_status_219L,max_tenor_203L,min_byoccupationinc_3656910L,min_childnum_21L,min_credacc_status_367L,min_credacc_transactions_402L,min_credtype_587L,min_familystate_726L,min_inittransactioncode_279L,min_isbidproduct_390L,min_isdebitcard_527L,min_pmtnum_8L,min_status_219L,min_tenor_203L,first_childnum_21L,first_credtype_587L,first_familystate_726L,first_inittransactioncode_279L,first_isbidproduct_390L,first_isdebitcard_527L,first_pmtnum_8L,first_status_219L,first_tenor_203L,last_byoccupationinc_3656910L,last_childnum_21L,last_credacc_status_367L,last_credacc_transactions_402L,last_credtype_587L,last_familystate_726L,last_inittransactioncode_279L,last_isbidproduct_390L,last_pmtnum_8L,last_status_219L,last_tenor_203L,max_num_group1,min_num_group1,first_num_group1,last_num_group1,max_amount_4527230A,min_amount_4527230A,first_amount_4527230A,last_amount_4527230A,mean_amount_4527230A,max_recorddate_4527225D,min_recorddate_4527225D,first_recorddate_4527225D,last_recorddate_4527225D,mean_recorddate_4527225D,max_num_group1_3,min_num_group1_3,first_num_group1_3,last_num_group1_3,max_amount_4917619A,min_amount_4917619A,first_amount_4917619A,last_amount_4917619A,mean_amount_4917619A,max_deductiondate_4917603D,min_deductiondate_4917603D,first_deductiondate_4917603D,last_deductiondate_4917603D,mean_deductiondate_4917603D,max_num_group1_4,min_num_group1_4,first_num_group1_4,last_num_group1_4,max_pmtamount_36A,min_pmtamount_36A,first_pmtamount_36A,last_pmtamount_36A,mean_pmtamount_36A,max_processingdate_168D,min_processingdate_168D,first_processingdate_168D,last_processingdate_168D,mean_processingdate_168D,max_num_group1_5,min_num_group1_5,first_num_group1_5,last_num_group1_5,max_mainoccupationinc_384A,min_mainoccupationinc_384A,first_mainoccupationinc_384A,last_mainoccupationinc_384A,mean_mainoccupationinc_384A,max_birth_259D,max_empl_employedfrom_271D,min_birth_259D,min_empl_employedfrom_271D,first_birth_259D,first_empl_employedfrom_271D,last_birth_259D,mean_birth_259D,mean_empl_employedfrom_271D,max_education_927M,max_empladdr_district_926M,max_empladdr_zipcode_114M,max_language1_981M,min_education_927M,min_language1_981M,first_education_927M,first_language1_981M,last_education_927M,last_empladdr_district_926M,last_empladdr_zipcode_114M,last_language1_981M,mode_education_927M,mode_language1_981M,max_contaddr_matchlist_1032L,max_contaddr_smempladdr_334L,max_empl_employedtotal_800L,max_empl_industry_691L,max_familystate_447L,max_housetype_905L,max_incometype_1044T,max_personindex_1023L,max_persontype_1072L,max_persontype_792L,max_relationshiptoclient_415T,max_relationshiptoclient_642T,max_remitter_829L,max_role_1084L,max_safeguarantyflag_411L,max_sex_738L,max_type_25L,min_contaddr_matchlist_1032L,min_contaddr_smempladdr_334L,min_empl_employedtotal_800L,min_empl_industry_691L,min_familystate_447L,min_housetype_905L,min_incometype_1044T,min_personindex_1023L,min_persontype_1072L,min_persontype_792L,min_relationshiptoclient_415T,min_relationshiptoclient_642T,min_remitter_829L,min_safeguarantyflag_411L,min_sex_738L,min_type_25L,first_contaddr_matchlist_1032L,first_contaddr_smempladdr_334L,first_empl_employedtotal_800L,first_empl_industry_691L,first_familystate_447L,first_housetype_905L,first_incometype_1044T,first_personindex_1023L,first_persontype_1072L,first_persontype_792L,first_role_1084L,first_safeguarantyflag_411L,first_sex_738L,first_type_25L,last_contaddr_matchlist_1032L,last_contaddr_smempladdr_334L,last_incometype_1044T,last_personindex_1023L,last_persontype_1072L,last_persontype_792L,last_relationshiptoclient_415T,last_relationshiptoclient_642T,last_remitter_829L,last_role_1084L,last_safeguarantyflag_411L,last_sex_738L,last_type_25L,max_num_group1_8,min_num_group1_8,first_num_group1_8,last_num_group1_8,max_amount_416A,min_amount_416A,first_amount_416A,last_amount_416A,mean_amount_416A,max_openingdate_313D,min_openingdate_313D,first_openingdate_313D,last_openingdate_313D,mean_openingdate_313D,max_num_group1_9,min_num_group1_9,first_num_group1_9,last_num_group1_9,max_openingdate_857D,min_openingdate_857D,first_openingdate_857D,last_openingdate_857D,mean_openingdate_857D,max_num_group1_10,min_num_group1_10,first_num_group1_10,last_num_group1_10
0,57543,92,10,2,,,,22130.26,-8832.0,9.0,9.0,9.0,10.0,9.0,2fc785b2,a55475b1,a55475b1,4.0,4.0,a55475b1,a55475b1,10.0,,,,,,,,,,14,1.0,6.0,,,7637.2,0.0,0.0,0.0,0.0,0.0,0.0,1.0,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,,,0.0,100000.0,CAL,0.0,0.0,,,,,0.0,100000.0,GBA,0.0,,0.45,,,,0.0,,CASH,0.45,False,,,-819.0,a55475b1,,,P94_109_143,,P159_130_59,P75_90_70,37998.0,-819.0,P99_56_166,P94_109_143,D,,0.0,0.0,0.0,,,,0.0,0.0,,,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,1.0,,0.0,0.0,0.0,0.0,,,0.0,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,False,OTHER,OTHER,,,,,,36.0,0.0,0.0,0.0,0.0,0.0,1.0,,,0.0,0.0,,FO,AL,,0.0,1519.8,,0.0,,,37998.0,,0.0,30000.0,,,,0.0,1519.8,,0.0,,,37998.0,,0.0,30000.0,,,,0.0,1519.8,0.0,37998.0,,0.0,30000.0,,,,0.0,1519.8,,0.0,,,37998.0,,0.0,30000.0,,,0.0,1519.8,,0.0,,,37998.0,,0.0,30000.0,,,,,-819.0,,,,,-789.0,,-819.0,,,,,-789.0,,-819.0,,,,,-789.0,,-819.0,,,,,-789.0,,-819.0,,,,,-789.0,P94_109_143,a55475b1,P149_40_170,P99_56_166,P94_109_143,P94_109_143,a55475b1,P149_40_170,P99_56_166,P94_109_143,P94_109_143,a55475b1,P149_40_170,P99_56_166,P94_109_143,P94_109_143,a55475b1,P149_40_170,P99_56_166,P94_109_143,P94_109_143,a55475b1,P149_40_170,P99_56_166,P94_109_143,,,,,COL,,POS,False,,31.0,D,31.0,,,,,COL,,POS,False,,31.0,D,31.0,,COL,,POS,False,,31.0,D,31.0,,,,,COL,,POS,False,31.0,D,31.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,31060.201,23310.201,23976.0,31060.201,25499.4404,3.0,-95.0,-19.0,-4.0,-27.0,4.0,0.0,1.0,3.0,,,,,,,,,,,,,,,36000.0,36000.0,36000.0,,36000.0,-8832.0,-964.0,-8832.0,-964.0,-8832.0,-964.0,,-8832.0,-964.0,a55475b1,a55475b1,a55475b1,a55475b1,P97_36_170,P209_127_106,P97_36_170,P209_127_106,a55475b1,a55475b1,a55475b1,a55475b1,a55475b1,a55475b1,False,False,MORE_ONE,OTHER,MARRIED,,EMPLOYED,1.0,5.0,5.0,OTHER,OTHER,False,PE,False,M,PRIMARY_MOBILE,False,False,MORE_ONE,OTHER,MARRIED,,EMPLOYED,0.0,1.0,1.0,OTHER,OTHER,False,False,M,PHONE,False,False,MORE_ONE,OTHER,MARRIED,,EMPLOYED,0.0,1.0,1.0,CL,False,M,PRIMARY_MOBILE,,,,,5.0,,,OTHER,,PE,,,PHONE,2.0,0.0,0.0,2.0,,,,,,,,,,,,,,,,,,,,,,,
1,57549,92,10,2,,,,,-10446.0,0.0,0.0,0.0,0.0,0.0,2fc785b2,a55475b1,a55475b1,0.0,0.0,a55475b1,a55475b1,0.0,,,,,,,,,,14,2.0,0.0,,,902.60004,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,0.0,10000.0,CAL,0.0,0.0,,,,,0.0,10000.0,GBA,0.0,,0.15,,,,1.0,,CASH,0.15,False,,,-546.0,a55475b1,,,P94_109_143,,a55475b1,a55475b1,20000.0,-546.0,P45_84_106,P94_109_143,D,,0.0,0.0,0.0,,,,0.0,0.0,,,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,2.0,,0.0,0.0,0.0,0.0,,,0.0,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,False,OTHER,OTHER,,,,,,12.0,0.0,0.0,1.0,,0.0,0.0,,,0.0,0.0,,BO,AL,,0.0,2048.8,,0.0,,,20000.0,,0.0,12000.0,,,,0.0,2048.8,,0.0,,,20000.0,,0.0,12000.0,,,,0.0,2048.8,0.0,20000.0,,0.0,12000.0,,,,0.0,2048.8,,0.0,,,20000.0,,0.0,12000.0,,,0.0,2048.8,,0.0,,,20000.0,,0.0,12000.0,,,,,-546.0,,,,-1209.0,-515.0,,-546.0,,,,-1209.0,-515.0,,-546.0,,,,-1209.0,-515.0,,-546.0,,,,-1209.0,-515.0,,-546.0,,,,-1209.0,-515.0,P94_109_143,P97_36_170,P67_102_161,P45_84_106,P94_109_143,P94_109_143,P97_36_170,P67_102_161,P45_84_106,P94_109_143,P94_109_143,P97_36_170,P67_102_161,P45_84_106,P94_109_143,P94_109_143,P97_36_170,P67_102_161,P45_84_106,P94_109_143,P94_109_143,P97_36_170,P67_102_161,P45_84_106,P94_109_143,,,,,CAL,SINGLE,CASH,False,,12.0,D,12.0,,,,,CAL,SINGLE,CASH,False,,12.0,D,12.0,,CAL,SINGLE,CASH,False,,12.0,D,12.0,,,,,CAL,SINGLE,CASH,False,12.0,D,12.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,8760.601,337.80002,8749.601,8760.601,5244.560404,0.0,-153.0,-153.0,0.0,-62.0,11.0,0.0,0.0,11.0,,,,,,,,,,,,,,,15000.0,15000.0,15000.0,,15000.0,-10446.0,-521.0,-10446.0,-521.0,-10446.0,-521.0,,-10446.0,-521.0,a55475b1,a55475b1,a55475b1,a55475b1,P97_36_170,P10_39_147,P97_36_170,P10_39_147,a55475b1,a55475b1,a55475b1,a55475b1,a55475b1,a55475b1,False,False,MORE_ONE,OTHER,SINGLE,,EMPLOYED,1.0,5.0,5.0,SIBLING,SIBLING,False,PE,False,M,PRIMARY_MOBILE,False,False,MORE_ONE,OTHER,SINGLE,,EMPLOYED,0.0,1.0,1.0,SIBLING,SIBLING,False,False,M,ALTERNATIVE_PHONE,False,False,MORE_ONE,OTHER,SINGLE,,EMPLOYED,0.0,1.0,1.0,EM,False,M,PRIMARY_MOBILE,,,,,5.0,,,SIBLING,,PE,,,PHONE,3.0,0.0,0.0,3.0,,,,,,,,,,,,,,,,,,,,,,,
2,57551,92,10,2,,,,,-11024.0,2.0,2.0,1.0,2.0,2.0,2fc785b2,a55475b1,a55475b1,0.0,1.0,a55475b1,a55475b1,2.0,,,,,,,,,,14,0.0,2.0,,,3610.2,0.0,0.0,0.0,0.0,0.0,0.0,2.0,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,0.0,40000.0,CAL,0.0,0.0,,,,,0.0,40000.0,GBA,0.0,,0.15,,,,0.0,,CASH,0.15,False,,,-318.0,a55475b1,,,P94_109_143,,a55475b1,a55475b1,20000.0,-318.0,P99_56_166,P94_109_143,D,,0.0,0.0,0.0,,,,0.0,0.0,,,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,1.0,,0.0,0.0,0.0,0.0,,,0.0,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,False,OTHER,OTHER,,,,,,12.0,0.0,0.0,0.0,,0.0,1.0,,,0.0,0.0,,BO,AL,,0.0,5401.0,,0.0,,,60000.0,,0.0,50000.0,,,,0.0,1871.8,,0.0,,,20000.0,,0.0,22000.0,,,,0.0,1871.8,0.0,20000.0,,0.0,22000.0,,,,0.0,5401.0,,0.0,,,60000.0,,0.0,50000.0,,,0.0,3636.4,,0.0,,,40000.0,,0.0,36000.0,,,,,-318.0,,,,-1117.0,-288.0,,-790.0,,,,-1117.0,-759.0,,-318.0,,,,,-288.0,,-790.0,,,,-1117.0,-759.0,,-554.0,,,,-1117.0,-524.0,P94_109_143,a55475b1,P67_102_161,P99_56_166,P94_109_143,P94_109_143,P97_36_170,P46_145_78,P45_84_106,P94_109_143,P94_109_143,a55475b1,P67_102_161,P99_56_166,P94_109_143,P94_109_143,P97_36_170,P46_145_78,P45_84_106,P94_109_143,P94_109_143,a55475b1,P46_145_78,P99_56_166,P94_109_143,,,,,CAL,MARRIED,CASH,False,,18.0,D,18.0,,,,,CAL,MARRIED,CASH,False,,12.0,D,12.0,,CAL,,CASH,False,,12.0,D,12.0,,,,,CAL,MARRIED,CASH,False,18.0,D,18.0,1.0,0.0,0.0,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,24000.0,24000.0,24000.0,,24000.0,-11024.0,-630.0,-11024.0,-630.0,-11024.0,-630.0,,-11024.0,-630.0,a55475b1,a55475b1,a55475b1,a55475b1,P97_36_170,P10_39_147,P97_36_170,P10_39_147,a55475b1,a55475b1,a55475b1,a55475b1,a55475b1,a55475b1,False,False,MORE_FIVE,OTHER,MARRIED,,EMPLOYED,1.0,5.0,5.0,FRIEND,FRIEND,False,PE,False,M,PRIMARY_MOBILE,False,False,MORE_FIVE,OTHER,MARRIED,,EMPLOYED,0.0,1.0,1.0,FRIEND,FRIEND,False,False,M,PHONE,False,False,MORE_FIVE,OTHER,MARRIED,,EMPLOYED,0.0,1.0,1.0,CL,False,M,PRIMARY_MOBILE,,,,,5.0,,,FRIEND,,PE,,,PHONE,2.0,0.0,0.0,2.0,,,,,,,,,,,,,,,,,,,,,,,
3,57552,92,10,3,,,,,-26304.0,0.0,0.0,0.0,0.0,0.0,2fc785b2,a55475b1,a55475b1,0.0,0.0,a55475b1,a55475b1,0.0,,,,,,,,,,12,0.0,0.0,,,6964.4,0.0,0.0,0.0,0.0,0.0,0.0,1.0,,,,,,,,,,CA,,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,,,0.0,90000.0,CAL,0.0,0.0,,,,,0.0,90000.0,GBA,0.0,,0.45,,,,0.0,,CASH,0.45,False,,,-1316.0,a55475b1,,,P94_109_143,,a55475b1,a55475b1,50000.0,-1316.0,P45_84_106,P94_109_143,D,,0.0,0.0,0.0,,,,0.0,0.0,,,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,1.0,,0.0,0.0,0.0,0.0,,,0.0,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,False,OTHER,OTHER,,,,,,18.0,0.0,0.0,0.0,,0.0,0.0,,,0.0,0.0,,FO,,,0.0,4834.2,,0.0,,,50000.0,,0.0,16000.0,,,,0.0,4834.2,,0.0,,,50000.0,,0.0,16000.0,,,,0.0,4834.2,0.0,50000.0,,0.0,16000.0,,,,0.0,4834.2,,0.0,,,50000.0,,0.0,16000.0,,,0.0,4834.2,,0.0,,,50000.0,,0.0,16000.0,,,,,-1316.0,,,,,-1288.0,,-1316.0,,,,,-1288.0,,-1316.0,,,,,-1288.0,,-1316.0,,,,,-1288.0,,-1316.0,,,,,-1288.0,P94_109_143,P97_36_170,P46_145_78,P45_84_106,P94_109_143,P94_109_143,P97_36_170,P46_145_78,P45_84_106,P94_109_143,P94_109_143,P97_36_170,P46_145_78,P45_84_106,P94_109_143,P94_109_143,P97_36_170,P46_145_78,P45_84_106,P94_109_143,P94_109_143,P97_36_170,P46_145_78,P45_84_106,P94_109_143,,1.0,,,CAL,MARRIED,CASH,False,,18.0,D,18.0,,1.0,,,CAL,MARRIED,CASH,False,,18.0,D,18.0,1.0,CAL,MARRIED,CASH,False,,18.0,D,18.0,,1.0,,,CAL,MARRIED,CASH,False,18.0,D,18.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,57569,92,10,2,,,,,-26395.0,0.0,0.0,0.0,0.0,0.0,2fc785b2,a55475b1,a55475b1,0.0,0.0,b6cabe76,a55475b1,0.0,,,,,,,,,,13,1.0,0.0,,,5553.4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,CA,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,0.0,50000.0,CAL,0.0,0.0,,,,,0.0,50000.0,GBA,0.0,,0.42,,,,0.0,,CASH,0.42,False,,,,a55475b1,,,a55475b1,,a55475b1,a55475b1,,,a55475b1,a55475b1,,,0.0,0.0,0.0,,,,0.0,0.0,,,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,1.0,,0.0,0.0,0.0,0.0,,,0.0,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,,OTHER,OTHER,,,,,,11.0,0.0,0.0,0.0,,0.0,0.0,,,0.0,0.0,,FO,AL,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
5,57630,92,10,2,,,,,-22194.0,0.0,0.0,0.0,0.0,0.0,2fc785b2,a55475b1,a55475b1,0.0,0.0,a55475b1,a55475b1,0.0,,,,,,,,,,14,0.0,0.0,,,7404.8003,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,CA,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,100000.0,CAL,0.0,0.0,,,,,0.0,100000.0,GBA,0.0,,0.45,,,,0.0,,CASH,0.45,False,,,,a55475b1,,,a55475b1,,a55475b1,a55475b1,,,a55475b1,a55475b1,,,,,,,,,,,,,,,,,,,,,,,,,1.0,,0.0,0.0,0.0,0.0,,,0.0,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,,OTHER,OTHER,,,,,,24.0,,,,,0.0,0.0,,,0.0,0.0,,FO,AL,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
6,57631,92,10,2,,,,750875.46,-23961.0,6.0,7.0,1.0,12.0,1.0,2fc785b2,a55475b1,a55475b1,6.0,2.0,a55475b1,a55475b1,12.0,,,,,,,,,,14,6.0,0.0,,,2872.8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,CA,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,30000.0,CAL,0.0,0.0,,,,,0.0,30000.0,GBA,0.0,,0.45,,,,0.0,,CASH,0.45,False,,,,a55475b1,,,a55475b1,,a55475b1,a55475b1,,,a55475b1,a55475b1,,,,,,,,,,,,,,,,,,,,,,,,,1.0,,0.0,0.0,0.0,0.0,,,0.0,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,,OTHER,OTHER,,,,,,18.0,,,,,0.0,0.0,,,0.0,0.0,,FO,AL,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
7,57632,92,10,2,,,,,-24538.0,0.0,0.0,0.0,0.0,0.0,2fc785b2,a55475b1,a55475b1,0.0,1.0,a55475b1,a55475b1,0.0,,,,,,,,,,14,0.0,1.0,,,6225.8003,0.0,0.0,1.0,0.0,0.0,0.0,1.0,,,,,,,,,,CA,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,100000.0,CAL,0.0,0.0,,,,,0.0,100000.0,GBA,0.0,,0.42,,,,0.0,,CASH,0.42,False,,,14.0,a55475b1,,,P11_56_131,,a55475b1,a55475b1,,,a55475b1,a55475b1,T,,,,,,,,,,,,,,,,,,,,,,,,1.0,,0.0,0.0,0.0,0.0,,,0.0,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,False,OTHER,OTHER,,,,,,24.0,,,,,1.0,1.0,,,0.0,0.0,,FO,AL,,0.0,6390.2,,0.0,,,100000.0,,0.0,100000.0,,,,0.0,6390.2,,0.0,,,100000.0,,0.0,100000.0,,,,0.0,6390.2,0.0,100000.0,,0.0,100000.0,,,,0.0,6390.2,,0.0,,,100000.0,,0.0,100000.0,,,0.0,6390.2,,0.0,,,100000.0,,0.0,100000.0,,,,,14.0,,,,,45.0,,14.0,,,,,45.0,,14.0,,,,,45.0,,14.0,,,,,45.0,,14.0,,,,,45.0,P11_56_131,a55475b1,P149_40_170,a55475b1,a55475b1,P11_56_131,a55475b1,P149_40_170,a55475b1,a55475b1,P11_56_131,a55475b1,P149_40_170,a55475b1,a55475b1,P11_56_131,a55475b1,P149_40_170,a55475b1,a55475b1,P11_56_131,a55475b1,P149_40_170,a55475b1,a55475b1,,,,,CAL,,CASH,False,,24.0,T,24.0,,,,,CAL,,CASH,False,,24.0,T,24.0,,CAL,,CASH,False,,24.0,T,24.0,,,,,CAL,,CASH,False,24.0,T,24.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
8,57633,92,10,2,,,,0.0,-11177.0,1.0,1.0,1.0,2.0,1.0,2fc785b2,6b2ae0fa,a55475b1,4.0,3.0,3439d993,a55475b1,2.0,,,,,,,,,,14,2.0,8.0,0.0,,7917.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,,,,,,,,,,,INSTANT,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,0.0,131950.0,REL,0.0,0.0,,,,,0.0,0.0,DD,0.0,,,,,,1.0,0.0,NDF,,False,False,,-281.0,a55475b1,,,P94_109_143,,a55475b1,a55475b1,200000.0,-281.0,P45_84_106,P94_109_143,D,,0.0,0.0,0.0,,,,0.0,0.0,,,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,2.0,,0.0,0.0,1.0,0.0,,,0.0,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,False,OTHER,OTHER,,,,,,,0.0,0.0,0.0,,0.0,2.0,,,0.0,0.0,,FO,AL,,0.0,7524.2,,200000.0,,,200000.0,,0.0,200000.0,,,,0.0,0.0,,0.0,,,20000.0,,0.0,30000.0,,,,0.0,0.0,200000.0,200000.0,,0.0,200000.0,,,,0.0,7524.2,,0.0,,,20000.0,,0.0,30000.0,,,0.0,4287.950075,,50000.0,,,87500.0,,0.0,105450.0,,,,,-281.0,,,,-2091.0,-735.0,,-1202.0,,,,-2091.0,-1172.0,,-281.0,,,,,,,-1202.0,,,,,-1172.0,,-813.0,,,,-2091.0,-959.0,P94_109_143,a55475b1,P46_145_78,a55475b1,a55475b1,P180_60_137,P33_146_175,P177_117_192,P45_84_106,P94_109_143,P94_109_143,a55475b1,P46_145_78,P45_84_106,P94_109_143,P94_109_143,a55475b1,P177_117_192,P99_56_166,P94_109_143,P94_109_143,a55475b1,P46_145_78,P94_109_143,P94_109_143,,,,,REL,MARRIED,POS,False,False,36.0,T,36.0,,,,,CAL,MARRIED,CASH,False,False,3.0,D,3.0,,REL,,NDF,False,False,,D,,,,,,COL,,POS,False,3.0,D,3.0,3.0,0.0,0.0,3.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
9,57634,92,10,2,,,,,,,,,,,a55475b1,a55475b1,a55475b1,,,a55475b1,a55475b1,,,,,,,,,,,14,,,,,5894.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,CA,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,,,0.0,50000.0,CAL,0.0,0.0,,,,,0.0,50000.0,GBA,0.0,,0.45,,,,0.0,,CASH,0.45,False,,,-715.0,a55475b1,,,P94_109_143,,a55475b1,a55475b1,60000.0,-715.0,P94_109_143,P94_109_143,D,,0.0,0.0,0.0,,,,0.0,0.0,,,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,2.0,,0.0,0.0,0.0,0.0,,,0.0,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,False,OTHER,OTHER,,,,,,12.0,0.0,0.0,0.0,,0.0,0.0,,,0.0,0.0,,FO,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [10]:
test_preds_df['case_id'] = test_df['case_id']
test_preds_df.set_index('case_id', inplace=True)

In [11]:
X_test = test_df.drop(columns=["WEEK_NUM"] + drop_cols)
X_test = X_test.set_index("case_id")
print("X_test shape: ", X_test.shape)

y_pred_1 = pd.Series(predict_proba_in_batches(model_1, X_test), index=X_test.index)
test_preds_df[f"pred_{model_name}"] = y_pred_1

X_test shape:  (10, 513)
Processing batch: 1/1


In [12]:
oof_df[f"pred_{model_name}"] = joblib.load("/kaggle/input/home-credit-baseline-training/oof_pred.pkl")

In [13]:
gini_score = gini_stability(oof_df, score_col=f"pred_{model_name}")
models_score_df.loc[model_name, ["gini_score"]] = gini_score
print("gini_score:\t", gini_score)

gini_score:	 0.6497821787794288


## Model 2

In [14]:
model_name = "model_2"
model_2 = joblib.load("/kaggle/input/home-credit-baseline-training-model-2/oof_model.pkl")
model_2

In [15]:
train_cols, cat_cols, drop_cols = joblib.load("/kaggle/input/home-credit-baseline-training-model-2/train_cat_columns.pkl")
print("train_cols:\t", len(train_cols))
print("cat_cols:\t", len(cat_cols))
print("drop_cols:\t", len(drop_cols))

train_cols:	 516
cat_cols:	 138
drop_cols:	 89


In [16]:
X_test = test_df.drop(columns=["WEEK_NUM"] + drop_cols)
X_test = X_test.set_index("case_id")
print("X_test shape: ", X_test.shape)

y_pred_2 = pd.Series(predict_proba_in_batches(model_2, X_test), index=X_test.index)
test_preds_df[f"pred_{model_name}"] = y_pred_2

X_test shape:  (10, 424)
Processing batch: 1/1


In [17]:
oof_df[f"pred_{model_name}"] = joblib.load("/kaggle/input/home-credit-baseline-training-model-2/oof_pred.pkl")

In [18]:
gini_score = gini_stability(oof_df, score_col=f"pred_{model_name}")
models_score_df.loc[model_name, ["gini_score"]] = gini_score
print("gini_score:\t", gini_score)

gini_score:	 0.6430261760598828


## Model 3

In [19]:
!pip install --no-index -Uq --find-links=/kaggle/input/lightautoml-038-dependencies lightautoml==0.3.8

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
cudf 23.8.0 requires cubinlinker, which is not installed.
cudf 23.8.0 requires cupy-cuda11x>=12.0.0, which is not installed.
cudf 23.8.0 requires ptxcompiler, which is not installed.
cuml 23.8.0 requires cupy-cuda11x>=12.0.0, which is not installed.
dask-cudf 23.8.0 requires cupy-cuda11x>=12.0.0, which is not installed.
beatrix-jupyterlab 2023.128.151533 requires jupyterlab~=3.6.0, but you have jupyterlab 4.0.11 which is incompatible.
cudf 23.8.0 requires cuda-python<12.0a0,>=11.7.1, but you have cuda-python 12.3.0 which is incompatible.
cudf 23.8.0 requires protobuf<5,>=4.21, but you have protobuf 3.20.3 which is incompatible.
cuml 23.8.0 requires dask==2023.7.1, but you have dask 2024.1.0 which is incompatible.
cuml 23.8.0 requires distributed==2023.7.1, but you have distributed 2024.1.0 which is 

In [20]:
from lightautoml.automl.presets.tabular_presets import TabularAutoML
from lightautoml.tasks import Task

In [21]:
model_name = "denselight_model"
model_3 = joblib.load("/kaggle/input/home-credit-baseline-training-lightautoml/denselight_model.pkl")
model_3

<lightautoml.automl.presets.tabular_presets.TabularAutoML at 0x7abbd62f5480>

In [22]:
train_cols, cat_cols, drop_cols = joblib.load("/kaggle/input/home-credit-baseline-training-lightautoml/train_cat_columns.pkl")
print("train_cols:\t", len(train_cols))
print("cat_cols:\t", len(cat_cols))
print("drop_cols:\t", len(drop_cols))

train_cols:	 418
cat_cols:	 113
drop_cols:	 98


In [23]:
X_test = test_df.drop(columns=["WEEK_NUM"] + drop_cols)
X_test = X_test.set_index("case_id")
print("X_test shape: ", X_test.shape)

y_pred_3 = pd.Series(
    predict_proba_in_batches(model_3, X_test, predict_mode = "lightautoml"),
    index=X_test.index)
test_preds_df[f"pred_{model_name}"] = y_pred_3

X_test shape:  (10, 415)
Processing batch: 1/1


In [24]:
oof_df[f"pred_{model_name}"] = joblib.load("/kaggle/input/home-credit-baseline-training-lightautoml/denselight_oof_preds.pkl")

In [25]:
gini_score = gini_stability(oof_df, score_col=f"pred_{model_name}")
models_score_df.loc[model_name, ["gini_score"]] = gini_score
print("gini_score:\t", gini_score)

gini_score:	 0.6360602763237749


## Estimation Results

In [26]:
models_score_df

Unnamed: 0,gini_score
model_1,0.649782
model_2,0.643026
denselight_model,0.63606


In [27]:
oof_df

Unnamed: 0,case_id,date_decision,MONTH,WEEK_NUM,target,pred_model_1,pred_model_2,pred_denselight_model
0,0,2019-01-03,201901,0,0,0.038914,0.036874,0.043001
1,1,2019-01-03,201901,0,0,0.060689,0.096309,0.028896
2,2,2019-01-04,201901,0,0,0.070097,0.083698,0.043408
3,3,2019-01-03,201901,0,0,0.078844,0.033708,0.056375
4,4,2019-01-04,201901,0,1,0.138104,0.120848,0.071871
...,...,...,...,...,...,...,...,...
1526654,2703450,2020-10-05,202010,91,0,0.000784,0.002075,0.000824
1526655,2703451,2020-10-05,202010,91,0,0.001888,0.002943,0.002522
1526656,2703452,2020-10-05,202010,91,0,0.041933,0.044443,0.055235
1526657,2703453,2020-10-05,202010,91,0,0.003564,0.002797,0.001453


In [28]:
del test_df
gc.collect()

0

# Blending

In [29]:
# 実際に評価スコアを呼び出す関数
def gini_wrapper(base_df):
    base_df = base_df[["WEEK_NUM", "target"]].copy()
    def gini_wrapper_inner(target, scores):
        base_df["score"] = scores
        gini_score = gini_stability(base_df, score_col="score")
        return 1 - gini_score
    return gini_wrapper_inner

### Hill climbing using minimize

In [30]:
# 上記のgini_wrapper関数とセットで重み付けを行うためのクラス
# この後にfind_weights関数で訓練データを使って重みを探索する
# これを使えばいい感じの重みを得られるくらいの理解で済ましたい
class WeightsSearcher:
    def __init__(self, loss_fn, bounds=[], mode="min", method='SLSQP'):
        self.loss_fn = loss_fn
        self.bounds = bounds
        self.mode = mode
        self.method = method # Nelder-Mead - for not smooth functions

    # これがgini_wrapper_innerの役割をしている
    # 予測値と重みを掛け合わせ、その結果から損失関数（ここでは1-gini_score）を計算する
    def _objective_function_wrapper(self, pred_values, true_targets, obj_fn):
        def objective_function(weights):
            pred_weighted = (pred_values * weights).sum(axis=1)
            score = obj_fn(true_targets, pred_weighted)
            return score
        return objective_function
    
    def find_weights(self, val_preds, true_targets):
        len_models = len(self.bounds)
        bounds = [0,1] * len_models if len(self.bounds) == 0 else self.bounds
        initial_weights = np.ones(len_models) / len_models
        objective_function = self._objective_function_wrapper(val_preds, true_targets, self.loss_fn)
        # 損失関数が最小になる（=gini_scoreが最大になる）重みを探す
        result = minimize(
            objective_function, 
            initial_weights, 
            bounds=bounds, 
            method=self.method,
        )
        optimized_weights = result.x
        optimized_weights /= np.sum(optimized_weights)
        return optimized_weights

In [31]:
model_names = models_score_df.index.to_list()
pred_cols = [f"pred_{name}" for name in model_names]
model_names

['model_1', 'model_2', 'denselight_model']

In [32]:
bounds = [(0, 1)] * len(pred_cols)
roc_auc_fn = lambda y_true, y_pred: 1 - roc_auc_score(y_true, y_pred)
gini_score_fn = gini_wrapper(oof_df)
w_searcher = WeightsSearcher(gini_score_fn, bounds, method='Nelder-Mead') # log_loss, gini_stability roc_auc_fn
optimized_weights = w_searcher.find_weights(
    oof_df[pred_cols].to_numpy(), 
    y_train
)
optimized_weights_df = pd.DataFrame(zip(model_names, optimized_weights), columns=['model', 'weight'])
display(optimized_weights_df)
print("sum: ", np.sum(optimized_weights))

Unnamed: 0,model,weight
0,model_1,0.479296
1,model_2,0.266185
2,denselight_model,0.254519


sum:  1.0


In [33]:
# 上記で算出した重みでaucを重み付けし、評価スコアを得る
oof_pred_optimized = (oof_df[pred_cols] * optimized_weights).sum(axis=1).to_numpy()
oof_df["pred_optimized"] = oof_pred_optimized # 重み付したaucを格納するカラムを作成
roc_auc_oof = roc_auc_score(y_train, oof_pred_optimized)
gini_score = gini_stability(oof_df, score_col="pred_optimized") # 重み付けしたaucをgini_stabilityに渡してスコアを得る
print("CV roc_auc_oof optimized:\t", roc_auc_oof)
print("CV gini_score:\t\t\t", gini_score)

CV roc_auc_oof optimized:	 0.8372383516308529
CV gini_score:			 0.6550665681333759


In [34]:
# テストデータのaucも重み付けしたバージョンを作成する
y_pred_all = (optimized_weights * test_preds_df[pred_cols]).sum(axis=1).to_numpy()
y_pred_all[:10]

array([0.15268928, 0.10499393, 0.08034035, 0.04036048, 0.03656391,
       0.03883912, 0.06013255, 0.04124608, 0.0480157 , 0.05783407])

# Submission

In [35]:
subm_df = pd.read_csv(data_nb.CFG.root_dir / "sample_submission.csv")
subm_df = subm_df.set_index("case_id")
subm_df["score"] = y_pred_all
display(subm_df.head())
print("Check null: ", subm_df["score"].isnull().any())

Unnamed: 0_level_0,score
case_id,Unnamed: 1_level_1
57543,0.152689
57549,0.104994
57551,0.08034
57552,0.04036
57569,0.036564


Check null:  False


In [36]:
subm_df.to_csv("submission.csv")