In [26]:
import os
import numpy as np 
import seaborn as sns
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from catboost import Pool, cv, CatBoostClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import auc, roc_curve
from catboost.utils import get_roc_curve
import pickle

pd.set_option('display.max_columns', 1000)
np.random.seed(566)
pd.set_option('display.max_rows', 500)
pd.set_option('display.width', 1000)
pd.set_option('display.float_format', '{:20,.2f}'.format)
pd.set_option('display.max_colwidth', -1)

In [27]:
TARGET_COL = "hospital_death"
df = pd.read_csv(r"input\training_v2.csv")
test = pd.read_csv(r"input\unlabeled.csv")

In [28]:
for col in df.columns:
    if "d1" in col and "max" in col:
        field_name = (col[3:-4])
        d1_max = col
        d1_min = 'd1_' + col[3:-4] + "_min"
        h1_max = 'h1_' + col[3:-4] + "_max"
        h1_min = 'h1_' + col[3:-4] + "_min"
    
        df[field_name+'d1_diff'] = df[d1_max] - df[d1_min]
        df[field_name+'h1_diff'] = df[h1_max] - df[h1_min]
        df[field_name+'_over_d1_max_avg'] = df[d1_max] / df[d1_max].mean()
        df[field_name+'_over_d1_min_avg'] = df[d1_min] / df[d1_min].mean()
        df[field_name+'_over_h1_max_avg'] = df[h1_max] / df[h1_max].mean()
        df[field_name+'_over_h1_min_avg'] = df[h1_min] / df[h1_min].mean()
        df[field_name+'_d1_max_over_h1_max'] = df[d1_max] / df[h1_max]
        df[field_name+'_d1_min_over_h1_min'] = df[d1_min] / df[h1_min]
        df[field_name+'_diff_d1_max_h1_max'] = df[d1_max] - df[h1_max]
        df[field_name+'_diff_d1_min_h1_min'] = df[d1_min] - df[h1_min]
        
        df[field_name+'_diff_d1_max_h1_max_over_avg'] = df[d1_max]/df[d1_max].mean() - df[h1_max]/df[h1_max].mean()
        df[field_name+'_diff_d1_min_h1_min_over_avg'] = df[d1_min]/df[d1_min].mean() - df[h1_min]/df[h1_min].mean()
        df[field_name+'_diff_d1_max_d1_min_over_avg'] = df[d1_max]/df[d1_max].mean() - df[d1_min]/df[d1_min].mean()
        df[field_name+'_diff_h1_max_h1_min_over_avg'] = df[h1_max]/df[h1_max].mean() - df[h1_min]/df[h1_min].mean()
        
for col in test.columns:
    if "d1" in col and "max" in col:
        field_name = (col[3:-4])
        d1_max = col
        d1_min = 'd1_' + col[3:-4] + "_min"
        h1_max = 'h1_' + col[3:-4] + "_max"
        h1_min = 'h1_' + col[3:-4] + "_min"
    
        test[field_name+'d1_diff'] = test[d1_max] - test[d1_min]
        test[field_name+'h1_diff'] = test[h1_max] - test[h1_min]
        test[field_name+'_over_d1_max_avg'] = test[d1_max] / test[d1_max].mean()
        test[field_name+'_over_d1_min_avg'] = test[d1_min] / test[d1_min].mean()
        test[field_name+'_over_h1_max_avg'] = test[h1_max] / test[h1_max].mean()
        test[field_name+'_over_h1_min_avg'] = test[h1_min] / test[h1_min].mean()
        test[field_name+'_d1_max_over_h1_max'] = test[d1_max] / test[h1_max]
        test[field_name+'_d1_min_over_h1_min'] = test[d1_min] / test[h1_min]
        test[field_name+'_diff_d1_max_h1_max'] = test[d1_max] - test[h1_max]
        test[field_name+'_diff_d1_min_h1_min'] = test[d1_min] - test[h1_min]
        
        test[field_name+'_diff_d1_max_h1_max_over_avg'] = test[d1_max]/test[d1_max].mean() - test[h1_max]/test[h1_max].mean()
        test[field_name+'_diff_d1_min_h1_min_over_avg'] = test[d1_min]/test[d1_min].mean() - test[h1_min]/test[h1_min].mean()
        test[field_name+'_diff_d1_max_d1_min_over_avg'] = test[d1_max]/test[d1_max].mean() - test[d1_min]/test[d1_min].mean()
        test[field_name+'_diff_h1_max_h1_min_over_avg'] = test[h1_max]/test[h1_max].mean() - test[h1_min]/test[h1_min].mean()        
        
        

In [29]:
categorical_cols_text =  ['hospital_id', 'ethnicity', 'gender', 'hospital_admit_source', 
'icu_admit_source', 'icu_stay_type', 'icu_type', 'apache_3j_bodysystem', 'apache_2_bodysystem',]

                          
categorical_cols_numb =  ['icu_id','gcs_eyes_apache', 'gcs_motor_apache', 'gcs_verbal_apache', 'elective_surgery', 
'readmission_status', 'apache_post_operative', 'arf_apache', 'gcs_unable_apache', 'intubated_apache',
'ventilated_apache', 'aids', 'cirrhosis', 'diabetes_mellitus', 'hepatic_failure', 'immunosuppression',
'leukemia', 'lymphoma', 'solid_tumor_with_metastasis']
# looking at the input dictionary, any field that is Binary, Int or text is treated as categorical

In [30]:
df[categorical_cols_numb] = df[categorical_cols_numb].fillna("9999").astype('int64')
test[categorical_cols_numb] = test[categorical_cols_numb].fillna("9999").astype('int64')

In [55]:
df[categorical_cols_text] = df[categorical_cols_text].fillna("")
test[categorical_cols_text] = test[categorical_cols_text].fillna("")

display (df.dtypes)

categorical_cols =categorical_cols_numb + categorical_cols_text

encounter_id                                      int64  
patient_id                                        int64  
hospital_id                                       int64  
hospital_death                                    int64  
age                                               float64
bmi                                               float64
elective_surgery                                  int64  
ethnicity                                         object 
gender                                            object 
height                                            float64
hospital_admit_source                             object 
icu_admit_source                                  object 
icu_id                                            int64  
icu_stay_type                                     object 
icu_type                                          object 
pre_icu_los_days                                  float64
readmission_status                                int64  
weight        

In [32]:
X_train = df.drop([TARGET_COL,'encounter_id','patient_id', 'apache_4a_icu_death_prob', 'apache_4a_hospital_death_prob'  ],axis=1)
y_train = df[TARGET_COL]


In [33]:
## catBoost Pool object
train_pool = Pool(data=X_train,label = y_train,cat_features=categorical_cols,)

### OPT/TODO:  do train test split for early stopping then add that as an eval pool object : 

In [34]:
model_basic = CatBoostClassifier(verbose=False,iterations=50,)#,learning_rate=0.1, task_type="GPU" ,)
model_basic.fit(train_pool, plot=True,silent=True)
print(model_basic.get_best_score())

MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

{'learn': {'Logloss': 0.16595993284559668}}


In [35]:
df_level2 = df[[TARGET_COL, 'apache_4a_icu_death_prob', 'apache_4a_hospital_death_prob'  ]]
df.to_csv("df_level2_1.csv",index=False)

test_level2 = test[['apache_4a_icu_death_prob', 'apache_4a_hospital_death_prob']]
test.to_csv("test_level2_1.csv",index=False)

best_model_score = 0

learning_rates = [0.05, ]
depths= [3, 6,]
l2_leaf_regs=[1,]

for learning_rate in learning_rates:
    for depth in depths:
        for l2_leaf_reg in l2_leaf_regs:
            
            model = CatBoostClassifier(iterations=50, learning_rate=learning_rate, depth=depth,
                                            l2_leaf_reg=l2_leaf_reg, class_weights=[1, 10], 
                                            custom_metric='AUC', early_stopping_rounds=400) #task_type="GPU",
            model.fit(train_pool, plot=True,silent=True )
            
            model_name = 'L1_CT_LR'+str(learning_rate)+'_depth'+str(depth)+'_l2'+str(l2_leaf_reg)
            print('This model is: ' + model_name)
            print("Model Best Score: " + str(model.get_best_score()))
            
            df_level2[model_name] = model.predict(X_train,prediction_type='Probability')[:,1]
            fpr, tpr, threshold = roc_curve(df[TARGET_COL], df_level2[model_name])
            auc_score = auc(fpr, tpr)
            print('Model AUC: '+ str(auc_score))
            
            df_level2.to_csv("df_level2.csv",index=False)
            
            model.save_model(model_name+".model", pool=train_pool)
                  
            feature_importances = model.get_feature_importance(train_pool)
            feature_names = X_train.columns
                    
            test[TARGET_COL] = model.predict(test.drop([TARGET_COL,'encounter_id','patient_id',
                                                              'apache_4a_icu_death_prob', 'apache_4a_hospital_death_prob'],axis=1)
                                                   ,prediction_type='Probability')[:,1]
            
            test[["encounter_id",TARGET_COL]].to_csv("submission"+model_name+".csv",index=False)
            
            test_level2[model_name]= test[TARGET_COL]
            test_level2.to_csv("test_level2.csv",index=False)
            
            if auc_score > best_model_score :
                best_model_name = model_name
                best_model_important_features = []
                for score, name in sorted(zip(feature_importances, feature_names), reverse=True):
                    if score > 0.05:
                        best_model_important_features.append(name)
                print(best_model_important_features)
           
            
pickle.dump( best_model_important_features, open( "best_model_important_features.p", "wb" ) )                     
print('\n\n\nTraining level 1 complete')

MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

This model is: L1_CT_LR0.05_depth3_l21
Model Best Score: {'learn': {'Logloss': 0.48072372432663485}}


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Model AUC: 0.8683907652848123


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


['ventilated_apache', 'gcs_motor_apache', 'apache_3j_diagnosis', 'gcs_verbal_apache', 'd1_bun_max', 'gcs_eyes_apache', 'd1_sysbp_min', 'sysbp_noninvasive_over_d1_min_avg', 'lactate_over_d1_max_avg', 'spo2_over_d1_min_avg', 'd1_spo2_min', 'bun_over_d1_max_avg', 'lactate_over_d1_min_avg', 'd1_lactate_max', 'apache_2_bodysystem', 'age', 'sysbp_over_d1_min_avg', 'd1_lactate_min', 'temp_over_d1_min_avg', 'd1_temp_min', 'heartrate_over_d1_max_avg', 'd1_sysbp_noninvasive_min', 'd1_heartrate_max', 'icu_admit_source', 'creatinine_over_d1_max_avg', 'resprate_over_d1_max_avg', 'arterial_pco2d1_diff', 'arterial_po2_over_d1_max_avg', 'arterial_po2d1_diff', 'bun_over_d1_min_avg', 'resprate_over_d1_min_avg', 'd1_bun_min', 'h1_resprate_min', 'arterial_pco2_over_d1_min_avg', 'd1_inr_max', 'd1_resprate_min', 'bun_apache', 'd1_hco3_min', 'd1_arterial_po2_min', 'h1_resprate_max', 'd1_hco3_max', 'hco3_over_d1_min_avg', 'hco3_over_d1_max_avg', 'spo2_diff_d1_max_d1_min_over_avg', 'd1_mbp_noninvasive_min', 'f

MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

This model is: L1_CT_LR0.05_depth6_l21
Model Best Score: {'learn': {'Logloss': 0.44659750835300044}}
Model AUC: 0.8845077196362936
['ventilated_apache', 'gcs_motor_apache', 'apache_3j_diagnosis', 'gcs_verbal_apache', 'age', 'd1_lactate_min', 'gcs_eyes_apache', 'sysbp_over_d1_min_avg', 'd1_temp_min', 'bun_over_d1_min_avg', 'apache_2_bodysystem', 'd1_spo2_min', 'd1_bun_max', 'd1_bun_min', 'bun_over_d1_max_avg', 'd1_sysbp_min', 'lactate_over_d1_max_avg', 'elective_surgery', 'heartrate_over_d1_max_avg', 'd1_lactate_max', 'icu_admit_source', 'spo2d1_diff', 'hco3_over_d1_max_avg', 'heartrate_over_d1_min_avg', 'spo2_over_d1_min_avg', 'd1_mbp_min', 'resprate_over_d1_max_avg', 'bun_apache', 'arterial_po2d1_diff', 'd1_resprate_min', 'spo2_diff_d1_max_d1_min_over_avg', 'platelets_over_d1_min_avg', 'd1_resprate_max', 'sysbp_noninvasive_over_d1_min_avg', 'arterial_ph_diff_d1_max_d1_min_over_avg', 'heartrated1_diff', 'h1_resprate_min', 'd1_heartrate_max', 'solid_tumor_with_metastasis', 'fio2_apache'

In [49]:
#  initializing Level 2 variables

categorical_cols_text =  ['hospital_id', 'ethnicity', 'gender', 'hospital_admit_source', 
'icu_admit_source', 'icu_stay_type', 'icu_type', 'apache_3j_bodysystem', 'apache_2_bodysystem',]

                          
categorical_cols_numb =  ['icu_id','gcs_eyes_apache', 'gcs_motor_apache', 'gcs_verbal_apache', 'elective_surgery', 
'readmission_status', 'apache_post_operative', 'arf_apache', 'gcs_unable_apache', 'intubated_apache',
'ventilated_apache', 'aids', 'cirrhosis', 'diabetes_mellitus', 'hepatic_failure', 'immunosuppression',
'leukemia', 'lymphoma', 'solid_tumor_with_metastasis']

best_model_important_features = pickle.load( open( "best_model_important_features.p", "rb" ) )

TARGET_COL = "hospital_death"

df_level2_1 = pd.read_csv("df_level2_1.csv")
df_level2 = pd.read_csv("df_level2.csv")

f_selected_df_level2_1 = pd.concat([df_level2_1[best_model_important_features],df_level2], axis=1)

test_level2_1 = pd.read_csv("test_level2_1.csv")
test_level2 = pd.read_csv("test_level2.csv")
f_selected_test_level2_1 = pd.concat([test_level2_1[best_model_important_features],test_level2], axis=1)


In [53]:
# Building pool

l2_categorical_cols_text = []
l2_categorical_cols_numb = []

for feature in best_model_important_features:
    if feature in categorical_cols_text:
        l2_categorical_cols_text.append(feature)
    elif feature in categorical_cols_numb:
        l2_categorical_cols_numb.append(feature)
        
categorical_cols =l2_categorical_cols_numb + l2_categorical_cols_text
print(categorical_cols)

f_selected_df_level2_1[l2_categorical_cols_text] = f_selected_df_level2_1[l2_categorical_cols_text].fillna("")
f_selected_test_level2_1[l2_categorical_cols_text] = f_selected_test_level2_1[l2_categorical_cols_text].fillna("")

f_selected_df_level2_1[l2_categorical_cols_numb] = f_selected_df_level2_1[l2_categorical_cols_numb].fillna("9999").astype('int64')
f_selected_test_level2_1[l2_categorical_cols_numb] = f_selected_test_level2_1[l2_categorical_cols_numb].fillna("9999").astype('int64')

X_train_l2 = f_selected_df_level2_1.drop([TARGET_COL],axis=1)
y_train_l2 = f_selected_df_level2_1[TARGET_COL]


train_pool = Pool(data=X_train_l2, label=y_train_l2, cat_features=categorical_cols,)



['ventilated_apache', 'gcs_motor_apache', 'gcs_verbal_apache', 'gcs_eyes_apache', 'elective_surgery', 'solid_tumor_with_metastasis', 'apache_2_bodysystem', 'icu_admit_source', 'apache_3j_bodysystem', 'hospital_admit_source']


Unnamed: 0,ventilated_apache,gcs_motor_apache,apache_3j_diagnosis,gcs_verbal_apache,age,d1_lactate_min,gcs_eyes_apache,sysbp_over_d1_min_avg,d1_temp_min,bun_over_d1_min_avg,apache_2_bodysystem,d1_spo2_min,d1_bun_max,d1_bun_min,bun_over_d1_max_avg,d1_sysbp_min,lactate_over_d1_max_avg,elective_surgery,heartrate_over_d1_max_avg,d1_lactate_max,icu_admit_source,spo2d1_diff,hco3_over_d1_max_avg,heartrate_over_d1_min_avg,spo2_over_d1_min_avg,d1_mbp_min,resprate_over_d1_max_avg,bun_apache,arterial_po2d1_diff,d1_resprate_min,spo2_diff_d1_max_d1_min_over_avg,platelets_over_d1_min_avg,d1_resprate_max,sysbp_noninvasive_over_d1_min_avg,arterial_ph_diff_d1_max_d1_min_over_avg,heartrated1_diff,h1_resprate_min,d1_heartrate_max,solid_tumor_with_metastasis,fio2_apache,temp_over_d1_max_avg,d1_hco3_min,creatinine_over_d1_min_avg,inr_over_d1_max_avg,mbp_over_d1_min_avg,arterial_po2_over_d1_min_avg,temp_over_d1_min_avg,d1_creatinine_min,h1_inr_max,d1_sodium_max,resprate_over_d1_min_avg,hco3_over_d1_min_avg,d1_pao2fio2ratio_min,pre_icu_los_days,d1_temp_max,albumin_diff_d1_max_d1_min_over_avg,d1_diasbp_min,d1_hematocrit_min,resprate_over_h1_max_avg,d1_inr_max,d1_sysbp_noninvasive_min,sodium_over_d1_max_avg,heart_rate_apache,d1_wbc_min,lactate_over_d1_min_avg,arterial_po2_diff_d1_max_d1_min_over_avg,mbp_noninvasive_over_d1_min_avg,pao2fio2ratiod1_diff,hematocrit_over_d1_max_avg,d1_potassium_min,h1_inr_min,resprate_apache,d1_arterial_ph_min,d1_bilirubin_max,creatinine_over_d1_max_avg,lactate_diff_d1_max_d1_min_over_avg,apache_3j_bodysystem,urineoutput_apache,wbc_over_d1_max_avg,resprate_over_h1_min_avg,hospital_admit_source,arterial_pco2_over_d1_min_avg,wbc_over_d1_min_avg,pao2fio2ratio_over_d1_max_avg,bmi,mbp_noninvasive_over_h1_min_avg,d1_hco3_max,d1_creatinine_max,arterial_pco2d1_diff,d1_platelets_min,arterial_pco2_diff_d1_max_d1_min_over_avg,mbp_diff_d1_max_d1_min_over_avg,sodium_over_d1_min_avg,h1_sysbp_noninvasive_min,weight,bilirubin_over_d1_max_avg,d1_sodium_min,arterial_ph_over_d1_max_avg,sysbp_over_d1_max_avg,sodium_apache,hemaglobin_over_d1_max_avg,resprate_diff_d1_min_h1_min_over_avg,bilirubin_apache,mbp_invasive_d1_min_over_h1_min,diasbp_invasive_over_d1_min_avg,h1_resprate_max,sysbp_invasive_diff_d1_min_h1_min,arterial_ph_diff_d1_min_h1_min,sysbp_invasive_d1_max_over_h1_max,pao2fio2ratio_diff_d1_min_h1_min_over_avg,heartrate_diff_h1_max_h1_min_over_avg,wbc_apache,hospital_death,apache_4a_icu_death_prob,apache_4a_hospital_death_prob,L1_CT_LR0.05_depth3_l21,L1_CT_LR0.05_depth6_l21
0,0,6,502.01,4,68.00,1.00,3,0.75,37.20,1.26,Cardiovascular,74.00,31.00,30.00,1.21,73.00,0.44,0,1.16,1.30,Floor,26.00,0.78,1.02,0.82,46.00,1.18,31.00,,10.00,0.19,1.18,34.00,0.75,,47.00,18.00,119.00,0,,1.07,15.00,1.63,,0.71,,1.03,2.23,,136.00,0.78,0.65,,0.54,39.90,-0.02,37.00,27.40,1.15,,73.00,0.98,118.00,14.10,0.47,,0.71,,0.79,3.40,,36.00,,0.40,1.69,-0.03,Sepsis,,1.13,1.05,Floor,,1.25,,22.73,1.07,19.00,2.51,,233.00,,0.14,0.97,115.00,73.90,0.35,134.00,,0.88,134.00,0.78,-0.27,0.40,,0.68,26.00,,,,,-0.00,14.10,0,0.05,0.10,0.65,0.67
1,1,3,203.01,1,77.00,3.50,1,0.69,35.10,0.38,Respiratory,70.00,11.00,9.00,0.43,67.00,1.20,0,1.15,3.50,Floor,30.00,1.11,1.02,0.77,38.00,1.11,9.00,0.00,12.00,0.23,2.47,32.00,0.69,-0.01,46.00,28.00,118.00,0,1.00,0.97,26.00,0.41,0.81,0.59,0.49,0.97,0.56,1.30,145.00,0.93,1.12,51.00,0.93,36.30,-0.01,31.00,36.10,1.37,1.30,67.00,1.04,120.00,12.70,1.65,-0.19,0.59,3.80,1.07,3.80,1.30,33.00,7.45,0.50,0.48,-0.45,Respiratory,,1.87,1.63,Floor,0.96,1.12,0.19,27.42,0.72,27.00,0.71,0.00,487.00,-0.14,0.56,1.05,71.00,70.20,0.44,145.00,1.01,1.07,145.00,0.99,-0.69,,,,31.00,,0.00,,0.01,0.04,12.70,0,0.29,0.47,0.90,0.92
2,0,6,703.03,5,25.00,,3,1.08,36.70,,Metabolic,91.00,,,,105.00,,0,0.93,,Accident & Emergency,7.00,,0.97,1.01,68.00,0.73,,,8.00,-0.02,,21.00,1.08,,28.00,16.00,96.00,0,,0.99,,,,1.05,,1.01,,,,0.62,,,0.00,37.00,,48.00,,0.88,,105.00,,102.00,,,,1.05,,,,,37.00,,,,,Metabolic,,,0.93,Emergency Department,,,,31.95,1.04,,,,,,-0.07,,124.00,95.30,,,,1.00,,,-0.31,,,,20.00,,,,,0.11,,0,0.00,0.00,0.12,0.08
3,1,6,1206.03,5,81.00,,4,0.87,34.80,,Cardiovascular,95.00,,,,84.00,,1,1.13,,Operating Room / Recovery,5.00,,1.31,1.05,84.00,0.80,,235.00,7.00,-0.04,0.22,23.00,0.87,0.00,24.00,11.00,116.00,0,0.60,1.02,,,1.00,1.29,0.99,0.96,,1.60,,0.54,,236.67,0.00,38.00,,42.00,25.90,0.53,1.60,84.00,,114.00,8.00,,1.05,1.29,105.83,0.98,3.50,1.10,4.00,7.34,,,,Cardiovascular,,0.72,0.64,Operating Room,0.70,0.71,1.20,22.64,,,,10.00,43.00,0.12,-0.49,,,61.70,,,1.01,1.07,,1.01,-0.09,,0.73,0.64,12.00,-28.00,0.00,1.21,-0.37,-0.06,8.00,0,0.03,0.04,0.40,0.35
4,0,9999,601.01,9999,19.00,,9999,1.24,36.70,,Trauma,96.00,,,,120.00,,0,0.86,,Accident & Emergency,4.00,,0.85,1.06,90.00,0.62,,,16.00,-0.05,,18.00,1.24,,29.00,,89.00,0,,1.00,,,,1.39,,1.01,,,,1.25,,,0.07,37.20,,57.00,,,,120.00,,60.00,,,,1.39,,,,,16.00,,,,,Trauma,,,,,,,,,1.15,,,,,,-0.39,,120.00,,,,,0.99,,,,,,,,,,,,0.06,,0,,,0.35,0.31
5,0,6,403.01,5,67.00,,4,1.10,36.60,0.55,Neurologic,91.00,13.00,13.00,0.51,107.00,,0,1.10,,Accident & Emergency,6.00,1.11,1.18,1.01,80.00,1.11,13.00,,10.00,-0.03,0.81,32.00,1.10,,30.00,12.00,113.00,0,,0.99,27.00,0.52,0.69,1.23,,1.01,0.71,1.10,137.00,0.78,1.17,,0.00,36.80,,61.00,44.20,0.53,1.10,107.00,0.98,113.00,10.90,,,1.23,,1.28,3.70,1.10,35.00,,,0.48,,Neurological,,0.87,0.70,Direct Admit,,0.96,,27.56,1.39,27.00,0.71,,159.00,,-0.02,0.99,143.00,100.00,,137.00,,1.17,137.00,1.36,0.08,,,,12.00,,,,,-0.09,10.90,0,0.02,0.05,0.20,0.15
6,1,6,203.01,5,59.00,,4,1.37,35.00,0.46,Respiratory,87.00,18.00,11.00,0.70,133.00,,0,1.09,,Accident & Emergency,13.00,1.35,1.00,0.96,97.00,1.32,18.00,0.00,16.00,0.05,1.41,38.00,1.37,-0.01,42.00,18.00,112.00,0,1.00,1.00,30.00,0.57,,1.50,3.57,0.97,0.78,,136.00,1.25,1.29,370.00,0.00,37.20,,68.00,33.50,0.80,,133.00,0.98,133.00,5.90,,-1.34,1.49,0.00,1.09,4.20,,53.00,7.42,,0.57,,Respiratory,,0.75,1.05,Operating Room,1.12,0.52,1.30,57.45,1.47,33.00,0.85,0.00,278.00,-0.17,-0.38,0.98,,156.60,,135.00,1.00,1.02,135.00,1.04,0.20,,0.73,1.39,18.00,-47.00,0.00,1.00,0.09,-0.00,5.90,0,0.05,0.10,0.47,0.40
7,1,6,501.05,5,70.00,,4,0.73,36.60,2.02,Cardiovascular,92.00,48.00,48.00,1.87,71.00,,0,1.15,,Accident & Emergency,8.00,1.19,1.22,1.02,60.00,0.97,48.00,0.00,12.00,-0.01,1.32,28.00,0.73,-0.01,32.00,26.00,118.00,0,,1.03,29.00,1.50,,0.92,0.86,1.01,2.05,,140.00,0.93,1.25,,0.00,38.50,,46.00,25.50,1.24,,71.00,1.01,120.00,12.80,,-0.32,0.92,,0.74,2.40,,28.00,7.38,,1.38,,Sepsis,,1.03,1.51,Emergency Department,1.12,1.13,,,0.75,29.00,2.05,0.00,260.00,-0.17,0.16,1.02,106.00,,,140.00,1.00,0.99,140.00,0.68,-0.58,,,,28.00,,,,,-0.08,12.80,0,0.06,0.11,0.62,0.59
8,1,6,103.01,5,45.00,5.90,4,1.01,36.90,0.63,Cardiovascular,97.00,15.00,15.00,0.58,98.00,2.02,0,0.80,5.90,Other Hospital,0.00,0.94,1.17,1.07,71.00,0.83,15.00,164.00,19.00,-0.09,1.15,24.00,1.01,0.08,0.00,19.00,82.00,0,1.00,0.99,23.00,0.85,0.75,1.09,0.89,1.02,1.16,1.20,142.00,1.48,0.99,92.00,0.01,36.90,-0.02,59.00,37.90,1.06,1.20,98.00,1.02,82.00,24.70,2.78,0.65,1.09,0.00,1.10,5.20,1.20,14.00,6.99,0.20,0.78,-0.76,Cardiovascular,,1.98,1.10,Other Hospital,0.86,2.18,0.32,,0.89,23.00,1.16,27.00,226.00,0.47,-0.21,1.03,98.00,,0.18,142.00,1.03,0.70,142.00,1.07,0.38,0.20,1.00,1.11,24.00,0.00,-0.15,1.00,0.02,-0.09,24.70,1,,,0.52,0.50
9,0,6,107.01,5,50.00,,4,0.80,36.40,0.42,Cardiovascular,96.00,10.00,10.00,0.39,78.00,,0,0.93,,Accident & Emergency,4.00,1.15,0.81,1.06,59.00,1.52,10.00,,14.00,-0.05,1.18,44.00,0.80,,39.00,17.00,96.00,0,,1.00,28.00,0.61,,0.91,,1.00,0.83,,139.00,1.09,1.21,,0.06,37.10,-0.03,48.00,37.20,1.28,,78.00,1.00,94.00,12.80,,,0.91,,1.08,3.30,,46.00,,0.40,0.56,,Cardiovascular,,1.03,0.99,Direct Admit,,1.13,,25.71,0.97,28.00,0.83,,232.00,,0.06,1.01,103.00,79.00,0.35,139.00,,0.91,139.00,1.12,0.10,0.40,0.94,1.22,29.00,-3.00,,1.00,,0.32,8.40,0,0.01,0.02,0.25,0.15


In [54]:
display (f_selected_df_level2_1.dtypes)

ventilated_apache                            int64  
gcs_motor_apache                             int64  
apache_3j_diagnosis                          float64
gcs_verbal_apache                            int64  
age                                          float64
d1_lactate_min                               float64
gcs_eyes_apache                              int64  
sysbp_over_d1_min_avg                        float64
d1_temp_min                                  float64
bun_over_d1_min_avg                          float64
apache_2_bodysystem                          object 
d1_spo2_min                                  float64
d1_bun_max                                   float64
d1_bun_min                                   float64
bun_over_d1_max_avg                          float64
d1_sysbp_min                                 float64
lactate_over_d1_max_avg                      float64
elective_surgery                             int64  
heartrate_over_d1_max_avg                    f

In [52]:
# running Level 2 - full df with additional Catboost predictions

best_model_score = 0

learning_rates = [0.05, ]
depths= [3, 6,]
l2_leaf_regs=[1,]

for learning_rate in learning_rates:
    for depth in depths:
        for l2_leaf_reg in l2_leaf_regs:
            
            model = CatBoostClassifier(iterations=50, learning_rate=learning_rate, depth=depth,
                                            l2_leaf_reg=l2_leaf_reg, class_weights=[1, 10], 
                                            custom_metric='AUC', early_stopping_rounds=400) #task_type="GPU",
            model.fit(train_pool, plot=True,silent=True )
            
            model_name = 'L2_CT_LR'+str(learning_rate)+'_depth'+str(depth)+'_l2'+str(l2_leaf_reg)
            print('This Level 2 model is: ' + model_name)
            print("Model Best Score: " + str(model.get_best_score()))
            
            predictions = model.predict(X_train,prediction_type='Probability')[:,1]
            fpr, tpr, threshold = roc_curve(df[TARGET_COL], predictions)
            auc_score = auc(fpr, tpr)
            print('Model AUC: '+ str(auc_score))
            
            feature_importances = model.get_feature_importance(train_pool)
            feature_names = X_train.columns
            for score, name in sorted(zip(feature_importances, feature_names), reverse=True):
                if score > 0.05:
                    print('{0}: {1:.2f}'.format(name, score))                    
            
            test[TARGET_COL] = model.predict(f_selected_test_level2_1, prediction_type='Probability')[:,1]
            test[["encounter_id",TARGET_COL]].to_csv("submission"+model_name+".csv",index=False)

print('Training complete')

MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

This Level 2 model is: L2_CT_LR0.05_depth3_l21
Model Best Score: {'learn': {'Logloss': 0.42513665760019287}}


CatBoostError: Invalid type for cat_feature[non-default value idx=0,feature_idx=1]=68.0 : cat_features must be integer or string, real number values and NaN values should be converted to string.