In [1]:
# import libraries

import pandas as pd
import pickle
import numpy as np
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.ensemble import StackingClassifier
from catboost import CatBoostClassifier
import os

In [2]:
# load models

models_dict = {}
model_list = ["XGB","CBC","RF","LR"]
base_path=os.path.abspath(os.getcwd())

for model in model_list:
    models_dict[model]=pickle.load(open(base_path+"\\data-scientist-technical-test-main\\data\\auto-insurance-fall-2017\\models\\"+model+".pkl", 'rb'))

In [3]:
# read data

train=pd.read_csv(base_path+"\\data-scientist-technical-test-main\\data\\auto-insurance-fall-2017\\train_auto_preproc.csv", sep=",", header=0)

# drop extra column

drop_col = ["Unnamed: 0"]
target_col=["TARGET_FLAG"]

train.drop(columns=drop_col, axis=1, inplace=True)

# divide X,y and split them

X=train.drop(columns=target_col, axis=1)
y=train[target_col].values

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=18, stratify=y)

In [4]:
# define ensemble model

stacked = StackingClassifier(estimators = [("XGB", models_dict["XGB"]),
                                           ("CBC", models_dict["CBC"]),
                                           ("LR", models_dict["LR"]),
                                           ("RF", models_dict["RF"])],
                             final_estimator=LogisticRegression(class_weight="balanced"),
                             cv=10)

In [5]:
# train ensemble model

ensemble = stacked.fit(X_train, y_train)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


0:	learn: 0.6143419	total: 180ms	remaining: 17.8s
1:	learn: 0.5745448	total: 183ms	remaining: 8.94s
2:	learn: 0.5350493	total: 186ms	remaining: 6.02s
3:	learn: 0.5119854	total: 189ms	remaining: 4.54s
4:	learn: 0.4989652	total: 192ms	remaining: 3.65s
5:	learn: 0.4887748	total: 196ms	remaining: 3.07s
6:	learn: 0.4757730	total: 200ms	remaining: 2.65s
7:	learn: 0.4703294	total: 203ms	remaining: 2.33s
8:	learn: 0.4653450	total: 207ms	remaining: 2.09s
9:	learn: 0.4604409	total: 210ms	remaining: 1.89s
10:	learn: 0.4548596	total: 214ms	remaining: 1.73s
11:	learn: 0.4503120	total: 217ms	remaining: 1.59s
12:	learn: 0.4470121	total: 220ms	remaining: 1.47s
13:	learn: 0.4438680	total: 223ms	remaining: 1.37s
14:	learn: 0.4404521	total: 226ms	remaining: 1.28s
15:	learn: 0.4382605	total: 232ms	remaining: 1.22s
16:	learn: 0.4359711	total: 235ms	remaining: 1.15s
17:	learn: 0.4336543	total: 238ms	remaining: 1.08s
18:	learn: 0.4310102	total: 242ms	remaining: 1.03s
19:	learn: 0.4292109	total: 247ms	remaini

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression






































0:	learn: 0.6144596	total: 2.76ms	remaining: 273ms
1:	learn: 0.5746548	total: 5.41ms	remaining: 265ms
2:	learn: 0.5362541	total: 8.16ms	remaining: 264ms
3:	learn: 0.5157029	total: 11ms	remaining: 263ms
4:	learn: 0.4959483	total: 13.7ms	remaining: 261ms
5:	learn: 0.4847529	total: 16.5ms	remaining: 259ms
6:	learn: 0.4760328	total: 19.6ms	remaining: 261ms
7:	learn: 0.4695939	total: 22.5ms	remaining: 259ms
8:	learn: 0.4615480	total: 25.5ms	remaining: 258ms
9:	learn: 0.4569074	total: 28.5ms	remaining: 257ms
10:	learn: 0.4533701	total: 31.4ms	remaining: 254ms
11:	learn: 0.4475611	total: 34.3ms	remaining: 252ms
12:	learn: 0.4449415	total: 37.3ms	remaining: 249ms
13:	learn: 0.4425298	total: 40ms	remaining: 246ms
14:	learn: 0.4395454	total: 42.8ms	remaining: 242ms
15:	learn: 0.4366911	total: 45.9ms	remaining: 241ms
16:	learn: 0.4333881	total: 49.2ms	remaining: 240ms
17:	learn: 0.4309968	total: 52ms	remaining: 237ms
18:	learn: 0.4293703	total: 54.8ms	remaining: 233ms
19:	learn: 0.4270639	total: 

73:	learn: 0.3575804	total: 196ms	remaining: 69ms
74:	learn: 0.3562889	total: 199ms	remaining: 66.3ms
75:	learn: 0.3553908	total: 202ms	remaining: 63.8ms
76:	learn: 0.3541045	total: 205ms	remaining: 61.1ms
77:	learn: 0.3529275	total: 207ms	remaining: 58.5ms
78:	learn: 0.3521069	total: 210ms	remaining: 55.8ms
79:	learn: 0.3512242	total: 212ms	remaining: 53ms
80:	learn: 0.3511754	total: 214ms	remaining: 50.3ms
81:	learn: 0.3502423	total: 217ms	remaining: 47.6ms
82:	learn: 0.3494068	total: 220ms	remaining: 45.1ms
83:	learn: 0.3484831	total: 223ms	remaining: 42.4ms
84:	learn: 0.3470368	total: 225ms	remaining: 39.7ms
85:	learn: 0.3457844	total: 227ms	remaining: 37ms
86:	learn: 0.3445970	total: 230ms	remaining: 34.3ms
87:	learn: 0.3432415	total: 232ms	remaining: 31.6ms
88:	learn: 0.3419386	total: 234ms	remaining: 29ms
89:	learn: 0.3406098	total: 237ms	remaining: 26.3ms
90:	learn: 0.3391359	total: 240ms	remaining: 23.7ms
91:	learn: 0.3378195	total: 242ms	remaining: 21ms
92:	learn: 0.3370995	t

84:	learn: 0.3451766	total: 225ms	remaining: 39.6ms
85:	learn: 0.3441453	total: 227ms	remaining: 37ms
86:	learn: 0.3428935	total: 229ms	remaining: 34.3ms
87:	learn: 0.3421198	total: 232ms	remaining: 31.7ms
88:	learn: 0.3409899	total: 235ms	remaining: 29ms
89:	learn: 0.3399592	total: 238ms	remaining: 26.4ms
90:	learn: 0.3392388	total: 241ms	remaining: 23.8ms
91:	learn: 0.3384027	total: 243ms	remaining: 21.2ms
92:	learn: 0.3373344	total: 246ms	remaining: 18.5ms
93:	learn: 0.3361596	total: 249ms	remaining: 15.9ms
94:	learn: 0.3353972	total: 251ms	remaining: 13.2ms
95:	learn: 0.3345339	total: 254ms	remaining: 10.6ms
96:	learn: 0.3338562	total: 256ms	remaining: 7.93ms
97:	learn: 0.3331261	total: 259ms	remaining: 5.28ms
98:	learn: 0.3326631	total: 261ms	remaining: 2.64ms
99:	learn: 0.3313018	total: 264ms	remaining: 0us
0:	learn: 0.6098436	total: 2.74ms	remaining: 271ms
1:	learn: 0.5727518	total: 5.44ms	remaining: 267ms
2:	learn: 0.5368546	total: 8.12ms	remaining: 263ms
3:	learn: 0.5166550	to

80:	learn: 0.3454766	total: 244ms	remaining: 57.2ms
81:	learn: 0.3449029	total: 246ms	remaining: 54ms
82:	learn: 0.3437854	total: 249ms	remaining: 50.9ms
83:	learn: 0.3428773	total: 252ms	remaining: 48ms
84:	learn: 0.3423714	total: 254ms	remaining: 44.9ms
85:	learn: 0.3413847	total: 257ms	remaining: 41.8ms
86:	learn: 0.3403352	total: 260ms	remaining: 38.8ms
87:	learn: 0.3397559	total: 263ms	remaining: 35.8ms
88:	learn: 0.3385248	total: 266ms	remaining: 32.8ms
89:	learn: 0.3372538	total: 268ms	remaining: 29.8ms
90:	learn: 0.3362276	total: 270ms	remaining: 26.8ms
91:	learn: 0.3348005	total: 273ms	remaining: 23.7ms
92:	learn: 0.3332649	total: 275ms	remaining: 20.7ms
93:	learn: 0.3323343	total: 278ms	remaining: 17.7ms
94:	learn: 0.3314315	total: 281ms	remaining: 14.8ms
95:	learn: 0.3312463	total: 283ms	remaining: 11.8ms
96:	learn: 0.3302782	total: 285ms	remaining: 8.82ms
97:	learn: 0.3295372	total: 288ms	remaining: 5.87ms
98:	learn: 0.3284840	total: 290ms	remaining: 2.93ms
99:	learn: 0.327

67:	learn: 0.3616875	total: 225ms	remaining: 106ms
68:	learn: 0.3599984	total: 227ms	remaining: 102ms
69:	learn: 0.3584617	total: 230ms	remaining: 98.5ms
70:	learn: 0.3577636	total: 233ms	remaining: 95.1ms
71:	learn: 0.3571235	total: 235ms	remaining: 91.5ms
72:	learn: 0.3561311	total: 238ms	remaining: 88ms
73:	learn: 0.3549454	total: 241ms	remaining: 84.6ms
74:	learn: 0.3536220	total: 243ms	remaining: 81ms
75:	learn: 0.3524135	total: 245ms	remaining: 77.5ms
76:	learn: 0.3514223	total: 248ms	remaining: 74ms
77:	learn: 0.3503872	total: 251ms	remaining: 70.7ms
78:	learn: 0.3492709	total: 253ms	remaining: 67.3ms
79:	learn: 0.3490225	total: 255ms	remaining: 63.8ms
80:	learn: 0.3480068	total: 258ms	remaining: 60.5ms
81:	learn: 0.3471069	total: 260ms	remaining: 57.2ms
82:	learn: 0.3457627	total: 263ms	remaining: 53.9ms
83:	learn: 0.3448108	total: 265ms	remaining: 50.5ms
84:	learn: 0.3436271	total: 268ms	remaining: 47.2ms
85:	learn: 0.3427297	total: 270ms	remaining: 44ms
86:	learn: 0.3414813	t

87:	learn: 0.3422367	total: 225ms	remaining: 30.7ms
88:	learn: 0.3415152	total: 228ms	remaining: 28.2ms
89:	learn: 0.3403341	total: 231ms	remaining: 25.6ms
90:	learn: 0.3393437	total: 233ms	remaining: 23.1ms
91:	learn: 0.3379894	total: 236ms	remaining: 20.5ms
92:	learn: 0.3370980	total: 239ms	remaining: 18ms
93:	learn: 0.3358116	total: 241ms	remaining: 15.4ms
94:	learn: 0.3348818	total: 244ms	remaining: 12.8ms
95:	learn: 0.3336785	total: 246ms	remaining: 10.3ms
96:	learn: 0.3324897	total: 250ms	remaining: 7.72ms
97:	learn: 0.3313041	total: 252ms	remaining: 5.14ms
98:	learn: 0.3301154	total: 255ms	remaining: 2.57ms
99:	learn: 0.3296061	total: 257ms	remaining: 0us


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

In [6]:
# make ensemble prediction

ensemble_pred=ensemble.predict(X_val)

In [7]:
# Evaluate on never seen validation set

f1 = f1_score(y_val, ensemble_pred)
print('f1 score: {}'.format(f1))

f1 score: 0.6038894575230297


In [8]:
# save Ensemble model
pickle.dump(ensemble, open(base_path+"\\data-scientist-technical-test-main\\data\\auto-insurance-fall-2017\\models\\Ensemble.pkl", 'wb'))