In [1]:
# import libraries

import pandas as pd
import pickle
import numpy as np
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.ensemble import StackingClassifier
from catboost import CatBoostClassifier
import os
import warnings
warnings.filterwarnings('ignore')

In [2]:
# load models

models_dict = {}
model_list = ["XGB","CBC","RF","LR"]
base_path=os.path.abspath(os.getcwd())

for model in model_list:
    models_dict[model]=pickle.load(open(base_path+"\\data-scientist-technical-test-main\\data\\auto-insurance-fall-2017\\models\\"+model+".pkl", 'rb'))

In [3]:
# read data

train=pd.read_csv(base_path+"\\data-scientist-technical-test-main\\data\\auto-insurance-fall-2017\\train_auto_preproc.csv", sep=",", header=0)

# drop extra column

drop_col = ["Unnamed: 0"]
target_col=["TARGET_FLAG"]

train.drop(columns=drop_col, axis=1, inplace=True)

# divide X,y and split them

X=train.drop(columns=target_col, axis=1)
y=train[target_col].values

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=18, stratify=y)

In [4]:
# define ensemble model

stacked = StackingClassifier(estimators = [("XGB", models_dict["XGB"]),
                                           ("CBC", models_dict["CBC"]),
                                           ("LR", models_dict["LR"]),
                                           ("RF", models_dict["RF"])],
                             final_estimator=LogisticRegression(class_weight="balanced"),
                             verbose=0,
                             cv=10)

In [5]:
# train ensemble model

ensemble = stacked.fit(X_train, y_train)



In [6]:
# make ensemble prediction

ensemble_pred=ensemble.predict(X_val)

In [7]:
# Evaluate on never seen validation set

f1 = f1_score(y_val, ensemble_pred)
print('f1 score: {}'.format(f1))

f1 score: 0.6006191950464397


In [8]:
# save Ensemble model
pickle.dump(ensemble, open(base_path+"\\data-scientist-technical-test-main\\data\\auto-insurance-fall-2017\\models\\Ensemble.pkl", 'wb'))