## Objective: Predicting the probability of bank churn using the preprocessed data
### Models Used: 
1.  XGBoost Classifier
2.  LGBM Classifier
3.  CatBoost Classifier 
4.  RandomForest Classifier
5.  GradientBoost Classifier
6.  AdaBoost Classifier
7.  SVC
8.  StackingClassifier

In [3]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import seaborn as sns 
import os 
import joblib  


In [4]:
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier, StackingClassifier

from sklearn.metrics import accuracy_score, f1_score, roc_auc_score



In [5]:
df_preprocessed = pd.read_csv('train_preprocessed.csv')
df_test_preprocessed = pd.read_csv('test_preprocessed.csv')

In [6]:
df_preprocessed.head()
X = df_preprocessed.drop(columns = ['id', 'CustomerId', 'Surname', 'Exited'])
y = df_preprocessed['Exited']

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

In [8]:
training_score_dict = {}
testing_score_dict = {}

In [9]:
def model_prediction(model):
    model.fit(X_train, y_train)
    y_pred_train = model.predict(X_train)
    y_pred_train_prob = model.predict_proba(X_train)[:,1] if hasattr(model, 'predict_proba') else None
    y_pred_test = model.predict(X_test)
    y_pred_test_prob = model.predict_proba(X_test)[:,1] if hasattr(model, 'predict_proba') else None
    train_accuracy = accuracy_score(y_train, y_pred_train)
    test_accuracy = accuracy_score(y_test, y_pred_test)
    train_f1 = f1_score(y_train, y_pred_train)
    test_f1 = f1_score(y_test, y_pred_test)
    train_roc_auc = roc_auc_score(y_train, y_pred_train_prob) if y_pred_train_prob is not None else None
    test_roc_auc = roc_auc_score(y_test, y_pred_test_prob) if y_pred_test_prob is not None else None

    training_score_dict[type(model).__name__] = [train_accuracy, train_f1, train_roc_auc]
    testing_score_dict[type(model).__name__] = [test_accuracy, test_f1, test_roc_auc]

    print(f'Model: {type(model).__name__}')
    print(f'Training Accuracy: {train_accuracy}')
    print(f'Testing Accuracy: {test_accuracy}')
    print(f'Training F1 Score: {train_f1}')
    print(f'Testing F1 Score: {test_f1}')
    if train_roc_auc is not None:
        print(f'Training ROC_AUC Score: {train_roc_auc}')
    if test_roc_auc is not None:
        print(f'Testing ROC_AUC Score: {test_roc_auc}')
    print('---------------------------------')

    return model

### Support Vector Classifier Model

In [10]:
model_svc = model_prediction(SVC(probability = True))