# Customer Churn Prediction in Telecommunication

## 1. Import required libraries

In [1]:
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.utils import shuffle
from sklearn.decomposition import PCA
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostClassifier, VotingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, mean_squared_error, precision_score, recall_score
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C
from bayes_opt import BayesianOptimization
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

## 2. Load and preprocess the data

#### Load the dataset into a Pandas dataframe

In [2]:
train = pd.read_csv('data/cell2celltrain.csv')
test = pd.read_csv('data/cell2cellholdout.csv')

df = pd.concat([train, test], ignore_index=True)
df = shuffle(df)

#### Understanding the dataset

In [3]:
# Display the first 5 rows of the dataframe
df.head()

Unnamed: 0,CustomerID,Churn,MonthlyRevenue,MonthlyMinutes,TotalRecurringCharge,DirectorAssistedCalls,OverageMinutes,RoamingCalls,PercChangeMinutes,PercChangeRevenues,...,ReferralsMadeBySubscriber,IncomeGroup,OwnsMotorcycle,AdjustmentsToCreditRating,HandsetPrice,MadeCallToRetentionTeam,CreditRating,PrizmCode,Occupation,MaritalStatus
48970,3384658,No,51.29,955.0,45.0,0.0,31.0,0.0,242.0,-12.3,...,0,4,No,0,Unknown,No,3-Good,Other,Other,Unknown
8816,3069482,No,83.54,338.0,67.0,0.25,58.0,0.0,42.0,25.8,...,0,5,No,0,Unknown,No,3-Good,Suburban,Other,Yes
28036,3222010,No,71.7,928.0,60.0,1.24,39.0,3.1,-130.0,-12.1,...,0,8,No,0,150,No,2-High,Suburban,Professional,No
46308,3366282,No,34.99,158.0,45.0,0.0,0.0,0.0,-36.0,0.0,...,0,0,No,0,Unknown,No,2-High,Town,Other,Unknown
40875,3325714,No,47.27,807.0,52.0,0.0,1.0,0.0,-807.0,-141.2,...,0,1,No,0,30,No,5-Low,Town,Other,No


In [4]:
# Display summary statistics for the numerical columns
df.describe()

Unnamed: 0,CustomerID,MonthlyRevenue,MonthlyMinutes,TotalRecurringCharge,DirectorAssistedCalls,OverageMinutes,RoamingCalls,PercChangeMinutes,PercChangeRevenues,DroppedCalls,...,Handsets,HandsetModels,CurrentEquipmentDays,AgeHH1,AgeHH2,RetentionCalls,RetentionOffersAccepted,ReferralsMadeBySubscriber,IncomeGroup,AdjustmentsToCreditRating
count,71047.0,70831.0,70831.0,70831.0,70831.0,70831.0,70831.0,70545.0,70545.0,71047.0,...,71046.0,71046.0,71046.0,69803.0,69803.0,71047.0,71047.0,71047.0,71047.0,71047.0
mean,3201948.0,58.852853,525.72625,46.862899,0.894817,40.08773,1.222315,-10.845304,-1.20473,6.009567,...,1.808617,1.561791,380.265631,31.375113,21.157715,0.037004,0.017918,0.050854,4.334229,0.053162
std,116796.5,44.243566,530.136184,23.915,2.197577,96.349599,9.081089,255.314034,38.770236,9.006656,...,1.33612,0.90828,254.294692,22.082195,23.917586,0.205823,0.141485,0.290444,3.137063,0.374988
min,3000002.0,-6.17,0.0,-11.0,0.0,0.0,0.0,-3875.0,-1107.7,0.0,...,1.0,1.0,-5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,3100576.0,33.64,158.0,30.0,0.0,0.0,0.0,-83.0,-7.1,0.7,...,1.0,1.0,204.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
50%,3201478.0,48.53,366.0,45.0,0.25,2.0,0.0,-5.0,-0.3,3.0,...,1.0,1.0,330.0,36.0,0.0,0.0,0.0,0.0,5.0,0.0
75%,3304918.0,71.03,722.0,60.0,0.99,41.0,0.3,66.0,1.6,7.7,...,2.0,2.0,515.0,48.0,42.0,0.0,0.0,0.0,7.0,0.0
max,3399994.0,1223.38,7668.0,400.0,159.39,4321.0,1112.4,5192.0,2483.5,221.7,...,28.0,16.0,1823.0,99.0,99.0,4.0,4.0,35.0,9.0,25.0


In [5]:
# Display basic information about the dataframe
print(f'Shape of the dataframe: {df.shape}')
print(f'Data types of each column:\n{df.dtypes}')

Shape of the dataframe: (71047, 58)
Data types of each column:
CustomerID                     int64
Churn                         object
MonthlyRevenue               float64
MonthlyMinutes               float64
TotalRecurringCharge         float64
DirectorAssistedCalls        float64
OverageMinutes               float64
RoamingCalls                 float64
PercChangeMinutes            float64
PercChangeRevenues           float64
DroppedCalls                 float64
BlockedCalls                 float64
UnansweredCalls              float64
CustomerCareCalls            float64
ThreewayCalls                float64
ReceivedCalls                float64
OutboundCalls                float64
InboundCalls                 float64
PeakCallsInOut               float64
OffPeakCallsInOut            float64
DroppedBlockedCalls          float64
CallForwardingCalls          float64
CallWaitingCalls             float64
MonthsInService                int64
UniqueSubs                     int64
ActiveSubs  

#### Data Cleaning and Remove Null

In [6]:
# Count the number of missing values in each column of the dataframe
df.isnull().sum()

CustomerID                       0
Churn                        20000
MonthlyRevenue                 216
MonthlyMinutes                 216
TotalRecurringCharge           216
DirectorAssistedCalls          216
OverageMinutes                 216
RoamingCalls                   216
PercChangeMinutes              502
PercChangeRevenues             502
DroppedCalls                     0
BlockedCalls                     0
UnansweredCalls                  0
CustomerCareCalls                0
ThreewayCalls                    0
ReceivedCalls                    0
OutboundCalls                    0
InboundCalls                     0
PeakCallsInOut                   0
OffPeakCallsInOut                0
DroppedBlockedCalls              0
CallForwardingCalls              0
CallWaitingCalls                 0
MonthsInService                  0
UniqueSubs                       0
ActiveSubs                       0
ServiceArea                     28
Handsets                         1
HandsetModels       

In [7]:
#Drop CustomerID
df = df.drop('CustomerID', axis=1)

#drop all row containing 'Unknown' except
#df = df[df.iloc[:, ~df.columns.isin(['Homeownership'])].ne('Unknown').all(axis=1)]
df = df[df.iloc[:, ~df.columns.isin(['Homeownership', 'MaritalStatus'])].ne('Unknown').all(axis=1)]

# drop all rows containing NaNs
df = df.dropna()

#### Analyzed Cleaned dataset

In [8]:
# Display basic information about the dataframe
print(f'Shape of the dataframe: {df.shape}')
print(f'Data types of each column:\n{df.dtypes}')

Shape of the dataframe: (21489, 57)
Data types of each column:
Churn                         object
MonthlyRevenue               float64
MonthlyMinutes               float64
TotalRecurringCharge         float64
DirectorAssistedCalls        float64
OverageMinutes               float64
RoamingCalls                 float64
PercChangeMinutes            float64
PercChangeRevenues           float64
DroppedCalls                 float64
BlockedCalls                 float64
UnansweredCalls              float64
CustomerCareCalls            float64
ThreewayCalls                float64
ReceivedCalls                float64
OutboundCalls                float64
InboundCalls                 float64
PeakCallsInOut               float64
OffPeakCallsInOut            float64
DroppedBlockedCalls          float64
CallForwardingCalls          float64
CallWaitingCalls             float64
MonthsInService                int64
UniqueSubs                     int64
ActiveSubs                     int64
ServiceArea 

#### Preprocess the data

In [9]:
#get all categorical features
df_cat = df.select_dtypes(np.object)
cat_attributes = df_cat.columns
cat_attributes

Index(['Churn', 'ServiceArea', 'ChildrenInHH', 'HandsetRefurbished',
       'HandsetWebCapable', 'TruckOwner', 'RVOwner', 'Homeownership',
       'BuysViaMailOrder', 'RespondsToMailOffers', 'OptOutMailings',
       'NonUSTravel', 'OwnsComputer', 'HasCreditCard', 'NewCellphoneUser',
       'NotNewCellphoneUser', 'OwnsMotorcycle', 'HandsetPrice',
       'MadeCallToRetentionTeam', 'CreditRating', 'PrizmCode', 'Occupation',
       'MaritalStatus'],
      dtype='object')

In [10]:
#return unique value in categorical features
for feature in cat_attributes:
    print(feature ,': ',df[feature].unique())

Churn :  ['No' 'Yes']
ServiceArea :  ['BOSBOS978' 'ATHKIN423' 'CHIGRY219' 'OHICOL614' 'DETANN734' 'STLSTL314'
 'PITHOM412' 'NYCBRO917' 'LAXBEV310' 'OKCTUL918' 'DALDAL214' 'CHICHI773'
 'OHIDAY937' 'NYCMAN917' 'MINMIN612' 'NEVELC619' 'BIRBIR205' 'OKCLAW580'
 'LAXCDG310' 'APCFCH703' 'INDIND317' 'MINSTP612' 'DETPON248' 'NYCNEW201'
 'MIAWPB561' 'LAXVNY818' 'HARHAR860' 'FLNTAM813' 'KCYKCM816' 'SANSAN210'
 'APCSIL301' 'BOSBOS508' 'NEVLVS702' 'NCRCHA704' 'HARNOR203' 'ATLANE678'
 'OHICAN330' 'LAXRIV909' 'MILMIL414' 'NYCWHI914' 'AIRCHA843' 'NEVPOW619'
 'ATLNOR678' 'LAXPAS626' 'DETDET313' 'HWIHON808' 'PHIMER609' 'LAXMON323'
 'SFRSFR415' 'NYCNEW973' 'ATLKNO423' 'OHIPSV440' 'HOUHOU281' 'SFRSMO650'
 'BOSBOS781' 'BOSBOS617' 'DENGLD303' 'LAXCUL310' 'LAXPSG760' 'OMADES515'
 'SANAUS512' 'PHXSCO480' 'MIADEL561' 'SFRSCL408' 'FLNCLR813' 'PHIPHI215'
 'NYCNAS516' 'NYCQUE917' 'KCYKCK913' 'CHIRCK815' 'KCYTOP913' 'INHFTW219'
 'LAXONT909' 'MIAFTL954' 'FLNJAC904' 'OHICLE216' 'MIAMIA305' 'APCBAL410'
 'PHXPHX602' '


Summary of the analysis

1. CreditRating feature is ordinal
2. Occupation, PrizmCode, MaritalStatus need to convert to numerical using onehot encoding
3. ServiceArea will be removed as it identify the location. This info is biased and doesnt apply and support to new unknown data that potentially identify new area
3. The value of the Handset is mostly numerical value, we convert this value to integer type
4. The rest of the features is boolean with value 'Yes' and 'No', this will be need to convert to binary

In [11]:
## Convert CreditRating feature to ordinal

# Define the mapping of categories to integers
mapping = {'1-Highest': 1, '2-High': 2, '3-Good': 3, '4-Medium': 4, '5-Low': 5, '6-VeryLow': 6, '7-Lowest': 7}

# Map the categories to integers in the 'CreditRating' column
df['CreditRating'] = df['CreditRating'].replace(mapping)

In [12]:
## Convert Categorical using One Hot Encoding
cat_cols_encoding = ['PrizmCode', 'Occupation', 'MaritalStatus']
df = pd.get_dummies(df, columns=cat_cols_encoding, drop_first=True)

In [13]:
## Drop ServiceArea
#df = df.drop('ServiceArea', axis=1)

df = df.drop(['ServiceArea', 'TruckOwner', 'RVOwner'], axis=1)


In [14]:
## Convert HandsetPrice to integer
df['HandsetPrice'] = df['HandsetPrice'].astype(int)

In [15]:
## Convert Categorical to binary

# return list of features contain 'Yes' and 'No'
YesNoFeatures = cat_attributes.difference(['CreditRating','ServiceArea', 'PrizmCode', 'Occupation', 'MaritalStatus','Homeownership', 'TruckOwner', 'RVOwner'])
mapping = {'Yes': 1, 'No': 0}
df[YesNoFeatures] = df[YesNoFeatures].replace(mapping)

# convert Homeownership to binary
mapping = {'Known': 1, 'Unknown': 0}
df['Homeownership'] = df['Homeownership'].replace(mapping)

#### Split Feature and label data

In [16]:
X = df.drop('Churn', axis=1)
y = df[['Churn']]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

### 3. Prediction Model Training and Evaluation

In this section, train various machine learning model and select the best performing model. 


The model use are as below:
1. Naive Bayes
2. Decision Tree
3. Logistic Regression
4. Support Vector Machine
5. Adaboost
6. Multi-Layer Perceptron Classifier
7. Voting Classifier

#### 3.1 Naive Bayes

In [17]:
# Define the objective function
def objective_function_NB(var_smoothing):
    model = GaussianNB(var_smoothing=var_smoothing)
    
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    return accuracy_score(y_test, y_pred)

# Define the search space
bounds_NB = {
    'var_smoothing': (1e-10, 1),
}

# Set up the optimizer
optimizer = BayesianOptimization(f=objective_function_NB, pbounds=bounds_NB)

# Run the optimization
optimizer.maximize(init_points=10, n_iter=10)

# Get the best hyperparameters and the best score
best_params = optimizer.max["params"]
best_score = optimizer.max["target"]

# Train best model
# Train the Naive Bayes classifier using the optimal hyperparameters
optimal_var_smoothing = best_params['var_smoothing']

model_NB = GaussianNB(var_smoothing=optimal_var_smoothing)
model_NB.fit(X_train, y_train)

# Evaluate Model
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

y_pred = model_NB.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")

|   iter    |  target   | var_sm... |
-------------------------------------
| [0m1        [0m | [0m0.7284   [0m | [0m0.3637   [0m |
| [0m2        [0m | [0m0.7284   [0m | [0m0.8082   [0m |
| [0m3        [0m | [0m0.7284   [0m | [0m0.3423   [0m |
| [0m4        [0m | [0m0.7284   [0m | [0m0.51     [0m |
| [0m5        [0m | [0m0.7284   [0m | [0m0.5558   [0m |
| [0m6        [0m | [0m0.7284   [0m | [0m0.6491   [0m |
| [0m7        [0m | [0m0.7284   [0m | [0m0.6587   [0m |
| [0m8        [0m | [0m0.7284   [0m | [0m0.9959   [0m |
| [0m9        [0m | [0m0.7284   [0m | [0m0.6253   [0m |
| [0m10       [0m | [0m0.7257   [0m | [0m0.07237  [0m |
| [0m11       [0m | [0m0.7284   [0m | [0m0.9104   [0m |
| [0m12       [0m | [0m0.7284   [0m | [0m0.7451   [0m |
| [0m13       [0m | [0m0.7284   [0m | [0m0.4431   [0m |
| [0m14       [0m | [0m0.7284   [0m | [0m0.6525   [0m |
| [0m15       [0m | [0m0.7284   [0m | [0m0.957    

#### 3.2 Decision Tree

In [18]:
# Define the objective function
def DT_objective_function(max_depth, min_samples_split, min_samples_leaf):
    max_depth = int(max_depth)
    min_samples_split = int(min_samples_split)
    min_samples_leaf = int(min_samples_leaf)

    classifier = DecisionTreeClassifier(max_depth=max_depth, min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf)

    classifier.fit(X_train, y_train)
    y_pred = classifier.predict(X_test)

    return f1_score(y_test, y_pred)

# Define the search space
DT_bounds = {
    'max_depth': (3, 30),
    'min_samples_split': (2, 100),
    'min_samples_leaf': (1, 50),
}

# Set up the optimizer
optimizer = BayesianOptimization(f=DT_objective_function, pbounds=DT_bounds)

# Run the optimization
optimizer.maximize(init_points=50, n_iter=50)

# Train best model
best_params = optimizer.max["params"]
best_score = optimizer.max["target"]

optimal_max_depth = int(best_params['max_depth'])
optimal_min_samples_split = int(best_params['min_samples_split'])
optimal_min_samples_leaf = int(best_params['min_samples_leaf'])

DT_classifier = DecisionTreeClassifier(max_depth=optimal_max_depth, min_samples_split=optimal_min_samples_split, min_samples_leaf=optimal_min_samples_leaf)
DT_classifier.fit(X_train, y_train)

# Evaluate Model
'''
import matplotlib.pyplot as plt
from sklearn.tree import plot_tree

plt.figure(figsize=(12, 8))
x1 = list(X.columns)
y1 = list(y.columns)

plot_tree(DT_classifier,filled=True)
plt.show()
'''

y_pred = DT_classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")

|   iter    |  target   | max_depth | min_sa... | min_sa... |
-------------------------------------------------------------
| [0m1        [0m | [0m0.2709   [0m | [0m19.14    [0m | [0m6.142    [0m | [0m60.68    [0m |
| [0m2        [0m | [0m0.2079   [0m | [0m10.16    [0m | [0m14.22    [0m | [0m68.91    [0m |
| [0m3        [0m | [0m0.2454   [0m | [0m29.27    [0m | [0m13.65    [0m | [0m89.63    [0m |
| [0m4        [0m | [0m0.1865   [0m | [0m7.09     [0m | [0m15.02    [0m | [0m75.47    [0m |
| [0m5        [0m | [0m0.2441   [0m | [0m13.27    [0m | [0m31.27    [0m | [0m88.65    [0m |
| [0m6        [0m | [0m0.2679   [0m | [0m23.74    [0m | [0m2.216    [0m | [0m67.14    [0m |
| [95m7        [0m | [95m0.2948   [0m | [95m19.91    [0m | [95m26.04    [0m | [95m6.593    [0m |
| [0m8        [0m | [0m0.2418   [0m | [0m27.99    [0m | [0m23.11    [0m | [0m98.62    [0m |
| [0m9        [0m | [0m0.2932   [0m | [0m27.43    

#### 3.3 Logistic Regression

In [19]:
# Define the objective function
def objective_function_LR(C, penalty):
    C = C
    penalty = 'l1' if penalty < 0.5 else 'l2'
    
    model = LogisticRegression(C=C, penalty=penalty, solver='liblinear')
    
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    return accuracy_score(y_test, y_pred)

# Define the search space
bounds_LR = {
    'C': (0.001, 10),
    'penalty': (0, 1),
}

# Set up the optimizer
optimizer = BayesianOptimization(f=objective_function_LR, pbounds=bounds_LR)

# Run the optimization
optimizer.maximize(init_points=50, n_iter=20)

# Get the best hyperparameters and the best score
best_params = optimizer.max["params"]
best_score = optimizer.max["target"]


# Train best model
optimal_C = best_params['C']
optimal_penalty = 'l1' if best_params['penalty'] < 0.5 else 'l2'

model_LR = LogisticRegression(C=optimal_C, penalty=optimal_penalty, solver='liblinear')

model_LR.fit(X_train, y_train)

# Evaluate Model
y_pred = model_LR.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")

|   iter    |  target   |     C     |  penalty  |
-------------------------------------------------
| [0m1        [0m | [0m0.7278   [0m | [0m6.924    [0m | [0m0.5617   [0m |
| [0m2        [0m | [0m0.7278   [0m | [0m2.332    [0m | [0m0.7037   [0m |
| [0m3        [0m | [0m0.7275   [0m | [0m2.115    [0m | [0m0.4356   [0m |
| [0m4        [0m | [0m0.7277   [0m | [0m9.003    [0m | [0m0.6195   [0m |
| [0m5        [0m | [0m0.7278   [0m | [0m4.264    [0m | [0m0.8673   [0m |
| [0m6        [0m | [0m0.7273   [0m | [0m8.513    [0m | [0m0.8641   [0m |
| [0m7        [0m | [0m0.7275   [0m | [0m0.8166   [0m | [0m0.1418   [0m |
| [0m8        [0m | [0m0.7273   [0m | [0m9.962    [0m | [0m0.438    [0m |
| [0m9        [0m | [0m0.7277   [0m | [0m1.418    [0m | [0m0.3122   [0m |
| [0m10       [0m | [0m0.7274   [0m | [0m1.345    [0m | [0m0.9247   [0m |
| [0m11       [0m | [0m0.7278   [0m | [0m0.2648   [0m | [0m0.7426   [0m 

#### 3.4 SVC

In [20]:
# Scale the features using StandardScaler
scaler = StandardScaler()
X_scaler = scaler.fit_transform(X)
X_train2, X_test2, y_train2, y_test2 = train_test_split(X_scaler, y, test_size=0.2, random_state=0)

# Define the objective function
def objective_function_SVC(C, gamma):
    C = C
    gamma = gamma
    
    model = SVC(C=C, gamma=gamma, kernel='rbf', random_state=0)    
    
    model.fit(X_train2, y_train2)
    y_pred2 = model.predict(X_test2)

    return accuracy_score(y_test2, y_pred2)

# Define the search space
bounds_SVC = {
    'C': (0.1, 100),
    'gamma': (0.001, 1),
}

# Set up the optimizer
optimizer = BayesianOptimization(f=objective_function_SVC, pbounds=bounds_SVC)

# Run the optimization
optimizer.maximize(init_points=10, n_iter=5)

# Get the best hyperparameters and the best score
best_params = optimizer.max["params"]
best_score = optimizer.max["target"]


# Train best model
optimal_C = best_params['C']
optimal_gamma = best_params['gamma']

model_svc = SVC(C=optimal_C, gamma=optimal_gamma, kernel='rbf')

model_svc.fit(X_train2, y_train2)

# Evaluate Model
y_pred2 = model_svc.predict(X_test2)
accuracy = accuracy_score(y_test2, y_pred2)
precision = precision_score(y_test2, y_pred2, average='weighted')
recall = recall_score(y_test2, y_pred2, average='weighted')
f1 = f1_score(y_test2, y_pred2, average='weighted')

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")

|   iter    |  target   |     C     |   gamma   |
-------------------------------------------------
| [0m1        [0m | [0m0.7222   [0m | [0m4.583    [0m | [0m0.1271   [0m |
| [95m2        [0m | [95m0.7366   [0m | [95m6.916    [0m | [95m0.978    [0m |
| [0m3        [0m | [0m0.7336   [0m | [0m77.26    [0m | [0m0.4404   [0m |
| [0m4        [0m | [0m0.7199   [0m | [0m47.08    [0m | [0m0.1399   [0m |
| [0m5        [0m | [0m0.7343   [0m | [0m18.87    [0m | [0m0.483    [0m |
| [0m6        [0m | [0m0.7213   [0m | [0m72.17    [0m | [0m0.1472   [0m |
| [0m7        [0m | [0m0.7266   [0m | [0m96.89    [0m | [0m0.2369   [0m |
| [0m8        [0m | [0m0.7296   [0m | [0m55.82    [0m | [0m0.2652   [0m |
| [0m9        [0m | [0m0.7285   [0m | [0m13.41    [0m | [0m0.252    [0m |
| [0m10       [0m | [0m0.7345   [0m | [0m38.91    [0m | [0m0.5339   [0m |
| [0m11       [0m | [0m0.7366   [0m | [0m6.916    [0m | [0m0.9852   

#### 3.5 Adaboost Classifier

In [21]:
# Define the objective function
def objective_function_adaboost(n_estimators, learning_rate):
    n_estimators = int(n_estimators)
    learning_rate = learning_rate
    
    model = AdaBoostClassifier(n_estimators=n_estimators, learning_rate=learning_rate, random_state=0)
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    return accuracy_score(y_test, y_pred)

# Define the search space
bounds_adaboost = {
    'n_estimators': (10, 200),
    'learning_rate': (0.001, 1),
}

# Set up the optimizer
optimizer = BayesianOptimization(f=objective_function_adaboost, pbounds=bounds_adaboost)

# Run the optimization
optimizer.maximize(init_points=50, n_iter=50)

# Train best model
best_params = optimizer.max["params"]
best_score = optimizer.max["target"]

# Train the AdaBoostClassifier model using the optimal hyperparameters
optimal_n_estimators = int(best_params['n_estimators'])
optimal_learning_rate = best_params['learning_rate']

model_adaboost = AdaBoostClassifier(n_estimators=optimal_n_estimators, learning_rate=optimal_learning_rate, random_state=0)

model_adaboost.fit(X_train, y_train)

# Evaluate Model
y_pred = model_adaboost.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")

|   iter    |  target   | learni... | n_esti... |
-------------------------------------------------
| [0m1        [0m | [0m0.7394   [0m | [0m0.2683   [0m | [0m30.56    [0m |
| [95m2        [0m | [95m0.7438   [0m | [95m0.8766   [0m | [95m186.3    [0m |
| [0m3        [0m | [0m0.7417   [0m | [0m0.4932   [0m | [0m149.8    [0m |
| [0m4        [0m | [0m0.7394   [0m | [0m0.6696   [0m | [0m59.58    [0m |
| [0m5        [0m | [0m0.742    [0m | [0m0.6949   [0m | [0m197.5    [0m |
| [0m6        [0m | [0m0.741    [0m | [0m0.5198   [0m | [0m59.01    [0m |
| [0m7        [0m | [0m0.7438   [0m | [0m0.9302   [0m | [0m119.8    [0m |
| [0m8        [0m | [0m0.7434   [0m | [0m0.7635   [0m | [0m135.0    [0m |
| [0m9        [0m | [0m0.7417   [0m | [0m0.7559   [0m | [0m115.8    [0m |
| [0m10       [0m | [0m0.7408   [0m | [0m0.3144   [0m | [0m104.5    [0m |
| [0m11       [0m | [0m0.7399   [0m | [0m0.315    [0m | [0m90.2     

#### 3.6 Multi-Layer Perceptron Classifier

In [22]:
# Define the objective function
def objective_function_MLP(hidden_layer_size, learning_rate, alpha):
    hidden_layer_size = int(hidden_layer_size)
    learning_rate = learning_rate
    alpha = alpha
    
    model = MLPClassifier(hidden_layer_sizes=(hidden_layer_size,),
                          learning_rate_init=learning_rate,
                          alpha=alpha)
    
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    return accuracy_score(y_test, y_pred)

# Define the search space
bounds_MLP = {
    'hidden_layer_size': (10, 200),
    'learning_rate': (1e-5, 1e-1),
    'alpha': (1e-5, 1e-1),
}

# Set up the optimizer
optimizer = BayesianOptimization(f=objective_function_MLP, pbounds=bounds_MLP)

# Run the optimization
optimizer.maximize(init_points=60, n_iter=100)

# Train best model
best_params = optimizer.max["params"]
best_score = optimizer.max["target"]

# Train the MLPClassifier model using the optimal hyperparameters
optimal_hidden_layer_size = int(best_params['hidden_layer_size'])
optimal_learning_rate = best_params['learning_rate']
optimal_alpha = best_params['alpha']

model_MLP = MLPClassifier(hidden_layer_sizes=(optimal_hidden_layer_size,),
                      learning_rate_init=optimal_learning_rate,
                      alpha=optimal_alpha,
                      random_state=0)

model_MLP.fit(X_train, y_train)

# Evaluate Model
y_pred = model_MLP.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")

|   iter    |  target   |   alpha   | hidden... | learni... |
-------------------------------------------------------------
| [0m1        [0m | [0m0.7284   [0m | [0m0.03468  [0m | [0m82.36    [0m | [0m0.07805  [0m |
| [0m2        [0m | [0m0.7268   [0m | [0m0.06674  [0m | [0m117.9    [0m | [0m0.01129  [0m |
| [0m3        [0m | [0m0.7284   [0m | [0m0.07083  [0m | [0m132.2    [0m | [0m0.05427  [0m |
| [0m4        [0m | [0m0.7284   [0m | [0m0.001898 [0m | [0m138.4    [0m | [0m0.09819  [0m |
| [0m5        [0m | [0m0.7284   [0m | [0m0.04879  [0m | [0m41.85    [0m | [0m0.07248  [0m |
| [0m6        [0m | [0m0.7249   [0m | [0m0.05575  [0m | [0m144.4    [0m | [0m0.04831  [0m |
| [0m7        [0m | [0m0.7282   [0m | [0m0.00783  [0m | [0m97.25    [0m | [0m0.06828  [0m |
| [0m8        [0m | [0m0.7282   [0m | [0m0.06532  [0m | [0m34.92    [0m | [0m0.07321  [0m |
| [0m9        [0m | [0m0.7284   [0m | [0m0.02146  [0m 

#### 3.7 Voting Classifier

In [29]:
eclf1 = VotingClassifier(estimators=[('nb', model_NB), ('dt', DT_classifier),('lr', model_LR), ('adaboost', model_adaboost), ('mlp', model_MLP)], voting='soft')
eclf1.fit(X_train, y_train)
predictions = eclf1.predict(X_test)
print("Final Accuracy Score ")
print(accuracy_score(y_test, predictions))

Final Accuracy Score 
0.7269660307119591


#### Model Evaluation

In [30]:
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')

    return accuracy, precision, recall, f1


model1_metrics = evaluate_model(model_NB, X_test, y_test)
model2_metrics = evaluate_model(DT_classifier, X_test, y_test)
model3_metrics = evaluate_model(model_LR, X_test, y_test)
model4_metrics = evaluate_model(model_svc, X_test2, y_test2)
model5_metrics = evaluate_model(model_adaboost, X_test, y_test)
model6_metrics = evaluate_model(model_MLP, X_test, y_test)
model7_metrics = evaluate_model(eclf1, X_test, y_test)

metrics_df = pd.DataFrame(
    data=[model1_metrics, model2_metrics, model3_metrics, model4_metrics,model5_metrics,model6_metrics,model7_metrics],
    columns=['Accuracy', 'Precision', 'Recall', 'F1 Score'],
    index=['Naive Bayes', 'Decision Tree', 'Logistic Regression', 'Support Vector Machine','Adaboost','Multi-Layer Perceptron Classifier','Voting Classifier']
)

print(metrics_df)

                                   Accuracy  Precision    Recall  F1 Score
Naive Bayes                        0.728362   0.530511  0.728362  0.613889
Decision Tree                      0.627734   0.626732  0.627734  0.627230
Logistic Regression                0.728478   0.669695  0.728478  0.624529
Support Vector Machine             0.737785   0.701915  0.737785  0.629732
Adaboost                           0.732434   0.692139  0.732434  0.643194
Multi-Layer Perceptron Classifier  0.727897   0.666471  0.727897  0.630739
Voting Classifier                  0.726966   0.669524  0.726966  0.647879


Upon a thorough analysis of the given performance metrics, it becomes evident that the Support Vector Machine (SVM) model outperforms its counterparts. Boasting the highest accuracy (0.737785016) and F1 Score (0.62973215), the SVM model demonstrates a remarkable balance between correct predictions and misclassifications.

Accuracy, a widely recognized metric for evaluating classifier performance, quantifies the proportion of correctly classified instances in relation to the total number of instances. Nevertheless, accuracy can be misleading when dealing with imbalanced class distributions. In such cases, the F1 Score emerges as a more reliable metric, as it accounts for both precision and recall. In this context, the SVM model's superior F1 Score (0.62973215) is particularly noteworthy.

Precision, which gauges the proportion of true positive instances out of instances predicted as positive, is crucial when false positives carry a high cost. The SVM model excels in this regard, with the highest precision (0.70191523) among the evaluated models. Moreover, the SVM model achieves the second-highest recall (0.737785016), only marginally lower than Logistic Regression (0.728478362). Recall is essential when minimizing false negatives is of utmost importance.

In conclusion, the Support Vector Machine model indisputably stands out among the given models, thanks to its superior accuracy, precision, and F1 Score, as well as its commendable recall. This exemplary performance establishes the SVM model as the most suitable choice for this particular problem, providing a solid foundation for accurate and reliable predictions.