In [25]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, roc_auc_score


In [26]:
data = pd.read_csv('Churn_Modelling.csv')
data.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [27]:
label_encoder = LabelEncoder()
data['Gender'] = label_encoder.fit_transform(data['Gender'])

In [28]:
data = data.drop('Geography',axis=1)
data = data.drop(['Surname','CustomerId'],axis=1)

In [29]:
data.head()

Unnamed: 0,RowNumber,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,619,0,42,2,0.0,1,1,1,101348.88,1
1,2,608,0,41,1,83807.86,1,0,1,112542.58,0
2,3,502,0,42,8,159660.8,3,1,0,113931.57,1
3,4,699,0,39,1,0.0,2,0,0,93826.63,0
4,5,850,0,43,2,125510.82,1,1,1,79084.1,0


In [30]:
X = data.drop('Exited', axis=1)
y = data['Exited']

In [31]:
X.head()

Unnamed: 0,RowNumber,CreditScore,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,1,619,0,42,2,0.0,1,1,1,101348.88
1,2,608,0,41,1,83807.86,1,0,1,112542.58
2,3,502,0,42,8,159660.8,3,1,0,113931.57
3,4,699,0,39,1,0.0,2,0,0,93826.63
4,5,850,0,43,2,125510.82,1,1,1,79084.1


In [32]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [33]:
# Logistic Regression
log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)
y_pred_log_reg = log_reg.predict(X_test)

In [34]:
# Random Forest
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)

In [35]:
# Gradient Boosting
gb = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, random_state=42)
gb.fit(X_train, y_train)
y_pred_gb = gb.predict(X_test)

In [36]:
# Logistic Regression
print("Logistic Regression")
print("Accuracy:", accuracy_score(y_test, y_pred_log_reg))
print("ROC AUC:", roc_auc_score(y_test, y_pred_log_reg))
print(confusion_matrix(y_test, y_pred_log_reg))
print(classification_report(y_test, y_pred_log_reg))

# Random Forest
print("\nRandom Forest")
print("Accuracy:", accuracy_score(y_test, y_pred_rf))
print("ROC AUC:", roc_auc_score(y_test, y_pred_rf))
print(confusion_matrix(y_test, y_pred_rf))
print(classification_report(y_test, y_pred_rf))

# Gradient Boosting
print("\nGradient Boosting")
print("Accuracy:", accuracy_score(y_test, y_pred_gb))
print("ROC AUC:", roc_auc_score(y_test, y_pred_gb))
print(confusion_matrix(y_test, y_pred_gb))
print(classification_report(y_test, y_pred_gb))


Logistic Regression
Accuracy: 0.8145
ROC AUC: 0.5731627374511322
[[1560   47]
 [ 324   69]]
              precision    recall  f1-score   support

           0       0.83      0.97      0.89      1607
           1       0.59      0.18      0.27       393

    accuracy                           0.81      2000
   macro avg       0.71      0.57      0.58      2000
weighted avg       0.78      0.81      0.77      2000


Random Forest
Accuracy: 0.8595
ROC AUC: 0.7001611904660116
[[1547   60]
 [ 221  172]]
              precision    recall  f1-score   support

           0       0.88      0.96      0.92      1607
           1       0.74      0.44      0.55       393

    accuracy                           0.86      2000
   macro avg       0.81      0.70      0.73      2000
weighted avg       0.85      0.86      0.84      2000


Gradient Boosting
Accuracy: 0.865
ROC AUC: 0.7112727238180288
[[1550   57]
 [ 213  180]]
              precision    recall  f1-score   support

           0       0.8