In [1]:
import pandas as pd

train_data = pd.read_csv("./Dataset/Churn_Modelling.csv")
train_data

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.00,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.80,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.00,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.10,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,9996,15606229,Obijiaku,771,France,Male,39,5,0.00,2,1,0,96270.64,0
9996,9997,15569892,Johnstone,516,France,Male,35,10,57369.61,1,1,1,101699.77,0
9997,9998,15584532,Liu,709,France,Female,36,7,0.00,1,0,1,42085.58,1
9998,9999,15682355,Sabbatini,772,Germany,Male,42,3,75075.31,2,1,0,92888.52,1


In [2]:
from sklearn.preprocessing import  StandardScaler


categorical_features = ['Geography', 'Gender']
train_data = pd.get_dummies(train_data, columns=categorical_features, drop_first=True)

numerical_features = ['CreditScore', 'Age', 'Balance', 'EstimatedSalary']
scaler = StandardScaler()
train_data[numerical_features] = scaler.fit_transform(train_data[numerical_features])

train_data['BalanceSalaryRatio'] = train_data['Balance'] / (train_data['EstimatedSalary'] + 1)


train_data['AgeBalanceInteraction'] = train_data['Age'] * train_data['Balance']


age_bins = [15, 30, 45, 60, 100]
age_labels = ['0-15', '16-30', '31-45', '46-100', ]
train_data['AgeGroup'] = pd.cut(train_data['Age'], bins=age_bins, labels=age_labels)

### Splitting Data

In [11]:
from sklearn.model_selection import train_test_split

In [12]:
numeric_columns = train_data.select_dtypes(include=['int64', 'float64']).columns
X = train_data[numeric_columns].drop(columns=['Exited'])
y = train_data['Exited']

# Split the dataset into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Random Forest

In [18]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score

In [14]:
rfc = RandomForestClassifier()

In [15]:
rfc.fit(X_train, Y_train)

In [19]:
RF_clf_model_Predicitions = rfc.predict(X_test)
print(RF_clf_model_Predicitions)

[0 0 0 ... 1 0 0]


In [20]:
print("\n-------------> Random_Forest_Classifier Classification REPORT <-------------\n")
print(classification_report(Y_test, RF_clf_model_Predicitions))


-------------> Random_Forest_Classifier Classification REPORT <-------------

              precision    recall  f1-score   support

           0       0.88      0.96      0.92      1607
           1       0.75      0.45      0.56       393

    accuracy                           0.86      2000
   macro avg       0.81      0.71      0.74      2000
weighted avg       0.85      0.86      0.85      2000



In [21]:
RF_clf_accuracy = accuracy_score(Y_test,RF_clf_model_Predicitions)
print(f"Random_Forest_Classifier Model Accuaracy is:{int(RF_clf_accuracy*100)}%.")

Random_Forest_Classifier Model Accuaracy is:86%.


## Logistic Regression

In [22]:
from sklearn.linear_model import LogisticRegression

In [23]:
lg = LogisticRegression(multi_class='ovr')

In [24]:
lg.fit(X_train, Y_train)

In [25]:
LG_clf_model_Predicitions = lg.predict(X_test)
print(LG_clf_model_Predicitions)

[0 0 0 ... 0 0 0]


In [26]:
print("\n-------------> Logistic_Regression_Classifier Classification REPORT <-------------\n")
print(classification_report(Y_test, LG_clf_model_Predicitions))


-------------> Logistic_Regression_Classifier Classification REPORT <-------------

              precision    recall  f1-score   support

           0       0.80      1.00      0.89      1607
           1       0.00      0.00      0.00       393

    accuracy                           0.80      2000
   macro avg       0.40      0.50      0.45      2000
weighted avg       0.65      0.80      0.72      2000



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [27]:
LG_clf_accuracy = accuracy_score(Y_test,LG_clf_model_Predicitions)
print(f"Logistic_Regression_Classifier Model Accuaracy is:{int(LG_clf_accuracy*100)}%.")

Decision_Tree_Classifier Model Accuaracy is:80%.


## SVM Classifier

In [None]:
from sklearn.svm import SVC

In [30]:
svm = SVC(random_state=42)

In [31]:
svm.fit(X_train, Y_train)

In [32]:
SVM_Classifier_Prediction = svm.predict(X_test)
print(SVM_Classifier_Prediction)

[0 0 0 ... 0 0 0]


In [33]:
print("\n-------------> SVM Classification REPORT <-------------\n")
print(classification_report(Y_test, SVM_Classifier_Prediction))


-------------> SVM Classification REPORT <-------------

              precision    recall  f1-score   support

           0       0.80      1.00      0.89      1607
           1       0.00      0.00      0.00       393

    accuracy                           0.80      2000
   macro avg       0.40      0.50      0.45      2000
weighted avg       0.65      0.80      0.72      2000



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [34]:
SVM_Accuracy = accuracy_score(Y_test,SVM_Classifier_Prediction)
print(f"SVM Classifier Model Accuracy is:{int(SVM_Accuracy*100)}%.")

SVM Classifier Model Accuracy is:80%.
