In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

In [5]:
warnings.filterwarnings("ignore")

In [6]:
# Load dataset
data_url = "/content/Churn_Modelling.csv"
data = pd.read_csv(data_url)

In [7]:
# Drop unnecessary columns
data = data.drop(['RowNumber', 'CustomerId', 'Surname'], axis=1)

In [8]:
# One-hot encoding for categorical variables
data = pd.get_dummies(data, drop_first=True)

In [9]:
# Split data into features and target
X = data.drop('Exited', axis=1)
y = data['Exited']

In [10]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

In [11]:
# Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [12]:
# Model training and evaluation
models = {
    "Logistic Regression": LogisticRegression(),
    "Support Vector Machine": SVC(),
    "Random Forest": RandomForestClassifier(),
    "Decision Tree": DecisionTreeClassifier(),
    "K-Nearest Neighbors": KNeighborsClassifier(),
    "Gradient Boosting": GradientBoostingClassifier()
}

In [13]:
results = {}
for name, model in models.items():
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    accuracy = accuracy_score(y_test, y_pred)
    results[name] = accuracy

In [14]:
performance_summary = pd.DataFrame(results.items(), columns=['Model', 'Accuracy'])

In [15]:
print(performance_summary)

                    Model  Accuracy
0     Logistic Regression     0.809
1  Support Vector Machine     0.865
2           Random Forest     0.866
3           Decision Tree     0.794
4     K-Nearest Neighbors     0.840
5       Gradient Boosting     0.867
