In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load the dataset
data = pd.read_csv("Churn_Modelling.csv")

# Remove unnecessary columns (RowNumber, CustomerId, Surname)
data = data.drop(["RowNumber", "CustomerId", "Surname"], axis=1)

# Encode categorical features (Geography, Gender)
label_encoder = LabelEncoder()
data["Geography"] = label_encoder.fit_transform(data["Geography"])
data["Gender"] = label_encoder.fit_transform(data["Gender"])

# Split data into features (X) and target (y)
X = data.drop("Exited", axis=1)
y = data["Exited"]

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize numerical features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize models
logreg_model = LogisticRegression()
rf_model = RandomForestClassifier()
gb_model = GradientBoostingClassifier()

# Train models
logreg_model.fit(X_train_scaled, y_train)
rf_model.fit(X_train_scaled, y_train)
gb_model.fit(X_train_scaled, y_train)

# Evaluate models
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred)
    return accuracy, report

# Evaluate models
logreg_accuracy, logreg_report = evaluate_model(logreg_model, X_test_scaled, y_test)
rf_accuracy, rf_report = evaluate_model(rf_model, X_test_scaled, y_test)
gb_accuracy, gb_report = evaluate_model(gb_model, X_test_scaled, y_test)

# Print evaluation results
print("Logistic Regression Accuracy:", logreg_accuracy)
print(logreg_report)

print("Random Forest Accuracy:", rf_accuracy)
print(rf_report)

print("Gradient Boosting Accuracy:", gb_accuracy)
print(gb_report)


Logistic Regression Accuracy: 0.815
              precision    recall  f1-score   support

           0       0.83      0.97      0.89      1607
           1       0.60      0.18      0.28       393

    accuracy                           0.81      2000
   macro avg       0.71      0.58      0.59      2000
weighted avg       0.78      0.81      0.77      2000

Random Forest Accuracy: 0.865
              precision    recall  f1-score   support

           0       0.88      0.97      0.92      1607
           1       0.76      0.45      0.57       393

    accuracy                           0.86      2000
   macro avg       0.82      0.71      0.74      2000
weighted avg       0.86      0.86      0.85      2000

Gradient Boosting Accuracy: 0.866
              precision    recall  f1-score   support

           0       0.88      0.96      0.92      1607
           1       0.76      0.47      0.58       393

    accuracy                           0.87      2000
   macro avg       0.82     