In [1]:
# Step 1: Import libraries
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import AdaBoostClassifier

from sklearn.metrics import classification_report
from sklearn.metrics import precision_score, recall_score, f1_score


# Step 2: Load dataset (use correct Kaggle path)
data = pd.read_csv("/kaggle/input/datasets/blastchar/telco-customer-churn/WA_Fn-UseC_-Telco-Customer-Churn.csv")


# Step 3: Preprocessing

# Remove unnecessary column
data.drop("customerID", axis=1, inplace=True)

# Convert TotalCharges to numeric
data["TotalCharges"] = pd.to_numeric(data["TotalCharges"], errors="coerce")

# Fill missing values
data.fillna(0, inplace=True)

# Convert categorical columns to numeric
le = LabelEncoder()

for column in data.columns:
    if data[column].dtype == "object":
        data[column] = le.fit_transform(data[column])


# Step 4: Split dataset
X = data.drop("Churn", axis=1)
y = data["Churn"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


# Step 5: Random Forest
rf = RandomForestClassifier(n_estimators=100, random_state=42)

rf.fit(X_train, y_train)

rf_pred = rf.predict(X_test)

print("===== Random Forest Classification Report =====")
print(classification_report(y_test, rf_pred))


# Step 6: Gradient Boosting
gb = GradientBoostingClassifier(n_estimators=100, random_state=42)

gb.fit(X_train, y_train)

gb_pred = gb.predict(X_test)

print("===== Gradient Boosting Classification Report =====")
print(classification_report(y_test, gb_pred))


# Step 7: AdaBoost
ab = AdaBoostClassifier(n_estimators=100, random_state=42)

ab.fit(X_train, y_train)

ab_pred = ab.predict(X_test)

print("===== AdaBoost Classification Report =====")
print(classification_report(y_test, ab_pred))


# Step 8: Comparison Table

results = []

results.append([
    "Random Forest",
    precision_score(y_test, rf_pred),
    recall_score(y_test, rf_pred),
    f1_score(y_test, rf_pred)
])

results.append([
    "Gradient Boosting",
    precision_score(y_test, gb_pred),
    recall_score(y_test, gb_pred),
    f1_score(y_test, gb_pred)
])

results.append([
    "AdaBoost",
    precision_score(y_test, ab_pred),
    recall_score(y_test, ab_pred),
    f1_score(y_test, ab_pred)
])

comparison = pd.DataFrame(
    results,
    columns=["Algorithm", "Precision", "Recall", "F1-Score"]
)

print("\n===== Algorithm Comparison =====")
print(comparison)


# Step 9: Best algorithm
best = comparison.loc[comparison["F1-Score"].idxmax()]

print("\n===== Best Algorithm Based on F1-Score =====")
print(best)


===== Random Forest Classification Report =====
              precision    recall  f1-score   support

           0       0.83      0.91      0.87      1036
           1       0.66      0.47      0.55       373

    accuracy                           0.79      1409
   macro avg       0.74      0.69      0.71      1409
weighted avg       0.78      0.79      0.78      1409

===== Gradient Boosting Classification Report =====
              precision    recall  f1-score   support

           0       0.84      0.91      0.87      1036
           1       0.68      0.53      0.60       373

    accuracy                           0.81      1409
   macro avg       0.76      0.72      0.74      1409
weighted avg       0.80      0.81      0.80      1409

===== AdaBoost Classification Report =====
              precision    recall  f1-score   support

           0       0.85      0.90      0.87      1036
           1       0.66      0.55      0.60       373

    accuracy                           