In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestClassifier, BaggingClassifier, AdaBoostClassifier, GradientBoostingClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [None]:
df = pd.read_csv("churn_data.csv")

In [None]:
# Check for missing values
print(df.isnull().sum())

# Fill missing numeric values with median
numeric_cols = df.select_dtypes(include=['int64', 'float64']).columns
imputer = SimpleImputer(strategy='median')
df[numeric_cols] = imputer.fit_transform(df[numeric_cols])

In [None]:
# Convert all object (categorical) columns
label_encoders = {}
for col in df.select_dtypes(include='object').columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

In [None]:
X = df.drop('churn', axis=1)
y = df['churn']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Define dictionary of models
models = {
    'Bagging': {
        'Random Forest': RandomForestClassifier(random_state=42),
        'Bagging Classifier/Regressor': BaggingClassifier(random_state=42)
    },
    'Boosting': {
        'XGBoost': XGBClassifier( eval_metric='logloss', random_state=42),
        'AdaBoost': AdaBoostClassifier(random_state=42),
        'Gradient Boosting': GradientBoostingClassifier(random_state=42)
    }
}

In [None]:
def get_base_learner(model):
    if hasattr(model, "base_estimator"):  # For BaggingClassifier and AdaBoostClassifier
        return type(model.base_estimator_).__name__ if hasattr(model, "base_estimator_") else type(model.base_estimator).__name__
    elif hasattr(model, "estimators_"):  # For RandomForestClassifier, GradientBoostingClassifier
        return type(model.estimators_[0]).__name__ if len(model.estimators_) > 0 else "Unknown"
    elif isinstance(model, XGBClassifier):
        return "DecisionTree"  # Internally used by XGBoost
    return "Unknown"

In [None]:
results = []
for model_type, group in models.items():
    for model_name, model in group.items():
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        base_learner = get_base_learner(model)

        results.append({
            "Type": model_type,
            "Model": model_name,
            "Base Learner": base_learner,
            "Accuracy": round(accuracy_score(y_test, y_pred), 4),
            "Precision": round(precision_score(y_test, y_pred), 4),
            "Recall": round(recall_score(y_test, y_pred), 4),
            "F1 Score": round(f1_score(y_test, y_pred), 4)
        })

In [None]:
results_df = pd.DataFrame(results)
results_df