In [2]:
import mlflow
import mlflow.xgboost
import mlflow.sklearn
from sklearn.metrics import roc_auc_score, classification_report, recall_score, precision_score
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler
import xgboost as xgb
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
import pandas as pd
import numpy as np

# Set up MLflow
mlflow.set_experiment("Telco Churn Multi-Model")
mlflow.set_tracking_uri("http://127.0.0.1:5000")


# utility function

In [3]:
def preprocess_data(file_path):
    """
    this function preprocess the data by taking in the original file path
    """

    df = pd.read_csv(file_path)
    
    # Fill missing in TotalCharges
    df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')  # Ensure numeric
    df['TotalCharges'] = df['TotalCharges'].fillna(df['MonthlyCharges'] * df['tenure'])
    
    categorical_variables = ['gender', 'SeniorCitizen', 'Partner', 'Dependents', 'PhoneService', 
                             'MultipleLines', 'InternetService', 'OnlineSecurity', 'OnlineBackup', 
                             'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies', 
                             'Contract', 'PaperlessBilling', 'PaymentMethod']
    numeric_variables = ['tenure', 'MonthlyCharges', 'TotalCharges']
    
    variables = df[categorical_variables + numeric_variables]
    target = df['Churn']
    
    # Train-val-test split
    X_train_val, X_test, y_train_val, y_test = train_test_split(
        variables, target, test_size=0.2, random_state=42, stratify=target
    )
    X_train, X_val, y_train, y_val = train_test_split(
        X_train_val, y_train_val, test_size=0.25, random_state=42, stratify=y_train_val
    )
    
    # Label encode target
    le = LabelEncoder()
    y_train_en = le.fit_transform(y_train)
    y_val_en = le.transform(y_val)
    y_test_en = le.transform(y_test)
    
    # One-hot encode categoricals
    ohe = OneHotEncoder(handle_unknown='ignore', drop='first')
    X_train_ohe = pd.DataFrame(
        ohe.fit_transform(X_train[categorical_variables]).toarray(), 
        columns=ohe.get_feature_names_out(), index=X_train.index
    )
    X_val_ohe = pd.DataFrame(
        ohe.transform(X_val[categorical_variables]).toarray(), 
        columns=ohe.get_feature_names_out(), index=X_val.index
    )
    X_test_ohe = pd.DataFrame(
        ohe.transform(X_test[categorical_variables]).toarray(), 
        columns=ohe.get_feature_names_out(), index=X_test.index
    )
    
    # Combine numeric and encoded
    X_train = pd.concat([X_train[numeric_variables], X_train_ohe], axis=1)
    X_val = pd.concat([X_val[numeric_variables], X_val_ohe], axis=1)
    X_test = pd.concat([X_test[numeric_variables], X_test_ohe], axis=1)
    
    # Scale numerics
    scaler = StandardScaler()
    X_train[numeric_variables] = scaler.fit_transform(X_train[numeric_variables])
    X_val[numeric_variables] = scaler.transform(X_val[numeric_variables])
    X_test[numeric_variables] = scaler.transform(X_test[numeric_variables])
    
    # Imbalance handling: Compute scale_pos_weight
    scale_pos_weight = sum(y_train_en == 0) / sum(y_train_en == 1)
    
    # Log data info as artifact
    with open("data_info.txt", "w") as f:
        f.write(f"Dataset shape: {df.shape}\n")
        f.write(f"Class balance: {np.bincount(y_train_en)}\n")
        f.write(f"Scale pos weight: {scale_pos_weight}\n")
    
    return (X_train, X_val, X_test, y_train_en, y_val_en, y_test_en, 
            categorical_variables, numeric_variables, ohe, scaler, le, scale_pos_weight)



In [4]:
def train_and_log_model(model_class, model_name, param_grid, X_train, y_train_en, X_val, y_val_en, X_test, y_test_en, 
                        num_vars, scale_pos_weight, ohe, scaler, le):
    with mlflow.start_run(run_name=model_name):
        # Log preprocessing params (shared across models)
        mlflow.log_param("scale_pos_weight", scale_pos_weight)
        mlflow.log_param("num_features", X_train.shape[1])
        mlflow.log_param("cat_vars_count", len(cat_vars))
        mlflow.log_param("num_vars_count", len(num_vars))
        
        # Initialize base model
        if model_class == xgb.XGBClassifier:
            base_model = model_class(objective='binary:logistic', scale_pos_weight=scale_pos_weight, 
                                     random_state=42, eval_metric='aucpr')
        else:
            base_model = model_class(random_state=42)
        
        # Grid search
        grid_search = GridSearchCV(base_model, param_grid, cv=5, scoring='roc_auc', n_jobs=-1)
        grid_search.fit(X_train, y_train_en)
        
        best_model = grid_search.best_estimator_
        best_params = grid_search.best_params_
        
        # Log hyperparams
        for param, value in best_params.items():
            mlflow.log_param(f"{model_name}_{param}", value)
        
        # Predictions and metrics
        y_val_pred_proba = best_model.predict_proba(X_val)[:, 1]
        y_test_pred_proba = best_model.predict_proba(X_test)[:, 1]
        y_test_pred = best_model.predict(X_test)
        
        val_auc = roc_auc_score(y_val_en, y_val_pred_proba)
        test_auc = roc_auc_score(y_test_en, y_test_pred_proba)
        
        # Compute recall and precision for positive class (churn = 1)
        test_recall = recall_score(y_test_en, y_test_pred, pos_label=1)
        test_precision = precision_score(y_test_en, y_test_pred, pos_label=1)
        
        mlflow.log_metric("val_auc_roc", val_auc)
        mlflow.log_metric("test_auc_roc", test_auc)
        mlflow.log_metric("test_recall", test_recall)
        mlflow.log_metric("test_precision", test_precision)
        
        # Log classification report as artifact
        report = classification_report(y_test_en, y_test_pred, output_dict=True)
        report_df = pd.DataFrame(report).transpose()
        report_df.to_csv("classification_report.csv")
        mlflow.log_artifact("classification_report.csv")
        
        # Log model
        if model_class == xgb.XGBClassifier:
            mlflow.xgboost.log_model(best_model, "model")
        else:
            mlflow.sklearn.log_model(best_model, "model")
        
        # Log model details
        mlflow.log_param("model_type", model_name)
        mlflow.log_param("target_encoder_classes", list(le.classes_))
        
        print(f"{model_name} Test AUC-ROC: {test_auc:.3f}, Recall: {test_recall:.3f}, Precision: {test_precision:.3f}")
        return best_model, test_auc

# workflow

In [18]:
# Load data
data = preprocess_data('../data/customer_churn_telecom_services.csv')
(X_train, X_val, X_test, y_train_en, y_val_en, y_test_en, 
 cat_vars, num_vars, ohe, scaler, le, scale_pos_weight) = data


In [19]:
# Three models to be trained (XGBoost, RandomForest, LogisticRegression)

models = [
    (
        "XGBoost",
        xgb.XGBClassifier(n_estimators = 100, max_depth = 3, learning_rate = 0.1),
        (X_train, y_train_en),
        (X_test, y_test_en)
    ),
    (
        'randomforest', 
        RandomForestClassifier(n_estimators = 100, max_depth = 10),
        (X_train, y_train_en),
        (X_test, y_test_en)
    ),
    (
        'LogisticRegression',
        LogisticRegression(C = 1, penalty = 'l1', solver = 'liblinear'),
        (X_train, y_train_en),
        (X_test, y_test_en)
    )
]

In [20]:
reports = []

for model_name, model, train_set, test_set in models:
    X_train = train_set[0]
    y_train_en = train_set[1]
    X_test = test_set[0]
    y_test_en = test_set[1]

    model.fit(X_train, y_train_en)
    y_pred = model.predict(X_test)
    report = classification_report(y_test_en, y_pred, output_dict=True)
    reports.append(report)

In [21]:
report

{'0': {'precision': 0.8462946020128088,
  'recall': 0.893719806763285,
  'f1-score': 0.8693609022556391,
  'support': 1035.0},
 '1': {'precision': 0.6518987341772152,
  'recall': 0.5508021390374331,
  'f1-score': 0.5971014492753624,
  'support': 374.0},
 'accuracy': 0.8026969481902059,
 'macro avg': {'precision': 0.749096668095012,
  'recall': 0.7222609729003591,
  'f1-score': 0.7332311757655008,
  'support': 1409.0},
 'weighted avg': {'precision': 0.7946948471721331,
  'recall': 0.8026969481902059,
  'f1-score': 0.7970933114716621,
  'support': 1409.0}}

In [28]:
# Initialize MLflow
mlflow.set_experiment("Churn detection")
mlflow.set_tracking_uri("http://127.0.0.1:5000/")

2025/10/25 13:02:54 INFO mlflow.tracking.fluent: Experiment with name 'Churn detection' does not exist. Creating a new experiment.


In [29]:
for i, element in enumerate(models):
    model_name = element[0]
    model = element[1]
    report = reports[i]

    with mlflow.start_run(run_name = model_name):
        mlflow.log_param("model", model_name)
        mlflow.log_params(model.get_params())
        mlflow.log_metric('accuracy', report['accuracy'])
        mlflow.log_metric('recall_class_1', report['1']['recall'])
        mlflow.log_metric('precision_class_1', report['1']['precision'])
        mlflow.log_metric('recall_class_0', report['0']['recall'])
        mlflow.log_metric('precision_class_0', report['0']['precision'])
        mlflow.log_metric('f1_score_macro', report['macro avg']['f1-score']) 

        if "XGB" in model_name:
            mlflow.xgboost.log_model(model, f'{model_name} model')
        else:
            mlflow.sklearn.log_model(model, f'{model_name} model')

  self.get_booster().save_model(fname)


🏃 View run XGBoost at: http://127.0.0.1:5000/#/experiments/469149532379613833/runs/15be5651ad4e4752b38d2166482f254e
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/469149532379613833




🏃 View run randomforest at: http://127.0.0.1:5000/#/experiments/469149532379613833/runs/8d9b3030043744f2bc7c4e56c55e4eaf
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/469149532379613833




🏃 View run LogisticRegression at: http://127.0.0.1:5000/#/experiments/469149532379613833/runs/1ad7e518e1834c94817ca0728d032789
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/469149532379613833
