In [1]:
import pandas as pd
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.dummy import DummyClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier


def create_test_artifacts():
    """
    Creates synthetic data and fitted models for testing ML functions.

    Returns:
        X_train (DataFrame): Training features
        X_test (DataFrame): Test features
        y_train (Series): Training labels (encoded as 'N', 'Y')
        y_test (Series): Test labels (encoded as 'N', 'Y')
        models (dict): Dictionary of fitted pipelines
    
    Example:
        To use this function, simply call:
            X_train, X_test, y_train, y_test, models = create_test_artifacts()
        
        Models included:
            - Dummy: Dummy Classifier
            - SVM: SVM RBF
            - KNN: KNN
            - DecisionTree: Decision Tree
            - RandomForest: Random Forest
        
        To select a specific model for testing, use models dictionary:
            single_model = models["RandomForest"]

        
    """
    # Generate Synthetic Data
    # Create 200 samples with 5 numeric features
    X, y = make_classification(
        n_samples=200, 
        n_features=5, 
        n_informative=3,
        n_redundant=0, 
        random_state=123
    )

    # Wrap in Pandas
    X_df = pd.DataFrame(X, columns=[f"feature_{i}" for i in range(5)])
    
    # Map 0/1 to 'N'/'Y' so fbeta_score(pos_label="Y") works
    y_series = pd.Series(y).map({0: 'N', 1: 'Y'})
    y_series.name = "churn"

    # Train-Test Split
    X_train, X_test, y_train, y_test = train_test_split(
        X_df, y_series, test_size=0.3, random_state=123
    )

    # Define Models and Pipelines
    scaler = StandardScaler()

    models = {
        "Dummy": make_pipeline(scaler, DummyClassifier(strategy="most_frequent")),
        "SVM": make_pipeline(scaler, SVC(kernel="rbf", probability=True, random_state=123)),
        "KNN": make_pipeline(scaler, KNeighborsClassifier(n_neighbors=3)),
        "DecisionTree": make_pipeline(scaler, DecisionTreeClassifier(max_depth=5, random_state=123)),
        "RandomForest": make_pipeline(scaler, RandomForestClassifier(n_estimators=10, random_state=123))
    }

    # Fit the models
    # Test functions expect fitted estimators
    for name, pipe in models.items():
        pipe.fit(X_train, y_train)

    return X_train, X_test, y_train, y_test, models



In [2]:
X_train, X_test, y_train, y_test, models = create_test_artifacts()

In [7]:
def model_cv_metric_compare(models_dict, X, y, cv=5):
    """
    Evaluates multiple models using Cross-Validation and returns a metric/scorer comparison DataFrame.
    
    Parameters
    ----------
    models_dict : dict
        Dictionary of {model_name: pipeline_object}. 
        Note: Models do not need to be fitted beforehand.
    X : DataFrame
        Features (Training set or full dataset).
    y : Series
        Labels (Training set or full dataset).
    cv : int
        Number of cross-validation folds (default 5).

    Scorers Evaluated
    -----------------
    - accuracy
    - precision (pos_label="Y")
    - recall (pos_label="Y")
    - f1 (pos_label="Y")
    - roc_auc (if model supports predict_proba)

    Returns
    -------
    dataframe : pandas.DataFrame
        Dataframe containing model name and mean evaluation metrics.
    """
    
    # Define Scorers that handle specific pos_label="Y"
    scorers = {
        'accuracy': make_scorer(accuracy_score),
        'precision': make_scorer(precision_score, pos_label="Y"),
        'recall': make_scorer(recall_score, pos_label="Y"),
        'f1': make_scorer(f1_score, pos_label="Y"),
    }

    results_list = []

    for name, model in models_dict.items():
        # Check if model supports probabilities for ROC-AUC
        # We need a separate handling for ROC AUC because it requires probabilities, not just predictions
        current_scorers = scorers.copy()
        
        # Only add ROC_AUC if the model supports predict_proba
        if hasattr(model, "predict_proba"):
            # Note: For string labels, we need to ensure the scorer knows which class is positive.
            # response_method='predict_proba' is handled by make_scorer automatically if configured,
            # but standard 'roc_auc' string in sklearn often assumes 0/1 or specific ordering.
            # We create a custom scorer for safety with string labels.
            def custom_roc(y_true, y_prob):
                 # This helper is needed if y is "Y"/"N" to map it for calculation
                 y_true_num = (y_true == "Y").astype(int)
                 return roc_auc_score(y_true_num, y_prob)
            
            # We tell sklearn to pass the probability of the positive class
            current_scorers['roc_auc'] = make_scorer(custom_roc, response_method="predict_proba")

        # Run Cross-Validation
        cv_results = cross_validate(
            model, 
            X, 
            y, 
            cv=cv, 
            scoring=current_scorers,
            n_jobs=-1 # Use all CPU cores for speed
        )

        # Aggregate Results (Take the Mean of the folds)
        metrics = {"Model": name}
        for metric_name in current_scorers.keys():
            # cross_validate returns keys like 'test_accuracy', 'test_f1', etc.
            key = f"test_{metric_name}"
            if key in cv_results:
                metrics[metric_name] = np.mean(cv_results[key])
            else:
                metrics[metric_name] = np.nan

        results_list.append(metrics)

    # 5. Format Output
    comparison_df = pd.DataFrame(results_list).set_index("Model")
    return comparison_df

In [8]:
model_cv_metric_compare(models, X_train, y_train, cv=5)

  return _ForkingPickler.loads(res)
  return _ForkingPickler.loads(res)


Unnamed: 0_level_0,accuracy,precision,recall,f1,roc_auc
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Dummy,0.521429,0.521429,1.0,0.685271,0.5
SVM,0.757143,0.76304,0.809524,0.772162,0.869016
KNN,0.714286,0.716078,0.766667,0.735105,0.780513
DecisionTree,0.771429,0.804514,0.751429,0.771691,0.813294
RandomForest,0.735714,0.759034,0.754286,0.737826,0.84859
