In [15]:
import pickle
import pandas as pd
import numpy as np

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [16]:
def metricas_generales(modelo, 
                       x_val: pd.core.frame.DataFrame,
                       y_val: pd.core.series.Series, 
                       nombre: str):
    """
    Funcion que calcula métricas de modelos clasificación
    ------------------------------------------------------------
    Parámetros:
        modelo:
            Modelo de Machine Learning Scikit-learn (debe contar con el método predict)
        x_val: pandas.core.frame.DataFrame
            DataFrame con las variables explicativas utilizadas para validar el modelo
        y_val: pandas.core.series.Series
            Pandas Series con las etiquetas reales de los datos utilizados para validar el modelo
        nombre: str
            Nombre que identificará el modelo para comparaciones
    ------------------------------------------------------------
    Returns:
        Diccionario con nombre del modelo y metricas: Accuracy, Precision, Recall, F1-score
    """
    
    metricas = {}
    y_pred = modelo.predict(x_val)
    metricas['Model'] = nombre
    metricas['Accuracy'] = np.round(accuracy_score(y_val, y_pred), 4)
    metricas['Precision'] = np.round(precision_score(y_val, y_pred), 4)
    metricas['Recall'] = np.round(recall_score(y_val, y_pred), 4)
    metricas['F1-score'] = np.round(f1_score(y_val, y_pred), 4)

    return metricas

In [19]:
def run_pipeline(file: str, mode: str='production'):

    with open('robust_scaler.pkl', 'rb') as f:
        scaler = pickle.load(f)

    with open('linear_one_hot_encoder.pkl', 'rb') as f:
        one_hot = pickle.load(f)
    
    with open('target_encoder.pkl', 'rb') as f:
        target_encoder = pickle.load(f)
        
    with open('champion_model.pkl', 'rb') as f:
        model = pickle.load(f)
    
    with open('columns_order.pkl', 'rb') as f:
       order_cols = pickle.load(f)
    
    with open('cat_cols.pkl', 'rb') as f:
       cat_cols = pickle.load(f)
    
    with open('num_cols.pkl', 'rb') as f:
       num_cols = pickle.load(f)

    data = pd.read_json(file, lines=True)
    
    from datetime import datetime
    log = datetime.now().strftime('%Y%m%d_%H%M%S')
    
    if mode == 'production':
        customers_ids = data['CustomerID']
        data = data.drop(['CustomerID', 'ChargesDaily'], axis=1)
        num_cols_t = data.select_dtypes(include=['float', 'int'], exclude='object').columns.tolist()
        
        data = one_hot.transform(data)
        cols_names = order_cols[:-3] + num_cols_t
        df = pd.DataFrame(data, columns=cols_names)
        if 'ChargesDaily' in num_cols:
            num_cols.remove('ChargesDaily')
        df[num_cols_t] = scaler.transform(df[num_cols])
        df.columns = order_cols
        pred_proba = model.predict_proba(df)[:, 1]
        pred_labels = model.predict(df)
        results = pd.DataFrame({'CustomerID': customers_ids,
                                'Probabilidad Churn': pred_proba,
                                'Churn': pred_labels})
        
        results.to_json(f'pipeline_results_{log}.json')
        return results
    if mode == 'monitor':
        #data.head()
        data = data.drop(['CustomerID', 'ChargesDaily'], axis=1)
        num_cols_t = data.select_dtypes(include=['float', 'int'], exclude='object').columns.tolist()
        
        y = data['Churn']
        y = target_encoder.transform(y)
        y = y.reshape(-1,1)
        data = data.drop('Churn', axis=1)
        data = one_hot.transform(data)
        cols_names = order_cols[:-3] + num_cols_t
        df = pd.DataFrame(data, columns=cols_names)
        if 'ChargesDaily' in num_cols:
            num_cols.remove('ChargesDaily')
        df[num_cols_t] = scaler.transform(df[num_cols])
        df.columns = order_cols
        y_pred = model.predict(df)
        metricas = metricas_generales(model, df, y, f'Champion_{log}')
        df_metricas = []
        df_metricas.append(metricas)
        df_metricas = pd.DataFrame(df_metricas)
        df_metricas.to_json(f'monitor_results_{log}.json')
        return metricas

In [20]:
URL = 'https://raw.githubusercontent.com/ignaciomajo/proyecto_TelecomX_parte_2/refs/heads/main/src/datos_artificiales_labeled.json'

run_pipeline(file=URL, mode='monitor')

{'Model': 'Champion_20250705_173610',
 'Accuracy': 0.7946,
 'Precision': 0.7771,
 'Recall': 0.8263,
 'F1-score': 0.8009}