# Clase 2: Seguimiento y comparación de modelos con MLflow

Dataset: Churn de clientes - Telco

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split

url = 'C:\MLOPS\Clase_MLOPs\Clase_MLOPs\Proyecto_1\data\WA_Fn-UseC_-Telco-Customer-Churn.csv'
df = pd.read_csv(url)
df.drop(['customerID'], axis=1, inplace=True)
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')
df.dropna(inplace=True)
df = pd.get_dummies(df, drop_first=True)

X = df.drop('Churn_Yes', axis=1)
y = df['Churn_Yes']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

  url = 'C:\MLOPS\Clase_MLOPs\Clase_MLOPs\Proyecto_1\data\WA_Fn-UseC_-Telco-Customer-Churn.csv'


In [4]:
import mlflow
import mlflow.sklearn
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, f1_score, recall_score, roc_auc_score

mlflow.set_experiment("Churn_Telco_Comparacion")

2025/04/10 19:25:05 INFO mlflow.tracking.fluent: Experiment with name 'Churn_Telco_Comparacion' does not exist. Creating a new experiment.


<Experiment: artifact_location='file:///c:/MLOPS/Clase_MLOPs/Clase_MLOPs/Proyecto_1/notebooks/mlruns/644798121893717707', creation_time=1744331105780, experiment_id='644798121893717707', last_update_time=1744331105780, lifecycle_stage='active', name='Churn_Telco_Comparacion', tags={}>

### Entrenamiento: Logistic Regression

In [None]:
with mlflow.start_run(run_name="Logistic Regression"):
    model = LogisticRegression(max_iter=1000)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    y_proba = model.predict_proba(X_test)[:,1]

    mlflow.log_param("modelo", "logistic")
    mlflow.log_metric("accuracy", accuracy_score(y_test, y_pred))
    mlflow.log_metric("f1_score", f1_score(y_test, y_pred))
    mlflow.log_metric("recall", recall_score(y_test, y_pred))
    mlflow.log_metric("auc", roc_auc_score(y_test, y_proba))
    
    mlflow.sklearn.log_model(model, "modelo_logistico")

### Entrenamiento: Random Forest

In [None]:
with mlflow.start_run(run_name="Random Forest"):
    model = RandomForestClassifier(n_estimators=100, max_depth=6, random_state=42)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    y_proba = model.predict_proba(X_test)[:,1]

    mlflow.log_param("modelo", "random_forest")
    mlflow.log_param("n_estimators", 100)
    mlflow.log_param("max_depth", 6)
    mlflow.log_metric("accuracy", accuracy_score(y_test, y_pred))
    mlflow.log_metric("f1_score", f1_score(y_test, y_pred))
    mlflow.log_metric("recall", recall_score(y_test, y_pred))
    mlflow.log_metric("auc", roc_auc_score(y_test, y_proba))
    
    mlflow.sklearn.log_model(model, "modelo_rf")

### SVM Support Vector Machinne

In [5]:
with mlflow.start_run(run_name="Support Vector Machine"):
    model = SVC(probability=True, random_state=42)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    y_proba = model.predict_proba(X_test)[:, 1]
 
    mlflow.log_param("modelo", "svm")
    mlflow.log_metric("accuracy", accuracy_score(y_test, y_pred))
    mlflow.log_metric("f1_score", f1_score(y_test, y_pred))
    mlflow.log_metric("recall", recall_score(y_test, y_pred))
    mlflow.log_metric("auc", roc_auc_score(y_test, y_proba))
   
    mlflow.sklearn.log_model(model, "modelo_svm")



### Visualizar Experimentos en la UI de MLflow

In [None]:
# Ejecuta en terminal (no dentro del notebook):
# mlflow ui
# Luego abre en el navegador: http://127.0.0.1:5000

### Cambiando los hiperparámetros con GridSearchCV

In [6]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, f1_score, recall_score, roc_auc_score
import mlflow
import mlflow.sklearn

# Definir el rango de valores para los hiperparámetros
param_grid = {
    'C': [0.1, 1, 10, 100],  # Hiperparámetro C
    'kernel': ['linear', 'rbf', 'poly'],  # Tipo de núcleo
    'gamma': ['scale', 'auto'],  # Coeficiente de la función del núcleo
    'class_weight': [None, 'balanced'],  # Pesos de las clases
    'shrinking': [True, False],  # Optimización de reducción
}

# Crear el modelo SVC
model = SVC(probability=True, random_state=42)

# Crear la búsqueda en cuadrícula
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, 
                           cv=5, scoring='accuracy', n_jobs=-1, verbose=1)

# Realizar la búsqueda
with mlflow.start_run(run_name="SVM GridSearchCV"):
    grid_search.fit(X_train, y_train)
    
    # Obtener el mejor modelo
    best_model = grid_search.best_estimator_

    # Realizar predicciones
    y_pred = best_model.predict(X_test)
    y_proba = best_model.predict_proba(X_test)[:, 1]

    # Registrar los resultados en mlflow
    mlflow.log_param("modelo", "svm_grid_search")
    mlflow.log_param("best_C", grid_search.best_params_['C'])
    mlflow.log_param("best_kernel", grid_search.best_params_['kernel'])
    mlflow.log_param("best_gamma", grid_search.best_params_['gamma'])
    mlflow.log_param("best_class_weight", grid_search.best_params_['class_weight'])
    mlflow.log_param("best_shrinking", grid_search.best_params_['shrinking'])
    
    mlflow.log_metric("accuracy", accuracy_score(y_test, y_pred))
    mlflow.log_metric("f1_score", f1_score(y_test, y_pred))
    mlflow.log_metric("recall", recall_score(y_test, y_pred))
    mlflow.log_metric("auc", roc_auc_score(y_test, y_proba))
    
    mlflow.sklearn.log_model(best_model, "modelo_svm_grid_search")

Fitting 5 folds for each of 96 candidates, totalling 480 fits


KeyboardInterrupt: 