# MLflow Experiment Tracking
# Seguimiento de Experimentos con MLflow

Este notebook demuestra cómo usar MLflow para el seguimiento de experimentos de machine learning usando los datos del proyecto Practical Statistics for Data Scientists.

## Objetivos:
- Configurar MLflow para seguimiento de experimentos
- Entrenar múltiples modelos de clasificación
- Registrar parámetros, métricas y artefactos
- Comparar diferentes modelos y configuraciones
- Usar MLflow UI para visualizar resultados

## Instalación de MLflow

Si no tienes MLflow instalado, descomenta y ejecuta la siguiente celda:

In [None]:
# !pip install mlflow

## Importar librerías necesarias

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

# MLflow
import mlflow
import mlflow.sklearn
from mlflow.tracking import MlflowClient

# Scikit-learn
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.metrics import confusion_matrix, classification_report

# Configurar estilo de gráficos
plt.style.use('seaborn-v0_8')
%matplotlib inline

print("📚 Librerías importadas exitosamente")
print(f"🔬 MLflow versión: {mlflow.__version__}")

## Configuración de MLflow

In [None]:
# Configurar MLflow
experiment_name = "Loan Default Classification"
mlflow.set_experiment(experiment_name)

# Obtener información del experimento
experiment = mlflow.get_experiment_by_name(experiment_name)
print(f"🔬 Experimento: {experiment.name}")
print(f"📁 Experiment ID: {experiment.experiment_id}")
print(f"📂 Artifact Location: {experiment.artifact_location}")

## Carga y preparación de datos

In [None]:
# Cargar datos
try:
    import common
    DATA = common.dataDirectory()
except ImportError:
    DATA = Path().resolve().parent.parent / 'data'

# Cargar dataset de préstamos
loan_data = pd.read_csv(DATA / 'loan3000.csv')

print(f"📊 Dataset cargado: {loan_data.shape[0]} filas, {loan_data.shape[1]} columnas")
print(f"🎯 Variables objetivo: {loan_data['outcome'].value_counts().to_dict()}")

# Mostrar primeras filas
loan_data.head()

In [None]:
# Exploración rápida de datos
print("📈 Información del dataset:")
print(loan_data.info())
print("\n📊 Estadísticas descriptivas:")
print(loan_data.describe())

## Preparación de datos para ML

In [None]:
# Preparar datos para machine learning
# Seleccionar características numéricas
features = ['borrower_score', 'payment_inc_ratio']
target = 'outcome'

X = loan_data[features]
y = loan_data[target]

# Codificar variable objetivo
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# División train/test
X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
)

# Escalado de características
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print(f"📊 Datos de entrenamiento: {X_train.shape[0]} muestras")
print(f"📊 Datos de prueba: {X_test.shape[0]} muestras")
print(f"🏷️ Clases: {le.classes_}")

## Función para entrenar y registrar modelos

In [None]:
def train_and_log_model(model, model_name, X_train, X_test, y_train, y_test, params=None):
    """
    Entrena un modelo y registra métricas en MLflow
    """
    with mlflow.start_run(run_name=model_name):
        # Registrar parámetros
        mlflow.log_param("model_type", model_name)
        mlflow.log_param("train_samples", len(X_train))
        mlflow.log_param("test_samples", len(X_test))
        mlflow.log_param("features", list(X_train.columns) if hasattr(X_train, 'columns') else "scaled_features")
        
        if params:
            for key, value in params.items():
                mlflow.log_param(key, value)
        
        # Entrenar modelo
        model.fit(X_train, y_train)
        
        # Predicciones
        y_pred = model.predict(X_test)
        y_pred_proba = model.predict_proba(X_test)[:, 1] if hasattr(model, 'predict_proba') else None
        
        # Calcular métricas
        accuracy = accuracy_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred, average='weighted')
        recall = recall_score(y_test, y_pred, average='weighted')
        f1 = f1_score(y_test, y_pred, average='weighted')
        
        # Registrar métricas
        mlflow.log_metric("accuracy", accuracy)
        mlflow.log_metric("precision", precision)
        mlflow.log_metric("recall", recall)
        mlflow.log_metric("f1_score", f1)
        
        # AUC si hay probabilidades
        if y_pred_proba is not None:
            auc = roc_auc_score(y_test, y_pred_proba)
            mlflow.log_metric("auc", auc)
        
        # Cross-validation score
        cv_scores = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy')
        mlflow.log_metric("cv_mean", cv_scores.mean())
        mlflow.log_metric("cv_std", cv_scores.std())
        
        # Registrar modelo
        mlflow.sklearn.log_model(model, "model")
        
        # Crear y guardar matriz de confusión
        cm = confusion_matrix(y_test, y_pred)
        plt.figure(figsize=(8, 6))
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
                   xticklabels=le.classes_, yticklabels=le.classes_)
        plt.title(f'Matriz de Confusión - {model_name}')
        plt.xlabel('Predicción')
        plt.ylabel('Actual')
        plt.tight_layout()
        
        # Guardar gráfico como artefacto
        plt.savefig(f"confusion_matrix_{model_name.replace(' ', '_')}.png")
        mlflow.log_artifact(f"confusion_matrix_{model_name.replace(' ', '_')}.png")
        plt.show()
        
        # Registrar reporte de clasificación
        report = classification_report(y_test, y_pred, target_names=le.classes_, output_dict=True)
        
        # Crear archivo de texto con el reporte
        with open(f"classification_report_{model_name.replace(' ', '_')}.txt", "w") as f:
            f.write(classification_report(y_test, y_pred, target_names=le.classes_))
        mlflow.log_artifact(f"classification_report_{model_name.replace(' ', '_')}.txt")
        
        print(f"✅ {model_name} entrenado y registrado en MLflow")
        print(f"📊 Accuracy: {accuracy:.4f}")
        print(f"📊 F1-Score: {f1:.4f}")
        if y_pred_proba is not None:
            print(f"📊 AUC: {auc:.4f}")
        print("-" * 50)
        
        return model

## Entrenar múltiples modelos

In [None]:
# Definir modelos a probar
models_to_test = [
    (LogisticRegression(random_state=42), "Logistic Regression", {"C": 1.0, "solver": "liblinear"}),
    (RandomForestClassifier(random_state=42), "Random Forest", {"n_estimators": 100, "max_depth": None}),
    (SVC(random_state=42, probability=True), "Support Vector Machine", {"C": 1.0, "kernel": "rbf"}),
    (GaussianNB(), "Naive Bayes", {}),
    (DecisionTreeClassifier(random_state=42), "Decision Tree", {"max_depth": None, "min_samples_split": 2})
]

print("🚀 Iniciando entrenamiento de modelos con MLflow...\n")

trained_models = {}

for model, name, params in models_to_test:
    print(f"🔄 Entrenando {name}...")
    trained_model = train_and_log_model(
        model, name, 
        pd.DataFrame(X_train_scaled, columns=features), 
        pd.DataFrame(X_test_scaled, columns=features),
        y_train, y_test, params
    )
    trained_models[name] = trained_model

print("\n🎉 ¡Todos los modelos han sido entrenados y registrados!")

## Experimento con hiperparámetros

In [None]:
# Probar diferentes hiperparámetros para Random Forest
print("🔬 Experimentando con hiperparámetros de Random Forest...\n")

rf_params = [
    {"n_estimators": 50, "max_depth": 5},
    {"n_estimators": 100, "max_depth": 10},
    {"n_estimators": 200, "max_depth": 15},
    {"n_estimators": 150, "max_depth": None}
]

for i, params in enumerate(rf_params):
    model = RandomForestClassifier(random_state=42, **params)
    model_name = f"Random Forest - Config {i+1}"
    
    print(f"🌳 Probando {model_name} con parámetros: {params}")
    train_and_log_model(
        model, model_name,
        pd.DataFrame(X_train_scaled, columns=features),
        pd.DataFrame(X_test_scaled, columns=features),
        y_train, y_test, params
    )

print("\n✅ Experimentos con hiperparámetros completados!")

## Consultar experimentos registrados

In [None]:
# Obtener todos los runs del experimento
client = MlflowClient()
experiment = mlflow.get_experiment_by_name(experiment_name)
runs = client.search_runs(experiment.experiment_id)

print(f"📊 Total de runs en el experimento: {len(runs)}")
print("\n🏆 Resumen de resultados:")
print("-" * 80)

# Crear DataFrame con resultados
results_data = []
for run in runs:
    metrics = run.data.metrics
    params = run.data.params
    
    results_data.append({
        'run_id': run.info.run_id[:8],
        'model_type': params.get('model_type', 'Unknown'),
        'accuracy': metrics.get('accuracy', 0),
        'f1_score': metrics.get('f1_score', 0),
        'auc': metrics.get('auc', 0),
        'cv_mean': metrics.get('cv_mean', 0),
        'start_time': run.info.start_time
    })

results_df = pd.DataFrame(results_data)
results_df = results_df.sort_values('accuracy', ascending=False)

print(results_df.to_string(index=False))

# Mejor modelo
best_model = results_df.iloc[0]
print(f"\n🥇 Mejor modelo: {best_model['model_type']}")
print(f"📊 Accuracy: {best_model['accuracy']:.4f}")
print(f"📊 F1-Score: {best_model['f1_score']:.4f}")
print(f"📊 AUC: {best_model['auc']:.4f}")

## Visualización de resultados

In [None]:
# Gráfico de comparación de modelos
plt.figure(figsize=(12, 8))

# Subplot 1: Accuracy
plt.subplot(2, 2, 1)
plt.barh(results_df['model_type'], results_df['accuracy'])
plt.xlabel('Accuracy')
plt.title('Comparación de Accuracy por Modelo')
plt.xlim(0, 1)

# Subplot 2: F1-Score
plt.subplot(2, 2, 2)
plt.barh(results_df['model_type'], results_df['f1_score'])
plt.xlabel('F1-Score')
plt.title('Comparación de F1-Score por Modelo')
plt.xlim(0, 1)

# Subplot 3: AUC
plt.subplot(2, 2, 3)
auc_data = results_df[results_df['auc'] > 0]  # Solo modelos con AUC
plt.barh(auc_data['model_type'], auc_data['auc'])
plt.xlabel('AUC')
plt.title('Comparación de AUC por Modelo')
plt.xlim(0, 1)

# Subplot 4: CV Mean
plt.subplot(2, 2, 4)
plt.barh(results_df['model_type'], results_df['cv_mean'])
plt.xlabel('CV Mean Accuracy')
plt.title('Comparación de CV Mean por Modelo')
plt.xlim(0, 1)

plt.tight_layout()
plt.savefig('model_comparison.png', dpi=300, bbox_inches='tight')
plt.show()

# Guardar como artefacto en MLflow
with mlflow.start_run(run_name="Model Comparison Summary"):
    mlflow.log_artifact('model_comparison.png')
    mlflow.log_metric("total_models_tested", len(results_df))
    mlflow.log_metric("best_accuracy", results_df['accuracy'].max())
    mlflow.log_param("best_model_type", best_model['model_type'])

## Instrucciones para usar MLflow UI

## 🚀 Cómo usar MLflow UI

Para visualizar todos los experimentos en la interfaz web de MLflow:

### 1. Abrir terminal/PowerShell
```bash
# En la carpeta del proyecto
mlflow ui
```

### 2. Abrir navegador
Ve a: http://localhost:5000

### 3. Explorar experimentos
- 📊 Ver todos los runs y métricas
- 📈 Comparar modelos lado a lado
- 📁 Descargar artefactos (gráficos, modelos)
- 🔍 Filtrar y ordenar resultados
- 📝 Agregar notas y tags

### 4. Funcionalidades útiles
- **Compare runs**: Seleccionar múltiples runs para comparar
- **Parallel coordinates**: Visualizar relaciones entre parámetros y métricas
- **Scatter plots**: Gráficos de dispersión de métricas
- **Model registry**: Registrar los mejores modelos

### 5. En Docker
```powershell
# Si usas Docker
.\docker-helper.ps1 shell
# Dentro del contenedor:
mlflow ui --host 0.0.0.0 --port 5000
```

## Próximos pasos

### 🎯 Sugerencias para experimentar más:

1. **Más datasets**: Probar con `loan_data.csv` completo
2. **Feature engineering**: Crear nuevas características
3. **Más modelos**: XGBoost, LightGBM, Neural Networks
4. **Hyperparameter tuning**: Grid search, Random search
5. **Ensemble methods**: Voting, Stacking
6. **Cross-validation**: Más estrategias de validación
7. **Model registry**: Registrar modelos en producción
8. **A/B testing**: Comparar modelos en producción

### 📚 Recursos adicionales:
- [MLflow Documentation](https://mlflow.org/docs/latest/index.html)
- [MLflow Examples](https://github.com/mlflow/mlflow/tree/master/examples)
- [MLflow Tutorials](https://mlflow.org/docs/latest/tutorials-and-examples/index.html)

In [None]:
# Limpiar archivos temporales
import os
import glob

# Eliminar archivos de imágenes temporales
for file in glob.glob("*.png"):
    try:
        os.remove(file)
    except:
        pass

for file in glob.glob("*.txt"):
    try:
        os.remove(file)
    except:
        pass

print("🧹 Archivos temporales limpiados")
print("\n✅ ¡Notebook completado!")
print("🚀 Ejecuta 'mlflow ui' en terminal para explorar los resultados")

# Ejemplo clásico: Dataset Iris

Ahora vamos a demostrar MLflow con el famoso dataset Iris, un ejemplo clásico de clasificación multiclase en machine learning.

## Sobre el dataset Iris:
- **150 muestras** de flores iris
- **4 características**: longitud y ancho de sépalo y pétalo
- **3 clases**: Iris-setosa, Iris-versicolor, Iris-virginica
- **Problema**: Clasificación multiclase
- **Objetivo**: Predecir la especie de iris basándose en las medidas florales

In [None]:
# Cargar dataset Iris
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler

# Cargar datos
iris = load_iris()
X_iris = iris.data
y_iris = iris.target

# Crear DataFrame para mejor visualización
iris_df = pd.DataFrame(X_iris, columns=iris.feature_names)
iris_df['species'] = iris.target
iris_df['species_name'] = iris_df['species'].map({0: 'setosa', 1: 'versicolor', 2: 'virginica'})

print("🌸 Dataset Iris cargado exitosamente")
print(f"📊 Forma del dataset: {iris_df.shape}")
print(f"🏷️ Características: {list(iris.feature_names)}")
print(f"🎯 Clases: {list(iris.target_names)}")
print(f"📈 Distribución de clases: {pd.Series(iris.target).value_counts().sort_index().to_dict()}")

# Mostrar primeras filas
print("\n📋 Primeras 5 filas del dataset:")
iris_df.head()

In [None]:
# Visualización exploratoria del dataset Iris
fig, axes = plt.subplots(2, 2, figsize=(15, 12))
fig.suptitle('🌸 Análisis Exploratorio del Dataset Iris', fontsize=16, fontweight='bold')

# 1. Distribución de las características
iris_df_melted = iris_df.melt(id_vars=['species_name'], 
                              value_vars=iris.feature_names,
                              var_name='feature', value_name='value')

axes[0, 0].set_title('Distribución de Características por Especie')
sns.boxplot(data=iris_df_melted, x='feature', y='value', hue='species_name', ax=axes[0, 0])
axes[0, 0].tick_params(axis='x', rotation=45)
axes[0, 0].legend(title='Especie')

# 2. Matriz de correlación
correlation_matrix = iris_df[iris.feature_names].corr()
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0, ax=axes[0, 1])
axes[0, 1].set_title('Matriz de Correlación de Características')

# 3. Scatter plot: Sepal vs Petal
scatter = axes[1, 0].scatter(iris_df['sepal length (cm)'], iris_df['sepal width (cm)'], 
                           c=iris_df['species'], cmap='viridis', alpha=0.7)
axes[1, 0].set_xlabel('Longitud del Sépalo (cm)')
axes[1, 0].set_ylabel('Ancho del Sépalo (cm)')
axes[1, 0].set_title('Sépalo: Longitud vs Ancho')
plt.colorbar(scatter, ax=axes[1, 0])

# 4. Distribución de clases
species_counts = iris_df['species_name'].value_counts()
axes[1, 1].pie(species_counts.values, labels=species_counts.index, autopct='%1.1f%%', startangle=90)
axes[1, 1].set_title('Distribución de Especies')

plt.tight_layout()
plt.show()

# Estadísticas descriptivas por especie
print("\n📊 Estadísticas descriptivas por especie:")
print(iris_df.groupby('species_name')[iris.feature_names].describe().round(2))

## Preparación de datos Iris para MLflow

In [None]:
# Preparar datos Iris para machine learning
X_iris = iris.data
y_iris = iris.target

# División train/test
X_train_iris, X_test_iris, y_train_iris, y_test_iris = train_test_split(
    X_iris, y_iris, test_size=0.2, random_state=42, stratify=y_iris
)

# Escalado de características
scaler_iris = StandardScaler()
X_train_iris_scaled = scaler_iris.fit_transform(X_train_iris)
X_test_iris_scaled = scaler_iris.transform(X_test_iris)

print(f"🌸 Datos Iris preparados:")
print(f"📊 Entrenamiento: {X_train_iris.shape[0]} muestras")
print(f"📊 Prueba: {X_test_iris.shape[0]} muestras") 
print(f"🏷️ Características: {iris.feature_names}")
print(f"🎯 Clases: {iris.target_names}")

# Crear nuevo experimento para Iris
iris_experiment_name = "Iris Species Classification"
mlflow.set_experiment(iris_experiment_name)

iris_experiment = mlflow.get_experiment_by_name(iris_experiment_name)
print(f"\n🔬 Experimento Iris creado: {iris_experiment.name}")
print(f"📁 Experiment ID: {iris_experiment.experiment_id}")

In [None]:
def train_and_log_iris_model(model, model_name, X_train, X_test, y_train, y_test, params=None):
    """
    Entrena un modelo multiclase para Iris y registra métricas en MLflow
    """
    with mlflow.start_run(run_name=f"Iris - {model_name}"):
        # Registrar parámetros
        mlflow.log_param("model_type", model_name)
        mlflow.log_param("dataset", "Iris")
        mlflow.log_param("problem_type", "multiclass_classification")
        mlflow.log_param("n_classes", 3)
        mlflow.log_param("train_samples", len(X_train))
        mlflow.log_param("test_samples", len(X_test))
        mlflow.log_param("n_features", X_train.shape[1])
        
        if params:
            for key, value in params.items():
                mlflow.log_param(key, value)
        
        # Entrenar modelo
        model.fit(X_train, y_train)
        
        # Predicciones
        y_pred = model.predict(X_test)
        y_pred_proba = model.predict_proba(X_test) if hasattr(model, 'predict_proba') else None
        
        # Calcular métricas
        accuracy = accuracy_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred, average='weighted')
        recall = recall_score(y_test, y_pred, average='weighted')
        f1 = f1_score(y_test, y_pred, average='weighted')
        
        # Métricas por clase
        precision_macro = precision_score(y_test, y_pred, average='macro')
        recall_macro = recall_score(y_test, y_pred, average='macro')
        f1_macro = f1_score(y_test, y_pred, average='macro')
        
        # Registrar métricas
        mlflow.log_metric("accuracy", accuracy)
        mlflow.log_metric("precision_weighted", precision)
        mlflow.log_metric("recall_weighted", recall)
        mlflow.log_metric("f1_weighted", f1)
        mlflow.log_metric("precision_macro", precision_macro)
        mlflow.log_metric("recall_macro", recall_macro)
        mlflow.log_metric("f1_macro", f1_macro)
        
        # AUC multiclase (One-vs-Rest)
        if y_pred_proba is not None:
            auc_ovr = roc_auc_score(y_test, y_pred_proba, multi_class='ovr')
            auc_ovo = roc_auc_score(y_test, y_pred_proba, multi_class='ovo')
            mlflow.log_metric("auc_ovr", auc_ovr)
            mlflow.log_metric("auc_ovo", auc_ovo)
        
        # Cross-validation
        cv_scores = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy')
        mlflow.log_metric("cv_mean", cv_scores.mean())
        mlflow.log_metric("cv_std", cv_scores.std())
        
        # Registrar modelo
        mlflow.sklearn.log_model(model, "model")
        
        # Matriz de confusión
        cm = confusion_matrix(y_test, y_pred)
        plt.figure(figsize=(8, 6))
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
                   xticklabels=iris.target_names, yticklabels=iris.target_names)
        plt.title(f'Matriz de Confusión - {model_name} (Iris)')
        plt.xlabel('Predicción')
        plt.ylabel('Actual')
        plt.tight_layout()
        
        # Guardar gráfico
        confusion_file = f"iris_confusion_matrix_{model_name.replace(' ', '_')}.png"
        plt.savefig(confusion_file)
        mlflow.log_artifact(confusion_file)
        plt.show()
        
        # Reporte de clasificación
        report = classification_report(y_test, y_pred, target_names=iris.target_names)
        report_file = f"iris_classification_report_{model_name.replace(' ', '_')}.txt"
        with open(report_file, "w") as f:
            f.write(report)
        mlflow.log_artifact(report_file)
        
        print(f"🌸 {model_name} entrenado en dataset Iris")
        print(f"📊 Accuracy: {accuracy:.4f}")
        print(f"📊 F1-Score (weighted): {f1:.4f}")
        print(f"📊 F1-Score (macro): {f1_macro:.4f}")
        if y_pred_proba is not None:
            print(f"📊 AUC (OvR): {auc_ovr:.4f}")
            print(f"📊 AUC (OvO): {auc_ovo:.4f}")
        print(f"📊 CV Score: {cv_scores.mean():.4f} (+/- {cv_scores.std() * 2:.4f})")
        print("-" * 60)
        
        return model

## Entrenar múltiples modelos con Iris

Vamos a probar diferentes algoritmos de machine learning con el dataset Iris para comparar su rendimiento en clasificación multiclase.

In [None]:
# Importar modelos adicionales para Iris
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import RidgeClassifier

# Definir modelos específicos para Iris
iris_models = [
    (LogisticRegression(random_state=42, max_iter=1000), "Logistic Regression", 
     {"C": 1.0, "solver": "liblinear", "multi_class": "ovr"}),
    
    (RandomForestClassifier(random_state=42), "Random Forest", 
     {"n_estimators": 100, "max_depth": None, "min_samples_split": 2}),
    
    (SVC(random_state=42, probability=True), "Support Vector Machine", 
     {"C": 1.0, "kernel": "rbf", "gamma": "scale"}),
    
    (GaussianNB(), "Naive Bayes", {"var_smoothing": 1e-9}),
    
    (DecisionTreeClassifier(random_state=42), "Decision Tree", 
     {"max_depth": None, "min_samples_split": 2, "criterion": "gini"}),
    
    (KNeighborsClassifier(), "K-Nearest Neighbors", 
     {"n_neighbors": 5, "weights": "uniform", "algorithm": "auto"}),
    
    (GradientBoostingClassifier(random_state=42), "Gradient Boosting", 
     {"n_estimators": 100, "learning_rate": 0.1, "max_depth": 3}),
    
    (RidgeClassifier(random_state=42), "Ridge Classifier", 
     {"alpha": 1.0, "solver": "auto"})
]

print("🚀 Iniciando entrenamiento de modelos con dataset Iris...\n")

iris_trained_models = {}

for model, name, params in iris_models:
    print(f"🔄 Entrenando {name} con Iris...")
    trained_model = train_and_log_iris_model(
        model, name, 
        X_train_iris_scaled, X_test_iris_scaled,
        y_train_iris, y_test_iris, params
    )
    iris_trained_models[name] = trained_model

print("\n🎉 ¡Todos los modelos Iris han sido entrenados y registrados!")
print(f"📊 Total de modelos entrenados: {len(iris_trained_models)}")
print(f"🔬 Experimento: {iris_experiment_name}")
print(f"🌐 Ver resultados en MLflow UI: http://localhost:5000")

In [None]:
# Análisis de resultados Iris
print("🔍 Analizando resultados del experimento Iris...\n")

# Obtener runs del experimento Iris
iris_client = MlflowClient()
iris_runs = iris_client.search_runs(iris_experiment.experiment_id)

print(f"📊 Total de runs en experimento Iris: {len(iris_runs)}")
print("\n🏆 Resumen de resultados Iris:")
print("=" * 100)

# Crear DataFrame con resultados Iris
iris_results_data = []
for run in iris_runs:
    metrics = run.data.metrics
    params = run.data.params
    
    iris_results_data.append({
        'run_id': run.info.run_id[:8],
        'model_type': params.get('model_type', 'Unknown'),
        'accuracy': metrics.get('accuracy', 0),
        'f1_weighted': metrics.get('f1_weighted', 0),
        'f1_macro': metrics.get('f1_macro', 0),
        'auc_ovr': metrics.get('auc_ovr', 0),
        'auc_ovo': metrics.get('auc_ovo', 0),
        'cv_mean': metrics.get('cv_mean', 0),
        'cv_std': metrics.get('cv_std', 0)
    })

iris_results_df = pd.DataFrame(iris_results_data)
iris_results_df = iris_results_df.sort_values('accuracy', ascending=False)

# Mostrar resultados
print(iris_results_df.round(4).to_string(index=False))

# Mejor modelo Iris
best_iris_model = iris_results_df.iloc[0]
print(f"\n🥇 Mejor modelo Iris: {best_iris_model['model_type']}")
print(f"📊 Accuracy: {best_iris_model['accuracy']:.4f}")
print(f"📊 F1-Score (weighted): {best_iris_model['f1_weighted']:.4f}")
print(f"📊 F1-Score (macro): {best_iris_model['f1_macro']:.4f}")
print(f"📊 AUC (OvR): {best_iris_model['auc_ovr']:.4f}")
print(f"📊 CV Score: {best_iris_model['cv_mean']:.4f} (+/- {best_iris_model['cv_std'] * 2:.4f})")

# Estadísticas del experimento
print(f"\n📈 Estadísticas del experimento Iris:")
print(f"📊 Accuracy promedio: {iris_results_df['accuracy'].mean():.4f}")
print(f"📊 Accuracy máxima: {iris_results_df['accuracy'].max():.4f}")
print(f"📊 Accuracy mínima: {iris_results_df['accuracy'].min():.4f}")
print(f"📊 Desviación estándar: {iris_results_df['accuracy'].std():.4f}")

# Modelos con accuracy perfecta
perfect_models = iris_results_df[iris_results_df['accuracy'] == 1.0]['model_type'].tolist()
if perfect_models:
    print(f"\n🎯 Modelos con accuracy perfecta (100%): {', '.join(perfect_models)}")
else:
    print(f"\n⚠️ Ningún modelo alcanzó accuracy perfecta")

In [None]:
# Visualizaciones comparativas para Iris
fig, axes = plt.subplots(2, 3, figsize=(18, 12))
fig.suptitle('🌸 Comparación de Modelos - Dataset Iris', fontsize=16, fontweight='bold')

# 1. Accuracy
axes[0, 0].barh(iris_results_df['model_type'], iris_results_df['accuracy'], color='skyblue')
axes[0, 0].set_xlabel('Accuracy')
axes[0, 0].set_title('Accuracy por Modelo')
axes[0, 0].set_xlim(0, 1)
axes[0, 0].grid(axis='x', alpha=0.3)

# 2. F1-Score Weighted
axes[0, 1].barh(iris_results_df['model_type'], iris_results_df['f1_weighted'], color='lightgreen')
axes[0, 1].set_xlabel('F1-Score (Weighted)')
axes[0, 1].set_title('F1-Score Weighted por Modelo')
axes[0, 1].set_xlim(0, 1)
axes[0, 1].grid(axis='x', alpha=0.3)

# 3. F1-Score Macro
axes[0, 2].barh(iris_results_df['model_type'], iris_results_df['f1_macro'], color='lightcoral')
axes[0, 2].set_xlabel('F1-Score (Macro)')
axes[0, 2].set_title('F1-Score Macro por Modelo')
axes[0, 2].set_xlim(0, 1)
axes[0, 2].grid(axis='x', alpha=0.3)

# 4. AUC OvR
auc_ovr_data = iris_results_df[iris_results_df['auc_ovr'] > 0]
if not auc_ovr_data.empty:
    axes[1, 0].barh(auc_ovr_data['model_type'], auc_ovr_data['auc_ovr'], color='gold')
    axes[1, 0].set_xlabel('AUC (One-vs-Rest)')
    axes[1, 0].set_title('AUC OvR por Modelo')
    axes[1, 0].set_xlim(0, 1)
    axes[1, 0].grid(axis='x', alpha=0.3)
else:
    axes[1, 0].text(0.5, 0.5, 'No hay datos AUC', ha='center', va='center')
    axes[1, 0].set_title('AUC OvR por Modelo')

# 5. Cross-Validation Mean
axes[1, 1].barh(iris_results_df['model_type'], iris_results_df['cv_mean'], color='mediumpurple')
axes[1, 1].set_xlabel('CV Mean Accuracy')
axes[1, 1].set_title('Cross-Validation Mean por Modelo')
axes[1, 1].set_xlim(0, 1)
axes[1, 1].grid(axis='x', alpha=0.3)

# 6. Scatter plot: Accuracy vs F1-Score
scatter = axes[1, 2].scatter(iris_results_df['accuracy'], iris_results_df['f1_weighted'], 
                           s=100, alpha=0.7, c=range(len(iris_results_df)), cmap='viridis')
axes[1, 2].set_xlabel('Accuracy')
axes[1, 2].set_ylabel('F1-Score (Weighted)')
axes[1, 2].set_title('Accuracy vs F1-Score')
axes[1, 2].grid(True, alpha=0.3)

# Añadir etiquetas a los puntos
for i, model in enumerate(iris_results_df['model_type']):
    axes[1, 2].annotate(model.split()[0][:3], 
                       (iris_results_df.iloc[i]['accuracy'], iris_results_df.iloc[i]['f1_weighted']),
                       xytext=(5, 5), textcoords='offset points', fontsize=8)

plt.tight_layout()
plt.savefig('iris_model_comparison.png', dpi=300, bbox_inches='tight')
plt.show()

# Registrar visualización en MLflow
with mlflow.start_run(run_name="Iris Model Comparison Summary"):
    mlflow.log_artifact('iris_model_comparison.png')
    mlflow.log_metric("total_iris_models", len(iris_results_df))
    mlflow.log_metric("best_iris_accuracy", iris_results_df['accuracy'].max())
    mlflow.log_metric("avg_iris_accuracy", iris_results_df['accuracy'].mean())
    mlflow.log_param("best_iris_model", best_iris_model['model_type'])
    mlflow.log_param("dataset", "Iris")
    mlflow.log_param("problem_type", "multiclass_classification")

print("📊 Visualizaciones guardadas en MLflow")
print("🎯 Resumen registrado como run separado")

## Experimento con hiperparámetros - K-Nearest Neighbors

KNN es particularmente interesante con Iris ya que las especies están bien separadas en el espacio de características. Vamos a experimentar con diferentes valores de K.

In [None]:
# Experimentar con diferentes configuraciones de KNN para Iris
print("🔬 Experimentando con K-Nearest Neighbors en Iris...\n")

# Diferentes configuraciones de KNN
knn_configs = [
    {"n_neighbors": 1, "weights": "uniform"},
    {"n_neighbors": 3, "weights": "uniform"},
    {"n_neighbors": 5, "weights": "uniform"},
    {"n_neighbors": 7, "weights": "uniform"},
    {"n_neighbors": 9, "weights": "uniform"},
    {"n_neighbors": 3, "weights": "distance"},
    {"n_neighbors": 5, "weights": "distance"},
    {"n_neighbors": 7, "weights": "distance"},
    {"n_neighbors": 11, "weights": "uniform"},
    {"n_neighbors": 15, "weights": "uniform"}
]

knn_results = []

for i, config in enumerate(knn_configs):
    model = KNeighborsClassifier(**config)
    model_name = f"KNN - K={config['n_neighbors']}, weights={config['weights']}"
    
    print(f"🔄 Probando {model_name}...")
    
    # Entrenar y registrar
    trained_model = train_and_log_iris_model(
        model, model_name,
        X_train_iris_scaled, X_test_iris_scaled,
        y_train_iris, y_test_iris, config
    )
    
    # Guardar resultados para análisis
    accuracy = accuracy_score(y_test_iris, trained_model.predict(X_test_iris_scaled))
    knn_results.append({
        'k': config['n_neighbors'],
        'weights': config['weights'],
        'accuracy': accuracy,
        'model_name': model_name
    })

print("\n✅ Experimentos KNN completados!")

# Análisis de resultados KNN
knn_df = pd.DataFrame(knn_results)
print(f"\n📊 Resultados de experimentos KNN:")
print(knn_df.sort_values('accuracy', ascending=False).to_string(index=False))

# Encontrar mejor configuración
best_knn = knn_df.loc[knn_df['accuracy'].idxmax()]
print(f"\n🥇 Mejor configuración KNN:")
print(f"📊 K = {best_knn['k']}, Weights = {best_knn['weights']}")
print(f"📊 Accuracy = {best_knn['accuracy']:.4f}")

# Visualizar resultados KNN
plt.figure(figsize=(12, 8))

# Subplot 1: Accuracy vs K para different weights
plt.subplot(2, 2, 1)
uniform_data = knn_df[knn_df['weights'] == 'uniform']
distance_data = knn_df[knn_df['weights'] == 'distance']

plt.plot(uniform_data['k'], uniform_data['accuracy'], 'o-', label='uniform', linewidth=2, markersize=8)
plt.plot(distance_data['k'], distance_data['accuracy'], 's-', label='distance', linewidth=2, markersize=8)
plt.xlabel('K (número de vecinos)')
plt.ylabel('Accuracy')
plt.title('KNN: Accuracy vs K')
plt.legend()
plt.grid(True, alpha=0.3)

# Subplot 2: Bar plot de todas las configuraciones
plt.subplot(2, 2, 2)
colors = ['lightblue' if w == 'uniform' else 'lightcoral' for w in knn_df['weights']]
bars = plt.bar(range(len(knn_df)), knn_df['accuracy'], color=colors)
plt.xlabel('Configuración')
plt.ylabel('Accuracy')
plt.title('Accuracy por Configuración KNN')
plt.xticks(range(len(knn_df)), [f"K={row['k']}\n{row['weights']}" for _, row in knn_df.iterrows()], 
           rotation=45, ha='right')
plt.grid(axis='y', alpha=0.3)

# Añadir leyenda de colores
from matplotlib.patches import Patch
legend_elements = [Patch(facecolor='lightblue', label='uniform'),
                  Patch(facecolor='lightcoral', label='distance')]
plt.legend(handles=legend_elements, title='Weights')

# Subplot 3: Heatmap de accuracy
plt.subplot(2, 2, 3)
pivot_data = knn_df.pivot(index='weights', columns='k', values='accuracy')
sns.heatmap(pivot_data, annot=True, cmap='YlOrRd', fmt='.3f', cbar_kws={'label': 'Accuracy'})
plt.title('Heatmap: Accuracy por K y Weights')
plt.xlabel('K (número de vecinos)')
plt.ylabel('Weights')

# Subplot 4: Box plot comparando weights
plt.subplot(2, 2, 4)
uniform_acc = knn_df[knn_df['weights'] == 'uniform']['accuracy']
distance_acc = knn_df[knn_df['weights'] == 'distance']['accuracy']
plt.boxplot([uniform_acc, distance_acc], labels=['uniform', 'distance'])
plt.ylabel('Accuracy')
plt.title('Distribución de Accuracy por Weights')
plt.grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.savefig('iris_knn_analysis.png', dpi=300, bbox_inches='tight')
plt.show()

# Registrar análisis KNN en MLflow
with mlflow.start_run(run_name="Iris KNN Hyperparameter Analysis"):
    mlflow.log_artifact('iris_knn_analysis.png')
    mlflow.log_metric("best_knn_accuracy", best_knn['accuracy'])
    mlflow.log_param("best_k", best_knn['k'])
    mlflow.log_param("best_weights", best_knn['weights'])
    mlflow.log_param("total_knn_configs", len(knn_configs))
    mlflow.log_metric("knn_accuracy_mean", knn_df['accuracy'].mean())
    mlflow.log_metric("knn_accuracy_std", knn_df['accuracy'].std())

print("📊 Análisis KNN guardado en MLflow")

## Comparación final: Préstamos vs Iris

Comparemos los resultados obtenidos en ambos datasets para entender las diferencias entre problemas de clasificación binaria y multiclase.

In [None]:
# Comparación de experimentos: Préstamos vs Iris
print("🔄 Recopilando resultados de ambos experimentos...")

# Obtener runs del experimento de préstamos
loan_experiment = mlflow.get_experiment_by_name("Loan Default Classification")
loan_runs = client.search_runs(loan_experiment.experiment_id)

# Recopilar datos de préstamos (excluyendo runs de resumen)
loan_data = []
for run in loan_runs:
    if "Summary" not in run.data.tags.get('mlflow.runName', ''):
        metrics = run.data.metrics
        params = run.data.params
        
        loan_data.append({
            'dataset': 'Préstamos',
            'model_type': params.get('model_type', 'Unknown'),
            'accuracy': metrics.get('accuracy', 0),
            'f1_score': metrics.get('f1_score', 0),
            'problem_type': 'Binary Classification'
        })

# Recopilar datos de Iris (excluyendo runs de resumen y KNN específicos)
iris_data = []
for run in iris_runs:
    if "Summary" not in run.data.tags.get('mlflow.runName', '') and "KNN -" not in run.data.tags.get('mlflow.runName', ''):
        metrics = run.data.metrics
        params = run.data.params
        
        iris_data.append({
            'dataset': 'Iris',
            'model_type': params.get('model_type', 'Unknown'),
            'accuracy': metrics.get('accuracy', 0),
            'f1_score': metrics.get('f1_weighted', 0),  # Usar f1_weighted para Iris
            'problem_type': 'Multiclass Classification'
        })

# Combinar datos
all_data = loan_data + iris_data
comparison_df = pd.DataFrame(all_data)

print(f"📊 Datos recopilados:")
print(f"   - Préstamos: {len(loan_data)} modelos")
print(f"   - Iris: {len(iris_data)} modelos")
print(f"   - Total: {len(all_data)} modelos")

# Estadísticas por dataset
print("\n📈 Estadísticas por dataset:")
stats_by_dataset = comparison_df.groupby('dataset').agg({
    'accuracy': ['mean', 'std', 'min', 'max'],
    'f1_score': ['mean', 'std', 'min', 'max']
}).round(4)

print(stats_by_dataset)

# Estadísticas por tipo de modelo
print("\n🤖 Estadísticas por tipo de modelo:")
stats_by_model = comparison_df.groupby('model_type').agg({
    'accuracy': ['mean', 'std', 'count'],
    'f1_score': ['mean', 'std']
}).round(4)

print(stats_by_model)

# Visualización comparativa
fig, axes = plt.subplots(2, 2, figsize=(15, 12))
fig.suptitle('📊 Comparación: Préstamos vs Iris', fontsize=16, fontweight='bold')

# 1. Box plot de accuracy por dataset
axes[0, 0].boxplot([comparison_df[comparison_df['dataset'] == 'Préstamos']['accuracy'],
                   comparison_df[comparison_df['dataset'] == 'Iris']['accuracy']], 
                   labels=['Préstamos\n(Binary)', 'Iris\n(Multiclass)'])
axes[0, 0].set_ylabel('Accuracy')
axes[0, 0].set_title('Distribución de Accuracy por Dataset')
axes[0, 0].grid(axis='y', alpha=0.3)

# 2. Box plot de F1-score por dataset
axes[0, 1].boxplot([comparison_df[comparison_df['dataset'] == 'Préstamos']['f1_score'],
                   comparison_df[comparison_df['dataset'] == 'Iris']['f1_score']], 
                   labels=['Préstamos\n(Binary)', 'Iris\n(Multiclass)'])
axes[0, 1].set_ylabel('F1-Score')
axes[0, 1].set_title('Distribución de F1-Score por Dataset')
axes[0, 1].grid(axis='y', alpha=0.3)

# 3. Scatter plot: Accuracy vs F1-Score coloreado por dataset
loan_subset = comparison_df[comparison_df['dataset'] == 'Préstamos']
iris_subset = comparison_df[comparison_df['dataset'] == 'Iris']

axes[1, 0].scatter(loan_subset['accuracy'], loan_subset['f1_score'], 
                  alpha=0.7, s=100, label='Préstamos', color='lightcoral')
axes[1, 0].scatter(iris_subset['accuracy'], iris_subset['f1_score'], 
                  alpha=0.7, s=100, label='Iris', color='lightblue')
axes[1, 0].set_xlabel('Accuracy')
axes[1, 0].set_ylabel('F1-Score')
axes[1, 0].set_title('Accuracy vs F1-Score')
axes[1, 0].legend()
axes[1, 0].grid(True, alpha=0.3)

# 4. Bar plot de accuracy promedio por modelo
model_accuracy = comparison_df.groupby('model_type')['accuracy'].mean().sort_values(ascending=True)
colors = ['lightcoral' if 'Logistic' in model else 'lightblue' if 'Random' in model 
          else 'lightgreen' if 'SVM' in model else 'gold' if 'Naive' in model 
          else 'mediumpurple' for model in model_accuracy.index]

axes[1, 1].barh(model_accuracy.index, model_accuracy.values, color=colors)
axes[1, 1].set_xlabel('Accuracy Promedio')
axes[1, 1].set_title('Accuracy Promedio por Tipo de Modelo')
axes[1, 1].grid(axis='x', alpha=0.3)

plt.tight_layout()
plt.savefig('loans_vs_iris_comparison.png', dpi=300, bbox_inches='tight')
plt.show()

# Análisis de diferencias
print("\n🔍 Análisis de diferencias:")
loan_avg_acc = comparison_df[comparison_df['dataset'] == 'Préstamos']['accuracy'].mean()
iris_avg_acc = comparison_df[comparison_df['dataset'] == 'Iris']['accuracy'].mean()
loan_avg_f1 = comparison_df[comparison_df['dataset'] == 'Préstamos']['f1_score'].mean()
iris_avg_f1 = comparison_df[comparison_df['dataset'] == 'Iris']['f1_score'].mean()

print(f"📊 Accuracy promedio - Préstamos: {loan_avg_acc:.4f}, Iris: {iris_avg_acc:.4f}")
print(f"📊 F1-Score promedio - Préstamos: {loan_avg_f1:.4f}, Iris: {iris_avg_f1:.4f}")
print(f"📊 Diferencia de Accuracy: {abs(iris_avg_acc - loan_avg_acc):.4f}")
print(f"📊 Diferencia de F1-Score: {abs(iris_avg_f1 - loan_avg_f1):.4f}")

if iris_avg_acc > loan_avg_acc:
    print("🎯 Iris tiende a tener mayor accuracy (dataset más 'fácil')")
else:
    print("🎯 Préstamos tiende a tener mayor accuracy")

# Mejores modelos por dataset
best_loan_model = comparison_df[comparison_df['dataset'] == 'Préstamos'].loc[
    comparison_df[comparison_df['dataset'] == 'Préstamos']['accuracy'].idxmax()]
best_iris_model = comparison_df[comparison_df['dataset'] == 'Iris'].loc[
    comparison_df[comparison_df['dataset'] == 'Iris']['accuracy'].idxmax()]

print(f"\n🥇 Mejor modelo para Préstamos: {best_loan_model['model_type']} ({best_loan_model['accuracy']:.4f})")
print(f"🥇 Mejor modelo para Iris: {best_iris_model['model_type']} ({best_iris_model['accuracy']:.4f})")

# Registrar comparación en MLflow
with mlflow.start_run(run_name="Dataset Comparison: Loans vs Iris"):
    mlflow.log_artifact('loans_vs_iris_comparison.png')
    mlflow.log_metric("loan_avg_accuracy", loan_avg_acc)
    mlflow.log_metric("iris_avg_accuracy", iris_avg_acc)
    mlflow.log_metric("loan_avg_f1", loan_avg_f1)
    mlflow.log_metric("iris_avg_f1", iris_avg_f1)
    mlflow.log_metric("accuracy_difference", abs(iris_avg_acc - loan_avg_acc))
    mlflow.log_param("total_models_compared", len(comparison_df))
    mlflow.log_param("best_loan_model", best_loan_model['model_type'])
    mlflow.log_param("best_iris_model", best_iris_model['model_type'])

print("\n📊 Comparación guardada en MLflow")
print("🎉 ¡Análisis completo finalizado!")