In [17]:
# Importar librerías
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# Importar MLFLOW
import mlflow
import mlflow.sklearn
from mlflow.models import infer_signature

# Importar elementos del modelo
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, roc_auc_score, roc_curve

In [14]:
#!pip install mlflow

In [15]:
track_uri = "http://localhost:8080/" # Esto puede ser que cambie por http://0.0.0.0:1234
mlflow.set_tracking_uri(track_uri)
mlflow.set_registry_uri("sqlite:////tmp/registry.db")

In [16]:
# Generando el experimento o cargandolo si existe
experiment_name = "Proyecto_Dengue_AAA"
mlflow.set_experiment(experiment_name)

# Cargando la información
client = mlflow.tracking.MlflowClient()
experiment_id = client.get_experiment_by_name(experiment_name).experiment_id

# Validacion
print(f"MLflow Version: {mlflow.__version__}")
print(f"Tracking URI: {mlflow.tracking.get_tracking_uri()}")
print(f"Nombre del experimento: {experiment_name}")
print(f"ID del experimento: {experiment_id}")

2024/05/26 06:19:21 INFO mlflow.tracking.fluent: Experiment with name 'Proyecto_Dengue_AAA' does not exist. Creating a new experiment.


MLflow Version: 2.13.0
Tracking URI: http://localhost:8080/
Nombre del experimento: Proyecto_Dengue_AAA
ID del experimento: 999845863419534997


## Cargar datos

## Ejecución de modelos

In [None]:


# Cargar datos de ejemplo
iris = load_iris()
X = iris.data
y = iris.target

# Dividir datos en conjunto de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Definir modelos
models = {
    "Logistic Regression": LogisticRegression(),
    "SVM": SVC(probability=True),
    "Random Forest": RandomForestClassifier(),
    "Gradient Boosting": GradientBoostingClassifier()
}

# Entrenar y evaluar modelos
with mlflow.start_run():
    for name, model in models.items():
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        
        # Calcular métricas
        accuracy = accuracy_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred, average='weighted')
        roc_auc = roc_auc_score(y_test, model.predict_proba(X_test), multi_class='ovr')
        
        # Guardar métricas en MLflow
        mlflow.log_metric(f'{name} Accuracy', accuracy)
        mlflow.log_metric(f'{name} F1 Score', f1)
        mlflow.log_metric(f'{name} ROC AUC', roc_auc)
        
        # Generar matriz de confusión
        cm = confusion_matrix(y_test, y_pred)
        plt.figure(figsize=(6, 4))
        sns.heatmap(cm, annot=True, cmap='Blues', fmt='g', xticklabels=iris.target_names, yticklabels=iris.target_names)
        plt.title(f'{name} Confusion Matrix')
        plt.xlabel('Predicted')
        plt.ylabel('Actual')
        plt.savefig(f'{name}_confusion_matrix.png')
        plt.close()
        
        # Generar curva ROC
        if model.__class__.__name__ != "GradientBoostingClassifier":  # Curva ROC no es aplicable para Gradient Boosting
            fpr, tpr, _ = roc_curve(y_test, model.predict_proba(X_test)[:, 1], pos_label=model.classes_[1])
            plt.figure()
            plt.plot(fpr, tpr, color='darkorange', lw=2)
            plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
            plt.xlim([0.0, 1.0])
            plt.ylim([0.0, 1.05])
            plt.xlabel('False Positive Rate')
            plt.ylabel('True Positive Rate')
            plt.title(f'{name} ROC Curve')
            plt.savefig(f'{name}_roc_curve.png')
            plt.close()
        
    # Guardar gráficos en MLflow
    mlflow.log_artifacts('.')
