In [0]:
import mlflow
import mlflow.sklearn
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import pandas as pd
import matplotlib.pyplot as plt

# Cargar datos
X, y = load_diabetes(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=42
)

input_example = pd.DataFrame(X_test[:5], columns=load_diabetes().feature_names)
signature = mlflow.models.infer_signature(
    pd.DataFrame(X_train, columns=load_diabetes().feature_names),
    pd.Series(y_train)
)

with mlflow.start_run() as run:
    # Parámetros
    params = {"fit_intercept": True}
    #params = {"fit_intercept": False}
    mlflow.log_params(params)
    
    # Entrenamiento
    model = LinearRegression(**params)
    model.fit(X_train, y_train)
    
    # Predicción
    y_pred = model.predict(X_test)
    
    # Métricas
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    mlflow.log_metric("mse", mse)
    mlflow.log_metric("r2", r2)
    
    # Artefacto: gráfico de predicción vs real
    plt.figure()
    plt.scatter(y_test, y_pred)
    plt.xlabel("Real")
    plt.ylabel("Predicción")
    plt.title("Predicción vs Real")
    plt.savefig("/tmp/pred_vs_real.png")
    mlflow.log_artifact("/tmp/pred_vs_real.png")
    plt.close()
    
    # Guardar modelo con signature
    mlflow.sklearn.log_model(
        model,
        "model",
        signature=signature,
        input_example=input_example,
        registered_model_name="bestmodel_1"
    )
    
    # Guardar tabla de predicción vs real como csv y loggear como artifact
    pred_vs_real_df = pd.DataFrame({
        "Real": y_test,
        "Predicción": y_pred
    })
    pred_vs_real_df.to_csv("/tmp/pred_vs_real.csv", index=False)
    mlflow.log_artifact("/tmp/pred_vs_real.csv")
    
    # Evaluación con mlflow
    model_uri = f"runs:/{run.info.run_id}/model"
    eval_results = mlflow.evaluate(
        model=model_uri,
        data=pd.DataFrame(X_test, columns=load_diabetes().feature_names).assign(target=y_test),
        targets="target",
        model_type="regressor",
        evaluators=["default"]
    )

In [0]:
import mlflow
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
import pandas as pd

# Use absolute path for experiment name
experiment_name = "hyperparameter_tuning_example"

mlflow.set_experiment(experiment_name="/Users/ghenrion@itba.edu.ar/hyperparameter_tuning_example")

# Load data
X, y = load_diabetes(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

input_example = pd.DataFrame(X_test[:5], columns=load_diabetes().feature_names)
signature = mlflow.models.infer_signature(
    pd.DataFrame(X_train, columns=load_diabetes().feature_names),
    pd.Series(y_train)
)

param_grid = [
    {"fit_intercept": True},
    {"fit_intercept": False}
]

with mlflow.start_run() as parent_run:
    best_mse = float("inf")
    best_params = None
    results = []
    for i, params in enumerate(param_grid):
        with mlflow.start_run(run_name=f"child_run_{i}", nested=True):
            model = LinearRegression(**params)
            model.fit(X_train, y_train)
            y_pred = model.predict(X_test)
            mse = mean_squared_error(y_test, y_pred)
            r2 = r2_score(y_test, y_pred)
            mlflow.log_params(params)
            mlflow.log_metric("mse", mse)
            mlflow.log_metric("r2", r2)
            results.append({"params": params, "mse": mse, "r2": r2})
            if mse < best_mse:
                best_mse = mse
                best_params = params
            # Guardar modelo con signature para cada child run
            mlflow.sklearn.log_model(
                model,
                "model_RL_"+str(i),
                signature=signature,
                input_example=input_example
            )
    mlflow.log_metric("best_mse", best_mse)
    mlflow.log_params({"best_fit_intercept": best_params["fit_intercept"]})
    display(pd.DataFrame(results))

In [0]:
import mlflow
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score, roc_curve, roc_auc_score
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
import pandas as pd
import matplotlib.pyplot as plt

# Experimento
mlflow.set_experiment("/Users/ghenrion@itba.edu.ar/random_forest_nested_example")

# Datos
X, y = load_diabetes(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Binarizar el target para ROC
y_bin = (y > y.mean()).astype(int)
y_train_bin = (y_train > y.mean()).astype(int)
y_test_bin = (y_test > y.mean()).astype(int)

# Grid de hiperparámetros
param_grid = [
    {"n_estimators": 50, "max_depth": None},
    {"n_estimators": 100, "max_depth": None},
    {"n_estimators": 100, "max_depth": 5},
    {"n_estimators": 200, "max_depth": 5},
]

with mlflow.start_run(run_name="parent_run") as parent_run:
    best_mse = float("inf")
    best_params = None
    results = []

    for i, params in enumerate(param_grid):
        with mlflow.start_run(
            run_name=f"child_run_{i}", nested=True
        ) as child_run:
            # Entrenamiento
            model = RandomForestRegressor(**params, random_state=42)
            model.fit(X_train, y_train)

            # Predicción y métricas
            y_pred = model.predict(X_test)
            mse = mean_squared_error(y_test, y_pred)
            r2 = r2_score(y_test, y_pred)

            # ROC para regresión (binarizando el target)
            fpr, tpr, _ = roc_curve(y_test_bin, y_pred)
            auc = roc_auc_score(y_test_bin, y_pred)
            plt.figure()
            plt.plot(fpr, tpr, label=f'ROC Curve (AUC = {auc:.3f})')
            plt.plot([0, 1], [0, 1], "k--", label="Random Classifier")
            plt.xlabel("False Positive Rate")
            plt.ylabel("True Positive Rate")
            plt.title("ROC Curve")
            plt.legend()
            plt.grid(True)
            roc_path = f"/tmp/roc_curve_{i}.png"
            plt.savefig(roc_path, dpi=300, bbox_inches="tight")
            mlflow.log_artifact(roc_path)
            plt.close()
            mlflow.log_metric("roc_auc", auc)

            # Logueo
            mlflow.log_params(params)
            mlflow.log_metric("mse", mse)
            mlflow.log_metric("r2", r2)

            # Guardar resultados locales
            results.append({"params": params, "mse": mse, "r2": r2, "roc_auc": auc})

            if mse < best_mse:
                best_mse = mse
                best_params = params

    # Métricas del mejor modelo
    mlflow.log_metric("best_mse", best_mse)
    mlflow.log_params({"best_params": best_params})

    # Mostrar tabla de resultados
    display(pd.DataFrame(results))

In [0]:
import mlflow
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_curve, roc_auc_score
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
import pandas as pd
import matplotlib.pyplot as plt

# Cargar datos y binarizar el target para clasificación
X, y = load_diabetes(return_X_y=True)
y_bin = (y > y.mean()).astype(int)  # Clasificación binaria

X_train, X_test, y_train, y_test = train_test_split(
    X, y_bin, test_size=0.2, random_state=42
)

mlflow.set_experiment("/Users/ghenrion@itba.edu.ar/random_forest_roc_example")

with mlflow.start_run():
    params = {"n_estimators": 100, "max_depth": 5, "random_state": 42}
    model = RandomForestClassifier(**params)
    model.fit(X_train, y_train)
    y_pred_proba = model.predict_proba(X_test)[:, 1]
    auc = roc_auc_score(y_test, y_pred_proba)
    mlflow.log_params(params)
    mlflow.log_metric("roc_auc", auc)

    # Curva ROC
    fpr, tpr, _ = roc_curve(y_test, y_pred_proba)
    plt.figure()
    plt.plot(fpr, tpr, label=f'ROC Curve (AUC = {auc:.3f})')
    plt.plot([0, 1], [0, 1], "k--", label="Random Classifier")
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.title("ROC Curve")
    plt.legend()
    plt.grid(True)
    plt.savefig("/tmp/roc_curve.png", dpi=300, bbox_inches="tight")
    mlflow.log_artifact("/tmp/roc_curve.png")
    plt.close()

    display(pd.DataFrame({"fpr": fpr, "tpr": tpr}))

In [0]:
import mlflow
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import GradientBoostingRegressor

# Cargar datos
X, y = load_diabetes(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

input_example = pd.DataFrame(X_test[:5], columns=load_diabetes().feature_names)
signature = mlflow.models.infer_signature(
    pd.DataFrame(X_train, columns=load_diabetes().feature_names),
    pd.Series(y_train)
)

with mlflow.start_run() as run:
    params = {"n_estimators": 100, "max_depth": 5, "random_state": 42}
    mlflow.log_params(params)
    
    model = GradientBoostingRegressor(**params)
    model.fit(X_train, y_train)
    
    y_pred = model.predict(X_test)
    
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    mlflow.log_metric("mse", mse)
    mlflow.log_metric("r2", r2)
    
    plt.figure()
    plt.scatter(y_test, y_pred)
    plt.xlabel("Real")
    plt.ylabel("Predicción")
    plt.title("Predicción vs Real (GBM)")
    plt.savefig("/tmp/pred_vs_real_gbm.png")
    mlflow.log_artifact("/tmp/pred_vs_real_gbm.png")
    plt.close()
    
    mlflow.sklearn.log_model(
        model,
        "model_gbm",
        signature=signature,
        input_example=input_example
    )
    
    pred_vs_real_df = pd.DataFrame({
        "Real": y_test,
        "Predicción": y_pred
    })
    pred_vs_real_df.to_csv("/tmp/pred_vs_real_gbm.csv", index=False)
    mlflow.log_artifact("/tmp/pred_vs_real_gbm.csv")
    
    display(pred_vs_real_df)

In [0]:
import mlflow
import pandas as pd


# Cargar el modelo registrado desde Model Registry
model_uri = "models:/bestmodel_2@champion"
model = mlflow.sklearn.load_model(model_uri)

# Crear un nuevo caso de ejemplo
nuevo_caso = pd.DataFrame([{
    "age": 0.05,
    "sex": -0.04464164,
    "bmi": 0.06061839,
    "bp": -0.02129594,
    "s1": -0.03482076,
    "s2": -0.04340085,
    "s3": -0.0025928,
    "s4": 0.01990749,
    "s5": -0.01764613,
    "s6": -0.03530685
}])

# Ejecutar predicción
prediccion = model.predict(nuevo_caso)
display(pd.DataFrame({"Predicción": prediccion}))

In [0]:
import mlflow
import pandas as pd
from sklearn.datasets import load_diabetes

# Cargar el modelo registrado desde Model Registry
model_uri = "models:/bestmodel_2@champion"
model = mlflow.sklearn.load_model(model_uri)

# Cargar datos de test para monitoreo
X, y = load_diabetes(return_X_y=True)
X_test = pd.DataFrame(X, columns=load_diabetes().feature_names)
y_test = y

# Realizar predicciones
y_pred = model.predict(X_test)

# Calcular métricas de monitoreo
from sklearn.metrics import mean_squared_error, r2_score
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# Mostrar métricas y comparación
monitor_df = pd.DataFrame({
    "Real": y_test,
    "Predicción": y_pred
})
display(monitor_df)
print(f"MSE: {mse:.4f}, R2: {r2:.4f}")