# 01 - Development Playground (Databricks)

Este notebook se usa como espacio de desarrollo rápido:  
- Carga de datos  
- Pruebas interactivas  
- Entrenamiento exploratorio  
- Logging a MLflow  
- Llamado al código productivo en `src/`  

---


In [0]:
import mlflow
import pandas as pd
import sys, os
import yaml
from sklearn.ensemble import RandomForestRegressor

# Asegurar que src/ esté en el path
repo_root = "/Workspace/Users/alexanderdiaz21795@gmail.com/github-actions-lab"
sys.path.append(os.path.join(repo_root, "src"))

from ingest import load_data
from prep import prepare_data
from utils.metrics import eval_metrics


# 2. Cargar los datos

In [0]:
with open("configs/config.yaml", "r") as f:
    cfg = yaml.safe_load(f)


In [0]:
cfg

In [0]:
df = load_data(cfg)
df.head()

In [0]:
df.describe()

In [0]:
df.isna().mean()

#3. Preparar los datos

In [0]:
X_train, X_test, y_train, y_test = prepare_data(cfg, df)

X_train.shape, X_test.shape

# 4. Entrenamiento rápido (experimento manual)

In [0]:
model = RandomForestRegressor(
            n_estimators=cfg["model"]["params"]["n_estimators"],
            max_depth=cfg["model"]["params"]["max_depth"],
            random_state=cfg["training"]["random_state"]
        )
model.fit(X_train, y_train)

preds = model.predict(X_test)
preds[:10]


#5. Evaluación preliminar

In [0]:
metrics = eval_metrics(y_test, preds)
metrics

#6. Logging con MLflow

In [0]:
cfg["mlflow"]["experiment_name"]

In [0]:
import mlflow.sklearn
from mlflow.tracking import MlflowClient

mlflow.set_experiment(cfg["mlflow"]["experiment_name"])

with mlflow.start_run(run_name="manual_dev_train"):
    mlflow.log_metric("rmse", metrics["rmse"])

    mlflow.log_param("n_estimators", cfg["model"]["params"]["n_estimators"])
    mlflow.log_param("max_depth", cfg["model"]["params"]["max_depth"])

    # Input example para la signature
    input_example = X_test.iloc[:3]
    
    # Log del modelo — MLflow retorna un ModelInfo con el URI
    model_info = mlflow.sklearn.log_model(
        sk_model=model,
        artifact_path="model",
        input_example=input_example,
        registered_model_name=cfg["mlflow"]["registered_model_name"]
    )

    version = model_info.registered_model_version
    name = cfg["mlflow"]["registered_model_name"]

    # 4. Asignar alias "latest" a esta versión
    client = MlflowClient()
    client.set_registered_model_alias(
        name=cfg["mlflow"]["registered_model_name"],
        alias="current",
        version=version
    )
    
    print(f"Modelo registrado como {name} v{version}")



#7. Cargar el modelo desdes MLflow

In [0]:
model_name = cfg["mlflow"]["registered_model_name"]
model_alias = "current"

# Load the model from the Model Registry
model_uri = f"models:/{model_name}@{model_alias}"

In [0]:
loaded_model = mlflow.sklearn.load_model(model_uri)
loaded_model

#8. Testear predicción con modelo cargado

In [0]:
pred = loaded_model.predict(X_test[:5])
pred
