# 01 - Development Playground (Databricks)

Este notebook se usa como espacio de desarrollo rápido:  
- Carga de datos  
- Pruebas interactivas  
- Entrenamiento exploratorio  
- Logging a MLflow  
- Llamado al código productivo en `src/`  

---


In [0]:
import mlflow
import pandas as pd
import sys, os

# Asegurar que src/ esté en el path
repo_root = "/Workspace/Repos/<your-user>/my-ml-project"
sys.path.append(os.path.join(repo_root, "src"))

from ingest import load_data
from prep import prepare_data
from utils.metrics import eval_metrics


# 2. Cargar los datos

In [0]:
df = load_data()
df.head()

In [0]:
df.describe()

In [0]:
df.isna().mean()

#3. Preparar los datos

In [0]:
X_train, X_test, y_train, y_test = prepare_data(df)

X_train.shape, X_test.shape

# 4. Entrenamiento rápido (experimento manual)

In [0]:
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier(n_estimators=200, max_depth=6)
model.fit(X_train, y_train)

preds = model.predict(X_test)
preds[:10]


#5. Evaluación preliminar

In [0]:
metrics = eval_metrics(y_test, preds)
metrics

#6. Logging con MLflow

In [0]:
import mlflow.sklearn

mlflow.set_experiment("/Users/<your-user>/my-ml-project/experiments")

with mlflow.start_run(run_name="manual_dev_train"):
    mlflow.log_params({
        "n_estimators": 200,
        "max_depth": 6
    })
    
    mlflow.log_metrics(metrics)

    # Logging del modelo
    mlflow.sklearn.log_model(
        model,
        artifact_path="model",
        input_example=X_train.iloc[:2]
    )



#7. Cargar el modelo desdes MLflow

In [0]:
logged_model = mlflow.get_artifact_uri("model")
logged_model

#8. Testear predicción con modelo cargado

In [0]:
loaded_model = mlflow.sklearn.load_model(logged_model)

test_df = X_test.iloc[:5]
loaded_model.predict(test_df)
