## Evaluación del modelo con nuevos datos

In [1]:
# Bootstrap (desde notebooks/)
import sys
from pathlib import Path
ROOT = Path.cwd().parent
sys.path.append(str(ROOT / "src"))

from models.predict_model import (
    load_model, make_dataset_from_raw, predict_proba,
    predict_labels_from_proba, evaluate_binary, save_metrics_json
)
from data.dataset import load_data

# 1) Cargar modelo
model = load_model(ROOT / "models" / "random_forest.joblib")

# 2) Cargar febrero 2020 crudo y preprocesar
URL_FEB = "https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2020-02.parquet"
df_raw = load_data(URL_FEB, filetype="parquet")
X_test, y_test, df_proc = make_dataset_from_raw(df_raw, target_col="high_tip")

# 3) Predecir + métricas
proba = predict_proba(model, X_test)
y_pred = predict_labels_from_proba(proba, threshold=0.5)
metrics = evaluate_binary(y_true=y_test, y_pred=y_pred, y_proba=proba)
print(metrics["classification_report"])

# 4) Guardar métricas
save_metrics_json(metrics, ROOT / "reports" / "metrics_eval_feb2020.json")


              precision    recall  f1-score   support

           0      0.689     0.130     0.219   2676852
           1      0.596     0.956     0.735   3600002

    accuracy                          0.604   6276854
   macro avg      0.643     0.543     0.477   6276854
weighted avg      0.636     0.604     0.515   6276854

