---

In [None]:
import sys
sys.path.append("../../../")
from ML.utils.mlflow_flow import set_tracking, quick_log_and_register
from ML.utils.utils_yose import load_data, make_features

from ensemble import EnsembleModel
import numpy as np
import pandas as pd

import mlflow
import os

from warnings import filterwarnings

filterwarnings("ignore")
set_tracking("https://dagshub.com/Yosesotomayor/retoCasas_v2.mlflow")

MODEL_NAME = "elnet_lgbm"

---

In [2]:
df_train, df_test = load_data()

y = np.log1p(df_train["SalePrice"]).astype(float)
X = df_train.drop(["SalePrice", "Id"], axis=1)
X = make_features(X)

rstate = 42

X_test = make_features(df_test.drop(["Id"], axis=1))

In [3]:
model = EnsembleModel(rstate=rstate)
model.fit(X, y)

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002296 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4472
[LightGBM] [Info] Number of data points in the train set: 1314, number of used features: 191
[LightGBM] [Info] Start training from score 12.022444
CV RMSE mean: 0.0768
CV RMSE std: 0.0000
CV R2 mean: 0.9549
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002289 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4573
[LightGBM] [Info] Number of data points in the train set: 1460, number of used features: 196
[LightGBM] [Info] Start training from score 12.024057


<ensemble.EnsembleModel at 0x1419a3c50>

---

In [None]:
import dagshub
dagshub.init(repo_owner = "Yosesotomayor", repo_name = "retoCasas_v2", mlflow=True)

quick_log_and_register(
    experiment="elnet_lgbm",
    run_name="elnet_lgbm",
    model=model,
    X=X, y=y,
    model_name=MODEL_NAME,
    set_challenger=True,
    X_test=X_test,
    params=model.get_params(),
    metrics={"r2": model.score(X, y), "rmse": np.sqrt(model.score(X, y))},
)

Subiendo modelo...
🏃 View run elnet_lgbm at: https://dagshub.com/Yosesotomayor/retoCasas_v2.mlflow/#/experiments/0/runs/2ebebb7b98284b9293e2c6b91f845d4a
🧪 View experiment at: https://dagshub.com/Yosesotomayor/retoCasas_v2.mlflow/#/experiments/0


Successfully registered model 'elnet_lgbm'.
2025/08/28 10:02:07 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: elnet_lgbm, version 1
Created version '1' of model 'elnet_lgbm'.


Modelo subido.


('runs:/2ebebb7b98284b9293e2c6b91f845d4a/model', '1')

---

In [5]:
model_r = mlflow.pyfunc.load_model(f"models:/{MODEL_NAME}@challenger")
preds_orig = model_r.predict(X_test)


sub_dir = "../../../data/housing_submissions/elnet_lgbm2"
os.makedirs(sub_dir, exist_ok=True)
submission_path = os.path.join(sub_dir, "submission_elnet_lgbm2.csv")


df_sub = pd.DataFrame({"Id": df_test["Id"], "SalePrice": preds_orig})
df_sub.to_csv(submission_path, index=False)

Downloading artifacts: 100%|██████████| 7/7 [00:07<00:00,  1.01s/it]


---