---

In [None]:
import sys
sys.path.append("../../../")
from ML.utils.mlflow_flow import set_tracking, quick_log_and_register
from ML.utils.utils_yose import load_data, make_features

from ensemble import EnsembleModel
import numpy as np
import pandas as pd

import dagshub
import mlflow
import os

from warnings import filterwarnings

filterwarnings("ignore")
set_tracking("https://dagshub.com/Yosesotomayor/retoCasas_v2.mlflow")

MODEL_NAME = "elnet_lgbm"

df_train, df_test = load_data()

y = np.log1p(df_train["SalePrice"]).astype(float)
X = df_train.drop(["SalePrice", "Id"], axis=1)
X = make_features(X)

rstate = 42

X_test = make_features(df_test.drop(["Id"], axis=1))

---

In [8]:
model = EnsembleModel(rstate=rstate)
model.fit(X, y)

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002564 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4472
[LightGBM] [Info] Number of data points in the train set: 1314, number of used features: 191
[LightGBM] [Info] Start training from score 12.022444
CV RMSE mean: 0.0768
CV RMSE std: 0.0000
CV R2 mean: 0.9549
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001951 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4573
[LightGBM] [Info] Number of data points in the train set: 1460, number of used features: 196
[LightGBM] [Info] Start training from score 12.024057


<ensemble.EnsembleModel at 0x14f7f7650>

---

In [10]:


dagshub.init(repo_owner = "Yosesotomayor", repo_name = "retoCasas_v2", mlflow=True)

quick_log_and_register(
    experiment="Housing_Competition",
    run_name="elnet_lgbm",
    model=model,
    X=X, y=y,
    model_name=MODEL_NAME,
    set_challenger=True,
    X_test=X_test,
    params=model.get_params(),
    metrics=model.get_metrics(),
)

INFO:httpx:HTTP Request: GET https://dagshub.com/api/v1/repos/Yosesotomayor/retoCasas_v2 "HTTP/1.1 200 OK"


INFO:dagshub:Initialized MLflow to track repo "Yosesotomayor/retoCasas_v2"


INFO:dagshub:Repository Yosesotomayor/retoCasas_v2 initialized!


Subiendo modelo...


2025/08/28 10:12:55 INFO mlflow.tracking.fluent: Experiment with name 'Housing_Competition' does not exist. Creating a new experiment.


🏃 View run elnet_lgbm at: https://dagshub.com/Yosesotomayor/retoCasas_v2.mlflow/#/experiments/1/runs/ceb1d62c3a24458296732a6d57a1d48f
🧪 View experiment at: https://dagshub.com/Yosesotomayor/retoCasas_v2.mlflow/#/experiments/1


Successfully registered model 'elnet_lgbm'.
2025/08/28 10:13:13 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: elnet_lgbm, version 1
Created version '1' of model 'elnet_lgbm'.


Modelo subido.


('runs:/ceb1d62c3a24458296732a6d57a1d48f/model', '1')

---

In [4]:
model_r = mlflow.pyfunc.load_model(f"models:/{MODEL_NAME}@challenger")
preds_orig = model_r.predict(X_test)


sub_dir = "../../../data/housing_submissions/elnet_lgbm2"
os.makedirs(sub_dir, exist_ok=True)
submission_path = os.path.join(sub_dir, "submission_elnet_lgbm2.csv")


df_sub = pd.DataFrame({"Id": df_test["Id"], "SalePrice": preds_orig})
df_sub.to_csv(submission_path, index=False)

Downloading artifacts: 100%|██████████| 7/7 [00:03<00:00,  1.85it/s]


---