---

In [1]:
import sys
sys.path.append("../../../")
from ML.utils.mlflow_flow import set_tracking, quick_log_and_register
from ML.utils.utils_yose import load_data, make_features 

from ensemble import EnsembleModel
import numpy as np
import pandas as pd

import dagshub
import mlflow
import os

from warnings import filterwarnings
from dotenv import load_dotenv

dagshub.init(repo_owner='Yosesotomayor', repo_name='retoCasas_v2', mlflow=True)

load_dotenv()
ENDPOINT_URL = os.getenv("MLFLOW_TRACKING_URI")
os.getenv("MLFLOW_TRACKING_URI")
os.getenv("MLFLOW_REGISTRY_URI")
os.getenv("DAGSHUB_TOKEN")

os.environ['MLFLOW_TRACKING_TOKEN'] = os.getenv("DAGSHUB_TOKEN")

filterwarnings("ignore")
set_tracking(ENDPOINT_URL)

MODEL_NAME = "elnet_lgbm"

sub_dir = "../../../data/housing_data/"
df_train, df_test = load_data(sub_dir = sub_dir)

y = np.log1p(df_train["SalePrice"]).astype(float)
X = df_train.drop(["SalePrice", "Id"], axis=1)
X = make_features(X)

rstate = 42

X_test = make_features(df_test.drop(["Id"], axis=1))

---

In [2]:
model = EnsembleModel(rstate=rstate)
model.fit_full(X, y)

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002287 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4573
[LightGBM] [Info] Number of data points in the train set: 1460, number of used features: 196
[LightGBM] [Info] Start training from score 2.566329


<ensemble.EnsembleModel at 0x14dea1d60>

---

In [3]:
dagshub.init(repo_owner = "Yosesotomayor", repo_name = "retoCasas_v2", mlflow=True)

quick_log_and_register(
    experiment="Housing_Competition",
    run_name="elnet_lgbm_kaggle",
    model=model,
    X=X, y=y,
    model_name=MODEL_NAME,
    set_challenger=True,
    X_test=X_test,
    params=model.get_params(),
    metrics=model.get_metrics(),
)

Got status code 500 when getting repository info.


RuntimeError: Got status code 500 when getting repository info.

In [5]:
import os, mlflow
from mlflow.tracking import MlflowClient

URI = "https://dagshub.com/<USER>/<REPO>.mlflow"  # <-- pon tu repo correcto

os.environ["MLFLOW_TRACKING_URI"] = URI
mlflow.set_tracking_uri(URI)
try:
    mlflow.set_registry_uri(URI)
except Exception:
    pass

print("mlflow version:", mlflow.__version__)
print("tracking:", mlflow.get_tracking_uri())
print("registry:", mlflow.get_registry_uri())
print("has token:", bool(os.environ.get("DAGSHUB_TOKEN") or os.environ.get("MLFLOW_TRACKING_TOKEN")))

# 1) Experimentos (API alto nivel)
print("\n=== Experimentos ===")
for e in mlflow.list_experiments():
    print(" -", e.name, e.experiment_id)

# 2) Registry (API client)
client = MlflowClient()
print("\n=== Model Registry ===")
try:
    rms = client.search_registered_models(max_results=5)
    print("OK, models:", [m.name for m in rms])
except Exception as e:
    print("Registry falla:", type(e).__name__, str(e)[:200])

# 3) Carga directa por runs (descarta problemas de Registry)
try:
    m = mlflow.pyfunc.load_model("runs:/ceb1d62c3a24458296732a6d57a1d48f/model")
    print("\nCarga por runs:/ --> OK")
except Exception as e:
    print("\nCarga por runs:/ falla:", type(e).__name__, str(e)[:200])

mlflow version: 2.22.0
tracking: https://dagshub.com/<USER>/<REPO>.mlflow
registry: https://dagshub.com/<USER>/<REPO>.mlflow
has token: True

=== Experimentos ===


AttributeError: module 'mlflow' has no attribute 'list_experiments'

---

In [None]:
model_r = mlflow.pyfunc.load_model("runs:/ceb1d62c3a24458296732a6d57a1d48f/model")

Downloading artifacts: 100%|██████████| 7/7 [00:01<00:00,  4.45it/s]


In [None]:
#model_r = mlflow.pyfunc.load_model(f"models:/{MODEL_NAME}@challenger")
preds_orig = model_r.predict(X_test)


sub_dir = "../../../data/housing_submissions/elnet_lgbm2-kaggle"
os.makedirs(sub_dir, exist_ok=True)
submission_path = os.path.join(sub_dir, "submission_elnet_lgbm-kaggle.csv")


df_sub = pd.DataFrame({"Id": df_test["Id"], "SalePrice": preds_orig})
df_sub.to_csv(submission_path, index=False)

---