---

In [9]:

import sys
sys.path.append("../../../")
from utils.mlflow_flow import set_tracking, quick_log_and_register
from utils.utils_yose import load_data, make_features, build_preprocessor

from ensemble import EnsembleModel
import numpy as np

import joblib

import dagshub
import mlflow
import os

from mlflow.tracking import MlflowClient

from warnings import filterwarnings
from dotenv import load_dotenv

dagshub.init(repo_owner='Yosesotomayor', repo_name='retoCasas_v2', mlflow=True)

load_dotenv()
ENDPOINT_URL = os.getenv("MLFLOW_TRACKING_URI")
ALIAS = os.getenv("MODEL_ALIAS")
MODEL_NAME = os.getenv("MODEL_NAME")
os.getenv("MLFLOW_TRACKING_URI")
os.getenv("MLFLOW_REGISTRY_URI")
os.getenv("DAGSHUB_TOKEN")

os.environ['MLFLOW_TRACKING_TOKEN'] = os.getenv("DAGSHUB_TOKEN")
os.environ.setdefault("MLFLOW_HTTP_REQUEST_TIMEOUT", "60")
os.environ.setdefault("MLFLOW_HTTP_REQUEST_MAX_RETRIES", "0")

filterwarnings("ignore")
set_tracking(ENDPOINT_URL)

sub_dir = "../../../data/housing_data/"
df_train, df_test = load_data(sub_dir = sub_dir)

y = np.log1p(df_train["SalePrice"]).astype(float)
rstate = 42

X = df_train.drop(["SalePrice", "Id"], axis=1)
X = make_features(X)

X_test = df_test.drop(["Id"], axis=1)
X_test = make_features(X_test)

---

In [2]:
model = EnsembleModel(rstate=rstate)
model.fit_full(X, y)

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002932 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4573
[LightGBM] [Info] Number of data points in the train set: 1460, number of used features: 196
[LightGBM] [Info] Start training from score 2.566329


<ensemble.EnsembleModel at 0x110a71b20>

---

In [3]:
dagshub.init(repo_owner = "Yosesotomayor", repo_name = "retoCasas_v2", mlflow=True)

model.rmse_std = 0.0
metrics = model.get_metrics()

joblib.dump(model.elasticnet, "./model/elasticnet.pkl")
joblib.dump(model.lgbm, "./model/lgbm.pkl")

quick_log_and_register(
    experiment="Housing_Competition",
    run_name="elnet_lgbm_kaggle",
    model=model,
    X=X, y=y,
    model_name=MODEL_NAME,
    set_challenger=True,
    X_test=X_test,
    params=model.get_params(),
    metrics=model.get_metrics(),
    tags={"model": "elnet_lgbm",
          'rmse': model.rmse
          },
    artifacts = {
        "elnet": "./model/elasticnet.pkl",
        "lgbm" : "./model/lgbm.pkl"
    }
)

Subiendo modelo...
Modelo subido.


('runs:/c5d7f7da87664b67ad1595f33557c4cc/model', '3')

---

In [5]:
print("[URIs]")
print("Tracking URI :", mlflow.get_tracking_uri())
print("Registry URI :", mlflow.get_registry_uri())
print("MLFLOW_TRACKING_TOKEN:", bool(os.getenv("MLFLOW_TRACKING_TOKEN")))
print("DAGSHUB_TOKEN        :", bool(os.getenv("DAGSHUB_TOKEN")))
print("MLFLOW_TRACKING_USERNAME:", os.getenv("MLFLOW_TRACKING_USERNAME"))
print()

client = MlflowClient()

mv = client.get_model_version_by_alias(MODEL_NAME, ALIAS)
run_id = mv.run_id
print(f"[Modelo] {MODEL_NAME}@{ALIAS} -> version={mv.version}")
print("source           :", mv.source)
print("storage_location :", getattr(mv, "storage_location", None))

[URIs]
Tracking URI : https://dagshub.com/Yosesotomayor/retoCasas_v2.mlflow
Registry URI : https://dagshub.com/Yosesotomayor/retoCasas_v2.mlflow
MLFLOW_TRACKING_TOKEN: True
DAGSHUB_TOKEN        : True
MLFLOW_TRACKING_USERNAME: Yosesotomayor

[Modelo] elnet_lgbm@challenger -> version=3
source           : mlflow-artifacts:/2e5f80440ff845c0b3b48f44c88470bd/c5d7f7da87664b67ad1595f33557c4cc/artifacts/model
storage_location : None


---