In [1]:
import mlflow
from mlflow import MlflowClient
from dotenv import dotenv_values

In [2]:
import sys

env_vars = dotenv_values("../.env")
sys.path.append(f"{env_vars['WORKING_DIR']}")
from utils import load_config

config = load_config("../config/development/config.yaml")

From a terminal, run:
```
mlflow server --host 127.0.0.1 --port 5000
```

In [3]:
client = MlflowClient(
    tracking_uri=env_vars["MLFLOW_TRACKING_URI"],
)

experiment_name = f"Enefit DataV1 lightgbm HpoConfigV1"
experiment = client.get_experiment_by_name(experiment_name)
experiment_id = experiment.experiment_id
runs_df = mlflow.search_runs([experiment_id])
runs_df.head()

Unnamed: 0,run_id,experiment_id,status,artifact_uri,start_time,end_time,metrics.test_mae,metrics.training_duration,metrics.train_mae,params.max_depth,params.feature_fraction,params.num_trees,params.num_leaves,params.learning_rate,tags.mlflow.user,tags.mlflow.runName,tags.mlflow.source.name,tags.mlflow.log-model.history,tags.mlflow.source.git.commit,tags.mlflow.source.type
0,54560700f31343be81d3065110ba6d87,215806994480999889,FINISHED,mlflow-artifacts:/215806994480999889/54560700f...,2025-05-03 06:53:28.191000+00:00,2025-05-03 06:53:35.628000+00:00,276.514219,0.631784,267.450256,6.0,0.3962723889353149,8.0,24.0,0.099063089087863,gabriel,TrialNumber35-20250503T065327UTC,train.py,"[{""run_id"": ""54560700f31343be81d3065110ba6d87""...",da8acbc0b8c1c3f7b3fbae9a652bfbd61a8ef941,LOCAL
1,8c1446a5f1424e06a1febd5163a17330,215806994480999889,FINISHED,mlflow-artifacts:/215806994480999889/8c1446a5f...,2025-05-03 05:51:55.345000+00:00,2025-05-03 05:52:06.041000+00:00,103.473742,1.569685,63.773141,,,,,,gabriel,DefaultParameters-20250503T055155UTC,train.py,"[{""run_id"": ""8c1446a5f1424e06a1febd5163a17330""...",da8acbc0b8c1c3f7b3fbae9a652bfbd61a8ef941,LOCAL
2,6ce5f9152766473186623e9ad218164b,215806994480999889,FINISHED,mlflow-artifacts:/215806994480999889/6ce5f9152...,2025-04-24 14:24:36.886000+00:00,2025-04-24 14:24:42.042000+00:00,277.246492,0.709642,267.962692,6.0,0.3962723889353149,8.0,24.0,0.099063089087863,gabriel,TrialNumber35-20250424T142435UTC,train.py,"[{""run_id"": ""6ce5f9152766473186623e9ad218164b""...",b3988f18ead5caa415f3d9f2930d395ff5f61fef,LOCAL


In [4]:
sorted_runs_df = runs_df.sort_values(by="metrics.test_mae", ascending=True)
sorted_runs_df.head()

Unnamed: 0,run_id,experiment_id,status,artifact_uri,start_time,end_time,metrics.test_mae,metrics.training_duration,metrics.train_mae,params.max_depth,params.feature_fraction,params.num_trees,params.num_leaves,params.learning_rate,tags.mlflow.user,tags.mlflow.runName,tags.mlflow.source.name,tags.mlflow.log-model.history,tags.mlflow.source.git.commit,tags.mlflow.source.type
1,8c1446a5f1424e06a1febd5163a17330,215806994480999889,FINISHED,mlflow-artifacts:/215806994480999889/8c1446a5f...,2025-05-03 05:51:55.345000+00:00,2025-05-03 05:52:06.041000+00:00,103.473742,1.569685,63.773141,,,,,,gabriel,DefaultParameters-20250503T055155UTC,train.py,"[{""run_id"": ""8c1446a5f1424e06a1febd5163a17330""...",da8acbc0b8c1c3f7b3fbae9a652bfbd61a8ef941,LOCAL
0,54560700f31343be81d3065110ba6d87,215806994480999889,FINISHED,mlflow-artifacts:/215806994480999889/54560700f...,2025-05-03 06:53:28.191000+00:00,2025-05-03 06:53:35.628000+00:00,276.514219,0.631784,267.450256,6.0,0.3962723889353149,8.0,24.0,0.099063089087863,gabriel,TrialNumber35-20250503T065327UTC,train.py,"[{""run_id"": ""54560700f31343be81d3065110ba6d87""...",da8acbc0b8c1c3f7b3fbae9a652bfbd61a8ef941,LOCAL
2,6ce5f9152766473186623e9ad218164b,215806994480999889,FINISHED,mlflow-artifacts:/215806994480999889/6ce5f9152...,2025-04-24 14:24:36.886000+00:00,2025-04-24 14:24:42.042000+00:00,277.246492,0.709642,267.962692,6.0,0.3962723889353149,8.0,24.0,0.099063089087863,gabriel,TrialNumber35-20250424T142435UTC,train.py,"[{""run_id"": ""6ce5f9152766473186623e9ad218164b""...",b3988f18ead5caa415f3d9f2930d395ff5f61fef,LOCAL


In [5]:
best_run = sorted_runs_df.iloc[0]
best_run_id = best_run["run_id"]
best_run_id

'8c1446a5f1424e06a1febd5163a17330'

In [6]:
import mlflow

logged_model = f"runs:/{best_run_id}/lightgbm"

# Load model as a PyFuncModel.
loaded_model = mlflow.pyfunc.load_model(logged_model)

# # Predict on a Pandas DataFrame.
# import pandas as pd

# loaded_model.predict(pd.DataFrame(data))

Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

In [7]:
loaded_model

mlflow.pyfunc.loaded_model:
  artifact_path: lightgbm
  flavor: mlflow.lightgbm
  run_id: 8c1446a5f1424e06a1febd5163a17330

In [8]:
loaded_model.get_raw_model()

In [9]:
import joblib
from utils import create_dir

# Create the models directory if it doesn't exist
create_dir("models")

# Save the model to a file
with open(f"models/{config['model_name']}.joblib", "wb") as f:
    joblib.dump(loaded_model, f)

Directory 'models' already exists.


In [10]:
# Load the model from the file
with open(f"models/{config['model_name']}.joblib", "rb") as f:
    loaded_model = joblib.load(f)

In [11]:
loaded_model

mlflow.pyfunc.loaded_model:
  artifact_path: lightgbm
  flavor: mlflow.lightgbm
  run_id: 8c1446a5f1424e06a1febd5163a17330

In [12]:
import json

metadata = best_run.to_json(indent=4)
with open(f"models/{config['model_name']}_metadata.json", "w") as f:
    f.write(metadata)

In [13]:
with open(f"models/{config['model_name']}_metadata.json", "r") as f:
    metadata = json.load(f)
metadata

{'run_id': '8c1446a5f1424e06a1febd5163a17330',
 'experiment_id': '215806994480999889',
 'status': 'FINISHED',
 'artifact_uri': 'mlflow-artifacts:/215806994480999889/8c1446a5f1424e06a1febd5163a17330/artifacts',
 'start_time': 1746251515345,
 'end_time': 1746251526041,
 'metrics.test_mae': 103.4737424393,
 'metrics.training_duration': 1.5696849823,
 'metrics.train_mae': 63.7731407043,
 'params.max_depth': None,
 'params.feature_fraction': None,
 'params.num_trees': None,
 'params.num_leaves': None,
 'params.learning_rate': None,
 'tags.mlflow.user': 'gabriel',
 'tags.mlflow.runName': 'DefaultParameters-20250503T055155UTC',
 'tags.mlflow.source.name': 'train.py',
 'tags.mlflow.log-model.history': '[{"run_id": "8c1446a5f1424e06a1febd5163a17330", "artifact_path": "lightgbm", "utc_time_created": "2025-05-03 05:52:00.433893", "model_uuid": "ddbd0a354b8640859eef85d3addb30e7", "flavors": {"python_function": {"loader_module": "mlflow.lightgbm", "python_version": "3.10.11", "data": "model.pkl", "