In [1]:
import mlflow
from mlflow import MlflowClient
from dotenv import dotenv_values

In [2]:
import sys

env_vars = dotenv_values("../.env")
sys.path.append(f"{env_vars['WORKING_DIR']}")
from utils import load_config, save_config

config = load_config("../config/development/config.yaml")

# Retrieve runs

From a terminal, in the same repo as your mlruns directory, run:
```
mlflow server --host 127.0.0.1 --port 5000
```

In [3]:
client = MlflowClient(
    tracking_uri=env_vars["MLFLOW_TRACKING_URI"],
)

experiment_name = f"Enefit DataV1 lightgbm HpoConfigV1"
experiment = client.get_experiment_by_name(experiment_name)
experiment_id = experiment.experiment_id
runs_df = mlflow.search_runs([experiment_id])
runs_df.head()

Unnamed: 0,run_id,experiment_id,status,artifact_uri,start_time,end_time,metrics.train_mae,metrics.test_mae,metrics.training_duration,params.num_trees,params.feature_fraction,params.learning_rate,params.max_depth,params.num_leaves,tags.mlflow.user,tags.mlflow.source.name,tags.mlflow.source.type,tags.mlflow.source.git.commit,tags.mlflow.runName,tags.mlflow.log-model.history
0,37cff3aa004b4ce39824c97d3f957fb5,255253755991586970,FINISHED,mlflow-artifacts:/255253755991586970/37cff3aa0...,2025-05-04 06:40:02.945000+00:00,2025-05-04 06:40:13.082000+00:00,267.450256,276.514219,0.651888,8,0.3962723889353149,0.099063089087863,6,24,gabriel,train.py,LOCAL,66e2bf008efc5aa702f8830dd6245d59c076c6ee,TrialNumber35-20250504T064002UTC,"[{""run_id"": ""37cff3aa004b4ce39824c97d3f957fb5""..."
1,1ffaae870c4041edabeb7a2041cfe926,255253755991586970,FINISHED,mlflow-artifacts:/255253755991586970/1ffaae870...,2025-05-04 06:32:59.722000+00:00,2025-05-04 06:33:09.335000+00:00,267.450256,276.514219,0.644157,8,0.3962723889353149,0.099063089087863,6,24,gabriel,train.py,LOCAL,66e2bf008efc5aa702f8830dd6245d59c076c6ee,TrialNumber35-20250504T063258UTC,"[{""run_id"": ""1ffaae870c4041edabeb7a2041cfe926""..."


In [4]:
sorted_runs_df = runs_df.sort_values(by="metrics.test_mae", ascending=True)
sorted_runs_df.head()

Unnamed: 0,run_id,experiment_id,status,artifact_uri,start_time,end_time,metrics.train_mae,metrics.test_mae,metrics.training_duration,params.num_trees,params.feature_fraction,params.learning_rate,params.max_depth,params.num_leaves,tags.mlflow.user,tags.mlflow.source.name,tags.mlflow.source.type,tags.mlflow.source.git.commit,tags.mlflow.runName,tags.mlflow.log-model.history
0,37cff3aa004b4ce39824c97d3f957fb5,255253755991586970,FINISHED,mlflow-artifacts:/255253755991586970/37cff3aa0...,2025-05-04 06:40:02.945000+00:00,2025-05-04 06:40:13.082000+00:00,267.450256,276.514219,0.651888,8,0.3962723889353149,0.099063089087863,6,24,gabriel,train.py,LOCAL,66e2bf008efc5aa702f8830dd6245d59c076c6ee,TrialNumber35-20250504T064002UTC,"[{""run_id"": ""37cff3aa004b4ce39824c97d3f957fb5""..."
1,1ffaae870c4041edabeb7a2041cfe926,255253755991586970,FINISHED,mlflow-artifacts:/255253755991586970/1ffaae870...,2025-05-04 06:32:59.722000+00:00,2025-05-04 06:33:09.335000+00:00,267.450256,276.514219,0.644157,8,0.3962723889353149,0.099063089087863,6,24,gabriel,train.py,LOCAL,66e2bf008efc5aa702f8830dd6245d59c076c6ee,TrialNumber35-20250504T063258UTC,"[{""run_id"": ""1ffaae870c4041edabeb7a2041cfe926""..."


In [5]:
best_run = sorted_runs_df.iloc[0]
best_run_id = best_run["run_id"]
best_run_id

'37cff3aa004b4ce39824c97d3f957fb5'

In [6]:
import mlflow

logged_model = f"runs:/{best_run_id}/lightgbm"

# Load model as a PyFuncModel.
loaded_model = mlflow.pyfunc.load_model(logged_model)

# # Predict on a Pandas DataFrame.
# import pandas as pd

# loaded_model.predict(pd.DataFrame(data))

Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

In [7]:
loaded_model

mlflow.pyfunc.loaded_model:
  artifact_path: lightgbm
  flavor: mlflow.lightgbm
  run_id: 37cff3aa004b4ce39824c97d3f957fb5

In [8]:
loaded_model.get_raw_model()

# Save the model

In [9]:
import joblib
from utils import create_dir

# Create the models directory if it doesn't exist
create_dir("models")

# Save the model to a file
with open(f"models/{config['model_name']}.joblib", "wb") as f:
    joblib.dump(loaded_model, f)

Directory 'models' already exists.


In [None]:
# Load the model from the file
with open(f"models/{config['model_name']}.joblib", "rb") as f:
    loaded_model = joblib.load(f)

In [11]:
loaded_model

mlflow.pyfunc.loaded_model:
  artifact_path: lightgbm
  flavor: mlflow.lightgbm
  run_id: 37cff3aa004b4ce39824c97d3f957fb5

# Load metadata

In [12]:
import json

metadata = best_run.to_json(indent=4)
with open(f"models/{config['model_name']}_metadata.json", "w") as f:
    f.write(metadata)

In [13]:
with open(f"models/{config['model_name']}_metadata.json", "r") as f:
    metadata = json.load(f)
metadata

{'run_id': '37cff3aa004b4ce39824c97d3f957fb5',
 'experiment_id': '255253755991586970',
 'status': 'FINISHED',
 'artifact_uri': 'mlflow-artifacts:/255253755991586970/37cff3aa004b4ce39824c97d3f957fb5/artifacts',
 'start_time': 1746340802945,
 'end_time': 1746340813082,
 'metrics.train_mae': 267.4502562879,
 'metrics.test_mae': 276.5142190107,
 'metrics.training_duration': 0.6518881321,
 'params.num_trees': '8',
 'params.feature_fraction': '0.3962723889353149',
 'params.learning_rate': '0.09906308908786306',
 'params.max_depth': '6',
 'params.num_leaves': '24',
 'tags.mlflow.user': 'gabriel',
 'tags.mlflow.source.name': 'train.py',
 'tags.mlflow.source.type': 'LOCAL',
 'tags.mlflow.source.git.commit': '66e2bf008efc5aa702f8830dd6245d59c076c6ee',
 'tags.mlflow.runName': 'TrialNumber35-20250504T064002UTC',
 'tags.mlflow.log-model.history': '[{"run_id": "37cff3aa004b4ce39824c97d3f957fb5", "artifact_path": "lightgbm", "utc_time_created": "2025-05-04 06:40:06.358640", "model_uuid": "299063be051

In [14]:
config["EXPERIMENT_ID"] = experiment_id
config["RUN_ID"] = best_run_id

In [15]:
config

{'splitting_datetime': datetime.datetime(2023, 1, 26, 5, 0),
 'freq': 'H',
 'id_col': 'prediction_unit_id',
 'time_col': 'datetime',
 'target_col': 'target',
 'forecast_horizon': 48,
 'n_lags': 4,
 'rolling_mean_window_size': 4,
 'date_features': ['month', 'dayofweek', 'hour'],
 'static_features': ['county',
  'is_business',
  'product_type',
  'is_consumption',
  'prediction_unit_id'],
 'n_lag_transforms': 4,
 'model_name': 'lightgbm',
 'hpo_config_version': 1,
 'random_state': 0,
 'n_splits': 5,
 'scoring': 'neg_mean_absolute_error',
 'n_trials': 3,
 'studies_dir': './optuna_studies',
 's3_bucket': None,
 'EXPERIMENT_ID': '255253755991586970',
 'RUN_ID': '37cff3aa004b4ce39824c97d3f957fb5'}

In [None]:
save_config(config, "../config/development/config.yaml")

Configuration saved to ../config/development/config.yaml


In [None]:
loaded_config = load_config("../config/development/config.yaml")
config == loaded_config

True