In [None]:
import pandas as pd
import mlflow
import json
from preprocessing.preprocessing import *
preprocessing_version = 2

In [101]:
mlflow.__version__

'2.20.0'

In [2]:
model_name = "lightgbm"
preprocessing_version = 2

# Load and test the model

From a terminal, run:
```
mlflow server --host 127.0.0.1 --port 5000
```

In [3]:
mlflow.set_tracking_uri("http://127.0.0.1:5000")

In [4]:
df = pd.read_csv("./data/consumption.csv")
df["datetime"] = pd.to_datetime(df["datetime"])

print(f"Using preprocessing version {preprocessing_version}")
preprocessing = vars()[f"preprocessing_{preprocessing_version}"]
X, y = preprocessing(df)
print(f"X shape : {X.shape}")
print(f"y shape : {y.shape}")

Using preprocessing version 2


  df[feat_name] = feat_vals[restore_idxs]
  df[feat_name] = feat_vals[restore_idxs]


X shape : (944799, 99)
y shape : (944799,)


## Validate the model before deployment

Run the following code to validate model inference works on the example input data and logged model dependencies, prior to deploying it to a serving endpoint

In [None]:
#BUG: NOT WORKING. Better to switch back to mlflow 2.17

# model_uri = 'runs:/963a54cea853490a9b21a465e6e99f94/lightgbm'
# # This is the input example logged with the model
# pyfunc_model = mlflow.pyfunc.load_model(model_uri)
# input_data = pyfunc_model.input_example

# # Verify the model with the provided input data using the logged dependencies.
# # For more details, refer to:
# # https://mlflow.org/docs/latest/models.html#validate-models-before-deployment
# mlflow.models.predict(
#     model_uri=model_uri,
#     input_data=input_data,
#     # env_manager="uv",
# )

In [11]:
logged_model = 'runs:/963a54cea853490a9b21a465e6e99f94/lightgbm'

# Load model as a PyFuncModel.
loaded_model = mlflow.pyfunc.load_model(logged_model)
loaded_model.predict(X[:1])

Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]



array([254.6388213])

# Deploy the model

In [12]:
from mlflow.tracking import MlflowClient

client = MlflowClient()
experiment_name = f"Enefit {model_name} Preprocessing {preprocessing_version}"
experiment = client.get_experiment_by_name(experiment_name)
experiment_id = experiment.experiment_id
print(f"Run ID : {experiment_id}")

Run ID : 882091402080986748


In [13]:
# Fetch the most recent run
runs = client.search_runs([experiment_id], order_by=["start_time DESC"], max_results=10)
# runs[0].to_dictionary()["data"]["metrics"]#["test_mae"]
if runs:
    latest_run_id = runs[0].info.run_id
    print(f"Latest Run ID: {latest_run_id}")

Latest Run ID: 963a54cea853490a9b21a465e6e99f94


In [14]:
PORT = 1234
print(
    f"""Run the below command in a new window. You must be in the same repo as your mlruns directory and have mlflow installed :

    mlflow models serve -m ./mlartifacts/{experiment_id}/{latest_run_id}/artifacts/lightgbm -p {PORT}"""
)

Run the below command in a new window. You must be in the same repo as your mlruns directory and have mlflow installed :

    mlflow models serve -m ./mlartifacts/882091402080986748/963a54cea853490a9b21a465e6e99f94/artifacts/lightgbm -p 1234


# Call the deployed model

In [None]:
import requests
import json

address = "127.0.0.1"
# address = "3.252.192.81"
# address = "ec2-54-228-144-127.eu-west-1.compute.amazonaws.com"


In [None]:
# Check if the model responds
url = f"http://{address}:{PORT}/ping"
r = requests.get(url)
print(r)
print(r.text)

<Response [200]>




In [None]:
# See https://mlflow.org/docs/latest/deployment/deploy-model-locally.html#json-input
json_data = json.loads(X.iloc[:1, :5].to_json(orient="split"))
json_data.pop('index', None)
json_data = {"dataframe_split": json_data}
json_data = json.dumps(json_data, indent=4)
print(json_data)

{
    "dataframe_split": {
        "columns": [
            "lag48",
            "lag49",
            "lag50",
            "lag51",
            "lag52"
        ],
        "data": [
            [
                120.54,
                134.986,
                150.412,
                152.763,
                136.13
            ]
        ]
    }
}


In [97]:
json_data = json.loads(X.iloc[:1, :].to_json(orient="split"))
json_data.pop('index', None)
json_data = {"dataframe_split": json_data}
json.dumps(json_data)

'{"dataframe_split": {"columns": ["lag48", "lag49", "lag50", "lag51", "lag52", "lag53", "lag54", "lag55", "lag56", "lag57", "lag58", "lag59", "lag60", "lag61", "lag62", "lag63", "lag64", "lag65", "lag66", "lag67", "lag68", "lag69", "lag70", "lag71", "lag72", "lag73", "lag74", "lag75", "lag76", "lag77", "lag78", "lag79", "lag80", "lag81", "lag82", "lag83", "lag84", "lag85", "lag86", "lag87", "lag88", "lag89", "lag90", "lag91", "lag92", "lag93", "lag94", "lag95", "expanding_mean_lag48", "rolling_mean_lag48_window_size24", "expanding_mean_lag49", "rolling_mean_lag49_window_size24", "expanding_mean_lag50", "rolling_mean_lag50_window_size24", "expanding_mean_lag51", "rolling_mean_lag51_window_size24", "expanding_mean_lag52", "rolling_mean_lag52_window_size24", "expanding_mean_lag53", "rolling_mean_lag53_window_size24", "expanding_mean_lag54", "rolling_mean_lag54_window_size24", "expanding_mean_lag55", "rolling_mean_lag55_window_size24", "expanding_mean_lag56", "rolling_mean_lag56_window_siz

In [None]:
# get predictions
url = f"http://{address}:{PORT}/invocations"
r = requests.post(url, headers={"Content-Type": "application/json"}, json=json_data)
r

<Response [200]>