In [18]:
import pandas as pd
import mlflow
from dotenv import dotenv_values

In [19]:
import sys

env_vars = dotenv_values("../.env")
sys.path.append(f"{env_vars['WORKING_DIR']}")
from utils import load_config

config = load_config("../config/development/config.yaml")

# Load the Data

In [20]:
from lgbm_hpo import load_data

X_train, y_train = load_data("../data/processed/consumption_train.csv")
X_test, y_test = load_data("../data/processed/consumption_test.csv")

X shape : (757248, 20)
y shape : (757248,)
X shape : (186732, 20)
y shape : (186732,)


# Load and test the model

From a terminal, run:
```
mlflow server --host 127.0.0.1 --port 5000
```

In [21]:
mlflow.set_tracking_uri("http://127.0.0.1:5000")

## Validate the model before deployment

Run the following code to validate model inference works on the example input data and logged model dependencies, prior to deploying it to a serving endpoint

In [None]:
EXPERIMENT_ID = config["EXPERIMENT_ID"]
RUN_ID = config["RUN_ID"]

'37cff3aa004b4ce39824c97d3f957fb5'

In [30]:
logged_model = f"runs:/{RUN_ID}/lightgbm"

# Load model as a PyFuncModel.
loaded_model = mlflow.pyfunc.load_model(logged_model)
loaded_model.predict(X_train[:1])

Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]



array([222.22662235])

# Deploy the model with MLflow

In [31]:
PORT = 5050
print(
    f"""Run the below command in a new window. You must be in the same repo as your mlruns directory and have mlflow installed :

    mlflow models serve -m ./mlartifacts/{EXPERIMENT_ID}/{RUN_ID}/artifacts/lightgbm -p {PORT}"""
)

Run the below command in a new window. You must be in the same repo as your mlruns directory and have mlflow installed :

    mlflow models serve -m ./mlartifacts/255253755991586970/37cff3aa004b4ce39824c97d3f957fb5/artifacts/lightgbm -p 5050


In [3]:
import requests
import json

address = "127.0.0.1"
PORT = 5001
# address = "3.252.192.81"
# address = "ec2-54-228-144-127.eu-west-1.compute.amazonaws.com"

In [4]:
# Check if the model responds
url = f"http://{address}:{PORT}/ping"
r = requests.get(url)
print(r)
print(r.text)

<Response [200]>




In [9]:
# See https://mlflow.org/docs/latest/deployment/deploy-model-locally.html#json-input
json_data = json.loads(X_train.iloc[:1, :5].to_json(orient="split"))
json_data.pop("index", None)
json_data = {"dataframe_split": json_data}
json_data = json.dumps(json_data, indent=4)
print(json_data)

{
    "dataframe_split": {
        "columns": [
            "county",
            "is_business",
            "product_type",
            "is_consumption",
            "prediction_unit_id"
        ],
        "data": [
            [
                11,
                0,
                1,
                1,
                43
            ]
        ]
    }
}


In [None]:
# See https://mlflow.org/docs/latest/deployment/deploy-model-locally.html#json-input
json_data = json.loads(X_train.iloc[:1, :].to_json(orient="split"))
json_data.pop("index", None)
json_data = {"dataframe_split": json_data}
# json_data = json.dumps(json_data, indent=4)
# print(json_data)
json_data

{'dataframe_split': {'columns': ['county',
   'is_business',
   'product_type',
   'is_consumption',
   'prediction_unit_id',
   'lag48',
   'lag49',
   'lag50',
   'lag51',
   'expanding_mean_lag48',
   'rolling_mean_lag48_window_size4',
   'expanding_mean_lag49',
   'rolling_mean_lag49_window_size4',
   'expanding_mean_lag50',
   'rolling_mean_lag50_window_size4',
   'expanding_mean_lag51',
   'rolling_mean_lag51_window_size4',
   'month',
   'dayofweek',
   'hour'],
  'data': [[11,
    0,
    1,
    1,
    43,
    24.152,
    23.939,
    20.023,
    20.48,
    21.7837142857,
    22.1485,
    21.389,
    21.77725,
    20.879,
    20.824,
    21.093,
    21.093,
    9,
    4,
    6]]}}

In [16]:
# get predictions
url = f"http://{address}:{PORT}/invocations"
r = requests.post(url, headers={"Content-Type": "application/json"}, json=json_data)
r

<Response [200]>

In [17]:
type(json_data)

dict

# Deploy the model with Flask

Run the `deployment.py` script in the repo's root directory

In [32]:
import requests
import json

address = "127.0.0.1"
PORT = 8080
# address = "3.252.192.81"
# address = "ec2-54-228-144-127.eu-west-1.compute.amazonaws.com"

In [37]:
# Check if the model responds
url = f"http://{address}:{PORT}/"
r = requests.get(url)
print(r)
print(r.text)

<Response [200]>
<h1>Welcome to the prediction API!</h1>


In [38]:
model_uri = f"runs:/{RUN_ID}/lightgbm"
# The model is logged with an input example
pyfunc_model = mlflow.pyfunc.load_model(model_uri)
input_data = pyfunc_model.input_example

Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

In [95]:
json_data = json.loads(input_data.to_json(orient="split"))
json_data.pop("index", None)
json_data = {"dataframe_split": json_data}
# json_data = json.dumps(json_data, indent=4)
# print(json_data)
# json_data

In [80]:
import joblib

with open(f"models/{config['model_name']}.joblib", "rb") as f:
    model = joblib.load(f)

In [89]:
json_data["dataframe_split"].keys()

dict_keys(['columns', 'data'])

In [None]:
pd.DataFrame(
    data=json_data["dataframe_split"]["data"],
    columns=json_data["dataframe_split"]["columns"],
)

Unnamed: 0,county,is_business,product_type,is_consumption,prediction_unit_id,lag48,lag49,lag50,lag51,expanding_mean_lag48,rolling_mean_lag48_window_size4,expanding_mean_lag49,rolling_mean_lag49_window_size4,expanding_mean_lag50,rolling_mean_lag50_window_size4,expanding_mean_lag51,rolling_mean_lag51_window_size4,month,dayofweek,hour
0,11,0,1,1,43,24.152,23.939,20.023,20.48,21.783714,22.1485,21.389,21.77725,20.879,20.824,21.093,21.093,9,4,6


In [92]:
# get predictions
url = f"http://{address}:{PORT}/predict"
r = requests.post(url, headers={"Content-Type": "application/json"}, json=json_data)
r

<Response [200]>

In [94]:
print(r.json())

{'prediction': '[np.float64(222.22662234600898)]'}
