In [1]:
import json

import mlflow
import numpy as np
import pandas as pd
import requests
from mlflow.tracking import MlflowClient

from train_rf import CATEGORICAL_FEATURES, NUMERICAL_FEATURES


In [2]:
current_experiment=dict(mlflow.get_experiment_by_name("loan"))
experiment_id=current_experiment['experiment_id']

# Get this from UI or CLI
rf_parent_run = "03046a89d08346a5bda301cc7c745885"

## Find the best model

In [3]:
# To access MLFlow stuff we need to work with MlflowClient
client = MlflowClient()

# Searches runs for a specific attribute and filters them by Parent Run ID
runs = client.search_runs(
    [experiment_id], 
    f"tags.mlflow.parentRunId = '{rf_parent_run}'", 
    order_by=["metrics.test_PR_AUC DESC"]
)

# Select the best run according to test_PR_AUC metric
best_run = np.argmax([f.data.metrics['test_PR_AUC'] for f in runs])
best_pr_auc = np.round(runs[best_run].data.metrics['test_PR_AUC'], 4)

print(f"Experiment had {len(runs)} HP tuning round")
print(f"Best run - {best_run} with PR AUC of {best_pr_auc}")

Experiment had 10 HP tuning round
Best run - 0 with PR AUC of 0.104


In [4]:
# log-model history is stored as string, so we need to "jsonify" it first
log_model_info = json.loads(runs[best_run].data.tags['mlflow.log-model.history'])[0]

# Construct a valid model URI
model_uri = 'runs:/' + log_model_info['run_id'] + '/' + log_model_info['artifact_path']
print(f"Best model URI - {model_uri}")


Best model URI - runs:/1d2537d89cb04760b3b9bc501ee0854f/sklearn_models


## Load the best model

In [16]:
# Data sample to test the model
data = pd.read_csv("./data/raw/train.csv", nrows=1)

In [17]:
# Load the model as pyfunc
sklearn_pyfunc = mlflow.pyfunc.load_model(model_uri=model_uri)
sklearn_pyfunc.predict(data)

 - category-encoders (current: 2.6.0, required: category-encoders==2.3.0)
To fix the mismatches, call `mlflow.pyfunc.get_model_dependencies(model_uri)` to fetch the model's environment and install dependencies using the resulting environment file.


array([[0.4980769, 0.5019231]])

## Register and Promote

In [19]:
model_name = 'loan_model'
model_version = 1

# Register model
mlflow.register_model(model_uri, model_name)

# Promote to Production
logs = client.transition_model_version_stage(name=model_name, version=model_version, stage="Production")

Successfully registered model 'loan_model'.
2023/02/14 11:51:20 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: loan_model, version 1
Created version '1' of model 'loan_model'.


## Load from Production Model Registry

In [20]:
stage = 'Production'

# Since it's a registered model in Production, we can load it like this now!
# No need for model URIs
model_registry_path = f'models:/{model_name}/{stage}'
production_model = mlflow.pyfunc.load_model(model_registry_path)

production_model.predict(data)

 - category-encoders (current: 2.6.0, required: category-encoders==2.3.0)
To fix the mismatches, call `mlflow.pyfunc.get_model_dependencies(model_uri)` to fetch the model's environment and install dependencies using the resulting environment file.


array([[0.4980769, 0.5019231]])

## Serve models

Run this command in the terminal: `mlflow models serve --model-uri models:/loan_model/Production -p 5001`

### Call from server

In [22]:
# Prepare the data to be sent to API
example = data[NUMERICAL_FEATURES + CATEGORICAL_FEATURES]
to_send = example.to_dict(orient='split')
to_send.pop("index", None)

In [21]:
# Prediction endpoint
url = 'http://127.0.0.1:5001/invocations'

# Preprocess the example
response = requests.post(url=url, data=json.dumps({"dataframe_split" :to_send}), headers={"Content-type": "application/json"})

# Load the response
response_json = json.loads(response.text)
print(response_json)