In [1]:
import mlflow
import numpy as np
from mlflow.tracking import MlflowClient

In [48]:
import pandas as pd

data = pd.read_csv("./data/raw/Base.csv")

## Find the best model

In [20]:
client = MlflowClient()
runs = client.search_runs(['308963504018611854'], "tags.mlflow.parentRunId = '{run_id}' ".format(run_id="99f16c2251dd4dcebf69f816261660cb"))

In [21]:
best_run = np.argmax([f.data.metrics['test_PR_AUC'] for f in runs])
best_run


0

In [22]:
import json
log_model_info = json.loads(runs[best_run].data.tags['mlflow.log-model.history'])[0]


In [24]:
model_uri = 'runs:/' + log_model_info['run_id'] + '/' + log_model_info['artifact_path']
model_uri

'runs:/15607b43eb5f41f6aac1ab869ae6ea1f/sklearn_models'

## Load the best model

In [29]:
sklearn_pyfunc = mlflow.pyfunc.load_model(model_uri=model_uri)
sklearn_pyfunc.predict(data.sample(5))

 - category-encoders (current: 2.3.0, required: category-encoders==2.6.0)
 - cloudpickle (current: 2.0.0, required: cloudpickle==2.2.1)
 - scikit-learn (current: 1.0.2, required: scikit-learn==1.2.1)
To fix the mismatches, call `mlflow.pyfunc.get_model_dependencies(model_uri)` to fetch the model's environment and install dependencies using the resulting environment file.
https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations


array([[0.97773829, 0.02226171],
       [0.98196164, 0.01803836],
       [0.93843352, 0.06156648],
       [0.99164771, 0.00835229],
       [0.99837131, 0.00162869]])

## Register Model

In [30]:
mlflow.register_model(model_uri, "best_rf")

Successfully registered model 'best_rf'.
2023/02/10 17:11:59 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: best_rf, version 1
Created version '1' of model 'best_rf'.


<ModelVersion: creation_timestamp=1676049119439, current_stage='None', description=None, last_updated_timestamp=1676049119439, name='best_rf', run_id='15607b43eb5f41f6aac1ab869ae6ea1f', run_link=None, source='file:///Users/antonsruberts/personal/tutorials/mlflow_models/mlruns/308963504018611854/15607b43eb5f41f6aac1ab869ae6ea1f/artifacts/sklearn_models', status='READY', status_message=None, tags={}, user_id=None, version=1>

In [31]:
model_name = 'best_rf'
model_version = 1

model_registry_path = f'models:/{model_name}/{model_version}'
loaded_model = mlflow.pyfunc.load_model(model_registry_path)


 - category-encoders (current: 2.3.0, required: category-encoders==2.6.0)
 - cloudpickle (current: 2.0.0, required: cloudpickle==2.2.1)
 - scikit-learn (current: 1.0.2, required: scikit-learn==1.2.1)
To fix the mismatches, call `mlflow.pyfunc.get_model_dependencies(model_uri)` to fetch the model's environment and install dependencies using the resulting environment file.
https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations


## Transition to production

In [35]:
logs = client.transition_model_version_stage(name="best_rf", version=1, stage="Production")

In [39]:
model_name = 'best_rf'
stage = 'Production'

model_registry_path = f'models:/{model_name}/{stage}'
loaded_model = mlflow.pyfunc.load_model(model_registry_path)

 - category-encoders (current: 2.3.0, required: category-encoders==2.6.0)
 - cloudpickle (current: 2.0.0, required: cloudpickle==2.2.1)
 - scikit-learn (current: 1.0.2, required: scikit-learn==1.2.1)
To fix the mismatches, call `mlflow.pyfunc.get_model_dependencies(model_uri)` to fetch the model's environment and install dependencies using the resulting environment file.
https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/modules/model_persistence.html#security-maintainability-limitations


## Serve models

`mlflow models serve --model-uri models:/best_rf/Production -p 1234`

## Call from server

In [45]:
data.sample(1).values

array([[1, 0.4, 0.1495204741631974, -1, 229, 50, 21.91362792263565,
        -1.1686546721418976, 'AC', 915, 5549.085941877825,
        3349.941674034092, 3076.396147162043, 0, 4, 'CA', 223, 1, 'BA',
        0, 1, -1, 0, 500.0, 0, 'INTERNET', 8.980315500464021, 'windows',
        1, 1, 0, 7]], dtype=object)

In [65]:
from train_rf import NUMERICAL_FEATURES, CATEGORICAL_FEATURES

fraud_example = data[data['fraud_bool'] == 1].sample(5)[NUMERICAL_FEATURES + CATEGORICAL_FEATURES]
to_send = fraud_example.to_dict(orient='split')
to_send.pop("index", None)

[620804, 753575, 911, 248862, 782]

In [66]:
import requests

url = 'http://127.0.0.1:1234/invocations'
response = requests.post(url=url, data=json.dumps({"dataframe_split" :to_send}), headers={"Content-type": "application/json"})
response_json = json.loads(response.text)
print(response_json)

{'predictions': [[0.9304447625984755, 0.06955523740152446], [0.9384335185438405, 0.061566481456159276], [0.9949696766422231, 0.005030323357777284], [0.9384335185438405, 0.061566481456159276], [0.8997133100873695, 0.10028668991263089]]}


In [None]:
[False for f in NUMERICAL_FEATURES] + [True for f in CATEGORICAL_FEATURES]