In [4]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
NAME = 'linear_sleep_mlflow'

# load data & train model(not used, just placeholder)

In [3]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn import datasets

iris = datasets.load_iris()
x = iris.data[:, 2:]
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=7)

    
# add parameters for tuning
num_estimators = 100

# train the model
rf = RandomForestRegressor(n_estimators=num_estimators)
rf.fit(X_train, y_train)
predictions = rf.predict(X_test)
print('predictions: ', predictions)

# log model performance 
mse = mean_squared_error(y_test, predictions)
print("  mse: %f" % mse)

predictions:  [2.   1.   0.   1.   1.63 0.   1.2  1.   0.   1.   1.99 1.   0.   2.
 0.   1.84 2.   2.   0.   0.   1.   2.   1.   1.28 1.52 1.84 1.   1.
 2.   2.  ]
  mse: 0.087900


In [5]:
import os
import pickle
tmpdir = 'mlflow_tmp'
data_path = os.path.join(tmpdir, 'skmodel.pkl')
with open(data_path, 'wb') as of:
    pickle.dump(rf, of)

In [6]:
%%writefile {NAME}.py
from __future__ import print_function

import os
import pickle

import pandas as pd
import numpy as np
import pytest
import six
import time


import mlflow
import mlflow.pyfunc
import mlflow.pyfunc.model
from mlflow.models import Model


def _load_pyfunc(path):
    with open(path, 'rb') as of:
        data_model = pickle.load(of)
    class Model:
        def predict(self, inputs):
            a, b = inputs.to_numpy()[0]
            x = inputs.shape[0]
            time.sleep(a * x + b)
            return inputs
    return Model()
        


if __name__ == '__main__':
    tmpdir = 'mlflow_tmp'
    data_path = os.path.join(tmpdir, 'skmodel.pkl')
    model_path = os.path.join(str(tmpdir), "model")

    model_config = Model(run_id="test")
    mlflow.pyfunc.save_model(path=model_path,
                             data_path=data_path,
                             loader_module=os.path.basename(__file__)[:-3],
                             code_path=[__file__],
                             mlflow_model=model_config)


Writing linear_sleep_mlflow.py


In [7]:
from bentoml.utils import detect_free_port
PORT = detect_free_port()
server_url = f'http://127.0.0.1:{PORT}/invocations'
print(server_url)

!rm -r {tmpdir}/model
!python {NAME}.py

print('\n**********************')
print("Run this command to launch mlflow:")
print(f"mlflow models serve -m {tmpdir}/model --port {PORT}")
# !mlflow models serve -m {tmpdir}/model --port {PORT}

http://127.0.0.1:35279/invocations
  import imp

**********************
Run this command to launch mlflow:
mlflow models serve -m mlflow_tmp/model --port 35279


# Test with requests

In [8]:
import json
import requests
import pandas as pd
import numpy as np

headers = {"content-type": "application/json"}
raw_data = np.array([[0, 2]])
data = pd.DataFrame(raw_data,
                    columns=map(str, range(raw_data.shape[1]))).to_json(orient='split')

json_response = requests.post(server_url,
                              data=data, headers=headers)
print(json_response)
print(json_response.json())

<Response [200]>
[{'0': 0, '1': 2}]


# Benchmark

In [11]:
import random

def get_request_producer(A, B):

    headers = {"content-type": "application/json"}

    def _gen_data():
        _A = A + random.random() / 100000.0
        _B = B + random.random() / 10000.0

        raw_data = np.array([[_A, _B]])
        data = pd.DataFrame(raw_data,columns=map(str, range(raw_data.shape[1]))).to_json(orient='split')
        return server_url, 'POST', headers, data

    return _gen_data

get_request_producer(0.1, 0.1)()

('http://127.0.0.1:35279/invocations',
 'POST',
 {'content-type': 'application/json'},
 '{"columns":["0","1"],"index":[0],"data":[[0.1000078584,0.100074214]]}')

In [None]:
A = 0.001
B = 0.001

In [25]:
# benchmark under a proper amount of users (three times of the theoretical capacity)
proper_user_num = min(max(int(3 / (A + B)), 1), 900)
print("test user amount: ", proper_user_num)

from bentoml.utils.benchmark import BenchmarkClient
b = BenchmarkClient(get_request_producer(A=A, B=B), lambda: 1, timeout=11)
b.start_session(60, proper_user_num, proper_user_num * 1)

test user amount:  900

╒══════════╤═════════╤══════════╤═════════════════╤═══════════════════╕
│ Result   │   Total │   Reqs/s │   Resp Time Avg │ Client Health %   │
╞══════════╪═════════╪══════════╪═════════════════╪═══════════════════╡
│ succ     │       0 │        0 │             nan │ 100.0             │
├──────────┼─────────┼──────────┼─────────────────┼───────────────────┤
│ fail     │       0 │        0 │             nan │                   │
╘══════════╧═════════╧══════════╧═════════════════╧═══════════════════╛
------ 900 users spawned ------

╒══════════╤═════════╤══════════╤═════════════════╤═══════════════════╕
│ Result   │   Total │   Reqs/s │   Resp Time Avg │ Client Health %   │
╞══════════╪═════════╪══════════╪═════════════════╪═══════════════════╡
│ succ     │     350 │      165 │        0.592955 │ 100.0             │
├──────────┼─────────┼──────────┼─────────────────┼───────────────────┤
│ fail     │       0 │        0 │      nan        │                   │
╘══════

In [26]:
def log_result(b, name, file_name):
    self = b.stat

    result = []
    result.append(name)
    
    result.append(self.success)
    result.append(self.success / max(self.sess_time, 1))
    result.append(sum(self.succ_times) / max(self.success, 1))

    result.append(self.fail)
    result.append(self.fail / max(self.sess_time, 1))
    result.append(sum(self.exec_times) / max(self.fail, 1))
    log_str = ','.join(str(r) for r in result)
    with open(file_name, "a") as lf:
        lf.write(log_str)
        lf.write('\n')


log_result(b, f'{NAME}_{A}_{B}', 'benchmark_result.csv')