# sklearn IRIS
![Impression](https://www.google-analytics.com/collect?v=1&tid=UA-112879361-3&cid=555&t=event&ec=benchmark&ea=bentoml-benchmark&dt=sklearn-iris-mlflow)

In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [1]:
NAME = 'sklearn_iris_mlflow'

# load data & train model

In [3]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn import datasets

iris = datasets.load_iris()
x = iris.data[:, 2:]
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=7)

    
# add parameters for tuning
num_estimators = 100

# train the model
rf = RandomForestRegressor(n_estimators=num_estimators)
rf.fit(X_train, y_train)
predictions = rf.predict(X_test)
print('predictions: ', predictions)

# log model performance 
mse = mean_squared_error(y_test, predictions)
print("  mse: %f" % mse)

predictions:  [2.   1.   0.   1.   1.62 0.   1.16 1.   0.   1.   2.   1.   0.   2.
 0.   1.91 2.   2.   0.   0.   1.   2.   1.   1.28 1.48 1.91 1.   1.
 2.   2.  ]
  mse: 0.093167


In [5]:
import pickle

tmpdir = 'mlflow_tmp'
!mkdir {tmpdir}

data_path = os.path.join(tmpdir, 'skmodel.pkl')
with open(data_path, 'wb') as of:
    pickle.dump(rf, of)

In [6]:
%%writefile {NAME}.py
from __future__ import print_function

import os
import pickle

import pandas as pd
import numpy as np
import pytest
import six

import tensorflow as tf

import mlflow
import mlflow.pyfunc
import mlflow.pyfunc.model
from mlflow.models import Model


def _load_pyfunc(path):
    with open(path, 'rb') as of:
        data_model = pickle.load(of)
    class Model:
        def predict(self, inputs):
            inputs = inputs.to_numpy()
            outputs = data_model.predict(inputs)
            return outputs
    return Model()
        


if __name__ == '__main__':
    tmpdir = 'mlflow_tmp'
    data_path = os.path.join(tmpdir, 'skmodel.pkl')
    model_path = os.path.join(str(tmpdir), "model")

    model_config = Model(run_id="test")
    mlflow.pyfunc.save_model(path=model_path,
                             data_path=data_path,
                             loader_module=os.path.basename(__file__)[:-3],
                             code_path=[__file__],
                             mlflow_model=model_config)


Writing sklearn_iris_mlflow.py


In [7]:
from bentoml.utils import detect_free_port
PORT = detect_free_port()
server_url = f'http://127.0.0.1:{PORT}/invocations'
print(server_url)

!rm -r {tmpdir}/model
!python {NAME}.py

print("Run this command to launch mlflow:")
print(f"mlflow models serve -m {tmpdir}/model --port {PORT}")
# !mlflow models serve -m {tmpdir}/model --port {PORT}

http://127.0.0.1:50737/invocations
rm: cannot remove ‘mlflow_tmp/model’: No such file or directory
Run this command to launch mlflow:
mlflow models serve -m mlflow_tmp/model --port 50737


# Test with requests

In [2]:
server_url = 'http://127.0.0.1:50737/invocations'
import json
import requests
import pandas as pd

from sklearn import datasets
from sklearn.model_selection import train_test_split

iris = datasets.load_iris()
x = iris.data[:, 2:]
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=7)

headers = {"content-type": "application/json"}
raw_data = X_test[:1]
data = pd.DataFrame(raw_data,
                    columns=map(str, range(raw_data.shape[1]))).to_json(orient='split')

print(data)
json_response = requests.post(server_url, data=data, headers=headers)
print(json_response)
print(json_response.json())

{"columns":["0","1"],"index":[0],"data":[[5.1,1.8]]}
<Response [200]>
[2.0]


# Benchmark

In [3]:
import pandas as pd
import json
import copy
import random


def get_request_producer():

    from sklearn import datasets
    from sklearn.model_selection import train_test_split

    iris = datasets.load_iris()
    x = iris.data[:, 2:]
    y = iris.target

    url = server_url
    method = "POST"
    headers = {"content-type": "application/json"}
    X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.1, random_state=7)
    base_data = X_test[:1].tolist()

    def _gen_data():
        raw = copy.deepcopy(base_data)
        raw[0][0] += random.random() / 10000
        raw[0][1] += random.random() / 10000
        data = pd.DataFrame(raw).to_json(orient='split')
        return url, method, headers, data

    return _gen_data

get_request_producer()()

('http://127.0.0.1:50737/invocations',
 'POST',
 {'content-type': 'application/json'},
 '{"columns":[0,1],"index":[0],"data":[[5.1000292703,1.8000596995]]}')

In [4]:
import os


def init_file(file_name):
    if os.path.exists(LOG_FILE):
        return
    with open(file_name, "a") as lf:
        lf.write('"model name","test_users","total succ","succ/sec","avg resp time",'
                 '"total fail","fail/sec","avg fail resp time"')
        lf.write('\n')


def log_result(b, name, file_name, test_user):
    init_file(file_name)
    self = b.stat

    result = []
    result.append(name)
    result.append(test_user)
    result.append(self.success)
    result.append(self.success / max(self.sess_time, 1))
    result.append(sum(self.succ_times) / max(self.success, 1))

    result.append(self.fail)
    result.append(self.fail / max(self.sess_time, 1))
    result.append(sum(self.exec_times) / max(self.fail, 1))
    
    result.append((1 - self.client_busy / max(self.req_total, 1)))

    log_str = ','.join(str(r) for r in result)
    with open(file_name, "a") as lf:
        lf.write(log_str)
        lf.write('\n')

        
LOG_FILE = 'benchmark_result.csv'

In [6]:
TEST_USER = 600
from bentoml.utils.benchmark import BenchmarkClient

import asyncio

for TEST_USER in range(600, 1600, 100):
    b = BenchmarkClient(get_request_producer(), lambda: 1, timeout=0.32)
    await b._start_session(60, TEST_USER, TEST_USER)
    log_result(b, f'{NAME}', LOG_FILE, TEST_USER)
    b.killall()
    await asyncio.sleep(15)


╒══════════╤═════════╤══════════╤═════════════════╤═══════════════════╕
│ Result   │   Total │   Reqs/s │   Resp Time Avg │ Client Health %   │
╞══════════╪═════════╪══════════╪═════════════════╪═══════════════════╡
│ succ     │       0 │        0 │             nan │ 100.0             │
├──────────┼─────────┼──────────┼─────────────────┼───────────────────┤
│ fail     │       0 │        0 │             nan │                   │
╘══════════╧═════════╧══════════╧═════════════════╧═══════════════════╛
------ 600 users spawned ------

╒══════════╤═════════╤══════════╤═════════════════╤═══════════════════╕
│ Result   │   Total │   Reqs/s │   Resp Time Avg │ Client Health %   │
╞══════════╪═════════╪══════════╪═════════════════╪═══════════════════╡
│ succ     │     107 │       49 │        0.431618 │ 100.0             │
├──────────┼─────────┼──────────┼─────────────────┼───────────────────┤
│ fail     │     204 │      102 │        0.665935 │                   │
╘══════════╧═════════╧════════