# ML Modeling with Iris Dataset and Sklearn

### Goals

* Traing ML Model
* Create Python Wrapper with predict function
* Build docker image (s2i) with Wrapper and model


In [46]:
import sklearn
from sklearn.ensemble import RandomForestClassifier
from sklearn import datasets
import numpy as np
import joblib
from joblib import dump, load
import seldon_core

In [40]:
sklearn.__version__

'0.24.2'

In [45]:
joblib.__version__

'1.0.1'

In [47]:
seldon_core.__version__

'1.10.0'

In [4]:
iris = datasets.load_iris()

In [7]:
iris['data'].shape

(150, 4)

In [8]:
iris['target'].shape

(150,)

In [13]:
np.unique(iris['target'])

array([0, 1, 2])

In [32]:
X = iris['data']
y = iris['target']

In [34]:
X[0]

array([5.1, 3.5, 1.4, 0.2])

In [35]:
clf = RandomForestClassifier(random_state=42)

In [36]:
clf.fit(X, y)

RandomForestClassifier(random_state=42)

In [38]:
clf.score(X, y)

1.0

In [39]:
dump(clf, "model.joblib")

['model.joblib']

## Testing Deployment Python Wrapper

In [7]:
cat Model.py

# predict function modified from https://docs.seldon.io/projects/seldon-core/en/latest/workflow/github-readme.html

from joblib import dump, load
import logging

logging.basicConfig(format='%(asctime)s (%(levelname)s) %(message)s',
                    level=logging.DEBUG,
                    datefmt='%d.%m.%Y %H:%M:%S')

class Model:
    def __init__(self):
        self._model = load("model.joblib")

    def predict(self, X, features_names=None):

        logging.info(f'received X {X}')

        output = self._model.predict(X)

        logging.info(f"model output {output}")

        return output

In [3]:
from Model import Model

In [4]:
Model().predict([[1,2,3,4]])

01.09.2021 09:24:06 (INFO) received X [[1, 2, 3, 4]]
01.09.2021 09:24:06 (INFO) model output [2]


array([2])

### Building Image

In [8]:
! make build

#s2i build . -E environment seldonio/seldon-core-s2i-python3:0.18 sklearn_iris:0.3
s2i build . -E environment seldonio/seldon-core-s2i-python3:1.7.0 sklearn_iris:0.3
---> Installing application source...
---> Installing dependencies ...
Looking in links: /whl
Collecting scikit-learn==0.24.2
Downloading scikit_learn-0.24.2-cp37-cp37m-manylinux2010_x86_64.whl (22.3 MB)
Collecting joblib==1.0.1
Downloading joblib-1.0.1-py3-none-any.whl (303 kB)
Collecting threadpoolctl>=2.0.0
Downloading threadpoolctl-2.2.0-py3-none-any.whl (12 kB)
Collecting scipy>=0.19.1
Downloading scipy-1.7.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl (28.5 MB)
Installing collected packages: threadpoolctl, joblib, scipy, scikit-learn
Successfully installed joblib-1.0.1 scikit-learn-0.24.2 scipy-1.7.1 threadpoolctl-2.2.0
You should consider upgrading via the '/opt/conda/bin/python -m pip install --upgrade pip' command.
Collecting pip-licenses
Downloading pip_licenses-3.5.2-py3-none-any.whl (17 kB)
Collecting

## Run docker locally and Test API access

In [26]:
! docker run --rm -d -p 9000:9000/tcp sklearn_iris:0.3

83ea4ebbb9fda6d498e096cdeb6986f3b8e1c1375f5543871aba3199288c5311


In [27]:
! cat api_access_curl.sh; ./api_access_curl.sh

# port 9000 exposed through the sklearn container
curl -s -X POST http://localhost:9000/api/v1.0/predictions \
    -H 'Content-Type: application/json' \
    -d '{ "data": { "ndarray": [[1,2,3,4]] } }'

# port 8000 exposed through seldon-container-engine container
# Swagger API doc at http://localhost:8000/api/v1.0/doc/

{"data":{"names":[],"ndarray":[2]},"meta":{}}


## Push to Registry

Push to the registry of your choice (content omitted)