🗒️ Documentation
- [exporting model artifacts](https://cloud.google.com/vertex-ai/docs/training/exporting-model-artifacts#scikit-learn)
- [sklearn - iris dataset](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_iris.html)

# train & export model

In [1]:
import pickle

import numpy as np
import sklearn
from sklearn import datasets
from sklearn.ensemble import RandomForestClassifier

In [2]:
ARTIFACT_FILENAME = "model.pkl"

In [3]:
iris = datasets.load_iris()
classifier = RandomForestClassifier()
classifier.fit(iris.data, iris.target)

RandomForestClassifier()

In [4]:
# Save model artifact to local filesystem (doesn't persist)
local_path = ARTIFACT_FILENAME
with open(local_path, "wb") as model_file:
    pickle.dump(classifier, model_file)

# sklearn version

In [5]:
sklearn.__version__

'1.0.2'

# model info

In [6]:
print(iris.DESCR)

.. _iris_dataset:

Iris plants dataset
--------------------

**Data Set Characteristics:**

    :Number of Instances: 150 (50 in each of three classes)
    :Number of Attributes: 4 numeric, predictive attributes and the class
    :Attribute Information:
        - sepal length in cm
        - sepal width in cm
        - petal length in cm
        - petal width in cm
        - class:
                - Iris-Setosa
                - Iris-Versicolour
                - Iris-Virginica
                
    :Summary Statistics:

                    Min  Max   Mean    SD   Class Correlation
    sepal length:   4.3  7.9   5.84   0.83    0.7826
    sepal width:    2.0  4.4   3.05   0.43   -0.4194
    petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)
    petal width:    0.1  2.5   1.20   0.76    0.9565  (high!)

    :Missing Attribute Values: None
    :Class Distribution: 33.3% for each of 3 classes.
    :Creator: R.A. Fisher
    :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
    :

In [7]:
iris.data

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2],
       [5.4, 3.9, 1.7, 0.4],
       [4.6, 3.4, 1.4, 0.3],
       [5. , 3.4, 1.5, 0.2],
       [4.4, 2.9, 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.1],
       [5.4, 3.7, 1.5, 0.2],
       [4.8, 3.4, 1.6, 0.2],
       [4.8, 3. , 1.4, 0.1],
       [4.3, 3. , 1.1, 0.1],
       [5.8, 4. , 1.2, 0.2],
       [5.7, 4.4, 1.5, 0.4],
       [5.4, 3.9, 1.3, 0.4],
       [5.1, 3.5, 1.4, 0.3],
       [5.7, 3.8, 1.7, 0.3],
       [5.1, 3.8, 1.5, 0.3],
       [5.4, 3.4, 1.7, 0.2],
       [5.1, 3.7, 1.5, 0.4],
       [4.6, 3.6, 1. , 0.2],
       [5.1, 3.3, 1.7, 0.5],
       [4.8, 3.4, 1.9, 0.2],
       [5. , 3. , 1.6, 0.2],
       [5. , 3.4, 1.6, 0.4],
       [5.2, 3.5, 1.5, 0.2],
       [5.2, 3.4, 1.4, 0.2],
       [4.7, 3.2, 1.6, 0.2],
       [4.8, 3.1, 1.6, 0.2],
       [5.4, 3.4, 1.5, 0.4],
       [5.2, 4.1, 1.5, 0.1],
       [5.5, 4.2, 1.4, 0.2],
       [4.9, 3

In [8]:
iris.target

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [9]:
iris.feature_names

['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']

In [10]:
iris.target_names

array(['setosa', 'versicolor', 'virginica'], dtype='<U10')

# use model

In [11]:
indices = [120, 60, 20]
x_test = iris.data[indices]
x_test

array([[6.9, 3.2, 5.7, 2.3],
       [5. , 2. , 3.5, 1. ],
       [5.4, 3.4, 1.7, 0.2]])

In [12]:
y_test = iris.target[indices]
y_test

array([2, 1, 0])

In [13]:
classifier.predict(x_test)

array([2, 1, 0])

# use model from pickle

In [14]:
loaded_model = pickle.load(open(r"model.pkl", "rb"))

In [15]:
data = [
    [6.9, 3.2, 5.7, 2.3],
    [5., 2., 3.5, 1.],
    [5.4, 3.4, 1.7, 0.2]
]

In [16]:
predictions = loaded_model.predict(data)
predictions

array([2, 1, 0])

In [17]:
data = np.array([
    [6.9, 3.2, 5.7, 2.3],
    [5., 2., 3.5, 1.],
    [5.4, 3.4, 1.7, 0.2]
])

In [18]:
predictions = loaded_model.predict(data)
predictions

array([2, 1, 0])