In [4]:
import numpy as np
from sklearn import datasets

## Load Data

Adapted from [sklearn docs](http://scikit-learn.org/stable/tutorial/statistical_inference/supervised_learning.html)

In [12]:
iris = datasets.load_iris()

In [13]:
iris_X = iris.data
iris_y = iris.target

In [24]:
iris_X[:5]

array([[ 5.1,  3.5,  1.4,  0.2],
       [ 4.9,  3. ,  1.4,  0.2],
       [ 4.7,  3.2,  1.3,  0.2],
       [ 4.6,  3.1,  1.5,  0.2],
       [ 5. ,  3.6,  1.4,  0.2]])

In [25]:
iris_y[:5]

array([0, 0, 0, 0, 0])

In [14]:
np.unique(iris_y)

array([0, 1, 2])

## Split iris data into train and test

In [16]:
np.random.seed(42)
indices = np.random.permutation(len(iris_X))
iris_X_train = iris_X[indices[:-10]]
iris_y_train = iris_y[indices[:-10]]
iris_X_test  = iris_X[indices[-10:]]
iris_y_test  = iris_y[indices[-10:]]

## Fit nearest neighbor classifer

In [17]:
from sklearn.neighbors import KNeighborsClassifier

In [18]:
knn = KNeighborsClassifier()

In [19]:
knn.fit(iris_X_train, iris_y_train) 

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=5, p=2,
           weights='uniform')

In [20]:
# check prediction
knn.predict(iris_X_test)

array([1, 1, 2, 2, 0, 1, 1, 0, 1, 2])

In [21]:
iris_y_test

array([1, 1, 2, 2, 0, 1, 2, 0, 1, 2])

## Pickle model to use in REST API

Adapted from [sklearn docs](http://scikit-learn.org/stable/modules/model_persistence.html)

In [29]:
from sklearn.externals import joblib

In [31]:
joblib.dump(knn, 'iris_knn_model.pkl') 

['iris_knn_model.pkl']

## Load pickled model and use it to predict

In [32]:
knn_from_pkl = joblib.load('iris_knn_model.pkl')

In [33]:
knn_from_pkl

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=5, p=2,
           weights='uniform')

In [34]:
# Get 1 test case
test_case = iris_X_test[:1]

In [40]:
# columns correspond to [Sepal Length, Sepal Width, Petal Length and Petal Width]
test_case

array([[ 6.3,  2.3,  4.4,  1.3]])

In [36]:
test_target = iris_y_test[:1]

In [37]:
test_target

array([1])

In [38]:
knn_from_pkl.predict(test_case)

array([1])

In [39]:
type(test_case)

numpy.ndarray