In [1]:
# Import Supporting Libraries
from sklearn import datasets
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
import pickle
import requests
import json
import pandas as pd
from requests.auth import HTTPProxyAuth
import os

In [2]:
# Load iris dataset
iris = datasets.load_iris()

In [4]:
# Print iris description 
print(iris.DESCR)

.. _iris_dataset:

Iris plants dataset
--------------------

**Data Set Characteristics:**

    :Number of Instances: 150 (50 in each of three classes)
    :Number of Attributes: 4 numeric, predictive attributes and the class
    :Attribute Information:
        - sepal length in cm
        - sepal width in cm
        - petal length in cm
        - petal width in cm
        - class:
                - Iris-Setosa
                - Iris-Versicolour
                - Iris-Virginica
                
    :Summary Statistics:

                    Min  Max   Mean    SD   Class Correlation
    sepal length:   4.3  7.9   5.84   0.83    0.7826
    sepal width:    2.0  4.4   3.05   0.43   -0.4194
    petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)
    petal width:    0.1  2.5   1.20   0.76    0.9565  (high!)

    :Missing Attribute Values: None
    :Class Distribution: 33.3% for each of 3 classes.
    :Creator: R.A. Fisher
    :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
    :

In [5]:
# Assign independent features "data" to X and dependent feature "target" to y
X = iris.data
y = iris.target

In [6]:
# Print target names
print(iris['target_names'])

['setosa' 'versicolor' 'virginica']


In [None]:
# Print Iris array X-variables, indepedent features
print(iris.data)

In [7]:
print(type(iris.data))

<class 'numpy.ndarray'>


In [9]:
# Print Iris array Y-variable, dependent feature
print(iris.target)

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2]


In [11]:
# Perform train, test split with 70% X_train, X_test, y_train, y_test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3)

In [12]:
# Instantiate Random Forest Classifier with 100 n_estimators and 2 n_jobs
rfc = RandomForestClassifier(n_estimators = 100, n_jobs = 2)

In [13]:
# Fit Random Forest Classifier
rfc.fit(X_train, y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=2,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

In [14]:
# Print accuracy score
# Print classification report
print("Accuracy = %0.2f" % accuracy_score (y_test, rfc.predict(X_test)))
print(classification_report(y_test, rfc.predict(X_test)))

Accuracy = 0.98
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         9
           1       0.94      1.00      0.97        17
           2       1.00      0.95      0.97        19

   micro avg       0.98      0.98      0.98        45
   macro avg       0.98      0.98      0.98        45
weighted avg       0.98      0.98      0.98        45



In [15]:
# Generate predicted values (y_pred)
y_pred = rfc.predict(X_test)

## Model Serialization

In [14]:
# Pickle dump rfc model
pickle.dump(rfc, open("iris_rfc.pkl", "wb"))

In [15]:
# Pickle load rfc model
my_random_forest = pickle.load(open("iris_rfc.pkl", 'rb'))

In [16]:
# View Random Forest
my_random_forest

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=2,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

In [16]:
# Print classification report again
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         9
           1       0.94      1.00      0.97        17
           2       1.00      0.95      0.97        19

   micro avg       0.98      0.98      0.98        45
   macro avg       0.98      0.98      0.98        45
weighted avg       0.98      0.98      0.98        45



### Flask web dev endpoint!

In [23]:
# Set the environment variable NO_PROXY for localhost if behind proxy
os.environ['NO_PROXY'] = '127.0.0.1'
url = "http://127.0.0.1:9000/ml_api"
versicolor_data = json.dumps({'sl':5.24,'sw':3.0,'pl':3.75,'pw':1.1})
r = requests.post(url, versicolor_data)
print(r.json())

{'results': {'y_hat': '[1]'}}


In [24]:
# Set the environment variable NO_PROXY for localhost if behind proxy
os.environ['NO_PROXY'] = '127.0.0.1'
url = "http://127.0.0.1:9000/ml_api"
setosa_data = json.dumps({'sl':5.11,'sw':3.4,'pl':1.6,'pw':0.3})
r = requests.post(url, setosa_data)
print(r.json())

{'results': {'y_hat': '[0]'}}


In [25]:
# Set the environment variable NO_PROXY for localhost if behind proxy
os.environ['NO_PROXY'] = '127.0.0.1'
url = "http://127.0.0.1:9000/ml_api"
virginica_data = json.dumps({'sl':6.1,'sw':3.4,'pl':5.6,'pw':2.1})
r = requests.post(url, virginica_data)
print(r.json())

{'results': {'y_hat': '[2]'}}
