# Deploying ML Tutorial

In [1]:
import pandas as pd
from sklearn.datasets import load_wine

from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline, FeatureUnion

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.feature_selection import SelectKBest

from sklearn.ensemble import RandomForestClassifier

import pickle

#### Loading Data

In [2]:
data = load_wine()
df = pd.DataFrame(data['data'])
df.columns = data['feature_names']
y = data['target']
df.head()

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline
0,14.23,1.71,2.43,15.6,127.0,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065.0
1,13.2,1.78,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050.0
2,13.16,2.36,2.67,18.6,101.0,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185.0
3,14.37,1.95,2.5,16.8,113.0,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480.0
4,13.24,2.59,2.87,21.0,118.0,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735.0


#### Defining a class that we can use in a pipeline

In [5]:
class RawFeats:
    def __init__(self, feats):
        self.feats = feats
        
    def fit(self, X, y=None):
        pass
    
    def transform(self, X, y=None):
        return X[self.feats]
    
    def fit_transform(self, X, y=None):
        self.fit(X)
        return self.transform(X)

In [6]:
# features we want to keep for PCA
feats = ['alcohol','malic_acid','ash','alcalinity_of_ash','magnesium',
         'total_phenols','flavanoids','nonflavanoid_phenols']
# creating class object with indexes we want to keep.
raw_feats = RawFeats(feats)

In [8]:
sc = StandardScaler()
pca = PCA(n_components=2)
selection = SelectKBest(k=4)
rf = RandomForestClassifier()

#### Pipelining

For some reason we create like 8 different piplines and combine them into 1 pipeline and then use grid search with the final pipeline.

In [11]:
PCA_pipeline = Pipeline([
    ("raw_feats", raw_feats),
    ("scaler", sc),
    ("pca", pca),
])  

In [12]:
kbest_pipeline = Pipeline([("kbest", selection)])

In [13]:
all_features = FeatureUnion([
    ("pca_pipeline", PCA_pipeline),
    ("kbest_pipeline", kbest_pipeline)
])

In [14]:
main_pipeline = Pipeline([
    ("features", all_features),
    ("rf", rf),
])

Grid search

In [15]:
param_grid = {"features__pca_pipeline__pca__n_components": [1,2,3],
              "features__kbest_pipeline__kbest__k": [1,2,3],
              "rf__n_estimators": [2,5,10],
              "rf__max_depth": [2,4,6]
             }

In [16]:
# create a Grid Search object
grid_search = GridSearchCV(main_pipeline, param_grid, n_jobs = -1, verbose=10, refit=True)    

# fit the model and tune parameters
grid_search.fit(df, y)

Fitting 5 folds for each of 81 candidates, totalling 405 fits


GridSearchCV(estimator=Pipeline(steps=[('features',
                                        FeatureUnion(transformer_list=[('pca_pipeline',
                                                                        Pipeline(steps=[('raw_feats',
                                                                                         <__main__.RawFeats object at 0x7faf593d2b20>),
                                                                                        ('scaler',
                                                                                         StandardScaler()),
                                                                                        ('pca',
                                                                                         PCA(n_components=2))])),
                                                                       ('kbest_pipeline',
                                                                        Pipeline(steps=[('kbest',
                 

> **Doing all this piplining enabled us to call the pipeline on the original dataset without any transformations**

In [17]:
grid_search.best_params_

{'features__kbest_pipeline__kbest__k': 3,
 'features__pca_pipeline__pca__n_components': 1,
 'rf__max_depth': 4,
 'rf__n_estimators': 10}

In [19]:
pickle.dump(grid_search, open( "model.p", "wb" ) )

# Sending Data to the model that we just deployed

We send the features of the wine sample that we want our model to classify.   
We send it in Json format

In [20]:
json_data = {'alcohol': 14.23,
 'malic_acid': 1.71,
 'ash': 2.43,
 'alcalinity_of_ash': 15.6,
 'magnesium': 127.0,
 'total_phenols': 2.8,
 'flavanoids': 3.06,
 'nonflavanoid_phenols': 0.28,
 'proanthocyanins': 2.29,
 'color_intensity': 5.64,
 'hue': 1.04,
 'od280/od315_of_diluted_wines': 3.92,
 'proline': 1065.0}

In [21]:
import requests

In [22]:
URL = "http://127.0.0.1:5000/scoring"
# sending post request and saving the response as response object 
r = requests.post(url = URL, json = json_data) 

In [25]:
r.json()

[[0.9742369129578432, 0.025763087042156806, 0.0]]