In [233]:
import pandas as pd
from sklearn.datasets import load_wine
import json

from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline, FeatureUnion

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.feature_selection import SelectKBest

import sklearn
from sklearn.ensemble import RandomForestClassifier

import pickle
import joblib

# DATA

In [194]:
data = load_wine()
df = pd.DataFrame(data['data'])
df.columns = data['feature_names']
y = data['target']
df.head()

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline
0,14.23,1.71,2.43,15.6,127.0,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065.0
1,13.2,1.78,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050.0
2,13.16,2.36,2.67,18.6,101.0,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185.0
3,14.37,1.95,2.5,16.8,113.0,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480.0
4,13.24,2.59,2.87,21.0,118.0,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735.0


### CLASS

In [195]:
class RawFeats:
    def __init__(self, feats):
        self.feats = feats

    def fit(self, X, y=None):
        pass


    def transform(self, X, y=None):
        return X[self.feats]

    def fit_transform(self, X, y=None):
        self.fit(X)
        return self.transform(X)


# features we want to keep for PCA
feats = ['alcohol','malic_acid','ash','alcalinity_of_ash','magnesium',
         'total_phenols','flavanoids','nonflavanoid_phenols']
# creating class object with indexes we want to keep.
raw_feats = RawFeats(feats)

### PCA

In [196]:
sc = StandardScaler()
pca = PCA(n_components=2)

### SELECTKBEST

In [197]:
selection = SelectKBest(k=4)

In [198]:
rf = RandomForestClassifier()

In [199]:
PCA_pipeline = Pipeline([
    ("rawFeats", raw_feats),
    ("scaler", sc),
    ("pca", pca)
])

kbest_pipeline = Pipeline([("kBest", selection)])

In [200]:
all_features = FeatureUnion([
    ("pcaPipeline", PCA_pipeline), 
    ("kBestPipeline", kbest_pipeline)
])

In [201]:
main_pipeline = Pipeline([
    ("features", all_features),
    ("rf", rf)
])

In [202]:
# set up our parameters grid
param_grid = {"features__pcaPipeline__pca__n_components": [1, 2, 3],
                  "features__kBestPipeline__kBest__k": [1, 2, 3],
                  "rf__n_estimators":[2, 5, 10]
             }

# create a Grid Search object
grid_search = GridSearchCV(main_pipeline, param_grid, n_jobs = -1, verbose=10, refit=True)    

# fit the model and tune parameters
grid_search.fit(df, y)

Fitting 5 folds for each of 27 candidates, totalling 135 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 6 concurrent workers.
[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:    1.4s
[Parallel(n_jobs=-1)]: Done   6 tasks      | elapsed:    1.5s
[Parallel(n_jobs=-1)]: Done  13 tasks      | elapsed:    1.5s
[Parallel(n_jobs=-1)]: Done  20 tasks      | elapsed:    1.5s
[Parallel(n_jobs=-1)]: Batch computation too fast (0.1752s.) Setting batch_size=2.
[Parallel(n_jobs=-1)]: Done  29 tasks      | elapsed:    1.6s
[Parallel(n_jobs=-1)]: Batch computation too fast (0.0613s.) Setting batch_size=4.
[Parallel(n_jobs=-1)]: Done  40 tasks      | elapsed:    1.7s
[Parallel(n_jobs=-1)]: Done  64 tasks      | elapsed:    1.8s
[Parallel(n_jobs=-1)]: Batch computation too fast (0.0959s.) Setting batch_size=8.
[Parallel(n_jobs=-1)]: Done 102 tasks      | elapsed:    1.9s
[Parallel(n_jobs=-1)]: Done 121 tasks      | elapsed:    2.0s
[Parallel(n_jobs=-1)]: Done 135 out of 135 | elapsed:    2.0s finished


GridSearchCV(estimator=Pipeline(steps=[('features',
                                        FeatureUnion(transformer_list=[('pcaPipeline',
                                                                        Pipeline(steps=[('rawFeats',
                                                                                         <__main__.RawFeats object at 0x7ff51654f790>),
                                                                                        ('scaler',
                                                                                         StandardScaler()),
                                                                                        ('pca',
                                                                                         PCA(n_components=2))])),
                                                                       ('kBestPipeline',
                                                                        Pipeline(steps=[('kBest',
                    

In [234]:
pickle.dump( grid_search, open( "model.p", "wb" ) )
joblib.dump(grid_search, open( "model_joblib.p", "wb" ))

In [235]:
joblib.load(open("model_joblib.p", 'rb'))

GridSearchCV(estimator=Pipeline(steps=[('features',
                                        FeatureUnion(transformer_list=[('pcaPipeline',
                                                                        Pipeline(steps=[('rawFeats',
                                                                                         <__main__.RawFeats object at 0x7ff5164e6a00>),
                                                                                        ('scaler',
                                                                                         StandardScaler()),
                                                                                        ('pca',
                                                                                         PCA(n_components=2))])),
                                                                       ('kBestPipeline',
                                                                        Pipeline(steps=[('kBest',
                    

In [204]:
pickle.load(open("model.p", 'rb'))

GridSearchCV(estimator=Pipeline(steps=[('features',
                                        FeatureUnion(transformer_list=[('pcaPipeline',
                                                                        Pipeline(steps=[('rawFeats',
                                                                                         <__main__.RawFeats object at 0x7ff51654fc10>),
                                                                                        ('scaler',
                                                                                         StandardScaler()),
                                                                                        ('pca',
                                                                                         PCA(n_components=2))])),
                                                                       ('kBestPipeline',
                                                                        Pipeline(steps=[('kBest',
                    

# OTHERS

In [208]:
import requests
import json

data = [[140.34, 1.68, 2.7, 0, 98.0, 2.8, 1.31, 5.53, 2.7, 130.0, 4.57, 1.96, 60.0]]
j_data = json.dumps(data)
j_data

'[[140.34, 1.68, 2.7, 0, 98.0, 2.8, 1.31, 5.53, 2.7, 130.0, 4.57, 1.96, 60.0]]'

In [191]:
json_data = {'alcohol': 14.23, 'malic_acid':1.71, 'ash':2.43, 'alcalinity_of_ash':15.6, 'magnesium':127.0, 'total_phenols':2.8, 'flavanoids':3.06, 'nonflavanoid_phenols': 0.28, 'proanthocyanins': 2.29, 'color_intensity': 5.64, 'hue': 1.04, 'od280/od315_of_diluted_wines':3.92, 'proline': 1065.0}


In [236]:
import requests
URL = "http://localhost:5000/api/"
# sending get request and saving the response as response object 
r = requests.get(url = URL, json = json_data)

In [237]:
r

<Response [405]>

In [232]:
r.url

'http://127.0.0.1:5000/api/'

In [None]:
url = 'http://localhost:5000/api/'

data = [[140.34, 1.68, 2.7, 0, 98.0, 2.8, 1.31, 5.53, 2.7, 130.0, 4.57, 1.96, 60.0]]
j_data = json.dumps(data)
headers = {'content-type': 'application/json', 'Accept-Charset': 'UTF-8'}
r = requests.post(url, data=j_data, headers=headers)
print(r)
print("Your wine belongs to class: " + r.text)