In [7]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
%matplotlib inline

In [8]:
data = load_wine()

In [148]:
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size = 0.2)

### Making a Pipeline and using GridSearch

In [149]:
from sklearn import decomposition, pipeline
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

In [150]:
#making instance of all objects 
ss = StandardScaler()
lda = LinearDiscriminantAnalysis()
pca = decomposition.PCA()
kpca = decomposition.KernelPCA()
svd = decomposition.TruncatedSVD()
lr = LogisticRegression()
svm = SVC()
n_components = [2, 4, 5, 7]

In [151]:
pipe = pipeline.Pipeline(steps = [('scalar', ss),('reduction', pca), ('classify', lr)])

In [163]:
param_pca = {
    'reduction':[pca],
    'reduction__n_components': n_components,
    'reduction__svd_solver': ['randomized','auto', 'full']
}

param_lda = {
    'reduction':[lda], 
    'reduction__n_components':[2,3]
}

param_kpca = {
    'reduction':[kpca],
    'reduction__n_components': [2,3],
    'reduction__kernel': ['poly','rbf']
}

param_lr = {
    'classify':[lr],
    'classify__C':[0.1,1,10], 
}

param_svm = {
    'classify':[svm],
    'classify__C':[0.1,1,10,100]
}

param_PCA_LR = {**param_pca, **param_lr}
param_KPCA_LR = {**param_kpca, **param_lr}
param_LDA_LR = {**param_lda, **param_lr}
param_LDA_SVM = {**param_lda, **param_svm}

In [164]:
grd = GridSearchCV(pipe, param_grid=[param_LDA_SVM,param_LDA_LR], cv = 5, verbose = 1)

In [165]:
grd.fit(X_train, y_train)

Fitting 5 folds for each of 26 candidates, totalling 130 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.






[Parallel(n_jobs=1)]: Done 130 out of 130 | elapsed:    1.1s finished


GridSearchCV(cv=5, error_score='raise-deprecating',
       estimator=Pipeline(memory=None,
     steps=[('scalar', StandardScaler(copy=True, with_mean=True, with_std=True)), ('reduction', PCA(copy=True, iterated_power='auto', n_components=None, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)), ('classify', LogisticRegression(C=10, class_weight...y='l2', random_state=None, solver='liblinear',
          tol=0.0001, verbose=0, warm_start=False))]),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid=[{'reduction': [LinearDiscriminantAnalysis(n_components=2, priors=None, shrinkage=None,
              solver='svd', store_covariance=False, tol=0.0001)], 'reduction__n_components': [1, 2], 'classify': [SVC(C=0.1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr...lty': ['l2'], 'classify__C': [0.1, 1, 10], 'classify__solver': ['newton-cg', 'lbfgs', 'liblinear']}],
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
    

In [166]:
grd.best_params_

{'classify': SVC(C=0.1, cache_size=200, class_weight=None, coef0=0.0,
   decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
   kernel='rbf', max_iter=-1, probability=False, random_state=None,
   shrinking=True, tol=0.001, verbose=False),
 'classify__C': 0.1,
 'reduction': LinearDiscriminantAnalysis(n_components=2, priors=None, shrinkage=None,
               solver='svd', store_covariance=False, tol=0.0001),
 'reduction__n_components': 2}

In [183]:
grd.score

1.0

In [172]:
from sklearn.metrics import classification_report, confusion_matrix
print(classification_report(y_test, grd.predict(X_test)))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        11
           1       1.00      1.00      1.00        17
           2       1.00      1.00      1.00         8

   micro avg       1.00      1.00      1.00        36
   macro avg       1.00      1.00      1.00        36
weighted avg       1.00      1.00      1.00        36



In [173]:
confusion_matrix(y_train, grd.predict(X_train))

array([[48,  0,  0],
       [ 0, 54,  0],
       [ 0,  0, 40]], dtype=int64)

In [174]:
confusion_matrix(y_test, grd.predict(X_test))

array([[11,  0,  0],
       [ 0, 17,  0],
       [ 0,  0,  8]], dtype=int64)