# Using GridSearch for finding optimal set of HyperParameters for SVM

In [66]:
from sklearn.model_selection import GridSearchCV
from sklearn.datasets import load_digits
import multiprocessing

In [67]:
cpus= multiprocessing.cpu_count()
print('No.of CPUs are: {}'.format(cpus))

No.of CPUs are: 4


In [68]:
digits=load_digits()
X=digits.data
Y=digits.target
print(X.shape,Y.shape)

(1797, 64) (1797,)


In [160]:
params=[
    {
        'kernel': ['linear','rbf','poly','sigmoid'],
        'C':[1.0,2.0,5.0]
    }
]

## Using Logistic Regression for Classification

In [150]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score

In [151]:
lr=LogisticRegression(solver='lbfgs',multi_class='auto',max_iter=3000)
acc=cross_val_score(lr,X,Y,cv=5,scoring='accuracy').mean()
print(acc)

0.9165528146334582


In [152]:
# Logistic Regression gave the accuracy of around 92% on MNIST Digit Dataset

## Using SVM without GridSeach for HyperParameters on the same dataset

In [137]:
from sklearn import svm
import pandas as pd

In [138]:
svc=svm.SVC(gamma='auto')
acc=cross_val_score(svc,X,Y,cv=5,scoring='accuracy').mean()
print(acc)

0.44878680061604637


In [139]:
# clearly SVM is performing poorly on the the given dataset

## Let's try setting up the optimal HyperParameters for the SVM

In [157]:
gs=GridSearchCV(estimator=svm.SVC(gamma='auto'),param_grid=params,cv=5,n_jobs=cpus,scoring='accuracy')
gs.fit(X,Y)

GridSearchCV(cv=5, error_score='raise-deprecating',
             estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='auto', kernel='rbf', max_iter=-1,
                           probability=False, random_state=None, shrinking=True,
                           tol=0.001, verbose=False),
             iid='warn', n_jobs=4,
             param_grid=[{'C': [0.1, 0.5, 0.8, 1.0, 2.0, 5.0],
                          'kernel': ['linear', 'rbf', 'poly', 'sigmoid']}],
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='accuracy', verbose=0)

In [158]:
gs.best_estimator_

SVC(C=0.1, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto', kernel='poly',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [161]:
gs.best_score_

0.9699499165275459

## SVM with Polynomial Kernel, C=1.0 gives 97% accuracy on the digit dataset

In [132]:
table=gs.cv_results_
table=pd.DataFrame(table)
print(table)

In [104]:
GridSearchCV?