In [1]:
# @itsvenu_
# Grid-search

In [12]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV

In [4]:
dat = pd.read_csv('../../data/Social_Network_Ads.csv')
dat = dat.iloc[:, 2:]
dat.head()

Unnamed: 0,Age,EstimatedSalary,Purchased
0,19,19000,0
1,35,20000,0
2,26,43000,0
3,27,57000,0
4,19,76000,0


In [5]:
X = dat.iloc[:, :-1].values
Y = dat.iloc[:, -1].values

In [7]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [8]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [9]:
svmClassifier = SVC(kernel='rbf', random_state=42)

In [10]:
svmClassifier.fit(X_train, Y_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
    kernel='rbf', max_iter=-1, probability=False, random_state=42,
    shrinking=True, tol=0.001, verbose=False)

In [11]:
accuracy_score(Y_test, svmClassifier.predict(X_test))

0.925

In [13]:
accuracies = cross_val_score(estimator=svmClassifier,
                            X = X_train, y = Y_train, cv=10)



In [17]:
# grid-search for optimal paramters

parameters = [{'C': [0.25, 0.50, 0.75, 1], 'kernel': ['linear']},
             {'C': [0.25, 0.50, 0.75, 1], 'kernel': ['rbf'], 'gamma': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]}]

parameters

[{'C': [0.25, 0.5, 0.75, 1], 'kernel': ['linear']},
 {'C': [0.25, 0.5, 0.75, 1],
  'kernel': ['rbf'],
  'gamma': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]}]

In [18]:
grid_search = GridSearchCV(estimator=svmClassifier,
                          param_grid=parameters,
                          scoring='accuracy',
                          cv=10, n_jobs=-1)

In [19]:
grid_search.fit(X_train, Y_train)

GridSearchCV(cv=10, error_score='raise-deprecating',
             estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='auto_deprecated', kernel='rbf', max_iter=-1,
                           probability=False, random_state=42, shrinking=True,
                           tol=0.001, verbose=False),
             iid='warn', n_jobs=-1,
             param_grid=[{'C': [0.25, 0.5, 0.75, 1], 'kernel': ['linear']},
                         {'C': [0.25, 0.5, 0.75, 1],
                          'gamma': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8,
                                    0.9],
                          'kernel': ['rbf']}],
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='accuracy', verbose=0)

In [20]:
grid_search.best_score_

0.909375

In [21]:
grid_search.best_params_

{'C': 0.5, 'gamma': 0.7, 'kernel': 'rbf'}

In [26]:
# we identifued optimal parameters with grid-search and CV
# very useful in complex models

In [24]:
# updated classifier

newClassifier = SVC(C=0.5, gamma=0.7, kernel='rbf')

In [25]:
newClassifier.fit(X_train, Y_train)

SVC(C=0.5, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=0.7, kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [27]:
accuracy_score(Y_test, newClassifier.predict(X_test))

0.9375

In [None]:
# We got better predictions than non-hyperparam tuned model