In [None]:
## Grid Search - optimal value of hyperparameters -> higher accuracy

In [3]:
# Importing the libraries

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [4]:
# Importing the dataset

dataset = pd.read_csv('Social_Network_Ads.csv')
x = dataset.iloc[:,:-1].values
y = dataset.iloc[:,-1].values

In [5]:
# Splitting the dataset on the training set and test set

from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.25, random_state = 0)

In [6]:
# Feature Scaling

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

In [9]:
# Training the Kernel SVM model on the training set

from sklearn.svm import SVC
classifier = SVC(kernel = 'rbf', random_state = 0)
classifier.fit(x_train, y_train)

SVC(random_state=0)

In [10]:
# Predicting the test set results with the Kernel SVM model

y_pred = classifier.predict(x_test)

In [11]:
# Confusion Matrix and Accuracy

from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)
print(cm)
print(accuracy)

[[64  4]
 [ 3 29]]
0.93


In [12]:
# k-Fold Cross Validation

# testing the model on the k different test sets in order to minimize the possibility that we have gotten lucky with the test set
# final accuracy - avg of k accuracies

from sklearn.model_selection import cross_val_score
accuracies = cross_val_score(estimator = classifier, X = x_train, y = y_train, cv = 10)
print(accuracies.mean())   # k-fold accuracy is smaller than model accuracy - we got a bit lucky before -> relevant acc = 90.33%

0.9033333333333333


In [13]:
# Grid search

from sklearn.model_selection import GridSearchCV
# parameters -> two dictionaries - linear and rbf kernel (gamma parameter - used only with rbf kernel)
# C - regularization parameter (0.25 - strong regularization, 1 - weaker regularization)

parameters = [{'C' : [0.25, 0.5, 0.75, 1], 'kernel' : ['linear']},
              {'C' : [0.25, 0.5, 0.75, 1], 'kernel' : ['rbf'], 'gamma' : [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]}]
               
grid_search = GridSearchCV(estimator = classifier,
                                        param_grid = parameters,
                                        scoring = 'accuracy',
                                        cv = 10,
                                        n_jobs = -1 )   # n_jobs = -1 -> all processors are used - optimized grid search

grid_search.fit(x_train, y_train)
best_accuracy = grid_search.best_score_
best_parameters = grid_search.best_params_
print(best_accuracy)
print(best_parameters)

0.9066666666666666
{'C': 0.5, 'gamma': 0.6, 'kernel': 'rbf'}
