**References** :
1. GridSearch CV :  https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html
2. SVM Classiifer : https://scikit-learn.org/stable/modules/svm.html#svm-classification


In [49]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV

In [50]:
#path = '/content/drive/My Drive/Colab Notebooks/ML Practice/data/Advertising_data.csv'
path = '/Data/Advertising_data.csv'
df = pd.read_csv(path)
df.head()

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19.0,19000.0,0
1,15810944,Male,35.0,20000.0,0
2,15668575,Female,26.0,43000.0,0
3,15603246,Female,27.0,57000.0,0
4,15804002,Male,19.0,76000.0,0


In [None]:
x = df.iloc[: , [2,3]].values  # Age and EstimatedSalary
y = df.iloc[: , 4].values      # Purchased

# SVM Without Hyperparameter tuning

In [None]:
## Splitting data - train and test set
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.25, random_state = 5)
X_train
X_test

In [None]:
## Feature scaling - Standard Normal distribution
Sc = StandardScaler()
X_train = Sc.fit_transform(X_train)
X_test = Sc.fit_transform(X_test)
X_train
X_test

In [46]:
## SVM Classifier with deafult hyperparameters
classifier = SVC(kernel = 'linear' , random_state= 0 )
classifier.fit(X_train , y_train)

SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='linear',
    max_iter=-1, probability=False, random_state=0, shrinking=True, tol=0.001,
    verbose=False)

In [47]:
## Predict on testdata
y_pred = classifier.predict(X_test)

In [48]:
## accuracy and confusion matrix
Cm = confusion_matrix(y_test , y_pred)
acc = accuracy_score(y_test , y_pred)
print("accuracy : " ,acc)
print("Confusion matrix : \n " , Cm)

accuracy :  0.85
Confusion matrix : 
  [[60  6]
 [ 9 25]]


# Hyperparameter tuning on SVM using Grid search CV

In [54]:
## Parameter dictionary (to pass into gridsearch CV)
# C : regularisation parameter
# Kenrel : type of kernel
# Gamma : Kernel coefficient for ‘rbf’, ‘poly’ and ‘sigmoid’.
Para_dict = [
              {'C' : [1,10,100,1000] , 'kernel' : ['linear']} ,
              {'C' : [1,10,100,1000] , 'kernel' : ['rbf'] , 'gamma': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]}
            ]


In [59]:
## Grid search CV
# estimator : Which type : regrssion(linear , logistic) , classiifer(SVM, KNN) etc.
# param_grid : dictionary of parameters on which gridsearch CV is performed
# Scoring : type of score like accuracy 
# cv : says about k fold cross validation where k = cv value
# n_jobs : Number of jobs to run in parallel.

grid_search = GridSearchCV( estimator = classifier , 
                            param_grid = Para_dict,
                            scoring = 'accuracy',
                            cv = 10,
                            n_jobs = -1
                          )
GS = grid_search.fit(X_train , y_train)

In [None]:
## Grid search ouptut
GS.cv_results_

In [82]:
## Grid search accuracy outputted by Grid serach
GS_accuracy = GS.best_score_
GS_accuracy


0.9100000000000001

In [83]:
## Best hyperparameters by gridsearch
best_hyperparameters = GS.best_params_
print( " best_hyperparameters : " , best_hyperparameters )

 best_hyperparameters :  {'C': 10, 'gamma': 0.3, 'kernel': 'rbf'}


# Testing our Hyperparameters returned by GridSearch CV

In [77]:
Final_classifier = SVC(kernel = 'rbf' , gamma = 0.3 , C = 10)
Final_classifier.fit(X_train , y_train)

SVC(C=10, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=0.3, kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [80]:
## Predict on testdata
y_pred_final = Final_classifier.predict(X_test)

In [81]:
## accuracy and confusion matrix
final_Cm = confusion_matrix(y_test , y_pred_final)
final_acc = accuracy_score(y_test , y_pred_final)
print("Final accuracy : " ,final_acc)
print("Final Confusion matrix : \n " , final_Cm)

Final accuracy :  0.94
Final Confusion matrix : 
  [[61  5]
 [ 1 33]]
