# **Use GridSearchCV and select the best hyperparamter for Support Vector machine**

## **GridSearchCV good work on small data sets**

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix,accuracy_score
from sklearn.svm import SVC
import warnings

In [None]:
warnings.filterwarnings(action='ignore')

In [None]:
df = pd.read_csv("/content/drive/MyDrive/Machine Learning/Supervised Learning/Some Important Topic for Machine Learning/Hyper Parameter tuning /Advertising_data.csv")
df

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19.0,19000.0,0
1,15810944,Male,35.0,20000.0,0
2,15668575,Female,26.0,43000.0,0
3,15603246,Female,27.0,57000.0,0
4,15804002,Male,19.0,76000.0,0
...,...,...,...,...,...
395,15691863,Female,46.0,41000.0,1
396,15706071,Male,51.0,23000.0,1
397,15654296,Female,50.0,20000.0,1
398,15755018,Male,36.0,33000.0,0


In [None]:
# in this data sets we focus on how to use gridsearch cv. so we work only two columns
features = df.iloc[:,[2,3]].values
level = df.iloc[:,-1].values

In [None]:
# Splitting the dataset into the Training set and Test set
x_train,x_test,y_train,y_test = train_test_split(features,level,test_size=0.25,random_state=5)

In [None]:
# Feature Scaling
sc = StandardScaler()
x_train = sc.fit_transform(X=x_train)
x_test = sc.transform(X=x_test)

In [None]:
# Fitting Kernel SVM to the Training set
classifier = SVC(kernel='linear',random_state=0)
classifier.fit(X=x_train,y=y_train) 

SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='linear',
    max_iter=-1, probability=False, random_state=0, shrinking=True, tol=0.001,
    verbose=False)

In [None]:
# Predicting the Test set results
y_pred = classifier.predict(X=x_test)

In [None]:
cm = confusion_matrix(y_true=y_test,y_pred=y_pred)
print(cm)

[[60  6]
 [ 9 25]]


In [None]:
accuracy = accuracy_score(y_test,y_pred)
accuracy

0.85

# **Now Applying Grid Search to find the best model and the best parameters**

In [None]:
parameters = [
              {'C':[1,10,100,1000],'kernel':['linear']},
              {'C':[1,10,100,1000],'kernel':['rbf'],'gamma':[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]}
]
grid_search = GridSearchCV(estimator=classifier,param_grid=parameters,scoring='accuracy',
                           cv=10,n_jobs=-1,return_train_score=False)
grid_search.fit(x_train,y_train)

GridSearchCV(cv=10, error_score=nan,
             estimator=SVC(C=1.0, break_ties=False, cache_size=200,
                           class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='scale', kernel='linear', max_iter=-1,
                           probability=False, random_state=0, shrinking=True,
                           tol=0.001, verbose=False),
             iid='deprecated', n_jobs=-1,
             param_grid=[{'C': [1, 10, 100, 1000], 'kernel': ['linear']},
                         {'C': [1, 10, 100, 1000],
                          'gamma': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8,
                                    0.9],
                          'kernel': ['rbf']}],
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='accuracy', verbose=0)

In [None]:
accuracy2 = grid_search.best_score_
accuracy2

0.9100000000000001

In [None]:
grid_search.best_params_

{'C': 10, 'gamma': 0.3, 'kernel': 'rbf'}

In [None]:
grid_search.best_estimator_

SVC(C=10, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=0.3, kernel='rbf',
    max_iter=-1, probability=False, random_state=0, shrinking=True, tol=0.001,
    verbose=False)

In [None]:
# Now apply those best parameter in svc 
classifier2 = SVC(C=10,kernel='rbf',gamma=0.3)
classifier2.fit(x_train,y_train)

SVC(C=10, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=0.3, kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [None]:
y_pred2 = classifier2.predict(x_test)

In [None]:
cm2 = confusion_matrix(y_test,y_pred2)
print(cm2)

[[61  5]
 [ 2 32]]


In [None]:
score = accuracy_score(y_test,y_pred2)
score

0.93