## GridSearchCV vs Cross_Val_score

In [3]:
from sklearn import datasets
from sklearn.model_selection import cross_val_score ,GridSearchCV


In [4]:
iris = datasets.load_iris()
cancer= datasets.load_breast_cancer()

In [5]:

from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

In [6]:
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.33, random_state=42, stratify =iris.target)

In [41]:
clf = SVC()

In [42]:
clf.fit(X_train,y_train)



SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)

In [43]:
y_pred=clf.predict(X_test)

In [45]:
confusion_matrix(y_test, y_pred)

array([[16,  0,  0],
       [ 0, 17,  0],
       [ 0,  0, 17]], dtype=int64)

In [46]:
accuracy_score(y_test, y_pred)

1.0

In [49]:
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        16
           1       1.00      1.00      1.00        17
           2       1.00      1.00      1.00        17

   micro avg       1.00      1.00      1.00        50
   macro avg       1.00      1.00      1.00        50
weighted avg       1.00      1.00      1.00        50



In [7]:
X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, test_size=0.33, random_state=42, stratify =cancer.target)

In [80]:
clf_cancer=SVC(kernel='linear',C=20, gamma='auto')

In [81]:
clf_cancer.fit(X_train,y_train)

SVC(C=20, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [82]:
y_pred=clf_cancer.predict(X_test)

In [83]:
accuracy_score(y_test,y_pred)

0.9574468085106383

#### Accuracy is comparitively less, so will apply below technique one by one so that it improves the efficiency.

- cross_val_score
- GridSearchCV

# Cross_val_score

In [84]:
cross_val_score(SVC(kernel='linear',C=10, gamma='auto'), cancer.data, cancer.target, cv= 5)

array([0.93913043, 0.93913043, 0.97345133, 0.94690265, 0.96460177])

In [85]:
cross_val_score(SVC(kernel='rbf',C=10, gamma='auto'), cancer.data, cancer.target, cv= 5)

array([0.62608696, 0.62608696, 0.62831858, 0.62831858, 0.62831858])

In [86]:
cross_val_score(SVC(kernel='linear',C=20, gamma='auto'), cancer.data, cancer.target, cv= 5)

array([0.93913043, 0.93043478, 0.95575221, 0.9380531 , 0.96460177])

In [8]:
import numpy as np

In [9]:

c = [1,10,20]
kernel= ['linear','rbf',]
score= {}

for c in c:
    for k in kernel:
        cv = cross_val_score(SVC(kernel=k,C=c, gamma='auto'), cancer.data, cancer.target, cv= 5)
        score[f'{c}_{k}']= np.average(cv)
score

{'1_linear': 0.9455636783378223,
 '1_rbf': 0.6274259330511736,
 '10_linear': 0.9526433243555215,
 '10_rbf': 0.6274259330511736,
 '20_linear': 0.9455944594074644,
 '20_rbf': 0.6274259330511736}

## GridSearchCV

In [10]:
from sklearn.model_selection import GridSearchCV


In [57]:
X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, test_size=0.33, random_state=42, stratify =cancer.target)

In [73]:
parameters = {'kernel': ('linear', 'rbf'), 'C': [1, 2, 4]}

clf = GridSearchCV(SVC(gamma='auto'), param_grid=parameters, cv = 5)

clf.fit(X_test,y_test)



GridSearchCV(cv=5, error_score='raise-deprecating',
       estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'kernel': ('linear', 'rbf'), 'C': [1, 2, 4]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [74]:
clf.best_estimator_

SVC(C=1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [75]:
clf.best_params_

{'C': 1, 'kernel': 'linear'}

In [76]:
print('on train set')
scores = cross_val_score(clf.best_estimator_, X_train, y_train, cv=5, scoring='accuracy')
print(scores.mean(), scores)
print('on test set')
scores = cross_val_score(clf.best_estimator_, X_test, y_test, cv=5, scoring='accuracy')
print(scores.mean(), scores) 

on train set
0.9658560036454773 [0.98701299 0.96103896 0.97368421 0.93421053 0.97333333]
on test set
0.9523470839260313 [0.92105263 0.92105263 0.97368421 1.         0.94594595]


In [77]:
y_pred=clf.predict(X_test)

In [78]:
accuracy_score(y_test,y_pred)

0.9680851063829787

In [79]:
confusion_matrix(y_test,y_pred)

array([[ 66,   4],
       [  2, 116]], dtype=int64)

## hurray !! bravo.. gridsearchcv is done