# Grid search

technique used for model tuning that tries several hyperparameters combinations and reports the best based on a metric

In [1]:
%matplotlib inline
from sklearn import svm
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.datasets import make_multilabel_classification
import pandas as pd
import numpy as np
import category_encoders as ce
import multiprocessing

random_state = 42
n_cpu = multiprocessing.cpu_count()
n_cpu

16

In [2]:
def get_data():
    return make_multilabel_classification(n_samples=1_000, n_features=15, n_classes=5, n_labels=2, allow_unlabeled=False, sparse=False, return_distributions=False, random_state=random_state)

In [3]:
data = get_data()

In [4]:
X = data[0]
X[:5,:]

array([[ 3.,  5.,  5.,  1.,  4.,  1.,  4.,  5.,  2.,  4.,  1.,  6.,  2.,
         4.,  2.],
       [ 0.,  5.,  3.,  2.,  6.,  1.,  6.,  0.,  2.,  8.,  4.,  3.,  1.,
         3.,  8.],
       [ 2., 10.,  3.,  2.,  2.,  1.,  4.,  3.,  3., 13.,  1.,  2.,  1.,
         2., 12.],
       [ 5.,  5.,  3.,  3.,  1.,  2.,  3.,  4.,  6.,  8.,  0.,  3.,  1.,
         0.,  3.],
       [ 0.,  0.,  1.,  3.,  3.,  0.,  1.,  2.,  5.,  5.,  7.,  0.,  0.,
         3.,  3.]])

In [5]:
y = np.argmax(data[1], axis=1)
y[:20]

array([1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 3, 3, 2, 1, 0, 2, 1, 1, 0])

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [7]:
svc = svm.SVC()
svc.fit(X_train, y_train)

SVC()

In [8]:
preds = svc.predict(X_test)

In [9]:
print(classification_report(y_test, preds))

              precision    recall  f1-score   support

           0       0.74      0.74      0.74        97
           1       0.74      0.82      0.78        92
           2       0.80      0.71      0.75        34
           3       0.67      0.67      0.67        18
           4       0.75      0.33      0.46         9

    accuracy                           0.74       250
   macro avg       0.74      0.65      0.68       250
weighted avg       0.75      0.74      0.74       250



## Try grid search

In [10]:
parameters = {
    'kernel': ['linear', 'rbf', 'poly', 'sigmoid'],
    'C': [0.1, 0.5, 1.0, 10],
    'degree': [1, 3, 5]
}
gs = GridSearchCV(svc, parameters, n_jobs=n_cpu)
gs.fit(X_train, y_train)

GridSearchCV(estimator=SVC(), n_jobs=16,
             param_grid={'C': [0.1, 0.5, 1.0, 10], 'degree': [1, 3, 5],
                         'kernel': ['linear', 'rbf', 'poly', 'sigmoid']})

In [11]:
gs.best_estimator_

SVC(degree=1, kernel='poly')

In [12]:
gs.best_score_

0.812

In [13]:
gs.best_params_

{'C': 1.0, 'degree': 1, 'kernel': 'poly'}

In [14]:
svc = svm.SVC(kernel='poly', degree=1)
svc.fit(X_train, y_train)

SVC(degree=1, kernel='poly')

In [15]:
preds = svc.predict(X_test)

In [16]:
print(classification_report(y_test, preds))

              precision    recall  f1-score   support

           0       0.73      0.79      0.76        97
           1       0.77      0.79      0.78        92
           2       0.89      0.71      0.79        34
           3       0.72      0.72      0.72        18
           4       0.75      0.33      0.46         9

    accuracy                           0.76       250
   macro avg       0.77      0.67      0.70       250
weighted avg       0.76      0.76      0.76       250

