# Grid Search

#### Load the packages, import the data, pre-process the data, and run the model
Example using SVM

In [34]:
import pandas as pd
import numpy as np

data = pd.read_csv("./Data Files/Log_Reg_Sample_File.csv")
X = data[['Daily Time Spent on Site', 'Age', 'Area Income', 
          'Daily Internet Usage', 'Gender']]
y = data["Clicked on Ad"]

X_dummies = pd.get_dummies(X["Gender"], drop_first = True)
X = pd.concat([X, X_dummies], axis = 1)
X = X.drop(["Gender"], axis = 1)
y_dummies = pd.get_dummies(y, drop_first = True)
y = y_dummies.iloc[:, 0]

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 1111)

X_train_scaled = X_train.copy()
X_test_scaled = X_test.copy()
for i in X_train.columns:
    if X_train[i].dtypes in ["float64", "int64"]:
        m = X_train[i].mean()
        s = X_train[i].std()
        X_train_scaled[i] = (X_train[i] - m) / s
        X_test_scaled[i] = (X_test[i] - m) / s

from sklearn.svm import SVC
svm_model = SVC(random_state = 1111)
svm_model.fit(X_train_scaled, y_train)
y_pred = svm_model.predict(X_test_scaled)

#### Evaluate the base SVM Model

In [35]:
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

print("Base SVM Model", "\n")
print(pd.DataFrame(confusion_matrix(y_test, y_pred)), 
      "      Accuracy:", round(accuracy_score(y_test, y_pred), 3), 
      "\n")
print(classification_report(y_test, y_pred))

Base SVM Model 

     0    1
0  144    6
1    6  144       Accuracy: 0.96 

             precision    recall  f1-score   support

          0       0.96      0.96      0.96       150
          1       0.96      0.96      0.96       150

avg / total       0.96      0.96      0.96       300



#### Optimize the Parameters using Grid Search

In [37]:
from sklearn.model_selection import GridSearchCV

parameters = [{"C": [1, 10, 100, 1000], "kernel": ["linear"]}, 
              {"C": [1, 10, 100, 1000], "kernel": ["rbf"], "gamma": [1, .1, .01, .001, .0001]}]
grid_model = GridSearchCV(SVC(), param_grid = parameters, cv = 10, 
                    verbose = 2, n_jobs = -1, scoring = "f1")  # Using F1-Score
grid_model.fit(X_train_scaled, y_train)

Fitting 10 folds for each of 24 candidates, totalling 240 fits
[CV] C=1, kernel=linear ..............................................
[CV] C=1, kernel=linear ..............................................
[CV] C=1, kernel=linear ..............................................
[CV] C=1, kernel=linear ..............................................
[CV] ............................... C=1, kernel=linear, total=   0.0s
[CV] ............................... C=1, kernel=linear, total=   0.0s
[CV] C=1, kernel=linear ..............................................
[CV] C=1, kernel=linear ..............................................
[CV] ............................... C=1, kernel=linear, total=   0.0s
[CV] ............................... C=1, kernel=linear, total=   0.0s
[CV] C=1, kernel=linear ..............................................
[CV] C=1, kernel=linear ..............................................
[CV] ............................... C=1, kernel=linear, total=   0.0s
[CV] .........

[Parallel(n_jobs=-1)]: Done 158 tasks      | elapsed:    2.5s


[CV] C=1000, gamma=1, kernel=rbf .....................................
[CV] .................. C=100, gamma=0.0001, kernel=rbf, total=   0.0s
[CV] C=1000, gamma=0.1, kernel=rbf ...................................
[CV] ...................... C=1000, gamma=1, kernel=rbf, total=   0.0s
[CV] .................... C=1000, gamma=0.1, kernel=rbf, total=   0.0s
[CV] C=1000, gamma=1, kernel=rbf .....................................
[CV] C=1000, gamma=0.1, kernel=rbf ...................................
[CV] .................... C=1000, gamma=0.1, kernel=rbf, total=   0.0s
[CV] ...................... C=1000, gamma=1, kernel=rbf, total=   0.0s
[CV] C=1000, gamma=0.1, kernel=rbf ...................................
[CV] C=1000, gamma=1, kernel=rbf .....................................
[CV] .................... C=1000, gamma=0.1, kernel=rbf, total=   0.0s
[CV] C=1000, gamma=0.1, kernel=rbf ...................................
[CV] ...................... C=1000, gamma=1, kernel=rbf, total=   0.0s
[CV] C

[Parallel(n_jobs=-1)]: Done 240 out of 240 | elapsed:    3.2s finished


GridSearchCV(cv=10, error_score='raise',
       estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False),
       fit_params={}, iid=True, n_jobs=-1,
       param_grid=[{'C': [1, 10, 100, 1000], 'kernel': ['linear']}, {'C': [1, 10, 100, 1000], 'kernel': ['rbf'], 'gamma': [1, 0.1, 0.01, 0.001, 0.0001]}],
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring='f1', verbose=2)

#### Display best parameters and accuracy
Iterate again if necessary

In [38]:
print("Best parameters:", grid_model.best_params_)
print("Best accuracy:", grid_model.best_score_)

Best parameters: {'C': 10, 'kernel': 'linear'}
Best accuracy: 0.966595754389


#### Predict on test data using refined model parameters

In [39]:
y_pred = grid_model.predict(X_test_scaled)
print("Refined SVM Model Using Grid Search Parameters", "\n")
print(pd.DataFrame(confusion_matrix(y_test, y_pred)), 
      "      Accuracy:", round(accuracy_score(y_test, y_pred), 3), 
      "\n")
print(classification_report(y_test, y_pred))

Refined SVM Model Using Grid Search Parameters 

     0    1
0  147    3
1    5  145       Accuracy: 0.973 

             precision    recall  f1-score   support

          0       0.97      0.98      0.97       150
          1       0.98      0.97      0.97       150

avg / total       0.97      0.97      0.97       300

