# GridSearch using SkLearn

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC

In [3]:
df = pd.read_csv("Titanic_Dataset.csv")
age_mean = np.mean(df["Age"])
df["Age"].fillna(age_mean,inplace=True)

**Taking only first 100 samples to reduce computation time**

In [7]:
x = np.array(df[["Age","SibSp","Parch"]].head(100))
y = np.array(df["Survived"].head(100))

**Creating required parameters**

In [8]:
model_object = SVC()

hyperparameters = {
    "C":[1,10,20],
    "kernel":["rbf","linear"]
}

grid = GridSearchCV(model_object,hyperparameters,cv=5,return_train_score=False)
grid.fit(x,y)

gridSearch_result = grid.cv_results_

gridSearch_result

{'mean_fit_time': array([0.00279179, 0.01116853, 0.00259275, 0.70810509, 0.00179448,
        0.91894135]),
 'std_fit_time': array([3.96927012e-04, 8.51944269e-03, 7.97665256e-04, 1.19663524e+00,
        4.00257764e-04, 1.42535960e+00]),
 'mean_score_time': array([0.00139709, 0.00099831, 0.0009973 , 0.00079908, 0.00059776,
        0.00059934]),
 'std_score_time': array([4.88269199e-04, 2.78041453e-07, 6.30073087e-04, 3.99543292e-04,
        4.88074578e-04, 4.89356498e-04]),
 'param_C': masked_array(data=[1, 1, 10, 10, 20, 20],
              mask=[False, False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'param_kernel': masked_array(data=['rbf', 'linear', 'rbf', 'linear', 'rbf', 'linear'],
              mask=[False, False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'params': [{'C': 1, 'kernel': 'rbf'},
  {'C': 1, 'kernel': 'linear'},
  {'C': 10, 'kernel': 'rbf'},
  {'C': 10, 'kernel': 'linear'},
  {'C': 20, 'ker

**It is hard to visualize the results in this form , hence convert it to a dataframe.**

In [9]:
df_grid = pd.DataFrame(gridSearch_result)
df_grid

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.002792,0.000397,0.001397,0.0004882692,1,rbf,"{'C': 1, 'kernel': 'rbf'}",0.6,0.6,0.6,0.6,0.55,0.59,0.02,4
1,0.011169,0.008519,0.000998,2.780415e-07,1,linear,"{'C': 1, 'kernel': 'linear'}",0.65,0.6,0.6,0.65,0.55,0.61,0.037417,1
2,0.002593,0.000798,0.000997,0.0006300731,10,rbf,"{'C': 10, 'kernel': 'rbf'}",0.5,0.6,0.6,0.6,0.55,0.57,0.04,6
3,0.708105,1.196635,0.000799,0.0003995433,10,linear,"{'C': 10, 'kernel': 'linear'}",0.65,0.6,0.6,0.65,0.55,0.61,0.037417,1
4,0.001794,0.0004,0.000598,0.0004880746,20,rbf,"{'C': 20, 'kernel': 'rbf'}",0.5,0.6,0.65,0.6,0.6,0.59,0.04899,4
5,0.918941,1.42536,0.000599,0.0004893565,20,linear,"{'C': 20, 'kernel': 'linear'}",0.65,0.6,0.6,0.65,0.55,0.61,0.037417,1


**Fetching only required columns.**

In [10]:
df_grid = df_grid[["param_C","param_kernel","mean_test_score"]]
df_grid

Unnamed: 0,param_C,param_kernel,mean_test_score
0,1,rbf,0.59
1,1,linear,0.61
2,10,rbf,0.57
3,10,linear,0.61
4,20,rbf,0.59
5,20,linear,0.61


**You can look manually or use the best_params_ variable to get the best values of your hyperparameters**

In [11]:
best_hyperparameters = grid.best_params_
best_hyperparameters

{'C': 1, 'kernel': 'linear'}

**Note : In this case we use SVC which didnt gave much good accuracy at all , hence its recommended to perform the same with different models and choose the most optimal.**