# GridSearch using SkLearn

In [12]:
import pandas as pd
import numpy as np
from sklearn.model_selection import RandomizedSearchCV
from sklearn.svm import SVC

In [13]:
df = pd.read_csv("Titanic_Dataset.csv")
age_mean = np.mean(df["Age"])
df["Age"].fillna(age_mean,inplace=True)

**Taking only first 100 samples to reduce computation time**

In [14]:
x = np.array(df[["Age","SibSp","Parch"]].head(100))
y = np.array(df["Survived"].head(100))

**Creating required parameters**

In [15]:
model_object = SVC()

hyperparameters = {
    "C":[1,10,20],
    "kernel":["rbf","linear"]
}

grid = RandomizedSearchCV(model_object,hyperparameters,cv=5,return_train_score=False,n_iter=3)
grid.fit(x,y)

gridSearch_result = grid.cv_results_

gridSearch_result

{'mean_fit_time': array([1.29658599, 0.00240664, 0.00409069]),
 'std_fit_time': array([2.04992307e+00, 1.96506037e-03, 1.65274304e-04]),
 'mean_score_time': array([0.        , 0.00079985, 0.00181203]),
 'std_score_time': array([0.        , 0.00159969, 0.00183904]),
 'param_kernel': masked_array(data=['linear', 'rbf', 'rbf'],
              mask=[False, False, False],
        fill_value='?',
             dtype=object),
 'param_C': masked_array(data=[20, 10, 20],
              mask=[False, False, False],
        fill_value='?',
             dtype=object),
 'params': [{'kernel': 'linear', 'C': 20},
  {'kernel': 'rbf', 'C': 10},
  {'kernel': 'rbf', 'C': 20}],
 'split0_test_score': array([0.65, 0.5 , 0.5 ]),
 'split1_test_score': array([0.6, 0.6, 0.6]),
 'split2_test_score': array([0.6 , 0.6 , 0.65]),
 'split3_test_score': array([0.65, 0.6 , 0.6 ]),
 'split4_test_score': array([0.55, 0.55, 0.6 ]),
 'mean_test_score': array([0.61, 0.57, 0.59]),
 'std_test_score': array([0.03741657, 0.04      

**It is hard to visualize the results in this form , hence convert it to a dataframe.**

In [16]:
df_grid = pd.DataFrame(gridSearch_result)
df_grid

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_kernel,param_C,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,1.296586,2.049923,0.0,0.0,linear,20,"{'kernel': 'linear', 'C': 20}",0.65,0.6,0.6,0.65,0.55,0.61,0.037417,1
1,0.002407,0.001965,0.0008,0.0016,rbf,10,"{'kernel': 'rbf', 'C': 10}",0.5,0.6,0.6,0.6,0.55,0.57,0.04,3
2,0.004091,0.000165,0.001812,0.001839,rbf,20,"{'kernel': 'rbf', 'C': 20}",0.5,0.6,0.65,0.6,0.6,0.59,0.04899,2


**Fetching only required columns. Since we passed n_iter=3 only 3 random combinations of all hyperparameters we used.**

In [17]:
df_grid = df_grid[["param_C","param_kernel","mean_test_score"]]
df_grid

Unnamed: 0,param_C,param_kernel,mean_test_score
0,20,linear,0.61
1,10,rbf,0.57
2,20,rbf,0.59


**You can look manually or use the best_params_ variable to get the best values of your hyperparameters**

In [18]:
best_hyperparameters = grid.best_params_
best_hyperparameters

{'kernel': 'linear', 'C': 20}

**Note : In this case we use SVC which didnt gave much good accuracy at all , hence its recommended to perform the same with different models and choose the most optimal.**