# HyperParameter Tuning

1. GridSearchCV

2. RandomizedSearchCV

In [74]:
#importing the dependencies

import numpy as np
import pandas as pd
import sklearn.datasets
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV

We will be working on breast Cancer DataSets

# Data Preprocessing

In [75]:
#Loading the Dataset from SK Learn

b_cancer_dataset=sklearn.datasets.load_breast_cancer()

In [76]:
print(b_cancer_dataset)

{'data': array([[1.799e+01, 1.038e+01, 1.228e+02, ..., 2.654e-01, 4.601e-01,
        1.189e-01],
       [2.057e+01, 1.777e+01, 1.329e+02, ..., 1.860e-01, 2.750e-01,
        8.902e-02],
       [1.969e+01, 2.125e+01, 1.300e+02, ..., 2.430e-01, 3.613e-01,
        8.758e-02],
       ...,
       [1.660e+01, 2.808e+01, 1.083e+02, ..., 1.418e-01, 2.218e-01,
        7.820e-02],
       [2.060e+01, 2.933e+01, 1.401e+02, ..., 2.650e-01, 4.087e-01,
        1.240e-01],
       [7.760e+00, 2.454e+01, 4.792e+01, ..., 0.000e+00, 2.871e-01,
        7.039e-02]]), 'target': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0,
       1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0,
       1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0,
 

In [77]:
#Creating Pandas DataFrame

cancer=pd.DataFrame(b_cancer_dataset.data, columns=b_cancer_dataset.feature_names)

In [78]:
cancer.head()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


In [79]:
#Adding the Target column to cancer Dataframe
cancer['label']=b_cancer_dataset.target

In [80]:
# print the first 5 rows of the dataframe
cancer.head()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,label
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,0
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,0
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,0
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,0
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,0


In [81]:
#Finding row and columns of Datasets(DataFrame)
cancer.shape

(569, 31)

In [82]:
#Finding Number of Missing Value

cancer.isnull().sum()

Unnamed: 0,0
mean radius,0
mean texture,0
mean perimeter,0
mean area,0
mean smoothness,0
mean compactness,0
mean concavity,0
mean concave points,0
mean symmetry,0
mean fractal dimension,0


In [83]:
#Checking Distribution of Value in Target columns

cancer['label'].value_counts()

Unnamed: 0_level_0,count
label,Unnamed: 1_level_1
1,357
0,212


1 --> Benign

0 --> Malignant

Separating the features and target

In [84]:
# X-> features Columns, Y-> Traget Columns

X=cancer.drop(columns='label', axis=1)
Y=cancer['label']

In [85]:
print(X)

     mean radius  mean texture  mean perimeter  mean area  mean smoothness  \
0          17.99         10.38          122.80     1001.0          0.11840   
1          20.57         17.77          132.90     1326.0          0.08474   
2          19.69         21.25          130.00     1203.0          0.10960   
3          11.42         20.38           77.58      386.1          0.14250   
4          20.29         14.34          135.10     1297.0          0.10030   
..           ...           ...             ...        ...              ...   
564        21.56         22.39          142.00     1479.0          0.11100   
565        20.13         28.25          131.20     1261.0          0.09780   
566        16.60         28.08          108.30      858.1          0.08455   
567        20.60         29.33          140.10     1265.0          0.11780   
568         7.76         24.54           47.92      181.0          0.05263   

     mean compactness  mean concavity  mean concave points  mea

In [86]:
print(Y)

0      0
1      0
2      0
3      0
4      0
      ..
564    0
565    0
566    0
567    0
568    1
Name: label, Length: 569, dtype: int64


In [87]:
#Converting The Feature and Target columns into Array

X=np.asarray(X)
Y=np.asarray(Y)

# **GridSearchCV**

GridSearchCV is used for determining the best parameters for our model

In [88]:
#Loding the SVC Model
model=SVC()

In [89]:
# hyperparameters

parameters = {
              'kernel':['linear','poly','rbf','sigmoid'],
              'C':[1, 5, 10, 20]
}


In [90]:
#grid search

classifier=GridSearchCV(model, parameters, cv=5)

In [91]:
#Fitting the data into our Model

classifier.fit(X,Y)

In [92]:
classifier.cv_results_

{'mean_fit_time': array([1.66034932e+00, 4.97212410e-03, 5.43136597e-03, 1.48624420e-02,
        3.06420560e+00, 4.14166451e-03, 4.50825691e-03, 1.36764526e-02,
        4.80991731e+00, 4.72574234e-03, 4.36863899e-03, 1.36538029e-02,
        7.55636973e+00, 7.17916489e-03, 4.17218208e-03, 1.39746666e-02]),
 'std_fit_time': array([8.76319038e-01, 9.87980743e-04, 4.34813092e-04, 3.24452407e-04,
        8.34246566e-01, 3.34482407e-04, 3.18872813e-04, 6.90677389e-04,
        1.09024210e+00, 6.05739052e-04, 3.96490817e-04, 7.01668388e-04,
        2.53322762e+00, 1.50724002e-03, 2.13247346e-04, 7.72157415e-04]),
 'mean_score_time': array([0.0018023 , 0.00171762, 0.00248208, 0.00412769, 0.00136557,
        0.00138903, 0.00185714, 0.00367398, 0.00137501, 0.00135369,
        0.00193086, 0.00365505, 0.00156393, 0.00193243, 0.00167055,
        0.00366511]),
 'std_score_time': array([5.55347442e-04, 1.73474603e-04, 6.33469175e-04, 1.84423600e-04,
        1.01508496e-04, 3.37622933e-05, 1.67570103e-

In [93]:
#Best Parameter
best_parameter=classifier.best_params_
print(best_parameter)

{'C': 10, 'kernel': 'linear'}


In [94]:
#highest Accuracy

Highest_accuracy=classifier.best_score_
print(Highest_accuracy)

0.9525694767893185


In [95]:
#Loading the results into Pandas dataFrame

results=pd.DataFrame(classifier.cv_results_)

In [97]:
results.head()

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,1.660349,0.876319,0.001802,0.000555,1,linear,"{'C': 1, 'kernel': 'linear'}",0.947368,0.929825,0.973684,0.921053,0.955752,0.945536,0.018689,4
1,0.004972,0.000988,0.001718,0.000173,1,poly,"{'C': 1, 'kernel': 'poly'}",0.842105,0.885965,0.929825,0.947368,0.938053,0.908663,0.039382,12
2,0.005431,0.000435,0.002482,0.000633,1,rbf,"{'C': 1, 'kernel': 'rbf'}",0.850877,0.894737,0.929825,0.947368,0.938053,0.912172,0.035444,11
3,0.014862,0.000324,0.004128,0.000184,1,sigmoid,"{'C': 1, 'kernel': 'sigmoid'}",0.54386,0.45614,0.464912,0.385965,0.451327,0.460441,0.050253,13
4,3.064206,0.834247,0.001366,0.000102,5,linear,"{'C': 5, 'kernel': 'linear'}",0.947368,0.938596,0.973684,0.929825,0.964602,0.950815,0.016216,2


In [98]:
grid_search_result=results[['param_C', 'param_kernel', 'mean_test_score']]

In [99]:
grid_search_result

Unnamed: 0,param_C,param_kernel,mean_test_score
0,1,linear,0.945536
1,1,poly,0.908663
2,1,rbf,0.912172
3,1,sigmoid,0.460441
4,5,linear,0.950815
5,5,poly,0.922729
6,5,rbf,0.931501
7,5,sigmoid,0.411178
8,10,linear,0.952569
9,10,poly,0.920975


# **RandomizedSearchCV**

In [112]:
#Loading Model

model=SVC()

In [111]:
# hyperparameters

parameters = {
              'kernel':['linear','poly','rbf','sigmoid'],
              'C':[1, 5, 10, 20]
}


In [113]:
#RandomizedSearchCV
classifier=RandomizedSearchCV(model, parameters, cv=5)

In [114]:
#Fitting the data into our Model

classifier.fit(X,Y)

In [115]:
classifier.cv_results_

{'mean_fit_time': array([7.76538849e-03, 9.28382874e-03, 1.44521642e+00, 7.66149426e+00,
        1.38904095e-02, 4.83922958e-03, 1.58074856e-02, 4.05778885e-03,
        4.46453094e-03, 1.43278599e-02]),
 'std_fit_time': array([1.43277524e-03, 1.70735962e-03, 5.10069126e-01, 2.56968520e+00,
        6.63690567e-04, 1.39852640e-04, 1.47926440e-03, 1.03234595e-04,
        1.26035611e-04, 1.20302588e-03]),
 'mean_score_time': array([0.00337405, 0.00413876, 0.00153012, 0.00143976, 0.00376539,
        0.00219002, 0.00449238, 0.00149021, 0.00179358, 0.00370073]),
 'std_score_time': array([4.99617846e-04, 1.75943349e-03, 3.12200309e-04, 2.69379968e-04,
        1.19684930e-04, 2.25893550e-04, 7.88139624e-04, 2.29286789e-05,
        7.59521517e-05, 1.30216624e-04]),
 'param_kernel': masked_array(data=['rbf', 'rbf', 'linear', 'linear', 'sigmoid', 'rbf',
                    'sigmoid', 'poly', 'rbf', 'sigmoid'],
              mask=[False, False, False, False, False, False, False, False,
            

In [116]:
#Best Parameter
best_parameter=classifier.best_params_
print(best_parameter)

{'kernel': 'linear', 'C': 20}


In [117]:
#Highest Accuracy
highest_accuracy=classifier.best_score_
print(highest_accuracy)

0.9490607048594939


In [118]:
#loading result into dataFrame
result2=pd.DataFrame(classifier.cv_results_)

In [119]:
result2.head()

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_kernel,param_C,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.007765,0.001433,0.003374,0.0005,rbf,10,"{'kernel': 'rbf', 'C': 10}",0.877193,0.921053,0.912281,0.95614,0.946903,0.922714,0.027879,4
1,0.009284,0.001707,0.004139,0.001759,rbf,5,"{'kernel': 'rbf', 'C': 5}",0.885965,0.929825,0.938596,0.947368,0.955752,0.931501,0.024358,3
2,1.445216,0.510069,0.00153,0.000312,linear,1,"{'kernel': 'linear', 'C': 1}",0.947368,0.929825,0.973684,0.921053,0.955752,0.945536,0.018689,2
3,7.661494,2.569685,0.00144,0.000269,linear,20,"{'kernel': 'linear', 'C': 20}",0.929825,0.95614,0.95614,0.938596,0.964602,0.949061,0.012816,1
4,0.01389,0.000664,0.003765,0.00012,sigmoid,10,"{'kernel': 'sigmoid', 'C': 10}",0.482456,0.403509,0.421053,0.342105,0.362832,0.402391,0.048906,9


In [120]:
randomized_search_result =result2[['param_C', 'param_kernel', 'mean_test_score']]

In [None]:
randomized_search_result