<a href="https://colab.research.google.com/github/Vanshaj-cs/Machine-Learning/blob/main/Hypertuning_Implementation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import numpy as np
import pandas as pd
import sklearn.datasets
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV

In [3]:
breast_cancer_dataset = sklearn.datasets.load_breast_cancer()

In [4]:
cancer_data = pd.DataFrame(breast_cancer_dataset.data, columns = breast_cancer_dataset.feature_names)

In [5]:
cancer_data.head()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


In [6]:
cancer_data.shape

(569, 30)

In [7]:
cancer_data['label'] = breast_cancer_dataset.target

In [8]:
cancer_data.isnull().sum()

Unnamed: 0,0
mean radius,0
mean texture,0
mean perimeter,0
mean area,0
mean smoothness,0
mean compactness,0
mean concavity,0
mean concave points,0
mean symmetry,0
mean fractal dimension,0


In [9]:
cancer_data['label'].value_counts()

Unnamed: 0_level_0,count
label,Unnamed: 1_level_1
1,357
0,212


In [10]:
X = cancer_data.drop(columns='label', axis=1)
Y = cancer_data['label']

In [11]:
 X = np.asarray(X)
 Y = np.asarray(Y)

GridSearchCV

In [13]:
model = SVC()

In [14]:
parameters = {
              'kernel':['linear','poly','rbf','sigmoid'],
              'C':[1, 5, 10, 20]
}

In [15]:
classifier = GridSearchCV(model, parameters, cv=5)

In [16]:
classifier.fit(X, Y)

In [17]:
classifier.cv_results_

{'mean_fit_time': array([1.56210022e+00, 3.66845131e-03, 4.37364578e-03, 1.41518593e-02,
        3.19254398e+00, 6.22286797e-03, 7.02219009e-03, 2.03637600e-02,
        4.45312519e+00, 4.11448479e-03, 3.93905640e-03, 1.37445927e-02,
        7.42095137e+00, 4.23760414e-03, 4.36997414e-03, 1.32356644e-02]),
 'std_fit_time': array([5.76549303e-01, 1.11908063e-04, 9.30732634e-05, 3.50793201e-04,
        8.25177502e-01, 7.47437471e-04, 7.20478751e-04, 1.85889938e-03,
        1.08822402e+00, 9.68020067e-05, 1.01209358e-04, 7.00368234e-04,
        2.36738640e+00, 1.31550756e-04, 8.63275294e-04, 7.41207796e-04]),
 'mean_score_time': array([0.00162201, 0.00124216, 0.00179734, 0.0038249 , 0.00134168,
        0.00211201, 0.00301542, 0.0061872 , 0.00118694, 0.00118361,
        0.00160751, 0.00385499, 0.00136471, 0.00118475, 0.00155139,
        0.00348887]),
 'std_score_time': array([3.20135251e-04, 1.39445318e-05, 2.34100336e-05, 7.02673229e-05,
        2.82547467e-04, 5.47089550e-05, 1.41325927e-

In [18]:
best_parameters = classifier.best_params_
print(best_parameters)

{'C': 10, 'kernel': 'linear'}


In [19]:
highest_accuracy = classifier.best_score_
print(highest_accuracy)

0.9525694767893185


In [21]:
result = pd.DataFrame(classifier.cv_results_)
result.head()

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,1.5621,0.576549,0.001622,0.00032,1,linear,"{'C': 1, 'kernel': 'linear'}",0.947368,0.929825,0.973684,0.921053,0.955752,0.945536,0.018689,4
1,0.003668,0.000112,0.001242,1.4e-05,1,poly,"{'C': 1, 'kernel': 'poly'}",0.842105,0.885965,0.929825,0.947368,0.938053,0.908663,0.039382,12
2,0.004374,9.3e-05,0.001797,2.3e-05,1,rbf,"{'C': 1, 'kernel': 'rbf'}",0.850877,0.894737,0.929825,0.947368,0.938053,0.912172,0.035444,11
3,0.014152,0.000351,0.003825,7e-05,1,sigmoid,"{'C': 1, 'kernel': 'sigmoid'}",0.54386,0.45614,0.464912,0.385965,0.451327,0.460441,0.050253,13
4,3.192544,0.825178,0.001342,0.000283,5,linear,"{'C': 5, 'kernel': 'linear'}",0.947368,0.938596,0.973684,0.929825,0.964602,0.950815,0.016216,2


In [22]:
grid_search_result = result[['param_C','param_kernel','mean_test_score']]
grid_search_result

Unnamed: 0,param_C,param_kernel,mean_test_score
0,1,linear,0.945536
1,1,poly,0.908663
2,1,rbf,0.912172
3,1,sigmoid,0.460441
4,5,linear,0.950815
5,5,poly,0.922729
6,5,rbf,0.931501
7,5,sigmoid,0.411178
8,10,linear,0.952569
9,10,poly,0.920975


RandomizedSearchCV

In [23]:
model = SVC()

In [24]:
parameters = {
              'kernel':['linear','poly','rbf','sigmoid'],
              'C':[1, 5, 10, 20]
}

In [25]:
classifier = RandomizedSearchCV(model, parameters, cv=5)

In [26]:
classifier.fit(X, Y)

In [27]:
classifier.cv_results_

{'mean_fit_time': array([5.03559575e+00, 4.04682159e-03, 3.02165036e+00, 1.51967525e-02,
        1.32737637e-02, 1.32018089e-02, 1.57436371e-02, 4.69903946e-03,
        4.22854424e-03, 1.57700481e+00]),
 'std_fit_time': array([1.33449479e+00, 8.77635079e-05, 8.22403419e-01, 1.30826843e-03,
        5.44876135e-04, 6.48944708e-04, 2.15244263e-03, 4.22147047e-04,
        1.46836535e-04, 6.19591791e-01]),
 'mean_score_time': array([0.00126019, 0.00121503, 0.00148959, 0.00397654, 0.00358925,
        0.00354371, 0.00435839, 0.00197754, 0.00120068, 0.00152435]),
 'std_score_time': array([4.56805518e-05, 2.70490685e-05, 3.31094874e-04, 4.06139314e-05,
        6.92459828e-05, 1.00055685e-04, 7.19448352e-04, 1.69596747e-04,
        2.98634855e-05, 3.30153405e-04]),
 'param_kernel': masked_array(data=['linear', 'poly', 'linear', 'sigmoid', 'sigmoid',
                    'sigmoid', 'sigmoid', 'rbf', 'poly', 'linear'],
              mask=[False, False, False, False, False, False, False, False,
    

In [28]:
best_parameters = classifier.best_params_
print(best_parameters)

{'kernel': 'linear', 'C': 10}


In [29]:
highest_accuracy = classifier.best_score_
print(highest_accuracy)

0.9525694767893185


In [30]:
result = pd.DataFrame(classifier.cv_results_)
result.head()

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_kernel,param_C,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,5.035596,1.334495,0.00126,4.6e-05,linear,10,"{'kernel': 'linear', 'C': 10}",0.938596,0.938596,0.973684,0.947368,0.964602,0.952569,0.0142,1
1,0.004047,8.8e-05,0.001215,2.7e-05,poly,10,"{'kernel': 'poly', 'C': 10}",0.885965,0.921053,0.903509,0.938596,0.955752,0.920975,0.024701,4
2,3.02165,0.822403,0.00149,0.000331,linear,5,"{'kernel': 'linear', 'C': 5}",0.947368,0.938596,0.973684,0.929825,0.964602,0.950815,0.016216,2
3,0.015197,0.001308,0.003977,4.1e-05,sigmoid,1,"{'kernel': 'sigmoid', 'C': 1}",0.54386,0.45614,0.464912,0.385965,0.451327,0.460441,0.050253,7
4,0.013274,0.000545,0.003589,6.9e-05,sigmoid,10,"{'kernel': 'sigmoid', 'C': 10}",0.482456,0.403509,0.421053,0.342105,0.362832,0.402391,0.048906,9


In [31]:
randomized_search_result = result[['param_C','param_kernel','mean_test_score']]
randomized_search_result

Unnamed: 0,param_C,param_kernel,mean_test_score
0,10,linear,0.952569
1,10,poly,0.920975
2,5,linear,0.950815
3,1,sigmoid,0.460441
4,10,sigmoid,0.402391
5,20,sigmoid,0.398867
6,5,sigmoid,0.411178
7,1,rbf,0.912172
8,20,poly,0.919221
9,1,linear,0.945536
