In [1]:
import numpy as np
import pandas as pd
import sklearn.datasets
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV

In [7]:
breast_cancer_dataset = sklearn.datasets.load_breast_cancer()

In [8]:
df = pd.DataFrame(breast_cancer_dataset.data, columns = breast_cancer_dataset.feature_names)

In [10]:
df.head()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


In [12]:
df.columns

Index(['mean radius', 'mean texture', 'mean perimeter', 'mean area',
       'mean smoothness', 'mean compactness', 'mean concavity',
       'mean concave points', 'mean symmetry', 'mean fractal dimension',
       'radius error', 'texture error', 'perimeter error', 'area error',
       'smoothness error', 'compactness error', 'concavity error',
       'concave points error', 'symmetry error', 'fractal dimension error',
       'worst radius', 'worst texture', 'worst perimeter', 'worst area',
       'worst smoothness', 'worst compactness', 'worst concavity',
       'worst concave points', 'worst symmetry', 'worst fractal dimension'],
      dtype='object')

In [13]:
df.shape

(569, 30)

In [14]:
df.isnull().sum()

mean radius                0
mean texture               0
mean perimeter             0
mean area                  0
mean smoothness            0
mean compactness           0
mean concavity             0
mean concave points        0
mean symmetry              0
mean fractal dimension     0
radius error               0
texture error              0
perimeter error            0
area error                 0
smoothness error           0
compactness error          0
concavity error            0
concave points error       0
symmetry error             0
fractal dimension error    0
worst radius               0
worst texture              0
worst perimeter            0
worst area                 0
worst smoothness           0
worst compactness          0
worst concavity            0
worst concave points       0
worst symmetry             0
worst fractal dimension    0
dtype: int64

In [17]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 569 entries, 0 to 568
Data columns (total 30 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   mean radius              569 non-null    float64
 1   mean texture             569 non-null    float64
 2   mean perimeter           569 non-null    float64
 3   mean area                569 non-null    float64
 4   mean smoothness          569 non-null    float64
 5   mean compactness         569 non-null    float64
 6   mean concavity           569 non-null    float64
 7   mean concave points      569 non-null    float64
 8   mean symmetry            569 non-null    float64
 9   mean fractal dimension   569 non-null    float64
 10  radius error             569 non-null    float64
 11  texture error            569 non-null    float64
 12  perimeter error          569 non-null    float64
 13  area error               569 non-null    float64
 14  smoothness error         5

In [23]:
target = breast_cancer_dataset.target_names

In [24]:
target

array(['malignant', 'benign'], dtype='<U9')

In [25]:
df['target'] = breast_cancer_dataset.target

In [26]:
df.head()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,target
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,0
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,0
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,0
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,0
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,0


In [27]:
df.shape

(569, 31)

In [28]:
target = breast_cancer_dataset.target_names

In [29]:
target

array(['malignant', 'benign'], dtype='<U9')

In [30]:
df.tail()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,target
564,21.56,22.39,142.0,1479.0,0.111,0.1159,0.2439,0.1389,0.1726,0.05623,...,26.4,166.1,2027.0,0.141,0.2113,0.4107,0.2216,0.206,0.07115,0
565,20.13,28.25,131.2,1261.0,0.0978,0.1034,0.144,0.09791,0.1752,0.05533,...,38.25,155.0,1731.0,0.1166,0.1922,0.3215,0.1628,0.2572,0.06637,0
566,16.6,28.08,108.3,858.1,0.08455,0.1023,0.09251,0.05302,0.159,0.05648,...,34.12,126.7,1124.0,0.1139,0.3094,0.3403,0.1418,0.2218,0.0782,0
567,20.6,29.33,140.1,1265.0,0.1178,0.277,0.3514,0.152,0.2397,0.07016,...,39.42,184.6,1821.0,0.165,0.8681,0.9387,0.265,0.4087,0.124,0
568,7.76,24.54,47.92,181.0,0.05263,0.04362,0.0,0.0,0.1587,0.05884,...,30.37,59.16,268.6,0.08996,0.06444,0.0,0.0,0.2871,0.07039,1


In [31]:
df["target"].value_counts()

target
1    357
0    212
Name: count, dtype: int64

In [32]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 569 entries, 0 to 568
Data columns (total 31 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   mean radius              569 non-null    float64
 1   mean texture             569 non-null    float64
 2   mean perimeter           569 non-null    float64
 3   mean area                569 non-null    float64
 4   mean smoothness          569 non-null    float64
 5   mean compactness         569 non-null    float64
 6   mean concavity           569 non-null    float64
 7   mean concave points      569 non-null    float64
 8   mean symmetry            569 non-null    float64
 9   mean fractal dimension   569 non-null    float64
 10  radius error             569 non-null    float64
 11  texture error            569 non-null    float64
 12  perimeter error          569 non-null    float64
 13  area error               569 non-null    float64
 14  smoothness error         5

In [33]:
X = df.drop(columns="target", axis=1)
y = df["target"]

In [34]:
X

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension
0,17.99,10.38,122.80,1001.0,0.11840,0.27760,0.30010,0.14710,0.2419,0.07871,...,25.380,17.33,184.60,2019.0,0.16220,0.66560,0.7119,0.2654,0.4601,0.11890
1,20.57,17.77,132.90,1326.0,0.08474,0.07864,0.08690,0.07017,0.1812,0.05667,...,24.990,23.41,158.80,1956.0,0.12380,0.18660,0.2416,0.1860,0.2750,0.08902
2,19.69,21.25,130.00,1203.0,0.10960,0.15990,0.19740,0.12790,0.2069,0.05999,...,23.570,25.53,152.50,1709.0,0.14440,0.42450,0.4504,0.2430,0.3613,0.08758
3,11.42,20.38,77.58,386.1,0.14250,0.28390,0.24140,0.10520,0.2597,0.09744,...,14.910,26.50,98.87,567.7,0.20980,0.86630,0.6869,0.2575,0.6638,0.17300
4,20.29,14.34,135.10,1297.0,0.10030,0.13280,0.19800,0.10430,0.1809,0.05883,...,22.540,16.67,152.20,1575.0,0.13740,0.20500,0.4000,0.1625,0.2364,0.07678
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
564,21.56,22.39,142.00,1479.0,0.11100,0.11590,0.24390,0.13890,0.1726,0.05623,...,25.450,26.40,166.10,2027.0,0.14100,0.21130,0.4107,0.2216,0.2060,0.07115
565,20.13,28.25,131.20,1261.0,0.09780,0.10340,0.14400,0.09791,0.1752,0.05533,...,23.690,38.25,155.00,1731.0,0.11660,0.19220,0.3215,0.1628,0.2572,0.06637
566,16.60,28.08,108.30,858.1,0.08455,0.10230,0.09251,0.05302,0.1590,0.05648,...,18.980,34.12,126.70,1124.0,0.11390,0.30940,0.3403,0.1418,0.2218,0.07820
567,20.60,29.33,140.10,1265.0,0.11780,0.27700,0.35140,0.15200,0.2397,0.07016,...,25.740,39.42,184.60,1821.0,0.16500,0.86810,0.9387,0.2650,0.4087,0.12400


In [35]:
y

0      0
1      0
2      0
3      0
4      0
      ..
564    0
565    0
566    0
567    0
568    1
Name: target, Length: 569, dtype: int64

In [36]:
X = np.asarray(X)
y = np.asarray(y)

In [43]:
#GridSearchCV: is used for determining the best parameters for our model
model = SVC()
parameters = {
    "kernel": ["linear", "poly", "rbf", "sigmoid"],
    "C": [1, 5, 10, 20]
}


In [44]:
classifier = GridSearchCV(model, parameters, cv=5)

In [45]:
classifier.fit(X, y)

In [46]:
classifier.cv_results_

{'mean_fit_time': array([1.16732464e+00, 3.17091942e-03, 3.64022255e-03, 1.14465714e-02,
        2.44665732e+00, 3.29298973e-03, 3.25388908e-03, 1.08855247e-02,
        3.53255291e+00, 3.29322815e-03, 3.32331657e-03, 1.04407310e-02,
        5.77370315e+00, 3.93753052e-03, 3.35307121e-03, 1.03771687e-02]),
 'std_fit_time': array([4.84514070e-01, 2.45785869e-04, 2.22074609e-04, 8.11450180e-04,
        5.56768959e-01, 9.08060901e-04, 2.48115671e-04, 5.87166585e-04,
        7.32719625e-01, 2.48489099e-04, 3.81275217e-04, 6.57156250e-04,
        1.87973790e+00, 8.00912296e-04, 4.25688008e-04, 8.42956229e-04]),
 'mean_score_time': array([0.00087323, 0.00108423, 0.0015192 , 0.00294895, 0.00079846,
        0.00088053, 0.00141191, 0.00291772, 0.00081325, 0.00091791,
        0.00132275, 0.00258856, 0.00076685, 0.00103345, 0.00126886,
        0.00288806]),
 'std_score_time': array([1.54861088e-04, 1.12989742e-04, 2.61617870e-04, 1.35096929e-04,
        8.67986405e-05, 7.60703087e-05, 2.65194562e-

In [47]:
best_parameters = classifier.best_params_

In [48]:
best_parameters

{'C': 10, 'kernel': 'linear'}

In [53]:
highest_accuracy = classifier.best_score_
print(round(highest_accuracy, 2)*100)

95.0


In [49]:
result = pd.DataFrame(classifier.cv_results_)

In [50]:
result

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,1.167325,0.484514,0.000873,0.000155,1,linear,"{'C': 1, 'kernel': 'linear'}",0.947368,0.929825,0.973684,0.921053,0.955752,0.945536,0.018689,4
1,0.003171,0.000246,0.001084,0.000113,1,poly,"{'C': 1, 'kernel': 'poly'}",0.842105,0.885965,0.929825,0.947368,0.938053,0.908663,0.039382,12
2,0.00364,0.000222,0.001519,0.000262,1,rbf,"{'C': 1, 'kernel': 'rbf'}",0.850877,0.894737,0.929825,0.947368,0.938053,0.912172,0.035444,11
3,0.011447,0.000811,0.002949,0.000135,1,sigmoid,"{'C': 1, 'kernel': 'sigmoid'}",0.54386,0.45614,0.464912,0.385965,0.451327,0.460441,0.050253,13
4,2.446657,0.556769,0.000798,8.7e-05,5,linear,"{'C': 5, 'kernel': 'linear'}",0.947368,0.938596,0.973684,0.929825,0.964602,0.950815,0.016216,2
5,0.003293,0.000908,0.000881,7.6e-05,5,poly,"{'C': 5, 'kernel': 'poly'}",0.885965,0.912281,0.921053,0.938596,0.955752,0.922729,0.023689,6
6,0.003254,0.000248,0.001412,0.000265,5,rbf,"{'C': 5, 'kernel': 'rbf'}",0.885965,0.929825,0.938596,0.947368,0.955752,0.931501,0.024358,5
7,0.010886,0.000587,0.002918,0.000758,5,sigmoid,"{'C': 5, 'kernel': 'sigmoid'}",0.491228,0.421053,0.421053,0.350877,0.371681,0.411178,0.048578,14
8,3.532553,0.73272,0.000813,0.000101,10,linear,"{'C': 10, 'kernel': 'linear'}",0.938596,0.938596,0.973684,0.947368,0.964602,0.952569,0.0142,1
9,0.003293,0.000248,0.000918,0.000109,10,poly,"{'C': 10, 'kernel': 'poly'}",0.885965,0.921053,0.903509,0.938596,0.955752,0.920975,0.024701,8


In [54]:
grid_search_result = result[["param_C", "param_kernel", "mean_test_score"]]

In [55]:
grid_search_result

Unnamed: 0,param_C,param_kernel,mean_test_score
0,1,linear,0.945536
1,1,poly,0.908663
2,1,rbf,0.912172
3,1,sigmoid,0.460441
4,5,linear,0.950815
5,5,poly,0.922729
6,5,rbf,0.931501
7,5,sigmoid,0.411178
8,10,linear,0.952569
9,10,poly,0.920975


In [56]:
#RandomizedSearchCV: 
model = SVC()
parameters = {
    "kernel":["linear", "poly", "rbf", "sigmoid"],
    "C":[1, 5, 10, 20]
}

In [57]:
classifier = RandomizedSearchCV(model, parameters, cv=5)

In [58]:
classifier.fit(X,y)

  _data = np.array(data, dtype=dtype, copy=copy,


In [60]:
classifier.cv_results_

{'mean_fit_time': array([5.21240234e-03, 1.04739642e+00, 2.89416313e-03, 2.61244774e-03,
        2.95963287e-03, 3.14669609e-03, 2.81400681e-03, 1.03525639e-02,
        2.90093422e-03, 5.50828843e+00]),
 'std_fit_time': array([1.92126204e-03, 4.26843528e-01, 5.59531376e-05, 1.61128744e-04,
        2.14127792e-04, 2.53945644e-04, 2.84607136e-04, 3.00078606e-04,
        7.88514223e-05, 1.77901303e+00]),
 'mean_score_time': array([0.00174398, 0.00079374, 0.00105877, 0.00081158, 0.00101786,
        0.00074987, 0.00083637, 0.00272479, 0.00077701, 0.00082045]),
 'std_score_time': array([4.05730295e-04, 5.89246876e-05, 5.11622939e-05, 2.65650085e-05,
        4.24321845e-05, 3.16963670e-05, 5.54840554e-05, 1.46588817e-04,
        2.11964710e-05, 1.82898934e-04]),
 'param_kernel': masked_array(data=['rbf', 'linear', 'rbf', 'poly', 'rbf', 'poly', 'poly',
                    'sigmoid', 'poly', 'linear'],
              mask=[False, False, False, False, False, False, False, False,
                 

In [61]:
best_parameters = classifier.best_params_
print(best_parameters)

{'kernel': 'linear', 'C': 20}


In [64]:
highest_accuracy = classifier.best_score_
print(round(highest_accuracy, 2)*100)

95.0


In [65]:
result = pd.DataFrame(classifier.cv_results_)

In [66]:
result.head()

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_kernel,param_C,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.005212,0.001921,0.001744,0.000406,rbf,5,"{'kernel': 'rbf', 'C': 5}",0.885965,0.929825,0.938596,0.947368,0.955752,0.931501,0.024358,3
1,1.047396,0.426844,0.000794,5.9e-05,linear,1,"{'kernel': 'linear', 'C': 1}",0.947368,0.929825,0.973684,0.921053,0.955752,0.945536,0.018689,2
2,0.002894,5.6e-05,0.001059,5.1e-05,rbf,10,"{'kernel': 'rbf', 'C': 10}",0.877193,0.921053,0.912281,0.95614,0.946903,0.922714,0.027879,5
3,0.002612,0.000161,0.000812,2.7e-05,poly,1,"{'kernel': 'poly', 'C': 1}",0.842105,0.885965,0.929825,0.947368,0.938053,0.908663,0.039382,9
4,0.00296,0.000214,0.001018,4.2e-05,rbf,20,"{'kernel': 'rbf', 'C': 20}",0.877193,0.921053,0.921053,0.947368,0.938053,0.920944,0.024105,7


In [67]:
randomized_search_result = result[["param_C", "param_kernel", "mean_test_score"]]

In [68]:
randomized_search_result

Unnamed: 0,param_C,param_kernel,mean_test_score
0,5,rbf,0.931501
1,1,linear,0.945536
2,10,rbf,0.922714
3,1,poly,0.908663
4,20,rbf,0.920944
5,20,poly,0.919221
6,5,poly,0.922729
7,1,sigmoid,0.460441
8,10,poly,0.920975
9,20,linear,0.949061
