In [1]:
import pandas as pd

import sklearn.metrics as mt
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

<p style="background-image: linear-gradient(to right, #0aa98f, #68dab2)"> &nbsp;</p>

In [2]:
data = pd.read_csv('data/01_logistic regression.csv')

data.drop(columns=['id', 'Unnamed: 32'], inplace=True)

data['diagnosis'] = (data['diagnosis']=='M').astype(int)
data.head(3)

Unnamed: 0,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
0,1,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,1,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758


<p style="background-image: linear-gradient(#0aa98f, #FFFFFF 10%)"> &nbsp; </p>

In [3]:
y = data['diagnosis']
X = data.drop(columns='diagnosis')

X_train, X_test , y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [4]:
model = SVC()
model.fit(X_train, y_train)
predictions = model.predict(X_test)

confusion_matrix = mt.confusion_matrix(y_test, predictions)
print('\nConfusion Matrix:\n', confusion_matrix)

accuracy = mt.accuracy_score(y_test, predictions)
print('\nAccuracy:', accuracy)


Confusion Matrix:
 [[67  0]
 [ 2 45]]

Accuracy: 0.9824561403508771


<p style="background-image: linear-gradient(#0aa98f, #FFFFFF 10%)"> &nbsp; </p>

In [5]:
params = {
    'C': range(1, 20),
    'kernel': ['linear', 'poly', 'rbf'],
    'degree': [2,3,4]
}

grid = GridSearchCV(estimator=model, param_grid=params, cv=10, n_jobs=-1)
grid.fit(X_train, y_train)

print(grid.best_params_)
print(grid.best_score_)
print(grid.best_estimator_)

{'C': 2, 'degree': 2, 'kernel': 'rbf'}
0.9780676328502416
SVC(C=2, degree=2)


In [6]:
model = grid.best_estimator_
model.fit(X_train, y_train)
predictions = model.predict(X_test)

confusion_matrix = mt.confusion_matrix(y_test, predictions)
print('\nConfusion Matrix:\n', confusion_matrix)

accuracy = mt.accuracy_score(y_test, predictions)
print('\nAccuracy:', accuracy)


Confusion Matrix:
 [[66  1]
 [ 1 46]]

Accuracy: 0.9824561403508771


<p style="background-image: linear-gradient(to right, #0aa98f, #68dab2)"> &nbsp;</p>