In [12]:
import pandas as pd
import numpy as  np
import scipy

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.svm import SVC
import utils as meu

In [3]:
df = load_breast_cancer()
X = df.data
y = df.target

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.3, random_state=42)

def_svc = SVC(random_state=42)
def_svc.fit(X_train, y_train)

def_y_pred = def_svc.predict(X_test)
print("Default Model Stats:")
meu.display_model_performance_metrics(true_labels=y_test, predicted_labels=def_y_pred, classes=[0,1])

Default Model Stats:
Model Performance metrics:
------------------------------
Accuracy: 0.9357
Precision: 0.9416
Recall: 0.9357
F1 Score: 0.9342

Model Classification report:
------------------------------
              precision    recall  f1-score   support

           0       1.00      0.83      0.90        63
           1       0.91      1.00      0.95       108

    accuracy                           0.94       171
   macro avg       0.95      0.91      0.93       171
weighted avg       0.94      0.94      0.93       171



In [9]:
# setting the parameter grid
grid_paramaters = {
    "kernel": ["linear", "rbf"],
    "gamma": [1e-3, 1e-4],
    "C": [1, 10, 50, 100]
}

# perform hyperparameter tuning
print("# Tuning hyper-parameters for accuracy")
clf = GridSearchCV(SVC(random_state=42), grid_paramaters, cv=5, scoring="accuracy")
clf.fit(X_train, y_train)
# view accuracy scores for all the models
print("Grid scores for all models based on CV: ")
means = clf.cv_results_["mean_test_score"]
stds = clf.cv_results_["std_test_score"]

for mean, std, params in zip(means, stds, clf.cv_results_["params"]):
    print("%0.5f (+/-%0.05f) for %r" % (mean, std * 2, params))
print("Best parameters set found on development set:", clf.best_params_)
print("Best model validation accuracy:", clf.best_score_)

# Tuning hyper-parameters for accuracy
Grid scores for all models based on CV: 
0.95218 (+/-0.06256) for {'C': 1, 'gamma': 0.001, 'kernel': 'linear'}
0.91193 (+/-0.04607) for {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
0.95218 (+/-0.06256) for {'C': 1, 'gamma': 0.0001, 'kernel': 'linear'}
0.92459 (+/-0.02287) for {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}
0.96228 (+/-0.04244) for {'C': 10, 'gamma': 0.001, 'kernel': 'linear'}
0.90187 (+/-0.04735) for {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
0.96228 (+/-0.04244) for {'C': 10, 'gamma': 0.0001, 'kernel': 'linear'}
0.92959 (+/-0.03449) for {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}
0.95718 (+/-0.05923) for {'C': 50, 'gamma': 0.001, 'kernel': 'linear'}
0.90187 (+/-0.04735) for {'C': 50, 'gamma': 0.001, 'kernel': 'rbf'}
0.95718 (+/-0.05923) for {'C': 50, 'gamma': 0.0001, 'kernel': 'linear'}
0.93462 (+/-0.02969) for {'C': 50, 'gamma': 0.0001, 'kernel': 'rbf'}
0.95468 (+/-0.05703) for {'C': 100, 'gamma': 0.001, 'kernel': 'linear'}
0.90187 (+/-

In [11]:
gs_best = clf.best_estimator_
tuned_y_pred = gs_best.predict(X_test)

print("Tuned model stats: ")
meu.display_model_performance_metrics(true_labels=y_test, predicted_labels=tuned_y_pred, classes=[0,1])

Tuned model stats: 
Model Performance metrics:
------------------------------
Accuracy: 0.9708
Precision: 0.9709
Recall: 0.9708
F1 Score: 0.9708

Model Classification report:
------------------------------
              precision    recall  f1-score   support

           0       0.95      0.97      0.96        63
           1       0.98      0.97      0.98       108

    accuracy                           0.97       171
   macro avg       0.97      0.97      0.97       171
weighted avg       0.97      0.97      0.97       171



 # Randomized Search

In [13]:
param_grid = {
    "C": scipy.stats.expon(scale=10),
    "gamma": scipy.stats.expon(scale=.1),
    "kernel": ["rbf", "linear"]
}
random_search = RandomizedSearchCV(SVC(random_state=42), param_distributions=param_grid, n_iter=50, cv=5)
random_search.fit(X_train, y_train)
print("Best Parameters set found on development set: ")
random_search.best_params_

Best Parameters set found on development set: 


{'C': 9.172777197466246, 'gamma': 0.15866888534384951, 'kernel': 'linear'}

In [16]:
rs_best = random_search.best_estimator_
rs_y_pred = rs_best.predict(X_test)
meu.get_metrics(true_labels=y_test, predicted_labels=rs_y_pred)

Accuracy: 0.9649
Precision: 0.9649
Recall: 0.9649
F1 Score: 0.9649
