In [1]:
from IPython.lib.deepreload import reload
%load_ext autoreload
%autoreload 2

In [2]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
import numpy as np

In [3]:
data = load_breast_cancer()
X = data.data
y = data.target

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=0)

In [5]:
import sys

sys.path.append("..")
from src.SVM.SVM import SVM

svm = SVM(lambd=0.05, minimizer_params={'beta': 0.01, 'min_epsilon': 1e-12, 'max_steps': 10000})

svm.fit(X_train, y_train)

In [12]:
svm.predict(X_test)

array([0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0,
       1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1,
       1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1,
       0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1])

In [13]:
np.sum(svm.predict(X_test)== y_test) / y_test.shape[0]

0.9186046511627907

In [14]:
lambdas = [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5]

def validate_model(training_set_x, training_set_y, validating_set_x, validating_set_y):
    best_model = None
    best_lambda = None
    best_score = - np.inf

    for param_lambda in lambdas:
        model0 = SVM(lambd=param_lambda)
        model0.fit(training_set_x, training_set_y)
        # results_validating = np.zeros(len(validating_set_y), dtype='int')
        results_validating = model0.predict(validating_set_x)
        n_of_successes = 0
        for x, y in zip(results_validating, validating_set_y):
            if x == y:
                n_of_successes += 1
        print(f"Validating model with lambda: {param_lambda} gave score: {n_of_successes / len(results_validating)}")
        # as long as new score is not worse than actual best, lambda should be maximized
        if (n_of_successes / len(results_validating) >= best_score):      
            best_score = n_of_successes / len(results_validating)
            best_lambda = param_lambda
            best_model = model0
    print(f"Best lambda for this validation equals: {best_lambda} with score: {best_score}")
    return best_model

In [15]:
model = validate_model(X_train, y_train, X_test, y_test)

Validating model with lambda: 0.0001 gave score: 0.9069767441860465
Validating model with lambda: 0.0005 gave score: 0.872093023255814
Validating model with lambda: 0.001 gave score: 0.872093023255814
Validating model with lambda: 0.005 gave score: 0.9418604651162791
Validating model with lambda: 0.01 gave score: 0.9069767441860465
Validating model with lambda: 0.05 gave score: 0.9186046511627907
Validating model with lambda: 0.1 gave score: 0.6744186046511628
Validating model with lambda: 0.5 gave score: 0.6627906976744186
Validating model with lambda: 1 gave score: 0.4186046511627907
Validating model with lambda: 5 gave score: 0.5813953488372093
Best lambda for this validation equals: 0.005 with score: 0.9418604651162791


In [16]:
model._f(X_test)

array([[ -384748.53759219],
       [  355313.56093596],
       [  443776.43959173],
       [    6863.9923769 ],
       [  681331.18835657],
       [  408321.56605417],
       [  713857.24005333],
       [  538410.02236267],
       [  308845.58360978],
       [  606203.837421  ],
       [  118551.88753637],
       [   83768.30812388],
       [  645352.33724432],
       [  -73489.1144613 ],
       [  101193.29657763],
       [ -794653.7607268 ],
       [  516695.43727499],
       [-2100895.57186478],
       [ -210387.45694964],
       [-2921354.82419721],
       [-1037677.04978529],
       [  -98051.93798427],
       [  401341.70322015],
       [  298577.51681136],
       [ -144513.90353483],
       [  179093.88663599],
       [  330291.43270289],
       [  447676.49965809],
       [  359108.12728383],
       [-3020885.55558324],
       [  500508.17883209],
       [-2504338.97400011],
       [  167870.06179765],
       [ -861078.45955353],
       [  533816.48149184],
       [ -864943.116

Metoda referencyjna

In [17]:
from sklearn.svm import SVC

svm = SVC(C=0.05, kernel="linear")
svm.fit(X_train, y_train)
np.sum(svm.predict(X_test) == y_test) / y_test.shape[0]

0.9302325581395349