# Support Vector Machine

## 1. Lib

In [63]:
import copy

import pandas as pd
import numpy as np
import math
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

## 2. Dataset

In [64]:
data = load_breast_cancer()
X = data.data
y = data.target

In [65]:
print(X.shape)
print(y.shape)

(569, 30)
(569,)


## 3. Problem definition
### 3.1. Ogólnie
Zadanie polega na znalezieniu funkcji $$ f(x)={w^{T}x-b}$$, która tworzy hiperpłaszczyznę zapewniającą klasyfikację (dopuszczającą pomyłki) z użyciem maszyny wektorów nośnych SVM. Otrzymana funkcja powinna zapewniać jak najmniejszą liczbę pomyłek przy klasyfikowaniu elementów zbioru BREAST CANCER do odpowiedniej klasy.

Klasyfikacja odbywa się poprzez zwrócenie dla danego zestawu cech $x$ grupy $y(x) = -1 \lor y(x) = 1$, do której należy za pomocą funkcji:

$$
y(x) =
{
\left\{
\begin{array}{ll}
-1 & \textrm{, $f(x) \leq 0$}\\
1 & \textrm{, $f(x) > 0$}
\end{array}
\right
}
$$


Aby otrzymać funkcję $f(x)$ należy znaleźć parametry $w$ i $b$, które minimalizują funkcję straty:
$$ J(w,b)=\Sigma_i(max(1-f(x_i)y_i, 0)) + \lambda*||w||^2 $$

Aby zoptymalizować owe parametry, zastosowana zostanie metoda gradientu prostego, w tym celu potrzebny będzie gradient funkcji $J(w,b)$:
$$
\nabla J =
\begin{bmatrix}
    \partial J \over \partial w_1 \\
    \vdots \\
    \partial J \over \partial w_n \\
    \partial J \over \partial b \\
\end{bmatrix}
$$



Natomiast pochodne cząstkowe te prezentują się następująco:
$$
{\partial J \over \partial w_i}=
{\lambda*2*w_i} + \Sigma_k(1 \cdot
{\left\{ \begin{array}{ll}
0 & \textrm{, $ 1-f(x_k)y_k \leq 0$ }\\
-y_k \cdot x_{k[i]} & \textrm{, $ 1-f(x_k)y_k > 0$}
\end{array}\right })
$$


$$
{\partial J \over \partial b}=
\Sigma_k (1 * {
\left\{ \begin{array}{ll}
0 & \textrm{, $ 1-f(x_k) \cdot y_k \leq 0$ }\\
y_k & \textrm{, $ 1-f(x_k) \cdot y_k > 0$}
\end{array}\right
}
)
$$



### 3.2. Functions to train:
+ funkcja **f(x)**:

In [66]:
def f(x, f_params):
    b = f_params[-1]
    w = f_params[:-1]
    w = np.matrix(w)
    x_mat = np.matrix(x)

    if w.shape[0] != 1:  # w = horizontal vector
        w = w.transpose()
    if x_mat.shape[0] == 1: # x = vertical vector
        x_mat = x_mat.transpose()

    return (w*x_mat).item(0) - b

+ gradient **J(w, b)**:

In [67]:
def grad_j(params, set_xs, set_ys, lambd, function_f):
    """
    param params: vector of parameters ([w1,...,wn,b])
    param set_xs: collection of parameteres of data for classification
    param set_ys: collection of results of data for classification
    param lambd: lambda used for SVM (λ)
    param function_f: function of which the gradient is
    """
    result = np.zeros(len(params))              # result is a numpy array of partial derivatives
    # counting gradients for w1, w2, ..., wn
    for iterator_w in range(len(params) - 1):   # partial derivatives for w1,..., wn
        summ = 2*lambd*params[iterator_w]       # sum = 2 * λ * wi

        for index, x in enumerate(set_xs):
            y = set_ys[index]
            distance = 1 - y* function_f(x, params)
            if distance > 0:                    # if (1-f(xk)yk) > 0
                summ -= y*x[iterator_w]         # sum = sum - yk*xk[i]
        result[iterator_w] = summ

# counting gradient for b
    summ = 0                                    # sum = 0
    for index, x in enumerate(set_xs):
        y =set_ys[index]
        distance = 1 - y* function_f(x, params)
        if distance >0:                        # if (1-f(xk)yk) > 0
            summ += y                          # sum += yk
    result[-1] = summ
    return result


In [68]:
# def grad_j(params, set_xs, set_ys, lambd, function_f):
#     """
#     param params: vector of parameters ([w1,...,wn,b])
#     param set_xs: collection of parameteres of data for classification
#     param set_ys: collection of results of data for classification
#     param lambd: lambda used for SVM (λ)
#     param function_f: function of which the gradient is
#     """
#     result_params = np.zeros_like(params)  # result is a numpy array of partial derivatives
#     # counting gradients for w1, w2, ..., wn
#     summs = params * 2 * lambd # sum = 2 * λ * wi
#
#
#
#     for index, x in enumerate(set_xs):
#     #             y = set_ys[index]
#     #             distance = 1 - y* function_f(x, params)
#     #             if distance > 0:                    # if (1-f(xk)yk) > 0
#     #                 summ -= y*x[iterator_w]         # sum = sum - yk*xk[i]
#
#     distances = 1 - set_ys * function_f(set_xs, params)
#     summs[np.where(distances>0)] -= set_ys*set_xs # if (1-f(xk)yk) > 0: sum = sum - yk*xk[i]
#     result_params[:-1] = summs
#
#
#     summ = 0                                    # sum = 0
#     for index, x in enumerate(set_xs):
#         y =set_ys[index]
#         distance = 1 - y* function_f(x, params)
#         if distance >0:                        # if (1-f(xk)yk) > 0
#             summ += y                          # sum += yk
#     result[-1] = summ
#     return result
#

+ algorytm realizujący metodę gradientu prostego:

In [69]:
def gradient_descent(function_f, gradient_f, params, beta, set_xs, set_ys, lambd, max_steps=1000, min_epsilon = 1e-10):
        """
        param function_f: function that is optimized
        param gradient_f: gradient of function_f
        param params: parameters to optimize
        param beta: parameter beta used in gradient_descent
        param set_xs: collection of parameters of data to classify
        param set_ys: collection of targets of data to classify
        param lambd: parameter used in SVM (λ)
        params max_steps and min_epsilon: parameters for STOP CRITERIUM in gradient_descent
        """
        new_param = params
        act_step = 0
        while 1:
            act_gradient = gradient_f(new_param, set_xs, set_ys, lambd, function_f)
            if np.linalg.norm(act_gradient) < min_epsilon or act_step > max_steps:
                return new_param
            new_param = new_param - beta * act_gradient
            act_step += 1

### 3.2. Functions to evaluate:
+ funkcja **y(x)**:

In [70]:
def classify_y(x, function_f, params):
    if function_f(x, params) <=0:
        return -1
    else:
        return 1

## 4. Train & test

In [71]:
def change_to_correct_targets(arr):
    new_arr = copy.copy(arr)
    new_arr[np.where(arr>0)]=1
    new_arr[np.where(arr<=0)]=-1
    return new_arr

In [72]:
y = change_to_correct_targets(y)

In [73]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.15, random_state=42
)

+ dodatkowo zostaną zdefiniowane funkcje: trenujące model (**train_model()**) oraz wykonujące walidacje dla hiperparametru lambda(**validate_model()**)
+ a także zostanie zdefiniowany zbiór możliwych lambd **lambdas = [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5]**:

In [74]:
lambdas = [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5]

def train_model(model0, training_set_x, training_set_y, param_lambda):
    return gradient_descent(f, grad_j, model0, 0.001, training_set_x, training_set_y, param_lambda)

def validate_model(training_set_x, training_set_y, validating_set_x, validating_set_y):
    best_model = None
    best_lambda = None
    best_score = - math.inf
    for param_lambda in lambdas:
        model0 = [0 for _ in range(31)]
        actual_model = train_model(model0, training_set_x, training_set_y, param_lambda)
        results_validating = np.zeros(len(validating_set_y), dtype='int')
        for ind, x in enumerate(validating_set_x):
            results_validating[ind]=int(classify_y(x, f, actual_model))
        n_of_successes = 0
        for x, y in zip(results_validating, validating_set_y):
            if x == y:
                n_of_successes += 1
        print(f"Validating model with lambda: {param_lambda} gave score: {n_of_successes / len(results_validating)}")
        # as long as new score is not worse than actual best, lambda should be maximized
        if (n_of_successes / len(results_validating) >= best_score):      
            best_score = n_of_successes / len(results_validating)
            best_lambda = param_lambda
            best_model = actual_model
    print(f"Best lambda for this validation equals: {best_lambda} with score: {best_score}")
    return best_model

In [75]:
model = validate_model(X_train, y_train, X_test, y_test)

Validating model with lambda: 0.0001 gave score: 0.9651162790697675


KeyboardInterrupt: 

In [25]:
# def get_success_percent(model_results, official_results):
#     sum = 0
#     for x, y in zip(model_results, official_results):
#         if x==y:
#             sum+=1
#     fraction = sum / len(model_results)
#     print(f"Success percent: {100*fraction}%")
#
# get_success_percent(results_testing_41, testing_setosa_versicolor_y)

Success percent: 100.0%
