In [20]:
""" this note book implements two nonlinear classification algorithms: 
    support vector machines with soft margin and it dual version with linear kernel
    computes optimal separating hyperplanes on load breast cancer dataset and displays pearson correlation 
    between classifiers """

import numpy as np
from sklearn.datasets import load_breast_cancer

shift = True
scale = True

In [21]:
# load the data
X, Y = load_breast_cancer(return_X_y=True)

In [22]:
# to convert the {0,1} output into {-1,+1}
if shift:
    Y = 2*Y - 1

In [23]:
mdata,ndim = X.shape

print(X.shape)
print(Y.shape)

(569, 30)
(569,)


In [24]:
# scale each of the input variables to have the maximum absolute value equal to 1 
if scale:
    X /= np.outer(np.ones(mdata),np.max(np.abs(X),0))

In [41]:
# stochastic gradient descent algorithm for soft-margin support vector machines
def stochastic_gradient_descent_sm(X, Y, lambda_ = 0.01, eta_ = 0.1, epochs = 10): 
    w = np.zeros(ndim)
    gradient = np.zeros(ndim)
    
    for _ in range(epochs):
        for i in range(mdata):
            if (np.dot(X[i], w)*Y[i] < 1): 
                gradient = -Y[i]*X[i] + lambda_*w
            else:
                gradient = lambda_*w
            w = w - eta_*gradient
    
    return w

In [74]:
# linear kernel
def kernel(u, v):
    return np.dot(u, v)

K = np.dot(X, X.T)

In [75]:
# stochastic dual coordinate descent algorithm for support vector machines
def stochastic_dual_coordinate_ascent(X, Y, C=1000, epochs = 10): 
    alpha = np.zeros(mdata)
    
    for _ in range(epochs):
        for i in range(mdata):
            """sm = 0
            for j in range(mdata):
                if j==i:
                    continue
                else:
                    sm += alpha[j] * Y[j] * K[i, j]"""
            alpha[i] = (1 - Y[i] * (np.sum(K[i] * Y * alpha) - alpha[i] * Y[i] * K[i, i])) / K[i, i]
            alpha[i] = min(C/mdata, max(0, alpha[i]))
    return alpha

In [76]:
weights = stochastic_gradient_descent_sm(X, Y, epochs=10)
print(weights)

[ 0.1577998  -0.07338511  0.05409096 -0.74451105  1.04258952 -0.39120249
 -1.52575812 -1.68575933  0.93783183  1.90232994 -0.68649229  0.13980422
 -0.54062695 -0.60711913  0.11643549  0.13389808  0.12972715  0.07995812
  0.34689575  0.31577184 -0.34043639 -0.38152709 -0.36428237 -0.9781163
  0.46601817 -0.67709888 -1.14472687 -1.65195119  0.21446617  0.55636185]


In [77]:
alpha = stochastic_dual_coordinate_ascent(X, Y, epochs=10)

In [78]:
dual_weights = np.zeros(ndim) #np.sum(alpha * Y * X[i])
for i in range(len(Y)):
    dual_weights += alpha[i] * Y[i]* X[i]
print(dual_weights)

[ 1.15447157 -1.23787938  0.93494548 -1.10063657  1.84027672 -0.50318653
 -2.45999222 -3.32013741  1.41272459  4.47158206 -1.92444264  0.37357132
 -1.26700269 -1.4852953  -0.60187186  1.20391973  1.08144463  0.84023283
  0.41254023  0.53459172 -0.39161541 -1.81432296 -0.38131352 -2.05273578
  0.27112611 -1.16668102 -1.24719523 -2.60446236 -0.69437082  0.98602932]


In [79]:
print(np.corrcoef(weights, dual_weights))

[[1.         0.92227312]
 [0.92227312 1.        ]]


In [48]:
np.linalg.norm(weights - dual_weights)

5.076522602387895