In [1]:
import numpy as np
import pylab as pl

%matplotlib inline

## 1. (0.5)

Consider the following model inspired in a support vector machine. 

$$ f_w(x) = wx + w_0$$

where 

$$ w=\sum_{x_{i}\in X}\alpha_{i}\phi(x_{i}) $$

The loss function corresponds to the 'hinge' loss function that makes it equivalent to the traditional primal optimization formulation (without the margin or regularization term):

$$E(w, x, y) = \max(0, 1 - y f_w(x))$$

Implement this model modifying the following functions:

In [2]:
def predict_ksvm(alpha, X, kernel, x):
    w = 0
    for i in range (len(alpha)):
        w += alpha[i] * kernel(X[i], x)
    return w

def loss_ksvm(alpha, X, kernel, x, y):
    r = predict_ksvm(alpha, X, kernel, x)
    return max(0, 1 - y * r)


In [3]:
def k1(x, y):
    return np.dot(x, y)

def k2(x, y):
    return (np.dot(x, y) + 1) ** 2

def test_loss_ksvm():
    X = [[-2, -1],
         [-1, 3],
         [2.5, -1.5],
         [4, 2]]
    Y = [-1, 1, 1, -1]
    epsilon = 0.0001
    test1 = [0 , 0 , 6.3 , 3.3]
    test2 = [25.775 , 0 , 11.9 , 113.0]
    alpha = [0.1, 0.4, -0.5, 0.3]
    for i, x_i in enumerate(X):
        if abs(loss_ksvm(alpha, X, k1, x_i, Y[i]) - test1[i]) > epsilon:
            raise Exception("loss_ksvm test failed!")
    for i, x_i in enumerate(X):
        if abs(loss_ksvm(alpha, X, k2, x_i, Y[i]) - test2[i]) > epsilon:
            raise Exception("loss_ksvm test failed!")

test_loss_ksvm()

## 2. (1.5)

Write a function that calculates the gradient of the loss with respect to the alpha parameter:

$$ \frac{\partial E}{\partial \alpha} $$

In [36]:
def de_dalpha(alpha, X, kernel, x, y):
    delta = np.zeros(len(alpha))
    #w = predict_ksvm(alpha, X, kernel, x)
    #tmp = 0
    #if (w * y > 1):
    #    tmp = - kernel(X[i], x)
    #tmp = max(0, 1 - y * r)

    for i in range(len(alpha)):
        if (alpha[i] * kernel(X[i], x) * y < 1):
            delta[i] = -y * kernel(X[i], x)
    return delta

In [37]:
def num_de_dalpha(alpha, X, kernel, x, y, epsilon):
    deltas = np.identity(len(alpha)) * epsilon
    de = np.zeros(len(alpha))
    for i in range(len(alpha)):
        de[i] = (loss_ksvm(alpha + deltas[i, :], X, kernel, x, y) - 
                 loss_ksvm(alpha - deltas[i, :], X, kernel, x, y)) / (2 * epsilon)
    return de

def test_de_dalpha(kernel):
    test_loss_ksvm()
    num_tests = 100
    epsilon = 0.0001
    X = [[-2, -1],
         [-1, 3],
         [2.5, -1.5],
         [4, 2]]
    for i in range(num_tests):
        talpha = np.random.randn(len(X))
        tx = np.random.randn(2)
        ty = np.random.randn(1)
        #print(de_dalpha(talpha, X, kernel, tx, ty))
        #print(num_de_dalpha(talpha, X, kernel, tx, ty, epsilon))
        #print(np.linalg.norm(de_dalpha(talpha, X, kernel, tx, ty) - num_de_dalpha(talpha, X, kernel, tx, ty, epsilon)))
        if np.linalg.norm(de_dalpha(talpha, X, kernel, tx, ty) - 
                          num_de_dalpha(talpha, X, kernel, tx, ty, epsilon)) > epsilon:
            raise Exception("de_dalpha test failed!")

test_de_dalpha(k1)
test_de_dalpha(k2)

Exception: de_dalpha test failed!