In [2]:
import numpy as np
import pandas as pd

Minimize 
$$
\frac{1}{2} \|w\|^2 + C*\sum_{i=1}^l \max(0, y_i(1-w^T x_i -b))
$$



In [None]:
class SVM:
    def __init__(self, learning_rate = 0.003, regularization = 0.2, n_iterations = 1000):
        self.learning_rate = learning_rate
        self.regularization = regularization
        self.n_iterations = n_iterations
        self.weights = None
        self.bias = None

    # two that accumulates - batch GD and mini-batch
    # update after each sample - sGD
    def fit(self, X,y, batch_size = 32, beta = 0.9):
        n_samples, n_features = X.shape 

        self.weights = np.zeros(n_features)
        self.bias = 0

        v_w = np.zeros(n_features)
        v_b = 0

        for _ in range(self.n_iterations):
            
            for (x_i, y_i) in zip(X,y):
                margin = y_i * (np.dot(self.weights, x_i) + self.bias)
                if margin >= 1:
                    dw = self.regularization * self.weights
                    db = 0

                else:
                    dw = self.regularization * self.weights - y_i * x_i
                    db = -y_i 
                
                # moving average of gradients with expo decay
                v_w = beta * v_w + (1 - beta) * dw
                v_b = beta * v_b + (1 - beta) * db 

                
                self.weights -= self.learning_rate * v_w 
                self.bias -= self.learning_rate * v_b

 
            
              
    def predict(self, X):
        output = np.dot(X, self.weights) + self.bias 
        return np.sign(output)
    
    def accuracy(self, y_true, y_pred):
        return np.sum(y_true == y_pred) / len(y_true)



Momentum dampens the oscillations without it, SGD oscillates across the slopes in the ravine
> Instead of updating parameters just on the current gradients - momentum accumulates the expo moving decaying average of past gradients
> this smoothes out noise via velocity vectors 

In [39]:
# Generate Synthethic data
n_samples = 100
n_features = 2

X_pos = np.random.randn(n_samples // 2, n_features) + 2
X_neg = np.random.randn(n_samples // 2, n_features) - 2
X = np.vstack((X_pos, X_neg))

y = np.hstack((np.ones(n_samples // 2), -1 * np.ones(n_samples // 2)))
y_test = np.hstack((np.ones(10), -1 * np.ones(10)))

In [40]:
# Generate test samples for class +1
X_pos_test = np.random.randn(10, n_features) + 2

# Generate test samples for class -1
X_neg_test = np.random.randn(10, n_features) - 2

# Stack test inputs
X_test = np.vstack((X_pos_test, X_neg_test))


In [41]:
svm = SVM()
svm.fit(X,y)
y_pred = svm.predict(X_test)

In [42]:
acc = svm.accuracy(y_test, y_pred)
print(f"Test Accuracy: {acc * 100:.2f}%")

Test Accuracy: 100.00%


Hand calculation

In [22]:
X = np.array([
    [-0.8, -0.2, 0.65, 0.11, -0.12],
    [0.87, 0.5, -0.05, -0.76, 0.43],
    [1, 1, 1, 1, 1]
])

y = np.array([1, -1, -1, -1, 1])

w = np.array([10, 10, 0])