Linear Support Vector Machine (SVM) classifier from scratch
using stochastic gradient descent (SGD)  



**SVM:**   
$\frac{1}{n} \sum_{i=1}^n \left[ 1 - y_i(x_i^t \beta + \alpha) \right]_+ + \frac{\lambda}{2} \|\beta\|^2$    
Loss + Penalty  

**Pegasos Algorithm:**  
1. Initialize $\beta = 0_{p \times 1}$, $\alpha_1 = 0$, and $t = 0$

2. For epoch $= 1, 2, \ldots, T$ do
    - For $i = 1, 2, \ldots, n$ do
        - $t = t + 1$, $\eta_t = \frac{1}{\lambda t}$
        - Update $\beta_{t+1} \gets \beta_t - \eta_t \Delta_t$
        - Update $\alpha_{t+1} \gets \alpha_t - \eta_t \delta_t$

Here $\eta_t$ is the learning rate, and $\Delta_t$ and $\delta_t$ are the (sub)gradient of $J_i(\beta, \alpha)$ when $\beta = \beta_t$ and $\alpha = \alpha_t$:

$J_i(\beta, \alpha) = \frac{\lambda}{2} \|\beta\|^2 + \left[ 1 - y_i(x_i^t \beta + \alpha) \right]_+$

$\Delta_t = \begin{cases}
    \lambda \beta_t - y_i x_i & \text{if } y_i (x_i^t \beta_t + \alpha_t) < 1 \\
    \lambda \beta_t & \text{otherwise}
\end{cases}$

$\delta_t = \begin{cases}
    -y_i & \text{if } y_i (x_i^t \beta_t + \alpha_t) < 1 \\
    0 & \text{otherwise}
\end{cases}$


In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix, accuracy_score

url = "train.csv"
train_data = pd.read_csv(url)
url = "test.csv"
test_data = pd.read_csv(url)

X_train = train_data.iloc[:, :-1].astype(float).values
y_train = train_data.iloc[:, -1].astype(int).values
y_train = np.where(y_train == 5, 1, -1)

X_test = test_data.iloc[:, :-1].astype(float).values
y_test = test_data.iloc[:, -1].astype(int).values
y_test = np.where(y_test == 5, 1, -1)


def pegasos(X, y, lambda_reg=0.01, epochs=20, seed=42):
    np.random.seed(seed)
    n, d = X.shape
    beta = np.zeros(d)
    alpha = 0
    t = 0

    for epoch in range(epochs):
        indices = np.random.permutation(n)
        for i in indices:
            t += 1
            eta_t = 1 / (t * lambda_reg)
            x_i, y_i = X[i], y[i]
            condition = y_i * (np.dot(x_i, beta) + alpha) < 1

            if condition:
                beta = (1 - eta_t * lambda_reg) * beta + eta_t * y_i * x_i
                alpha += eta_t * y_i
            else:
                beta = (1 - eta_t * lambda_reg) * beta

    return beta, alpha

lambda_reg = 0.01
epochs = 20
beta, alpha = pegasos(X_train, y_train, lambda_reg, epochs)

def predict(X, beta, alpha):
    return np.where(np.dot(X, beta) + alpha > 0, 1, -1)

y_train_pred = predict(X_train, beta, alpha)
y_test_pred = predict(X_test, beta, alpha)


print("Training Data Confusion Matrix:")
print(confusion_matrix(y_train, y_train_pred))

print("\nTest Data Confusion Matrix:")
print(confusion_matrix(y_test, y_test_pred))

test_accuracy = accuracy_score(y_test, y_test_pred)
test_error = 1 - test_accuracy
print(f"Test Error: {test_error:.4f}")

Training Data Confusion Matrix:
[[100   0]
 [  1  99]]

Test Data Confusion Matrix:
[[281  19]
 [ 13 287]]
Test Error: 0.0533
