# Hard margin SVM from scratch


Implementing

$$
\max_{\lambda} \left( \sum_{i=1}^{n} \lambda_i - \frac{1}{2} \sum_{i=1}^{n} \sum_{j=1}^{n} \lambda_i \lambda_j y_i y_j \langle x_i, x_j \rangle \right),
$$

subject to:

$$
\lambda_i \geq 0 \;\ \forall i, \quad \text{and} \;\ \sum_{i=1}^{n} \lambda_i y_i = 0,
$$


In [5]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
import cvxpy as cp


In [4]:
# preprocessing
df = pd.read_csv("iris.csv")
df.loc[df['variety'] == 'Setosa', 'target'] = 1
df.loc[df['variety'] != 'Setosa', 'target'] = -1

df = df.sample(frac=1)
idx66 = int(len(df) * 0.66)
df_train = df[:idx66]
df_test = df[idx66:]
len(df_train), len(df_test)

df_X_train = df_train[['sepal.length', 'sepal.width', 'petal.length', 'petal.width']]
df_y_train = df_train[['target']]

df_X_test = df_test[['sepal.length', 'sepal.width', 'petal.length', 'petal.width']]
df_y_test = df_test[['target']]

# scale
scaler = StandardScaler()
df_X_train = scaler.fit_transform(df_X_train)
df_X_test = scaler.transform(df_X_test)

In [6]:
n = len(df_train)
l = cp.Variable(n)
vals = (df_y_train.values * df_X_train) @ (df_y_train.T.values * df_X_train.T)

# regularize
epsilon = 1e-5
vals = vals + epsilon * np.eye(vals.shape[0])
term = cp.quad_form(l, vals)
objective = cp.Maximize(cp.sum(l) - 0.5 * term)
constraints = [l >= 0, cp.sum(cp.multiply(l, df_y_train.values.flatten())) == 0]
prob = cp.Problem(objective, constraints)

result = prob.solve()

In [7]:
# get weights
lambdas = constraints[0].dual_value
weights = np.sum(lambdas[:, np.newaxis] * df_y_train.values * df_X_train, axis=0)

# get bias  - from support idx
df_support = lambdas[:, np.newaxis] * df_y_train.values * df_X_train

support_idx = np.all(df_support != 0, axis=1)
bias = np.mean(df_y_train[support_idx].values.flatten() - np.dot(df_X_train[support_idx], weights))

In [8]:
from sklearn.metrics import roc_auc_score, confusion_matrix

def pred(X, weights, bias):
    preds = weights @ X.T + bias
    res = np.where(preds > 0, 1, -1)
    return res 


preds = pred(df_X_test, weights, bias)

cm = confusion_matrix(df_y_test, preds)
print(cm)

roc_auc = roc_auc_score(df_y_test, preds)
print(f"ROC AUC Score: {roc_auc}")

[[26  6]
 [ 0 19]]
ROC AUC Score: 0.90625
