In [6]:
import numpy as np

# Load data
X = np.loadtxt('cm-sheet5-X.txt')  # shape (2000, 137)
y = np.loadtxt('cm-sheet5-y.txt')  # shape (2000,)

# Exponential loss, gradient, Hessian

def loss(w, X, y):
    z = -y * (X @ w)
    expz = np.exp(z)
    return np.sum(expz)


def loss(w, X, y):
    z = -y * (X @ w)
    expz = np.exp(z)
    return np.sum(expz)


def grad(w, X, y):
    z = -y * (X @ w)
    expz = np.exp(z)
    return - (expz * y) @ X

def hessian(w, X, y):
    # Compute Hessian: H = X^T diag(exp(-y_i w^T x_i)) X
    z = -y * (X @ w)
    expz = np.exp(z)
    return X.T @ (expz[:, None] * X)


# 1. Newton's Method


In [7]:

def optimize_newton(w0, X, y, tol=1e-6, maxiter=50):
    w = w0.copy()
    for i in range(maxiter):
        g = grad(w, X, y)
        H = hessian(w, X, y)
        delta = np.linalg.solve(H, g)
        w -= delta
        if np.linalg.norm(g) < tol:
            break
    return w


# 2. Gradient Descent

In [None]:
def optimize_gd(w0, X, y, lr=1.0, tol=1e-5, maxiter=10000):
    w = w0.copy()
    for i in range(maxiter):
        g = grad(w, X, y)
        # backtracking to find a safe α
        α = lr
        c = 1e-4
        L0 = loss(w, X, y)
        while True:
            w_new = w - α*g
            L1 = loss(w_new, X, y)
            if not (L1 > L0 - c*α*(g@g)): 
                break
            α *= 0.5
            if α < 1e-16: 
                break
        w = w_new
        if np.linalg.norm(g) < tol:
            break
    return w


In [9]:

def accuracy(w, X, y):
    preds = np.sign(X @ w)
    return np.mean(preds == y)

In [10]:
w0 = np.zeros(X.shape[1])
methods = {
        'newton': optimize_newton,
        # 'bfgs': optimize_bfgs,
        # 'lbfgs': optimize_lbfgs,
        'gd': optimize_gd,
        # 'cg': optimize_cg,
        # 'sgd': optimize_sgd,
        # 'minibatch': optimize_minibatch,
    }

In [11]:
for name, fn in methods.items():
    w_opt= fn(w0, X, y)
    acc = accuracy(w_opt, X, y)
    print(f"{name}: accuracy={acc:.4f}")

newton: accuracy=0.9480


  expz = np.exp(z)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


gd: accuracy=0.9240


In [14]:
w_opt = optimize_gd(w0, X, y, tol=1e-5, maxiter = 10000)
accuracy(w_opt, X, y)

  expz = np.exp(z)
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


0.946