In [190]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

In [192]:
df = pd.read_csv('wdbc.data', header=None,sep=',')
df = df.drop(columns=0)

In [194]:
df[1].unique()
df[1] = df[1].replace({'M':1,'B':0})

In [196]:
df.head()

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,22,23,24,25,26,27,28,29,30,31
0,1,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,1,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,1,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,1,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


In [198]:
X = df.drop(columns=1)
y = df[1]

#train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10)

#need to scale data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [300]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def logistic_loss(w, X, y):
    z = X @ w
    return np.mean(np.log(1 + np.exp(-y * z)))

def gradient(w, X, y):
    z = X @ w
    return -X.T @ (y * (1 - sigmoid(y * z))) / len(y)

def line_search(f, x, p, nabla, X, y): 
    a = 1
    c1 = 1e-4 
    c2 = 0.9 
    fx = f(x)
    x_new = x + a * p 
    nabla_new = gradient(x_new, X, y)

    while f(x_new) >= fx + (c1 * a * nabla.T @ p) or nabla_new.T @ p <= c2 * nabla.T @ p:
        a *= 0.5
        x_new = x + a * p 
        nabla_new = gradient(x_new, X, y)

    return a

In [243]:
def bfgs(X, y, epsilon=1e-5, max_iter=100):
    n_features = X.shape[1]
    w = np.zeros(n_features)
    H = np.eye(n_features)
    k = 0

    grad = gradient(w, X, y)

    while np.linalg.norm(grad) > epsilon and k < max_iter:
        p = -H @ grad

        alpha = line_search(
            lambda w_: logistic_loss(w_, X, y),
            w,p,grad,X,y)

        w_new = w + alpha * p

        s = w_new - w
        grad_new = gradient(w_new, X, y)
        yk = grad_new - grad
        rho = 1.0 / (yk @ s)

        I = np.eye(n_features)
        H = (I - rho * np.outer(s, yk)) @ H @ (I - rho * np.outer(yk, s)) + rho * np.outer(s, s)

        w = w_new
        grad = grad_new
        k += 1

    return w, k, H

In [255]:
def l_bfgs(X, y, epsilon=1e-5, max_iter=100, m=10):
    n_features = X.shape[1]
    w = np.zeros(n_features)
    k = 0

    grad = gradient(w, X, y)
    history = []

    while np.linalg.norm(grad) > epsilon and k < max_iter:
        # 2 loop recursion
        q = grad.copy()
        alphas = []
        rhos = []

        for s_i, y_i in reversed(history):
            rho_i = 1.0 / (y_i @ s_i)
            rhos.append(rho_i)
            alpha_i = rho_i * (s_i @ q)
            alphas.append(alpha_i)
            q = q - alpha_i * y_i

        r = q

        for i, (s_i, y_i) in enumerate(history):
            rho_i = rhos[-(i+1)]
            beta = rho_i * (y_i @ r)
            r = r + s_i * (alphas[-(i+1)] - beta)

        p = -r

        alpha = line_search(
            lambda w_: logistic_loss(w_, X, y),
            w,p,grad,X,y)

        w_new = w + alpha * p

        s = w_new - w
        grad_new = gradient(w_new, X, y)
        yk = grad_new - grad

        # want m pairs
        if len(history) == m:
            history.pop(0)
        history.append((s, yk))

        w = w_new
        grad = grad_new
        k += 1

    return w, k

In [302]:
w_final, k_final, H_final = bfgs(X_train, y_train)

y_pred_prob = sigmoid(X_test @ w_final)
y_pred = (y_pred_prob >= 0.5).astype(int)

print("Accuracy:", accuracy_score(y_test, y_pred))
print(f"BFGS converged in {k_final} iterations")

Accuracy: 0.8859649122807017
BFGS converged in 58 iterations


  return 1 / (1 + np.exp(-z))


In [304]:
w_final, k_final = l_bfgs(X_train, y_train,m=15)

y_pred_prob = sigmoid(X_test @ w_final)
y_pred = (y_pred_prob >= 0.5).astype(int)

print("Accuracy:", accuracy_score(y_test, y_pred))
print(f"L-BFGS converged in {k_final} iterations")

Accuracy: 0.8771929824561403
L-BFGS converged in 59 iterations
