In [1]:
import numpy as np
import pandas as pd
np.set_printoptions(precision=4, suppress=True)

In [2]:
df = pd.read_csv("../data/South African Heart Disease.txt")
names = ['sbp', 'tobacco', 'ldl', 'famhist', 'obesity', 'alcohol', 'age']
df['famhist'] = pd.get_dummies(df['famhist'])['Present']
X, y = df[names].values, df[['chd']].values
X = np.insert(X, 0, values=1, axis=1)
N, p = X.shape

In [3]:
b_hat = np.zeros(shape=(p))
delta = np.inf
while delta > 0.000000001:
    grad = np.zeros(shape=(1, p))
    hess = np.zeros(shape=(p, p))
    loss = 0.
    for i in range(N):
        bt_xi = b_hat @ X[i]
        ebx = np.exp(bt_xi)
        pxi = ebx/(1+ebx)
        grad += X[i] * (y[i, 0] - pxi)
        xi = np.reshape(X[i], (1, p))
        hess += -(xi.T @ xi) * pxi * (1 - pxi)
        loss += y[i][0] * bt_xi - np.log(1+np.exp(bt_xi))
    delta = np.squeeze(np.linalg.inv(hess) @ grad.T)
    b_hat -= delta
    delta = delta @ delta.T
    print(loss, b_hat)

-320.2339974186954 [-2.8943  0.005   0.0681  0.1436  0.7224 -0.0297 -0.0004  0.0267]
-245.7972636266484 [-3.8984  0.0057  0.0781  0.1792  0.9061 -0.0348  0.0004  0.0395]
-241.70241294159862 [-4.1209  0.0058  0.0795  0.1846  0.9381 -0.0346  0.0006  0.0424]
-241.5871635441954 [-4.1296  0.0058  0.0795  0.1848  0.9392 -0.0345  0.0006  0.0425]
-241.5870161826394 [-4.1296  0.0058  0.0795  0.1848  0.9392 -0.0345  0.0006  0.0425]


In [4]:
y_hat = np.zeros(shape=y.shape)
for i in range(N):
    e = np.exp(b_hat @ X[i])
    ps = [1 / (1 + e), e / (1 + e)]
    y_hat[i,0] = np.argmax(ps)
np.sum(y == y_hat)

337