In [None]:
import numpy as np
import matplotlib.pyplot as plt

from tqdm import tqdm
import time

In [None]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))


def generate_logistic_data(theta_true, n_samples=1000):
    n_features = len(theta_true) - 1
    X = np.random.randn(n_samples, n_features)
    # Add a bias term
    X = np.hstack((np.ones((n_samples, 1)), X))
    p = sigmoid(np.dot(X, theta_true))
    y = np.random.binomial(1, p, n_samples)
    return X, y

In [None]:
# Hyperparameters

d = 10
theta_true = np.array([0, 3, -9, 4, -9, 15, 0, -7, 1, 0])
n = 10_000

In [None]:
X, y = generate_logistic_data(theta_true, n)
print(X.shape)
print(y.shape)

In [None]:
def logistic_loss(theta, X, y):
    z = np.dot(X, theta)
    p = sigmoid(z)
    loss = -y * np.log(p) - (1 - y) * np.log(1 - p)
    return loss


def logistic_grad(theta, X, y):
    z = np.dot(X, theta)
    p = sigmoid(z)
    # grad = (p - y)[:, None] * X
    grad = (p - y) * X
    return grad

In [None]:
# Online SGD, only one pass on data
def SGD_logistic(theta, theta_true, X, y, lr=0.01):
    """
    theta: initial estimate of theta
    theta_true: true theta
    X: features
    y: labels
    lr: learning rate
    """
    theta = theta.copy()
    n, d = X.shape
    errors = [(theta - theta_true).dot(theta - theta_true)]
    for i in tqdm(range(n)):
        grad = logistic_grad(theta, X[i], y[i])
        theta -= lr * grad
        errors.append((theta - theta_true).dot(theta - theta_true))
    return theta, errors

In [None]:
# Online Newton algorithm, only one pass on data
def SNA_logistic(theta, theta_true, X, y, lr=0.01):
    theta = theta.copy()
    n, d = X.shape
    hess = np.eye(d)
    hess_inv = np.eye(d)
    errors = [(theta - theta_true).dot(theta - theta_true)]

    for i in tqdm(range(n)):
        z = np.dot(X[i], theta)
        p = sigmoid(z)
        grad = (p - y[i]) * X[i]
        hess += (p * (1 - p) * np.outer(X[i], X[i]) - hess) / (i + 1)
        if i > 1000:
            try:
                hess_inv = np.linalg.inv(hess)
            except np.linalg.LinAlgError:
                print(f"iteration {i}: hess not invertible")
                hess_inv = np.eye(d)
        theta -= lr * np.dot(hess_inv, grad)
        errors.append((theta - theta_true).dot(theta - theta_true))
    return theta, hess, hess_inv, errors

In [None]:
theta = theta_true + np.random.normal(0, 1, size=theta_true.shape)
print(theta)
print(theta_true)
theta_SGD, errors_SGD = SGD_logistic(theta, theta_true, X, y)
theta_SNA, hess, hess_inv, errors_SNA = SNA_logistic(theta, theta_true, X, y)
print(theta_SNA)

In [None]:
def plot_param_errors(errors_SGD, errors_SNA):
    plt.plot(errors_SGD, label="SGD")
    plt.plot(errors_SNA, label="SNA")
    plt.xlabel("Sample size")
    plt.ylabel("Error")
    plt.title("Parameter Estimation Error over Sample Size")
    plt.legend()
    plt.show()

In [None]:
plot_param_errors(errors_SGD, errors_SNA)