# Test for estimation of Logistic regression by SG MCMC

In [9]:
import numpy as np
import scipy.special as ssp

In [5]:
n_sample = 100
data_seed = 20240802

M = 10

np.random.seed(data_seed)
true_w = np.random.normal(size=M)
train_X = np.random.normal(size=(n_sample, M))
true_p = ssp.expit(train_X @ true_w)
train_Y = (np.random.uniform(size=n_sample) < true_p).astype(int)

# Learning

In [33]:
def dlogp(train_X: np.ndarray, train_Y: np.ndarray, est_w: np.ndarray, pri_beta: float) -> np.ndarray:
    """ calculate derivative of log p
    """
    est_p = ssp.expit(train_X@est_w)
    return (train_Y - est_p) @ train_X - pri_beta * est_w

In [49]:
pri_beta = 0.001
time_step = 0.1
K = 100
L = 1000
ln_seed = 20230802

In [50]:
np.random.seed(ln_seed)

In [51]:
post_w = []
for ite in range(L):
    est_w = np.random.normal(size=M)
    for k in range(K):
        dE = dlogp(train_X, train_Y, est_w, pri_beta)
        est_w += time_step * dE + np.sqrt(2*time_step) * np.random.normal(size=M)
        pass
    post_w.append(est_w)

In [54]:
np.array(post_w).mean(axis = 0)

array([-3.75981342,  1.51049736, -2.11663796, -1.21848046, -1.87539804,
        0.40798306,  1.09147724, -4.16534617,  2.13457883, -1.70833484])

In [55]:
true_w

array([-2.02588083,  0.62254118, -1.33538204, -0.1008201 , -1.82274825,
        0.86196226, -0.03320596, -2.65577799,  1.04576374, -1.24776485])

In [42]:
est_w

array([-4.4893283 ,  0.56070702, -1.86528631, -0.66822893, -2.09065585,
        0.97869774,  1.06782702, -5.14621586,  1.49147957, -1.33249236])