In [23]:
import numpy as np
import math
import cancer

In [3]:
X, y = cancer.get_train()

In [53]:
def soft_threshold(x, delta):
    return np.sign(x) * np.max(np.abs(x) - delta, 0)    

In [54]:
def fit(X, y, delta):
    N, D = X.shape
    cs = np.unique(y)
    C = len(cs)
    sse = np.zeros(D)
    mus = []
    prior = []
    xbar = np.mean(X, axis = 0)

    for c in cs:
        i = y == c
        X_c = X[i]
        prior.append(np.mean(i))
        mu_c = np.mean(X_c, axis = 0)
        mus.append(mu_c)
        sse += np.sum((X_c - mu_c) ** 2, axis = 0)
        
    sigma2 = sse / (1.0 * N - C)
    sigma = np.sqrt(sigma2)
    s0 = np.median(sigma)
    
    m = np.zeros(C)
    d = np.zeros((C, D))
    
    for i in range(len(cs)):
        m[i] = math.sqrt(1.0 / sum(y == c) - 1.0 / N)
        d[i] = (mus[i] - xbar) / (m[i] * (sigma + s0))
        d[i] = soft_threshold(d[i], delta)
    return cs, np.log(prior), mus, sigma2
        
def predict(model, X):
    N, D = X.shape
    cs, log_prior, mus, sigma2 = model
    
    def loglik(i):
        Z = 0.5 * (X - mus[i]) ** 2 / sigma2
        return log_prior[i] - np.sum([Z[:, j] for j in range(D)], axis = 0)
    
    return cs[np.argmax([loglik(i) for i in range(len(cs))], axis = 0)]

In [55]:
X = np.array([
    [2, 3, 4, 5],
    [2.2, 3.1, 4.2, 5.3],
    [2.1, 3.2, 4.1, 5.2],
])

y = np.array([1, 1, 2])

In [52]:
model = fit(X, y, 4.3)
np.mean(predict(model, X) == y)

1.0