In [3]:
import numpy as np
from numpy.linalg import inv
from sklearn import lda
import htwt

In [4]:
X, y = htwt.get()

In [8]:
def fit(X, y):
    Sigma = np.cov(X, rowvar = False)
    InvSigma = inv(Sigma)

    def theta(c):
        i = y == c
        prior = np.mean(i)
        mean = np.mean(X[i], axis = 0)
        gamma = -mean.dot(InvSigma).dot(mean) / 2.0 + np.log(prior)
        Beta = InvSigma.dot(mean)
        return c, gamma, Beta

    return [theta(c) for c in np.unique(y)]

def predict(model, X):
    p = []
    cs = []
        
    for theta in model:
        c, gamma, Beta = theta
        cs.append(c)
        p.append(X.dot(Beta) + gamma)
    return np.array([cs[i] for i in np.argmax(p, axis = 0)])

In [9]:
model = fit(X, y)
np.mean(predict(model, X) == y)

0.87142857142857144

In [10]:
clf = lda.LDA()
model = clf.fit(X, y)
np.mean(clf.predict(X) == y)

0.88095238095238093