In [1]:
import numpy as np
from numpy.linalg import inv
from sklearn import lda
import htwt

In [2]:
X, y = htwt.get()

In [3]:
def fit(X, y):
    cs = np.unique(y)
    Betas = []
    gammas = []
    Sigma = np.cov(X, rowvar = False)
    InvSigma = inv(Sigma)

    for c in cs:
        i = y == c
        prior = np.mean(i)
        mean = np.mean(X[i], axis = 0)
        gammas.append(-mean.dot(InvSigma).dot(mean) / 2.0 + np.log(prior))
        Betas.append(InvSigma.dot(mean))
        
    return cs, Betas, gammas

def predict(model, X):
    cs, Betas, gammas = model
    N, D = X.shape
    C = len(cs)
    loglik = np.empty((N, C))
    
    for i in range(C):
        loglik[:, i] = X.dot(Betas[i]) + gammas[i]
    
    return cs[np.argmax(loglik, axis = 1)]

In [4]:
model = fit(X, y)
np.mean(predict(model, X) == y)

0.87142857142857144

In [5]:
clf = lda.LDA()
clf.fit(X, y)
np.mean(clf.predict(X) == y)

0.88095238095238093