In [2]:
import numpy as np
from numpy.linalg import inv
from scipy.special import expit
import datasets

In [4]:
X, y = datasets.htwt()

In [14]:
class Classifier:
    '''
    iteratively reweighted least squares
    murphy p. 253
    '''        
    def fit(self, X, y):
        N, D = X.shape
        w = np.zeros(D)
        ybar = np.mean(y)
        w0 = np.log(ybar / (1 - ybar))

        for k in range(20):
            eta = w0 + X.dot(w)
            mu = expit(eta)
            s = mu * (1 - mu)
            z = eta + (y - mu) / s
            S = np.diag(s)
            w = inv(X.T.dot(S).dot(X)).dot(X.T).dot(S).dot(z)    
        self.theta = w0, w

    def predict_proba(self, X):
        w0, w = self.theta
        return expit(w0 + X.dot(w))

    def predict(self, X):
        return (self.predict_proba(X) > 0.5) * 1

In [15]:
clf = Classifier()
clf.fit(X, y)
np.mean(clf.predict(X) != y)

0.20476190476190476