In [21]:
import numpy as np
from numpy.linalg import inv
from scipy.special import expit
from scipy.optimize import minimize
import statsmodels.api as sm
from scipy import stats
from sklearn.metrics import log_loss
import datasets
from scipy.special import erf

In [8]:
X, y = datasets.htwt()
N, D = X.shape
Xaug = np.hstack([np.ones((N, 1)), X])

In [9]:
model = sm.GLM(y, Xaug, family = sm.families.Binomial()).fit()
np.mean((model.predict(Xaug) > 0.5) != y)

0.11904761904761904

In [None]:
# https://github.com/rvtonge/darpa-graphs/blob/master/thirdparty/pmtk3-master/
# pmtksupportCopy/markSchmidt-9march2011/markSchmidt/lossFuncs/ProbitLoss.m

function [nll,g,H] = ProbitLoss(w,X,y)
% w(feature,1)
% X(instance,feature)
% y(instance,1)

yXw = y.*(X*w)/sqrt(2);
erf_yXw = erf(full(yXw));
probit_yXw = (1/2)*(1+erf_yXw)+eps;
nll = -sum(log(probit_yXw));

if nargout > 1
        norm_yXw = (1/sqrt(2*pi))*exp(-yXw.^2);
        g = -X'*(y.*norm_yXw./probit_yXw);
end

if nargout > 2
    H = X'*diag(sparse(norm_yXw.*norm_yXw./probit_yXw.^2 + norm_yXw.*yXw.*sqrt(2)./probit_yXw))*X;
end

In [27]:
# probit regression
y_tilde = 2 * y - 1

loss = lambda w: -sum(y_tilde * Xaug.dot(w))
loss = lambda w: -sum(np.log(0.5 * (1 + erf(y_tilde * Xaug.dot(w) / np.sqrt(2))) + eps))

def grad(w):
    eta = Xaug.dot(w)
    phi_y = stats.norm.pdf(eta) * y_tilde
    Phi = stats.norm.cdf(y_tilde * eta)
    return sum([Xaug[i].dot(phi_y[i]) / Phi[i] for i in xrange(N)])

def hess(w):
    H = np.zeros((D + 1, D + 1))
    eta = Xaug.dot(w)
    for i in xrange(N):
        yeta = y_tilde[i] * eta[i]
        Phi = stats.norm.cdf(yeta)
        phi = stats.norm.pdf(eta[i])
        H -= np.outer(Xaug[i], Xaug[i]) * ((phi / Phi) ** 2 + (yeta * phi / Phi))
    return H

w = minimize(loss, np.zeros(D + 1), tol = 1e-6).x
print w
w = minimize(loss, np.zeros(D + 1), method = 'Newton-CG', jac = grad, hess = hess, tol = 1e-6).x
print w
np.mean((Xaug.dot(w) > 0) != y)

[-21.90976476   0.26285725   0.02405287]
[ 0.  0.  0.]


0.34761904761904761

In [10]:
# iteratively reweighted least squares -- murphy p. 253
w = np.zeros(D + 1)

for _ in xrange(5):
    eta = Xaug.dot(w)
    mu = expit(eta)
    S = np.diag(mu * (1 - mu))
    z = eta + inv(S).dot(y - mu)
    w = inv(Xaug.T.dot(S).dot(Xaug)).dot(Xaug.T).dot(S).dot(z)

np.mean((expit(Xaug.dot(w)) > 0.5) != y)

0.11904761904761904

In [8]:
def fun(w):
    eta = Xaug.dot(w)
    A = np.log(1 + np.exp(eta))
    return -sum(eta * y - A)
        
w = minimize(fun, np.zeros(D + 1)).x

np.mean((expit(Xaug.dot(w)) > 0.5) != y)

0.11904761904761904