In [1]:
import numpy as np
import pandas as pd
from scipy.io import loadmat

In [2]:
data = loadmat('faces.mat')

In [41]:
training_data = np.concatenate((data['train_faces'], data['train_nonfaces']), 
                               axis=0)
training_data = np.reshape(training_data, (361, 4858))

test_data = np.concatenate((data['test_faces'], data['test_nonfaces']),
                           axis=0)
test_data = np.reshape(test_data, (361, 944))

# using 1 to indicate 'face' and 0 to indicate 'not face'
training_labels = np.concatenate((np.ones(len(data['train_faces'])),
                                  np.zeros(len(data['train_nonfaces']))))
test_labels = np.concatenate((np.ones(len(data['test_faces'])),
                              np.zeros(len(data['test_nonfaces']))))

In [86]:
def logistic_regression(X, Y, max_iter=0):
    [m, n] = X.shape
    B_old = np.zeros(m)
    W = np.diag(np.ones(n))
    dist = 1
    
    # While above tolerance level
    while dist > 0.01:
        btx = np.dot(B_old.T, X)
        
        # the value of the p.d.f and its derivative
        P = np.exp(btx)/(1 + np.exp(btx))
        np.fill_diagonal(W, P*(1 - P))
        
        Winv = np.linalg.inv(W)
        Z = np.dot(X.T, B_old) + np.dot(Winv, Y - P)
        
        XWXt = np.dot(np.dot(X, W), X.T)
        XWZ = np.dot(np.dot(X, W), Z)
        B_new = np.dot(np.linalg.inv(XWXt), XWZ)
        
        dist = (np.dot((B_new - B_old).T, (B_new - B_old))) ** 0.5
        B_old = B_new
        
    return B_old

betas = logistic_regression(training_data, training_labels)

In [87]:
# Calculating the errors
def logit_error(X, Y):
    btx = np.dot(betas.T, X)
    n = len(btx)
    predictions = np.ones(n)
    wrong = 0.0
    
    for i in range(n):
        if btx[i] < 0:
            predictions[i] = 0
        
        if Y[i] != predictions[i]:
            wrong = wrong + 1.0
    
    return 100.0*wrong/n

training_err = logit_error(training_data, training_labels)
test_err = logit_error(test_data, test_labels)

print '''The training error is %.4f%% and 
    the test error is %.4f%%.''' % (training_err, test_err)

The training error is 0.0000% and 
    the test error is 50.1059%.


In [88]:
print("The first five components of beta are {}.".format(betas[0:5]))

The first five components of beta are [ 0.00117913 -0.00338817 -0.00048985 -0.00138774  0.00050729].
