In [1]:
import numpy as np
from scipy import optimize
from __future__ import division

In [86]:
class SigmoidActivationFunction:
    
    @staticmethod
    def value(z):
        return 1 / (1 + np.exp(-z))
    
    @staticmethod
    def derivative(z):
        sig = SigmoidActivationFunction.value(z)
        return sig * (1 - sig)
    
    
class CrossEntropyCostFunction:

    @staticmethod
    def cost(actual, predicted, numberOfExamples):
        return np.sum(-actual * np.log(predicted).T - (1 - actual) * np.log(1 - predicted).T) / numberOfExamples    
    
    @staticmethod
    def regulazation(weightsList, lambdaFactor, numberOfelements):
        cost = 0
        if lambdaFactor == 0:
            for w in weightsList:
                cost +=np.dot(w, w)
            return (lambdaFactor/(2*numberOfelements))*cost
        return cost
    
    @staticmethod
    def regulazation(actual, predicted, activationFunction):
    
    
    
class NN_1HL:
    #opti_method='TNC' BFGS
    def __init__(self, reg_lambda=0, epsilon_init=0.12, 
                 hidden_layer_size=25, opti_method='TNC', maxiter=500,
                 activationFunction = SigmoidActivationFunction, costFunction = CrossEntropyCostFunction):
        self.reg_lambda = reg_lambda
        self.epsilon_init = epsilon_init
        self.hidden_layer_size = hidden_layer_size
        self.activation_func = activationFunction.value
        self.activation_func_prime = activationFunction.derivative
        self.method = opti_method
        self.maxiter = maxiter
        self._costFunction = costFunction
    
    def rand_init(self, l_in, l_out):
        return np.random.randn(l_out, l_in + 1) * 2 * self.epsilon_init - self.epsilon_init
    
    def pack_thetas(self, t1, t2):
        return np.concatenate((t1.reshape(-1), t2.reshape(-1)))
    
    def unpack_thetas(self, thetas, input_layer_size, hidden_layer_size, num_labels):
        t1_start = 0
        t1_end = hidden_layer_size * (input_layer_size + 1)
        t1 = thetas[t1_start:t1_end].reshape((hidden_layer_size, input_layer_size + 1))
        t2 = thetas[t1_end:].reshape((num_labels, hidden_layer_size + 1))
        return t1, t2
    
    def _forward(self, X, t1, t2):
        m = X.shape[0]
        ones = None
        if len(X.shape) == 1:
            ones = np.array(1).reshape(1,)
        else:
            ones = np.ones(m).reshape(m,1)
        
        # Input layer
        a1 = np.hstack((ones, X))
        
        # Hidden Layer
        z2 = np.dot(t1, a1.T)
        a2 = self.activation_func(z2)
        a2 = np.hstack((ones, a2.T))
        
        # Output layer
        z3 = np.dot(t2, a2.T)
        a3 = self.activation_func(z3)
        return a1, z2, a2, z3, a3
    
    def variableSetup(self, thetas, input_layer_size, hidden_layer_size, num_labels, X, y):
        t1, t2 = self.unpack_thetas(thetas, input_layer_size, hidden_layer_size, num_labels)
        m = X.shape[0]
        Y = np.eye(num_labels)[y]
        return (t1, t2, m, Y)
        
    def removeBiasesFromWeightMatrices(self, listOfWeights):
        return [w[:, 1:] for w in listOfWeights]
    
    
    #TODO: combine function and function_prime for minimization
    def function(self, thetas, input_layer_size, hidden_layer_size, num_labels, X, y, reg_lambda):
        t1, t2, m, Y = self.variableSetup(thetas, input_layer_size, hidden_layer_size, num_labels, X, y)
        
        _, _, _, _, h = self._forward(X, t1, t2)
        
        J = self._costFunction.cost(Y, h, m)
        reg = self._costFunction.regulazation(self.removeBiasesFromWeightMatrices([t1, t2]), self.reg_lambda, m)
        return J + reg
    
    def addOnes(self, x):
        ys,xs = x.shape
        z = np.ones((ys,1))
        return np.concatenate((z, x), axis=1)
        
    def function_prime(self, thetas, input_layer_size, hidden_layer_size, num_labels, X, y, reg_lambda):
        t1, t2, m, Y = self.variableSetup(thetas, input_layer_size, hidden_layer_size, num_labels, X, y)

        t1f = t1[:, 1:]
        t2f = t2[:, 1:]

        a1, z2, a2, z3, a3 = self._forward(X, t1, t2)
        
        si3 = a3.T-Y
        si2 = (np.dot(si3, t2) * self.activation_func_prime(self.addOnes(z2.T)))[:, 1:]

        d1_ = np.dot(si2.T,a1);
        d2_ = np.dot(si3.T,a2);  
        
        Theta1_grad = d1_ / m
        Theta2_grad = d2_ / m
        
        if reg_lambda != 0:
            Theta1_grad[:, 1:] += (reg_lambda / m) * t1f
            Theta2_grad[:, 1:] += (reg_lambda / m) * t2f
        
        return self.pack_thetas(Theta1_grad, Theta2_grad)
    
    def fit(self, X, y):
        num_features = X.shape[0]
        input_layer_size = X.shape[1]
        num_labels = len(set(y))
        
        theta1_0 = self.rand_init(input_layer_size, self.hidden_layer_size)
        theta2_0 = self.rand_init(self.hidden_layer_size, num_labels)
        
        thetas0 = self.pack_thetas(theta1_0, theta2_0)
        
        options = {'maxiter': self.maxiter}
        _res = optimize.minimize(self.function, thetas0, jac=self.function_prime, method=self.method, 
                                 args=(input_layer_size, self.hidden_layer_size, num_labels, X, y, self.reg_lambda), options=options)
        
        self.t1, self.t2 = self.unpack_thetas(_res.x, input_layer_size, self.hidden_layer_size, num_labels)
    
    def predict(self, X):
        return self.predict_proba(X).argmax(0)
    
    def predict_proba(self, X):
        _, _, _, _, h = self._forward(X, self.t1, self.t2)
        return h
    
    
    

In [11]:
import sklearn.datasets as datasets
from sklearn import cross_validation
from sklearn.metrics import accuracy_score

In [89]:
from scipy.io import loadmat
data = loadmat('ex3data1.mat')
np.random.seed(40)
X, y = data['X'], data['y']
y = y.reshape(X.shape[0])
y = y - 1  # Fix notation # TODO: Automaticlly fix that on the class

X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.3)

nn = NN_1HL(maxiter=300, reg_lambda=3)



nn.fit(X_train, y_train)


accuracy_score(y_test, nn.predict(X_test))

0.92733333333333334

In [10]:
0.76600000000000001

0.766