In [1]:
import numpy as np
from scipy import optimize
from __future__ import division

In [41]:
class SigmoidActivationFunction:
    
    @staticmethod
    def value(z):
        return 1 / (1 + np.exp(-z))
    
    @staticmethod
    def derivative(z):
        sig = SigmoidActivationFunction.value(z)
        return sig * (1 - sig)
    
class WeightPacking:
    @staticmethod
    def pack(weights, biases):
        return np.concatenate([np.concatenate([w.ravel(), b.ravel()]) for w,b in zip(weights, biases)])
    
    @staticmethod
    def unpack(thetas, layers):
        start = 0
        weights = []
        biases = []
        for x in range(len(layers)-1):
            y,x = layers[x], layers[x+1]
            weights.append(thetas[start:start+y*x].reshape(y,x))
            start += y*x
            biases.append(thetas[start:start+x].reshape(1, x))
            start += x
        return (weights, biases)

In [81]:
class NN_1HL(object):
    
    def __init__(self, reg_lambda=0, epsilon_init=0.12,
                 hidden_layer_size=25, opti_method='TNC', maxiter=500, ActivationFunction = SigmoidActivationFunction):
        self.reg_lambda = reg_lambda
        self.epsilon_init = epsilon_init
        self.hidden_layer_size = hidden_layer_size
        self.activation_func = ActivationFunction.value
        self.activation_func_prime = ActivationFunction.derivative
        self.method = opti_method
        self.maxiter = maxiter
    
    def rand_init(self, l_in, l_out):
        return np.random.rand(l_out, l_in + 1) * 2 * self.epsilon_init - self.epsilon_init
    
    def pack_thetas(self, t1, t2):
        return np.concatenate((t1.reshape(-1), t2.reshape(-1)))
    
    def unpack_thetas(self, thetas, layers):
        
        input_layer_size, hidden_layer_size, num_labels = layers[0], layers[1], layers[2]
        
        t1_start = 0
        t1_end = hidden_layer_size * (input_layer_size + 1)
        t1 = thetas[t1_start:t1_end].reshape((hidden_layer_size, input_layer_size + 1))
        t2 = thetas[t1_end:].reshape((num_labels, hidden_layer_size + 1))
        return t1, t2
    

    def costCalc(self, actual, predicted):
        costPositive = -actual * np.log(predicted)
        costNegative = (1 - actual) * np.log(1 - predicted)
        cost = costPositive - costNegative
        return np.sum(cost)
    
    def function(self, thetas, layers, X, y, reg_lambda):
        
        t1, t2 = self.unpack_thetas(thetas, layers)
        
        m = X.shape[0]
        Y = np.eye(layers[-1])[y]
        
        _, _, _, _, h = self._forward(X, t1, t2)

        J = self.costCalc(Y, h) / m
        
        if reg_lambda != 0:
            t1f = t1[:, 1:]
            t2f = t2[:, 1:]
            J += self.reg_lambda / (2 * m) * (np.sum(t1f**2) + np.sum(t2f**2))
        return J
        
    
    def fit(self, X, y):
        num_features = X.shape[0]
        input_layer_size = X.shape[1]
        num_labels = len(set(y))
        
        theta1_0 = self.rand_init(input_layer_size, self.hidden_layer_size)
        theta2_0 = self.rand_init(self.hidden_layer_size, num_labels)
        
        
        t1w = theta1_0[:, 1:].T
        t1b = theta1_0[:, 0][np.newaxis]

        t2w = theta2_0[:, 1:].T
        t2b = theta2_0[:, 0][np.newaxis]
        
        thetas0 = WeightPacking.pack([t1w, t2w], [t1b, t2b])
        #thetas0 = self.pack_thetas(theta1_0, theta2_0)
        
        
        
        layers = [input_layer_size, self.hidden_layer_size, num_labels]
        
        options = {'maxiter': self.maxiter}
        _res = optimize.minimize(self.function, thetas0, jac=self.function_prime, method=self.method, 
                                 args=(layers, X, y, self.reg_lambda), options=options)
        
        self.t1, self.t2 = self.unpack_thetas(_res.x, [input_layer_size, self.hidden_layer_size, num_labels])
    
    def predict(self, X):
        return self.predict_proba(X).argmax(0)
    
    def predict_proba(self, X):
        _, _, _, _, h = self._forward(X, self.t1, self.t2)
        return h.T
    
    def _forward(self, X, t1, t2):
        t1w = t1[:, 1:].T
        t1b = t1[:, 0][np.newaxis]

        t2w = t2[:, 1:].T
        t2b = t2[:, 0][np.newaxis]
        
        
        a1new = X
        z1new = np.dot(X, t1w) + t1b
        a2new = self.activation_func(z1new)
        z2new = np.dot(a2new, t2w) + t2b
        a3new = self.activation_func(z2new)
        
        yd, xd = z1new.T.shape
        if xd == 1:
            z1new =  z1new.ravel()
            z2new =  z2new.ravel()
        
        return a1new, z1new, a2new, z2new, a3new
    
    
    
    def function_prime(self, thetas, layers, X, y, reg_lambda):

        t1, t2 = self.unpack_thetas(thetas, layers)
        m = X.shape[0]

        t1w = t1[:, 1:].T
        t1b = t1[:, 0][np.newaxis]

        t2w = t2[:, 1:].T
        t2b = t2[:, 0][np.newaxis]
        
        
        Y = np.eye(layers[-1])[y]

        b1 = np.zeros(layers[1])
        b2 = np.zeros(layers[2])
        
        Delta1, Delta2 = np.zeros(t1w.shape), np.zeros(t2w.shape)

        for i, row in enumerate(X):
            a1, z2, a2, z3, a3 = self._forward(row, t1, t2)

            # Backprop
            d3 = (a3 - Y[i, :]).T
            d2 = np.dot(t2w, d3).ravel() * self.activation_func_prime(z2)
            
            b2+=d3.ravel()
            b1+=d2.ravel()
            
            Delta2 += np.dot(d3, a2).T
            Delta1 += np.dot(d2[np.newaxis].T, a1[np.newaxis]).T
            
            
        Theta1_grad = np.zeros(t1.shape)
        Theta1_grad[:, 1:] = Delta1.T/m
        
        Theta2_grad = np.zeros(t2.shape)
        Theta2_grad[:, 1:] = Delta2.T/m
        
        
        Theta1_grad[:, 0] = b1/m
        Theta2_grad[:, 0] = b2/m
        
        
        if reg_lambda != 0:
            Theta1_grad[:, 1:] += (reg_lambda / m) * t1w.T
            Theta2_grad[:, 1:] += (reg_lambda / m) * t2w.T
        
        return self.pack_thetas(Theta1_grad, Theta2_grad)

In [79]:
np.random.seed(40)
import sklearn.datasets as datasets
from sklearn import cross_validation

iris = datasets.load_iris()
X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.4)

nn = NN_1HL(reg_lambda = 2.1)
nn.fit(X_train, y_train)


from sklearn.metrics import accuracy_score

accuracy_score(y_test, nn.predict(X_test))

0.96666666666666667

In [None]:
0.96666666666666667

In [35]:
np.random.seed(40)
from scipy.io import loadmat
data = loadmat('ex3data1.mat')
X, y = data['X'], data['y']
y = y.reshape(X.shape[0], )
y = y - 1  # Fix notation # TODO: Automaticlly fix that on the class

X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.4)

nn = NN_1HL(maxiter=50, reg_lambda = 2.1)
nn.fit(X_train, y_train)

accuracy_score(y_test, nn.predict(X_test))



0.876

In [358]:
0.876

0.876

In [None]:
#class NeuralNet:
#    def __init__(self, reg_lambda=0, epsilon_init=0.12, hiddenLayerSizes = [25],  opti_method='TNC', maxiter=500):
#        self.reg_lambda = reg_lambda
#        self.epsilon_init = epsilon_init
#        self.method = opti_method
#        self.maxiter = maxiter
#        self._hiddenLayerSizes = hiddenLayerSizes
#        
#    def weightsInit(self, layers):
#        weights = []
#        biases = []
#        for x in range(len(layers)-1):
#            weights.append(np.random.randn(layers[x], layers[x+1]))
#            biases.append(np.random.randn(1, layers[x+1]))
#        return weights, biases
#        
#    def fit(self, X, y):
#        #print("yxdfs", X.shape)
#        numFeatures = X.shape[0]
#        inputLayerSize = X.shape[1]
#        numLabels = len(set(y))
#        
#        layers = [inputLayerSize] + self._hiddenLayerSizes + [numLabels]
#        weights, biases = self.weightsInit(layers)
#        
#        
#        #for w, b in zip(weights, biases):
#        #    print(w.shape, b.shape)
#        
#        p = self.pack(weights, biases)
#        #wt, bt = self.unpack(p, layers)
#        #
#        #for w, b in zip(wt, bt):
#        #    print(w.shape, b.shape)
#        
#        options = {'maxiter': 500}
#        _res = optimize.minimize(self.optimFunction, p, jac=True, method='TNC', 
#                                 args=(layers, X, y, 0), options=options)
#        
#        
#        
#        
#        self.optimFunction(_res.x, layers, X, y, 0)
#        
#        self._weights, self._biases = weights, biases
#        

#            
#        

#
#
#        
#    def optimFunction(self, thetas, layers,  X, y, reg_lambda):
#        weights, biases = self.unpack(thetas, layers)
#        
#        m = X.shape[0]
#        wt, bt = self.unpack(thetas, layers)
#        Y = np.eye(layers[-1])[y]
#        
#
#        predicted = self._forward(X, weights, biases)
#        
#        #J = np.sum(np.nan_to_num(-Y * np.log(predicted) - (1 - Y) * np.log(1 - predicted))) / m
#        #print(J)
#        
#        nabla_b = [np.zeros(b.shape) for b in biases]
#        nabla_w = [np.zeros(w.shape) for w in weights]
#        
#        for i, row in enumerate(X):
#            
#            
#            #Forward
#            inputActivations = [X[i,:]]
#            outputActivations = []
#            for w,b in zip(weights, biases):
#                z = np.dot(inputActivations[-1], w)+b
#                outputActivations.append(z)
#                inputActivations.append(self.sigmoid(z))
#            predicted = inputActivations[-1]
#            
#            #Back
#            ds = [predicted - Y[i, :]]
#            for x in range(len(weights)-1):
#                ds.append(np.dot(ds[-1], weights[1].T) * self.sigmoid_prime(outputActivations[0]))
#               
#            
#            
#            nabla_b = [d+b for b, d in zip(reversed(ds), nabla_b)]
#            
#            #print(inputActivations[0].shape)
#            
#            Delta2 = np.dot(inputActivations[1].T, ds[0])
#            Delta1 = np.dot(inputActivations[0][np.newaxis].T, ds[1])
#            
#            nabla_w = [nabla_w[0] + Delta1, nabla_w[1] + Delta2]
#            
#            
#        Thetagrad = [w/m for w in nabla_w]
#        Thetagradbias = [b/m for b in  nabla_b]
#        
#        return (J, self.pack(Thetagrad, Thetagradbias))
#            
#        
#        
#        
#        
#    def sigmoid(self, X):
#        return 1 / (1 + np.exp(-X))
#    
#    def sigmoid_prime(self, z):
#        sig = self.sigmoid(z)
#        return sig * (1 - sig)
#        
#    def _forward(self, X, weights, biases):        
#        for w, b in zip(weights, biases):
#            X = self.sigmoid(np.dot(X, w) + b)
#        return X
#    
#    def predict(self, X):
#        return self._forward(X, self._weights, self._biases).argmax(1)

In [384]:
#np.random.seed(40)
#import sklearn.datasets as datasets
#from sklearn import cross_validation
#
#iris = datasets.load_iris()
#X = iris.data
#y = iris.target
#
#X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.4)
#
#nn = NeuralNet(reg_lambda = 2.1)
#nn.fit(X_train, y_train)
#
#
#from sklearn.metrics import accuracy_score
#
#accuracy_score(y_test, nn.predict(X_test))

array([[ 0.82353265,  0.10169697,  0.37523681],
       [ 0.64959551,  0.99375116,  0.39973218],
       [ 0.31309426,  0.84499566,  0.61286451],
       [ 0.85020265,  0.69461428,  0.16775967],
       [ 0.33532488,  0.8425862 ,  0.10078412],
       [ 0.69056047,  0.48535535,  0.76482521],
       [ 0.15156521,  0.39489849,  0.40561274],
       [ 0.00675989,  0.69183383,  0.32111048],
       [ 0.84364198,  0.25563488,  0.76862291],
       [ 0.13310176,  0.77821092,  0.31205836]])