In [1]:
import numpy as np
from scipy import optimize
from __future__ import division

In [134]:
class SigmoidActivationFunction:
    
    @staticmethod
    def value(z):
        return 1 / (1 + np.exp(-z))
    
    @staticmethod
    def derivative(z):
        sig = SigmoidActivationFunction.value(z)
        return sig * (1 - sig)
    
class WeightPacking:
    @staticmethod
    def pack(weights, biases):
        return np.concatenate([np.concatenate([w.ravel(), b.ravel()]) for w,b in zip(weights, biases)])
    
    @staticmethod
    def unpack(thetas, layers):
        start = 0
        weights = []
        biases = []
        for h in range(len(layers)-1):
            y,x = layers[h], layers[h+1]
            weights.append(thetas[start:start+y*x].reshape(y,x))
            start += y*x
            biases.append(thetas[start:start+x].reshape(1, x))
            start += x
        return (weights, biases)

In [137]:
class NN_1HL(object):
    
    def __init__(self, reg_lambda=0, epsilon_init=0.12,
                 hidden_layer_size=25, opti_method='TNC', maxiter=500, ActivationFunction = SigmoidActivationFunction):
        self.reg_lambda = reg_lambda
        self.epsilon_init = epsilon_init
        self.hidden_layer_size = hidden_layer_size
        self.activation_func = ActivationFunction.value
        self.activation_func_prime = ActivationFunction.derivative
        self.method = opti_method
        self.maxiter = maxiter
    
    def rand_init(self, l_in, l_out):
        return np.random.rand(l_out, l_in + 1) * 2 * self.epsilon_init - self.epsilon_init
    
    def pack_thetas(self, t1, t2):
        return np.concatenate((t1.reshape(-1), t2.reshape(-1)))
    
    def unpack_thetas(self, thetas, layers):
        
        input_layer_size, hidden_layer_size, num_labels = layers[0], layers[1], layers[2]
        
        t1_start = 0
        t1_end = hidden_layer_size * (input_layer_size + 1)
        t1 = thetas[t1_start:t1_end].reshape((hidden_layer_size, input_layer_size + 1))
        t2 = thetas[t1_end:].reshape((num_labels, hidden_layer_size + 1))
        return t1, t2
    

    def costCalc(self, actual, predicted):
        costPositive = -actual * np.log(predicted)
        costNegative = (1 - actual) * np.log(1 - predicted)
        cost = costPositive - costNegative
        return np.sum(cost)
    
    def predict(self, X):
        return self.predict_proba(X).argmax(0)
    
    def predict_proba(self, X):
        
        t1w = self.t1[:, 1:].T
        t1b = self.t1[:, 0][np.newaxis]

        t2w = self.t2[:, 1:].T
        t2b = self.t2[:, 0][np.newaxis]
        
        _, _, _, _, h = self._forward(X, [t1w, t2w], [t1b, t2b])
        return h.T
    
    def _forward(self, X, weights, biases):
       
        a1new = X
        z1new = np.dot(X, weights[0]) + biases[0]
        a2new = self.activation_func(z1new)
        z2new = np.dot(a2new, weights[1]) + biases[1]
        a3new = self.activation_func(z2new)
        
        yd, xd = z1new.T.shape
        if xd == 1:
            z1new =  z1new.ravel()
            z2new =  z2new.ravel()
        
        return a1new, z1new, a2new, z2new, a3new    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    def fit(self, X, y):
        num_features = X.shape[0]
        input_layer_size = X.shape[1]
        num_labels = len(set(y))
        
        theta1_0 = self.rand_init(input_layer_size, self.hidden_layer_size)
        theta2_0 = self.rand_init(self.hidden_layer_size, num_labels)
        
        
        layers = [input_layer_size, self.hidden_layer_size, num_labels]
        
        t1w = theta1_0[:, 1:].T
        t1b = theta1_0[:, 0][np.newaxis]

        t2w = theta2_0[:, 1:].T
        t2b = theta2_0[:, 0][np.newaxis]
        
        print("initw", t1w)
        print("initb", t1b)

        
        thetas0 = WeightPacking.pack([t1w, t2w], [t1b, t2b])
        #print(thetas0)
        options = {'maxiter': self.maxiter}
        _res = optimize.minimize(self.function, thetas0, jac=True, method=self.method, 
                                 args=(layers, X, y, self.reg_lambda), options=options)
        
        self.t1, self.t2 = self.unpack_thetas(_res.x, [input_layer_size, self.hidden_layer_size, num_labels])
    
    
    def function(self, thetas, layers, X, y, reg_lambda):

        print("1 vector", thetas)
        
        t1, t2 = self.unpack_thetas(thetas, layers)
        m = X.shape[0]

        wk,bk = WeightPacking.unpack(thetas, layers)

        #print(t1 == wk[0])
        
        t1w = t1[:, 1:].T
        t1b = t1[:, 0][np.newaxis]

        t2w = t2[:, 1:].T
        t2b = t2[:, 0][np.newaxis]
        
        print("inf1", t1w)
        print("inf2", wk[0])
        print("inf2b", bk[0])
        
        Y = np.eye(layers[-1])[y]
        

        _, _, _, _, h = self._forward(X, [t1w, t2w], [t1b, t2b])

        J = self.costCalc(Y, h) / m
        
        if reg_lambda != 0:
            t1f = t1[:, 1:]
            t2f = t2[:, 1:]
            J += self.reg_lambda / (2 * m) * (np.sum(t1f**2) + np.sum(t2f**2))
        

        b1 = np.zeros(layers[1])
        b2 = np.zeros(layers[2])
        
        Delta1, Delta2 = np.zeros(t1w.shape), np.zeros(t2w.shape)

        for i, row in enumerate(X):
            
            a1, z2, a2, z3, a3 = self._forward(row, [t1w, t2w], [t1b, t2b])

            # Backprop
            d3 = (a3 - Y[i, :]).T
            d2 = np.dot(t2w, d3).ravel() * self.activation_func_prime(z2)
            
            b2+=d3.ravel()
            b1+=d2.ravel()
            
            Delta2 += np.dot(d3, a2).T
            Delta1 += np.dot(d2[np.newaxis].T, a1[np.newaxis]).T
            
            
        Theta1_grad = np.zeros(t1.shape)
        Theta1_grad[:, 1:] = Delta1.T/m
        
        Theta2_grad = np.zeros(t2.shape)
        Theta2_grad[:, 1:] = Delta2.T/m
        
        
        Theta1_grad[:, 0] = b1/m
        Theta2_grad[:, 0] = b2/m
        
        
        if reg_lambda != 0:
            Theta1_grad[:, 1:] += (reg_lambda / m) * t1w.T
            Theta2_grad[:, 1:] += (reg_lambda / m) * t2w.T
        #return J,  WeightPacking.pack([Theta1_grad[:, 1:].T, Theta2_grad[:, 1:].T], [Theta1_grad[:, 0], Theta2_grad[:, 0]])
        return (J, self.pack_thetas(Theta1_grad, Theta2_grad))

In [138]:
np.random.seed(40)
import sklearn.datasets as datasets
from sklearn import cross_validation

iris = datasets.load_iris()
X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.4)

nn = NN_1HL(reg_lambda = 2.1, maxiter=0)
nn.fit(X_train, y_train)


from sklearn.metrics import accuracy_score

accuracy_score(y_test, nn.predict(X_test))

initw [[  7.38225654e-02   2.37581880e-02  -1.19897599e-01  -1.83089681e-02
   -1.18595110e-01  -9.19972694e-02   5.61109879e-02  -2.06488327e-02
   -7.78579096e-03   4.57200015e-02   3.87457960e-02  -5.00563207e-02
    6.02613223e-03   6.48767632e-03   7.07117426e-02   4.24365587e-02
   -9.89764294e-02  -5.84824875e-02  -1.00530324e-01   9.20848883e-02
    7.76363847e-02   4.54247214e-02  -1.31462299e-02   8.38567137e-02
   -5.68046740e-02]
 [  1.12890583e-01  -5.79564105e-02  -7.06541164e-02  -5.00154553e-02
   -1.00125214e-01   7.51773483e-02  -1.07830517e-01  -5.59744100e-02
   -2.59859136e-02  -9.04573505e-02  -6.90088422e-02   1.14735601e-01
    9.43104685e-02   6.09670170e-02   5.70721206e-02   9.90260589e-02
    4.97579875e-02  -4.75046717e-02  -1.12650885e-01  -3.74048895e-02
    1.57641153e-02   2.81984826e-02   1.18767329e-01   4.20378353e-02
    7.83467370e-02]
 [  6.95360574e-02  -2.83411234e-02  -2.96864952e-02   4.21914269e-02
    4.04663327e-03   1.00650895e-01  -5.1086

0.33333333333333331

In [5]:
0.96666666666666667

0.9666666666666667

In [49]:
np.random.seed(40)
from scipy.io import loadmat
data = loadmat('ex3data1.mat')
X, y = data['X'], data['y']
y = y.reshape(X.shape[0], )
y = y - 1  # Fix notation # TODO: Automaticlly fix that on the class

X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.4)


nn = NN_1HL(maxiter=50, reg_lambda = 2.1)
nn.fit(X_train, y_train)

accuracy_score(y_test, nn.predict(X_test))



initw [[ 0.10016364 -0.04572237 -0.07647824 ..., -0.09294501 -0.1027607
   0.01316554]
 [-0.03847699 -0.07536525  0.03132587 ..., -0.07037281  0.11270158
  -0.10070193]
 [-0.06391403 -0.01410597 -0.05743196 ...,  0.01684863  0.10433352
   0.10846168]
 ..., 
 [ 0.04227835 -0.03933255  0.11128212 ..., -0.10972296 -0.05346367
  -0.1055018 ]
 [-0.10586833 -0.01503455 -0.04825858 ..., -0.00073086  0.01250652
   0.06834749]
 [-0.04589032  0.10956645 -0.00678537 ..., -0.11422528  0.01886324
   0.09208523]]
initb [[ 0.05055243  0.06222448 -0.11862853 -0.0596204  -0.01544234 -0.11404066
   0.02283608 -0.07185854  0.0884596   0.07929232  0.0893194   0.07857383
  -0.06593219 -0.02185787  0.07096474  0.02023183 -0.11116568  0.01329271
   0.06076685 -0.0133218  -0.0678304   0.08921589 -0.01858226  0.01051906
  -0.08535013]]
[ 0.10016364 -0.04572237 -0.07647824 ..., -0.11804889  0.05393864
  0.07469002]
[ 0.10016364 -0.04572237 -0.07647824 ..., -0.11804889  0.05393864
  0.07469002]
[ 0.10016364 -0.0

0.10299999999999999

In [7]:
0.876

0.876

In [8]:
#class NeuralNet:
#    def __init__(self, reg_lambda=0, epsilon_init=0.12, hiddenLayerSizes = [25],  opti_method='TNC', maxiter=500):
#        self.reg_lambda = reg_lambda
#        self.epsilon_init = epsilon_init
#        self.method = opti_method
#        self.maxiter = maxiter
#        self._hiddenLayerSizes = hiddenLayerSizes
#        
#    def weightsInit(self, layers):
#        weights = []
#        biases = []
#        for x in range(len(layers)-1):
#            weights.append(np.random.randn(layers[x], layers[x+1]))
#            biases.append(np.random.randn(1, layers[x+1]))
#        return weights, biases
#        
#    def fit(self, X, y):
#        #print("yxdfs", X.shape)
#        numFeatures = X.shape[0]
#        inputLayerSize = X.shape[1]
#        numLabels = len(set(y))
#        
#        layers = [inputLayerSize] + self._hiddenLayerSizes + [numLabels]
#        weights, biases = self.weightsInit(layers)
#        
#        
#        #for w, b in zip(weights, biases):
#        #    print(w.shape, b.shape)
#        
#        p = self.pack(weights, biases)
#        #wt, bt = self.unpack(p, layers)
#        #
#        #for w, b in zip(wt, bt):
#        #    print(w.shape, b.shape)
#        
#        options = {'maxiter': 500}
#        _res = optimize.minimize(self.optimFunction, p, jac=True, method='TNC', 
#                                 args=(layers, X, y, 0), options=options)
#        
#        
#        
#        
#        self.optimFunction(_res.x, layers, X, y, 0)
#        
#        self._weights, self._biases = weights, biases
#        

#            
#        

#
#
#        
#    def optimFunction(self, thetas, layers,  X, y, reg_lambda):
#        weights, biases = self.unpack(thetas, layers)
#        
#        m = X.shape[0]
#        wt, bt = self.unpack(thetas, layers)
#        Y = np.eye(layers[-1])[y]
#        
#
#        predicted = self._forward(X, weights, biases)
#        
#        #J = np.sum(np.nan_to_num(-Y * np.log(predicted) - (1 - Y) * np.log(1 - predicted))) / m
#        #print(J)
#        
#        nabla_b = [np.zeros(b.shape) for b in biases]
#        nabla_w = [np.zeros(w.shape) for w in weights]
#        
#        for i, row in enumerate(X):
#            
#            
#            #Forward
#            inputActivations = [X[i,:]]
#            outputActivations = []
#            for w,b in zip(weights, biases):
#                z = np.dot(inputActivations[-1], w)+b
#                outputActivations.append(z)
#                inputActivations.append(self.sigmoid(z))
#            predicted = inputActivations[-1]
#            
#            #Back
#            ds = [predicted - Y[i, :]]
#            for x in range(len(weights)-1):
#                ds.append(np.dot(ds[-1], weights[1].T) * self.sigmoid_prime(outputActivations[0]))
#               
#            
#            
#            nabla_b = [d+b for b, d in zip(reversed(ds), nabla_b)]
#            
#            #print(inputActivations[0].shape)
#            
#            Delta2 = np.dot(inputActivations[1].T, ds[0])
#            Delta1 = np.dot(inputActivations[0][np.newaxis].T, ds[1])
#            
#            nabla_w = [nabla_w[0] + Delta1, nabla_w[1] + Delta2]
#            
#            
#        Thetagrad = [w/m for w in nabla_w]
#        Thetagradbias = [b/m for b in  nabla_b]
#        
#        return (J, self.pack(Thetagrad, Thetagradbias))
#            
#        
#        
#        
#        
#    def sigmoid(self, X):
#        return 1 / (1 + np.exp(-X))
#    
#    def sigmoid_prime(self, z):
#        sig = self.sigmoid(z)
#        return sig * (1 - sig)
#        
#    def _forward(self, X, weights, biases):        
#        for w, b in zip(weights, biases):
#            X = self.sigmoid(np.dot(X, w) + b)
#        return X
#    
#    def predict(self, X):
#        return self._forward(X, self._weights, self._biases).argmax(1)

In [9]:
#np.random.seed(40)
#import sklearn.datasets as datasets
#from sklearn import cross_validation
#
#iris = datasets.load_iris()
#X = iris.data
#y = iris.target
#
#X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.4)
#
#nn = NeuralNet(reg_lambda = 2.1)
#nn.fit(X_train, y_train)
#
#
#from sklearn.metrics import accuracy_score
#
#accuracy_score(y_test, nn.predict(X_test))

In [10]:
np.random.rand(3,4)

array([[ 0.47764338,  0.78715049,  0.46901537,  0.64170794],
       [ 0.15625387,  0.93352675,  0.73788127,  0.78484598],
       [ 0.41162111,  0.98616717,  0.80951491,  0.08323786]])