In [122]:
import numpy as np
from scipy import optimize
from sklearn import cross_validation
from sklearn.metrics import accuracy_score
import sklearn.datasets as datasets

In [278]:
class SigmoidActivationFunction:
    
    @staticmethod
    def value(z):
        return 1 / (1 + np.exp(-z))
    
    @staticmethod
    def derivative(z):
        sig = SigmoidActivationFunction.value(z)
        return sig * (1 - sig)
    
    
class CrossEntropyCostFunction:

    @staticmethod
    def cost(actual, predicted, numberOfExamples):
        return np.sum(np.nan_to_num(-actual * np.log(predicted).T - (1 - actual) * np.log(1 - predicted).T)) / numberOfExamples    
    
    @staticmethod
    def regulazation(weightsList, lambdaFactor, numberOfelements):
        cost = 0
        for w in weightsList:
            cost +=np.sum(w**2)
        return (lambdaFactor/(2*numberOfelements))*cost

    
    @staticmethod
    def delta(actual, predicted, activationFunction):
        return predicted-actual
    
    
class WeightPacking:
    @staticmethod
    def unpack(weights, layerSizes):
        requredLen = sum([y*x for y,x in layerSizes])
        if requredLen == len(weights):
            start = 0
            returnList = []
            for y,x in layerSizes:
                returnList.append(weights[start:start+y*x].reshape((y,x)))
                start +=y*x
            return returnList
        else:
            raise ValueError("Weights sizes mismatch,", requredLen, "weights requred,",len(weights) , "recived")
    
    @staticmethod
    def pack(weightsList):
        return np.concatenate([w.ravel() for w in weightsList])
    
    

In [285]:
class NN_1HL:
    #opti_method='TNC' BFGS
    def __init__(self, layerSizes = [50, 25], reg_lambda=0, epsilon_init=0.12, 
                 opti_method='TNC', maxiter=500,
                 activationFunction = SigmoidActivationFunction,
                 costFunction = CrossEntropyCostFunction, weightPacking=WeightPacking):
        
        self.epsilon_init = epsilon_init
        self._method = opti_method
        self._maxiter = maxiter
        self._reg_lambda = reg_lambda
        self._costFunction = costFunction
        self._activationFunction = activationFunction
        self._layerSizes = layerSizes
        self._weightPacking = weightPacking
    
    def rand_init(self, l_in, l_out):
        return np.random.randn(l_out, l_in + 1) * 2 * self.epsilon_init - self.epsilon_init
           
        
    def unpackWeights(self, thetas, layers):
        sizes = []
        for x in range(len(layers)-1):
            sizes.append((layers[x+1], layers[x] + 1))
        ls = self._weightPacking.unpack(thetas, sizes)
        return ls
    
    
    def _forward(self, X, weights):
        aList = [self.addOnes(X)]
        zList = []
        lastLayer = None
        
        for w in weights:
            
            z = np.dot(w, aList[-1].T)
            a = self._activationFunction.value(z)
            
            aList.append(self.addOnes(a.T))
            zList.append(z)
            lastLayer = a
            
        return aList[0:-1], zList, lastLayer
    
    
    def variableSetup(self, thetas, layers,  X, y):
        weights = self.unpackWeights(thetas, layers)
        m = X.shape[0]
        Y = np.eye(layers[-1])[y]
        return (weights, m, Y)
        
        
    def removeBiasesFromWeightMatrices(self, listOfWeights):
        return [w[:, 1:] for w in listOfWeights]
    
    
    def function(self, thetas, layers,  X, y, reg_lambda):
        #initial values setup
        weights, m, Y = self.variableSetup(thetas, layers, X, y)
        
        #feedforward
        aList, zList, prediction = self._forward(X, weights)

        #error calculation
        si = [self._costFunction.delta(Y, prediction.T, self._activationFunction)]
        
        #backpropagation
        for x in range(len(zList)-1, 0, -1):
            si.append((np.dot(si[-1], weights[x]) * self._activationFunction.derivative(self.addOnes(zList[x-1].T)))[:, 1:])

        #delta calculation
        Deltas = [np.dot(siN.T, aN) for siN, aN in zip(reversed(si), aList)]
        
        #gradient calculation
        Theta_grads = [d/m for d in Deltas]
        
        tfs = self.removeBiasesFromWeightMatrices(weights)
        
        #cost
        J = self._costFunction.cost(Y, prediction, m)
        reg = 0 
        
        #regulazation calculation
        if reg_lambda != 0:
            reg = self._costFunction.regulazation(tfs, self._reg_lambda, m)
            for x in range(len(Theta_grads)):
                Theta_grads[x][:, 1:] += (reg_lambda / m) * tfs[x]

        return (J + reg, Theta_grads)
    
    
    def functionOpt(self, thetas, layers, X, y, reg_lambda):
        c, dw = self.function(thetas, layers, X, y, reg_lambda)
        return (c, self._weightPacking.pack(dw))
    
    
    def addOnes(self, x):
        ys,xs = x.shape
        z = np.ones((ys,1))
        return np.concatenate((z, x), axis=1)
    
    
    def generateWeights(self, inputLayerSize, layerSizes, numLabels):
        ls = [self.rand_init(inputLayerSize, layerSizes[0])]
        for x in range(0, len(layerSizes)-1):
            ls.append(self.rand_init(layerSizes[x], layerSizes[x+1]))
        ls.append(self.rand_init(layerSizes[-1], numLabels))
        return ls
    
    
    def fit(self, X, y):
        num_features = X.shape[0]
        input_layer_size = X.shape[1]
        num_labels = len(set(y))
        
        #return list of weights
        weights = self.generateWeights(input_layer_size, self._layerSizes, num_labels)

        
        layers = [input_layer_size] + self._layerSizes + [num_labels]
        
        args = (layers, X, y, self._reg_lambda)
        
        options = {'maxiter': self._maxiter}
        _res = optimize.minimize(self.functionOpt, self._weightPacking.pack(weights), jac=True, method=self._method, 
                                 args=args, options=options)
        
        self.weights = self.unpackWeights(_res.x, layers)
    
    
    def predict(self, X):
        return self.predict_proba(X).argmax(0)
    
    
    def predict_proba(self, X):
        _, _, h = self._forward(X, self.weights)
        return h
    
    def getWeights(self):
        return self.weights
    
    
    

In [291]:
from scipy.io import loadmat
data = loadmat('ex3data1.mat')
np.random.seed(40)
X, y = data['X'], data['y']
y = y.reshape(X.shape[0])
y = y - 1  # Fix notation # TODO: Automaticlly fix that on the class

X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.3)

nn = NN_1HL(layerSizes=[50, 20], maxiter=100, reg_lambda=1)
nn.fit(X_train, y_train)

accuracy_score(y_test, nn.predict(X_test))
print(X.shape)
for x in nn.getWeights():
    print(x.shape)

(5000, 400)
(50, 401)
(20, 51)
(10, 21)


In [None]:
0.85733333333333328

In [158]:
iris = datasets.load_iris()
X = iris.data
y = iris.target
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.4)
nn = NN_1HL(layerSizes=[50, 30], maxiter=100, reg_lambda=1)
nn.fit(X_train, y_train)
accuracy_score(y_test, nn.predict(X_test))

0.96666666666666667

In [None]:
0.96666666666666667

In [6]:
w1 = np.random.rand(3,2)
w2 = np.random.rand(2,4)
w3 = np.random.rand(5,2)
print(w1)
print("")
print(w2)
print("")
print(w3)
print("")
x = np.concatenate([w1.ravel(),w2.ravel(),w3.ravel()])
print(x)

sizes = [(3,2),(2,4),(5,2)]
print("")
def neki(sizes, weights):
    if sum([y*x for y,x in sizes]) == len(weights):
        start = 0
        for y,x in sizes:
            print(weights[start:start+y*x].reshape((y,x)))
            start +=y*x
    else:
        print("error")
    

neki(sizes, x)


[[ 0.52817684  0.32312065]
 [ 0.80975388  0.06100429]
 [ 0.94629727  0.25635086]]

[[ 0.95200163  0.35435641  0.22840884  0.16440801]
 [ 0.86332623  0.69000625  0.90245113  0.33116605]]

[[ 0.53626987  0.46399028]
 [ 0.47600067  0.11168056]
 [ 0.41924697  0.14158498]
 [ 0.95637804  0.03635139]
 [ 0.58220501  0.811917  ]]

[ 0.52817684  0.32312065  0.80975388  0.06100429  0.94629727  0.25635086
  0.95200163  0.35435641  0.22840884  0.16440801  0.86332623  0.69000625
  0.90245113  0.33116605  0.53626987  0.46399028  0.47600067  0.11168056
  0.41924697  0.14158498  0.95637804  0.03635139  0.58220501  0.811917  ]

[[ 0.52817684  0.32312065]
 [ 0.80975388  0.06100429]
 [ 0.94629727  0.25635086]]
[[ 0.95200163  0.35435641  0.22840884  0.16440801]
 [ 0.86332623  0.69000625  0.90245113  0.33116605]]
[[ 0.53626987  0.46399028]
 [ 0.47600067  0.11168056]
 [ 0.41924697  0.14158498]
 [ 0.95637804  0.03635139]
 [ 0.58220501  0.811917  ]]


In [7]:
def f(x):
    return x**2+15*x

def fd(x):
    return (x**2+15*x, 2*x+15)


optimize.minimize(fd, 100, jac = True).x

array([-7.5])