# Neural Net

In [1]:
import numpy as np
from sklearn.utils import shuffle

In [2]:
def cost(ytest, ypred):
    ytest = np.array(ytest)
    ypred = np.array(ypred)
    return 0.5 * np.sum((ytest - ypred) ** 2)

In [3]:
cost([10,11,12], [150, 160, 160])

31852.5

In [3]:
def sigmoid(z, deriv=False):
    if deriv == False:
        return 1.0/(1.0+np.exp(-z))
    else:
        return sigmoid(z) * (1.0 - sigmoid(z))

In [9]:
def MSE(ypred, ytest, deriv=False):
    if deriv:
        return ypred - ytest
    else:
        return 0.5 * np.mean(np.sum((ypred - ytest) ** 2))

In [10]:
MSE(np.array([10,11,12]), np.array([150, 160, 160]))

31852.5

In [11]:
def neighborhood(iterable):
    iterator = iter(iterable)
    prev_item = None
    current_item = next(iterator)  # throws StopIteration if empty.
    for next_item in iterator:
        yield (prev_item, current_item, next_item)
        prev_item = current_item
        current_item = next_item
    yield (prev_item, current_item, None)

In [171]:
class Layer():
    def __init__(self, n_inputs, n_neurons = 20, bias=0.0,\
                 activation_function=sigmoid):
        self.W = np.random.uniform(-0.1, 0.1, size=(n_inputs, n_neurons))            
        self.b = bias
        self.act_func = activation_function
        self.signal = 0.0
        self.act_res = 0.0
        self.deriv = 0.0
        
    def __len__(self):
        return self.W.shape

In [210]:
class NN():
    def __init__(self, n_inputs=784, n_outputs=10, layers=None, cost_function = MSE, n_epochs = 20, \
                 learning_rate = 0.1, batch_size = 20, debug = False):
        self.n_inputs = n_inputs
        self.n_outputs = n_outputs
        self.debug = debug
        self.batch_size = batch_size
        self.n_epochs = n_epochs
        self.cost_func = cost_function
        self.eta = learning_rate
        if layers is None:
            print "Layers wasn't initialized"
            layers = [] 
            layers.append(Layer(n_inputs, 4))
            layers.append(Layer(4, 6))
            layers.append(Layer(6, n_outputs))
            self.layers = layers
            for i in self.layers:
                print i.W.shape
        else:
            self.layers = layers
   
    def forward(self, x):
        self.layers[0].signal = np.dot(x, self.layers[0].W) + self.layers[0].b
        self.layers[0].act_res = self.layers[0].act_func(self.layers[0].signal)
        self.layers[0].deriv = self.layers[0].act_func(self.layers[0].signal, deriv=True)
        for prev, curr, nxt in neighborhood(self.layers[:]):
            if prev is None:
                continue
            curr.signal = np.dot(prev.act_res, curr.W) + curr.b
            curr.act_res = curr.act_func(curr.signal)
            curr.deriv = curr.act_func(curr.signal, deriv=True)
            output = curr.act_res
        return output
    

    def fit(self, x, y=None):
        for epoch in xrange(self.n_epochs):
            x_sh, y_sh = shuffle(x, y)
            n_batches = x.shape[0] / self.batch_size
            for i in xrange(n_batches):
                batch_x = x_sh[i*self.batch_size:(i+1)*self.batch_size]
                batch_y = y_sh[i*self.batch_size:(i+1)*self.batch_size]
                output = self.forward(batch_x)

                cost_deriv = self.cost_func(output, batch_y, deriv=True)
                delta = cost_deriv * self.layers[-1].deriv
#                 print delta.shape
                print
                self.backward(delta)
            if self.debug and epoch % 100 == 0:
                print self.cost_func(self.forward(x), y)
            
        
    def predict(self, x):
        return self.forward(x)
    
    def backward(self, delta):
        for prev, curr, nxt in neighborhood(self.layers[::-1]):
            print
            print delta.shape, curr.signal.shape
            dX = (np.dot(delta, curr.signal) / self.batch_size).T
            dW = - self.eta * dX
            if nxt is not None:
                print curr.W.shape, delta.shape
                weight_delta = np.dot(curr.W, delta)
                print weight_delta.shape, nxt.deriv.shape
                delta = weight_delta * nxt.deriv
            curr.W += dW
    

In [209]:
x = np.array([[0,0], [0, 1], [1, 0], [1, 1]])
y = np.array([0, 1, 1, 1])
nn = NN(n_inputs=2, batch_size=1, n_outputs=1, debug=True, n_epochs=1000, learning_rate=0.1)
nn.fit(x, y)
print nn.predict(np.array([[0,0]]))
print nn.predict(np.array([[0,1]]))
print nn.predict(np.array([[1,0]]))
print nn.predict(np.array([[1,1]]))

Layers wasn't initialized
(2, 4)
(4, 6)
(6, 1)


(1, 1) (1, 1)
(6, 1) (1, 1)
(6, 1) (1, 6)

(6, 1) (1, 6)


ValueError: shapes (6,1) and (6,1) not aligned: 1 (dim 1) != 6 (dim 0)

In [152]:
x = np.array([ [0,0,1],[0,1,1],[1,0,1],[1,1,1] ])
y = np.array([0,1,1,0])
nn = NN(n_inputs=3, batch_size=1, n_outputs=1, debug=True, n_epochs=100, learning_rate=0.1)
nn.fit(x, y)
nn.predict(np.array([[0,0,1]]))

Layers wasn't initialized


ValueError: operands could not be broadcast together with shapes (3,2) (2,2) (3,2) 

In [39]:
for p,c,n in neighborhood(nn.layers[1:]):
    print p

None


Layers wasn't initialized


NameError: global name 'l' is not defined

In [14]:
l1 = Layer(784, 20)
print l1.W
print l1.b
print l1.act_func

[[ 0.07309525  0.04600237  0.04270979 ..., -0.09919984  0.00621015
  -0.07381296]
 [ 0.04967612 -0.06545312  0.00520215 ..., -0.05365605 -0.09853688
   0.01422717]
 [ 0.08512657 -0.09153191  0.06124884 ...,  0.06299205  0.07060972
  -0.00845894]
 ..., 
 [-0.06520541  0.02059706 -0.01356862 ..., -0.08985891  0.00255746
  -0.08460494]
 [ 0.04594625  0.09654212  0.02775311 ..., -0.05416095 -0.06233646
   0.01707149]
 [-0.01650306  0.02114456 -0.07046178 ..., -0.08010814 -0.04892466
  -0.09020029]]
0.01
<function sigmoid at 0x7f8a7b131de8>


[[0 0]
 [0 1]
 [1 0]
 [1 1]] [0 1 1 1]
