<a href="https://colab.research.google.com/github/jjennings955/Neural-Network-Notebooks/blob/master/Backpropagation_Object_Oriented.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# https://pastebin.com/Yc7RBFpT

In [0]:
import numpy as np

class Layer(object):
    def __init__(self, parent_layer, *args, **kwargs):
        self.parent = parent_layer
        self.name = kwargs.pop('name', 'unnamed')
        if self.parent:
            self.parent.child = self
        self.child = None
        
      
# Dense/fully connected layer/Affine layer
class Dense(Layer):
    def __init__(self, num_inputs, num_outputs, parent_layer=None, name="Dense", *args, **kwargs):
        super(Dense, self).__init__(parent_layer)
        self.W = np.random.randn(num_outputs, num_inputs)/np.sqrt(num_outputs)
        self.b = np.random.randn(num_outputs, 1)
        
        
    def forward(self, x):
        self.x = x
        return np.matmul(self.W, x) + self.b

    def backward(self, g):
        self.dW = g.dot(self.x.T)
        dX = self.W.T.dot(g)
        self.db = g
        return dX
    def __repr__(self):
        return "Dense(num_inputs={}, num_outputs={}, name={})".format(self.W.shape[0], self.W.shape[0], self.name)
       

class Sigmoid(Layer):
    def __init__(self, parent_layer, *args, **kwargs):
        super(Sigmoid, self).__init__(parent_layer, *args, **kwargs)
        
    def forward(self, x):
        self.out = 1/(1 + np.exp(-x))
        return self.out

    def backward(self, g):
        sigmoid_gradient = self.out*(1 - self.out)
        return sigmoid_gradient*g
    def __repr__(self):
        return "Sigmoid(name={})".format(self.name)
      
class Softmax(Layer):
    def __init__(self, parent_layer, *args, **kwargs):
        super(Softmax, self).__init__(parent_layer, *args, **kwargs)
        
    def forward(self, x):
        e_x = np.exp(x)
        denominator = np.sum(e_x)
        self.output = e_x/denominator
        return self.output

    def backward(self, g):
        jacobian = np.diagflat(self.output) - np.matmul(self.output, self.output.T)
        return np.matmul(jacobian.T, g)
      
    def __repr__(self):
        return "Softmax(name={})".format(self.name)
      
class CrossEntropy(Layer):
    def __init__(self, parent_layer, labels, *args, **kwargs):
        super(CrossEntropy, self).__init__(parent_layer)
        self.labels = labels # We really should have figured out a better way to implement this.
        
    def forward(self, y_hat):
        self.y_hat = y_hat
        self.y = self.labels
        xent = -np.sum(self.y * np.log(self.y_hat + 1e-8))
        return xent
        
    def backward(self, g):
        self.gradient = self.y / self.y_hat
        return self.gradient
      
    def __repr__(self):
        return "CrossEntropy(name={})".format(self.name) 

class Network(Layer):
    def __init__(self, parent_layer):
        super(Network, self).__init__(parent_layer)

    def _root(self):
        obj = self.parent
        while obj.parent != None:
            obj = obj.parent
            self.root = obj
        return self.root

    def forward(self, input):
        obj = self._root()
        current_input = input
        while obj.child:
            out = obj.forward(current_input)
            current_input = out
            print('call', obj, '.forward(X) with output from previous layer,\noutput={}'.format(out))
            print('-----------------------------')
            obj = obj.child

    def backward(self):
        g = 1
        obj = self.parent
    
        while obj:
            g = obj.backward(g)
            print('call', obj, '.backward(g) with g flowing backwards from child layer\ng={}'.format(g))
            print('-----------------------------')
            obj = obj.parent
    
    def __repr__(self):
        output = []
        obj = self._root()
        while obj.child:
          output.append(repr(obj))
          obj = obj.child
        return ' -> '.join(output)
      
      
      
      


# z_1 = Sigmoid(net_1)
# net_2 = Dense(3,3, z_1)
# z_2 = Softmax(net_2)
# loss = CrossEntropy(z_2)



# Testing Dense layer

In [107]:
np.random.seed(1234)
net_1 = Dense(num_inputs=2, num_outputs=3)
print(net_1.W)
print(net_1.b)

# 0.2, -0.3

x = np.float32([[0.2, -0.3]]).T
g = np.float32([[1, 1, 1]]).T
print(net_1.forward(x))
print(net_1.backward(g))

[[ 0.27218322 -0.68761014]
 [ 0.82717375 -0.18050966]
 [-0.4160321   0.51220376]]
[[ 0.85958841]
 [-0.6365235 ]
 [ 0.01569637]]
[[ 1.12030811]
 [-0.41693585]
 [-0.22117118]]
[[ 0.68332487]
 [-0.35591603]]


# Testing Sigmoid layer

In [108]:
x = np.float32([[0.0, -5.0, 5.0]]).T
s = Sigmoid(parent_layer=None)
g = np.float32([[1, 1, 1]]).T
print(s.forward(x))
print(s.backward(g))


[[0.5       ]
 [0.00669285]
 [0.9933072 ]]
[[0.25      ]
 [0.00664806]
 [0.00664803]]


# Testing Softmax Layer

In [109]:
x = np.float32([[1, 1, 1]]).T
g = np.float32([[-1, 1, 0]]).T
soft = Softmax(None)
print(soft.forward(x))
print(soft.backward(g))

[[0.33333334]
 [0.33333334]
 [0.33333334]]
[[-0.33333334]
 [ 0.33333334]
 [ 0.        ]]


# Testing Cross Entropy layer 

In [110]:
y = np.float32([[1, 0, 0]]).T
x_ent = CrossEntropy(None, labels=y)
y_hat = np.float32([[0.25, 0.4, 0.35]]).T
print(x_ent.forward(y_hat))
print(x_ent.backward(1))


1.3862944
[[4.]
 [0.]
 [0.]]


# Creating a network and connecting it all-together using the Network class

In [111]:
y = np.float32([[1, 0, 0]]).T

net_1 = Dense(2, 10, name='dense_1')
z_1 = Sigmoid(net_1, name='sigmoid_1')
net_2 = Dense(10,3, z_1, name='dense_2')
z_2 = Softmax(net_2, name='softmax_outut')
loss = CrossEntropy(z_2, labels=y, name='xent')
network = Network(loss)
print(network)

Dense(num_inputs=10, num_outputs=10, name=unnamed) -> Sigmoid(name=sigmoid_1) -> Dense(num_inputs=3, num_outputs=3, name=unnamed) -> Softmax(name=softmax_outut) -> CrossEntropy(name=unnamed)


# Forward Propagation

In [112]:
x = np.float32([[0.2, -0.3]]).T
network.forward(x)

call Dense(num_inputs=10, num_outputs=10, name=unnamed) .forward(X) with output from previous layer,
output=[[ 0.80802736]
 [-0.42554432]
 [ 0.24129564]
 [ 1.00924786]
 [ 0.93888596]
 [ 0.78510711]
 [-0.18192828]
 [ 0.03466466]
 [-0.41656497]
 [ 0.74412702]]
-----------------------------
call Sigmoid(name=sigmoid_1) .forward(X) with output from previous layer,
output=[[0.69168899]
 [0.39519081]
 [0.56003291]
 [0.73287293]
 [0.71887457]
 [0.68677977]
 [0.45464296]
 [0.5086653 ]
 [0.39733901]
 [0.67789766]]
-----------------------------
call Dense(num_inputs=3, num_outputs=3, name=unnamed) .forward(X) with output from previous layer,
output=[[ 0.7703048 ]
 [-1.54826007]
 [ 1.07502848]]
-----------------------------
call Softmax(name=softmax_outut) .forward(X) with output from previous layer,
output=[[0.40738758]
 [0.04009294]
 [0.55251948]]
-----------------------------
call CrossEntropy(name=unnamed) .forward(X) with output from previous layer,
output=0.8979902309932645
----------------

# Compute the backward pass for a single input

In [113]:
network.backward()

call CrossEntropy(name=unnamed) .backward(g) with g flowing backwards from child layer
g=[[2.4546649]
 [0.       ]
 [0.       ]]
-----------------------------
call Softmax(name=softmax_outut) .backward(g) with g flowing backwards from child layer
g=[[ 0.59261242]
 [-0.04009294]
 [-0.55251948]]
-----------------------------
call Dense(num_inputs=3, num_outputs=3, name=unnamed) .backward(g) with g flowing backwards from child layer
g=[[ 0.32001113]
 [ 0.31738264]
 [-0.13789965]
 [-0.05342303]
 [-0.64109653]
 [-0.23230823]
 [ 0.46212811]
 [-0.70275542]
 [ 0.37157586]
 [ 0.17777533]]
-----------------------------
call Sigmoid(name=sigmoid_1) .backward(g) with g flowing backwards from child layer
g=[[ 0.06824408]
 [ 0.07585922]
 [-0.03397793]
 [-0.01045864]
 [-0.12956171]
 [-0.04997259]
 [ 0.11458131]
 [-0.17563609]
 [ 0.08897782]
 [ 0.03881767]]
-----------------------------
call Dense(num_inputs=10, num_outputs=10, name=unnamed) .backward(g) with g flowing backwards from child layer
g=[[-

# Inspecting the gradient of our loss wrt our weights and biases

In [7]:
net_1.dW

array([[-0.00260669,  0.00391003],
       [ 0.02271754, -0.03407631],
       [ 0.0074605 , -0.01119076],
       [-0.05267   ,  0.079005  ],
       [ 0.05481178, -0.08221768],
       [-0.00380882,  0.00571323],
       [-0.02256565,  0.03384847],
       [-0.01880307,  0.0282046 ],
       [-0.03405327,  0.0510799 ],
       [-0.05774484,  0.08661726]])

In [8]:
net_1.db

array([[-0.01303345],
       [ 0.11358768],
       [ 0.03730252],
       [-0.26335   ],
       [ 0.27405891],
       [-0.01904409],
       [-0.11282824],
       [-0.09401534],
       [-0.17026633],
       [-0.28872418]])

In [12]:
print(net_2.W)
print(net_2.b)

[[-0.68035984  0.82786177 -0.07628352 -0.96442492  0.76733849  0.07731785
  -0.04997971  0.39442053 -0.57251963 -0.90886088]
 [-0.58768083  0.29608092 -0.32040171  0.59669215 -0.33152285  0.17731508
   0.45339487  0.85775959  0.28980691  0.40411043]
 [-0.69919073  0.02501047 -0.01541989 -0.09658222 -0.96685814  0.02008536
   0.31291023  0.1272609  -1.21771491 -0.17660551]]
[[-0.49618386]
 [ 1.43266477]
 [ 0.30094553]]
