In [4]:
import numpy as np

In [128]:
# Bare bones neural network, just for fun


# BP thinking - for any equation, the gradient of inputs is product of gradients of
# the loss with respect to output (how the loss changes with respect to output)
# the output with respect to inputs (hot the output changes with respect to inputs)

# Nice videos for vectorizing code
# https://www.youtube.com/watch?v=-l9BdZsopYI
# 

class Linear:
    def __init__(self, input_dim, output_dim):
        # output dim is k, input dim is d
        self.W = np.random.randn(output_dim,input_dim) # kxd matrix
        self.b = np.random.randn(output_dim,1) # kx1 matrix
        self.gradW = np.random.randn(output_dim,input_dim) # kxd matrix
        self.gradb = np.random.randn(output_dim)
        
    # x is 1xd single example
    def forward(self, x):
        # z = XWT + b
        output = np.dot(self.W,x) + self.b
        return output # nxk matrix
    
    def backward(self, x, prevGrad):
        # this can be thought as three separate groups of variables in computation graph
        # 1. the inputs x
        # 2. the weights w
        # 3. the bias b
        # z = XWT + b
        
        # dL/dW = dz/dW * dL/dz
        # this is a kxd matrix
        self.gradW = np.dot(prevGrad, x.T)
        
        # dL/db = dz/db * dL/dz
        # this is a kx1 matrix
        self.gradb = np.copy(prevGrad)
        
        # this needs to be returned; as this is the value comes from previous layer
        # dL/dX = dz/dX * dL/dz
        # this is a dx1 matrix
        return np.dot(self.W.T ,prevGrad)
        
        
class Sigmoid:
    def __init__(self):
        pass
    
    def forward(self, x):
        output = 1/(1+np.exp(-x))
        return output
    
    def backward(self, x, prevGrad):
        sig = 1/(1+np.exp(-x))
        return np.multiply(np.multiply(sig, (1 - sig)), prevGrad)
        
class MSE:
    def __init__(self,labels):
        self.labels = labels
    
    def forward(self,x):
        output = np.mean(np.square((x-self.labels)))
        return output
    
    def backward(self, x):
        return np.mean(2*(x - self.labels))
        
        

In [129]:
n = 1 #the matrix operations above does not support batching
d = 2
h = 3
o = 1

# X is 1xd thing
X = np.random.randn(d,1)
Y = np.random.randn(1)

lin1 = Linear(d,h)
sig = Sigmoid()
lin2 = Linear(h,o)
mse = MSE(Y)

z = lin1.forward(X) # kx1 matrix
a = sig.forward(z)
yhat = lin2.forward(a)
loss = mse.forward(yhat)

In [138]:
grad1 = mse.backward(yhat)
grad2 = lin2.backward(a, grad1)
grad3 = sig.backward(z, grad2)
grad4 = lin1.backward(X, grad3)
lin1.b

array([[-0.27029735],
       [-0.48222409],
       [ 2.27783732]])

In [75]:
np.dot(X.T,X)

array([[ 1.94131599]])

In [103]:
dd = np.random.randn(3,2) # kxd matrix
bb = np.random.randn(3,1) # 1x1 matrix


In [104]:
dd

array([[ 2.2331605 ,  0.60344817],
       [ 1.54753237, -1.6356448 ],
       [ 0.8241335 , -0.06775162]])

In [105]:
bb

array([[-0.11224995],
       [ 0.66822787],
       [-0.4767358 ]])

In [107]:
dd+bb

array([[ 2.12091054,  0.49119821],
       [ 2.21576024, -0.96741693],
       [ 0.34739771, -0.54448741]])