In [1]:
import ann
import layers
nn = ann.ann_by_layers(layers = [layers.fc(20), layers.relu(), layers.fc(10)])

In [2]:
nn.layers

[<layers.fc at 0x4e8c048>,
 <layers.relu at 0x4e8c080>,
 <layers.fc at 0x4e8c3c8>]

In [3]:
import numpy as np
x = np.arange(1,6)
x = x[:,None]
y = np.concatenate((2*x+1, 3-x))
y

array([[ 3],
       [ 5],
       [ 7],
       [ 9],
       [11],
       [ 2],
       [ 1],
       [ 0],
       [-1],
       [-2]])

In [4]:
def initialize_weight(sz):
    w = np.random.normal(size = sz)
    return w

# nn.layers[0].get_training_parameters()
# for this to be useful for initializing weights, I need to know the size given input data. 
# i.e. size information must be forwarded through the network

nn.layers[0].W = initialize_weight((20,5))
nn.layers[0].b = initialize_weight((20,1))
nn.layers[2].W = initialize_weight((10,20))
nn.layers[2].b = initialize_weight((10,1))

In [5]:
z = nn.layers[0].forward(x)
z.shape
z2 = nn.layers[2].forward(z)
z3 = nn.forward(x)
z3.shape

(10, 1)

In [6]:
loss = layers.mse()
loss.forward(z3, y)

5324.3339953099185

In [7]:
params = []
for layer in nn.layers:
    params.append(layer.get_training_parameters(x))    

In [8]:
dLdZ = loss.x_gradient(nn.forward(x),y)

In [9]:
getattr(nn.layers[0], params[0]['Weights']['Name'])
params

[{'Weights': {'Name': 'W', 'Gradient': array([[[1., 2., 3., 4., 5.],
           [0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0.],
           ...,
           [0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0.]],
   
          [[0., 0., 0., 0., 0.],
           [1., 2., 3., 4., 5.],
           [0., 0., 0., 0., 0.],
           ...,
           [0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0.]],
   
          [[0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0.],
           [1., 2., 3., 4., 5.],
           ...,
           [0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0.]],
   
          ...,
   
          [[0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0.],
           ...,
           [1., 2., 3., 4., 5.],
           [0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0.]],
   
          [[0., 0., 0., 0., 0.],
           [0., 0., 0., 0., 0.],
   

In [37]:
# How to compute the backward for W in first fc layer.
# Essentially : use tensordot, and whatever your convention for where y is.
# I used the first dimension arbitrarily.

grad = params[0]['Weights']['Gradient']
print(grad.shape)
gradYDim = np.reshape(grad[:,0,0],(-1,1))
print(gradYDim.shape)
print(dLdZ.shape)
x1 = nn.layers[0].forward(x)
gradx1 = nn.layers[1].x_gradient(x1)
print(gradx1.shape)
gradx2 = nn.layers[2].x_gradient(nn.layers[1].forward(x1))
print(gradx2.shape)

firstProd = np.tensordot(gradx1,grad,(1,0))
secondProd = np.matmul(gradx2, firstProd)
backward = np.tensordot(dLdZ, secondProd, (0,1))
print(backward.shape)

# backward = np.tensordot(dLdZ, np.matmul(gradx2,np.tensordot(gradx1,grad,(2,1))),1
# print(backward.shape)

(20, 20, 5)
(20, 1)
(10, 1)
(20, 20)
(10, 20)
(1, 20, 5)


In [54]:
# Get updates via back prop

# Difficulties:
# 1: need to forward prop to the layer - cache this in an initial forward prop loop?
# 2: too many nested loops
# 3. getting gradients is a bit confusing with current set up. Using indexing to identify layers seems not great.
# 4. need to multiply over right dimensions in Weights derivative?

updates = []
step = 1e-3
for i,layer_params in zip(range(len(params)),params):
    layer_updates = {}
    for param in layer_params:
        dLdW=np.eye(nn.layers[2].NumHidden)
        for j in range(len(nn.layers)-1, i, -1):
            print(j)
            xhat = x
            for k in range(j):
                xhat = nn.layers[k].forward(xhat)
            dLdW = np.matmul(dLdW, nn.layers[j].x_gradient(xhat))
        layer_updates[param] = {}
        layer_updates[param]['Update'] = step*(np.matmul(dLdW, layer_params[param]['Gradient']))
        layer_updates[param]['Name'] = layer_params[param]['Name']
    updates.append(layer_updates)
    
for i in range(len(nn.layers)):
    update = updates[i]
    layer = nn.layers[i]
    for param in update:
        current = getattr(layer, update[param]['Name'])
        new = current - update[param]['Update']
        setattr(layer, update[param]['Name'], new)

2
1
2
1


ValueError: operands could not be broadcast together with shapes (20,5) (20,10,5) 

In [55]:
print(layer_updates)

{'Weights': {'Update': array([[[0.001, 0.002, 0.003, 0.004, 0.005],
        [0.   , 0.   , 0.   , 0.   , 0.   ],
        [0.   , 0.   , 0.   , 0.   , 0.   ],
        [0.   , 0.   , 0.   , 0.   , 0.   ],
        [0.   , 0.   , 0.   , 0.   , 0.   ],
        [0.   , 0.   , 0.   , 0.   , 0.   ],
        [0.   , 0.   , 0.   , 0.   , 0.   ],
        [0.   , 0.   , 0.   , 0.   , 0.   ],
        [0.   , 0.   , 0.   , 0.   , 0.   ],
        [0.   , 0.   , 0.   , 0.   , 0.   ]],

       [[0.   , 0.   , 0.   , 0.   , 0.   ],
        [0.001, 0.002, 0.003, 0.004, 0.005],
        [0.   , 0.   , 0.   , 0.   , 0.   ],
        [0.   , 0.   , 0.   , 0.   , 0.   ],
        [0.   , 0.   , 0.   , 0.   , 0.   ],
        [0.   , 0.   , 0.   , 0.   , 0.   ],
        [0.   , 0.   , 0.   , 0.   , 0.   ],
        [0.   , 0.   , 0.   , 0.   , 0.   ],
        [0.   , 0.   , 0.   , 0.   , 0.   ],
        [0.   , 0.   , 0.   , 0.   , 0.   ]],

       [[0.   , 0.   , 0.   , 0.   , 0.   ],
        [0.   , 0.   , 0.   