In [43]:
%pylab inline
import numpy as np

Populating the interactive namespace from numpy and matplotlib


In [44]:
def relu(s):
    return np.maximum(0,s)

def l2_loss(y,y_pred, N):
    diff = y - y_pred
    return (1/N) * 0.5 * diff.dot(diff.T)

def print_latex(X):
    tab = ''
    for i in X:
        for j in i:
            tab += ' {:.4f} &'.format(j)
        tab = tab[:-1] + '\\\\ \n'
        
    print(tab)

In [45]:
# Init

np.random.seed(42)

N = 4
X = np.array([[0.75, 0.80], 
              [0.20, 0.05], 
              [-0.75, 0.80], 
              [0.20, -0.05]]).T
y = np.array([1, 1, -1, -1]).T

W_1 = np.array([[0.6, 0.7, 0.0],[0.01, 0.43, 0.88]]).T
W_2 = np.array([[0.02, 0.03, 0.09]])

print('X\n', X)
print('y\n', y)
print('W_1\n', W_1)
print('W_2\n', W_2)

X
 [[ 0.75  0.2  -0.75  0.2 ]
 [ 0.8   0.05  0.8  -0.05]]
y
 [ 1  1 -1 -1]
W_1
 [[ 0.6   0.01]
 [ 0.7   0.43]
 [ 0.    0.88]]
W_2
 [[ 0.02  0.03  0.09]]


In [46]:
# \[
# M=
#   \begin{bmatrix}
#     1 & 2 & 3 & 4 & 5 \\
#     3 & 4 & 5 & 6 & 7
#   \end{bmatrix}
# # \]

## Forward

In [54]:
s = W_1.dot(X)
z = relu(s)

print('s = W.X', s.shape, '\n', s)
print('z=relu(s)', z.shape,  '\n', z)

s_out = W_2.dot(z)
y_out = z_out = s_out

print('s_out = W_2.z', s_out.shape, '\n', s_out)
print('z_out=linear(s_out)', z_out.shape,  '\n', z_out)

loss = l2_loss(y, y_out, N=N)
print('Loss', loss.shape, '\n', loss)

s = W.X (3, 4) 
 [[ 0.4609029   0.12093892 -0.44161944  0.1197337 ]
 [ 0.87335435  0.16215838 -0.18042917  0.11885056]
 [ 0.7173808   0.04771543  0.68917703 -0.04019443]]
z=relu(s) (3, 4) 
 [[ 0.4609029   0.12093892  0.          0.1197337 ]
 [ 0.87335435  0.16215838  0.          0.11885056]
 [ 0.7173808   0.04771543  0.68917703  0.        ]]
s_out = W_2.z (1, 4) 
 [[ 0.20731924  0.03408093  0.05595202  0.02436202]]
z_out=linear(s_out) (1, 4) 
 [[ 0.20731924  0.03408093  0.05595202  0.02436202]]
Loss (1, 1) 
 [[ 0.46571183]]


### Backward

In [48]:
dL_dYout = 1/N * (y_out - y)
delta_out = dL_dSout = dL_dYout

dL_dW2 = delta_out.dot(z.T)

# relu derivative
dL_dZ = np.where(s>0, np.ones(s.shape), 0)
delta_1 = dL_dS = W_2.T.dot(delta_out) * dL_dZ

dL_dW1 = delta_1.dot(X.T)

In [53]:
print_latex(W_2)

 0.0715 & 0.1330 & 0.0812 \\ 



In [50]:
print('dL_dYout \n',dL_dYout)
print('delta_out \n',delta_out)
print('dL_dW2 \n',dL_dW2)
print('delta_1 \n',delta_1)
print('dL_dW1 \n',dL_dW1)

dL_dYout 
 [[-0.2253525  -0.24719625  0.26584     0.25148625]]
delta_out 
 [[-0.2253525  -0.24719625  0.26584     0.25148625]]
dL_dW2 
 [[-0.10294599 -0.2059524   0.01762657]]
delta_1 
 [[-0.00450705 -0.00494393  0.          0.00502973]
 [-0.00676058 -0.00741589  0.          0.00754459]
 [-0.02028173 -0.02224766  0.0239256   0.        ]]
dL_dW1 
 [[-0.00336313 -0.00410432]
 [-0.00504469 -0.00615648]
 [-0.03760503  0.00180272]]


In [51]:
W_2 -= 0.5 * dL_dW2
W_1 -= 0.5 * dL_dW1

print('W_2 \n',W_2)
print('W_1 \n',W_1)

W_2 
 [[ 0.07147299  0.1329762   0.08118672]]
W_1 
 [[ 0.60168156  0.01205216]
 [ 0.70252235  0.43307824]
 [ 0.01880251  0.87909864]]
