In [1]:
%matplotlib inline


Warm-up: numpy
--------------

A fully-connected ReLU network with one hidden layer and no biases, trained to
predict y from x using Euclidean error.

This implementation uses numpy to manually compute the forward pass, loss, and
backward pass.

A numpy array is a generic n-dimensional array; it does not know anything about
deep learning or gradients or computational graphs, and is just a way to perform
generic numeric computations.



In [3]:
import numpy as np

# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1000, 100, 10

# Create random input and output data
x = np.random.randn(N, D_in)
y = np.random.randn(N, D_out)

# Randomly initialize weights
w1 = np.random.randn(D_in, H)
w2 = np.random.randn(H, D_out)

learning_rate = 1e-6
for t in range(2):
    # Forward pass: compute predicted y
    h = x.dot(w1)
    h_relu = np.maximum(h, 0)
    y_pred = h_relu.dot(w2)

    # Compute and print loss
    loss = np.square(y_pred - y).sum()
    print(t, loss)

    # Backprop to compute gradients of w1 and w2 with respect to loss
    grad_y_pred = 2.0 * (y_pred - y)
    grad_w2 = h_relu.T.dot(grad_y_pred)
    grad_h_relu = grad_y_pred.dot(w2.T)
    grad_h = grad_h_relu.copy()
    grad_h[h < 0] = 0
    grad_w1 = x.T.dot(grad_h)

    # Update weights
    w1 -= learning_rate * grad_w1
    w2 -= learning_rate * grad_w2

0 26745629.198
1 20126949.3262


In [8]:
import numpy as np

# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 2, 10, 10, 2

# Create random input and output data
x = np.random.randn(N, D_in)
print(x)
y = np.random.randn(N, D_out)
print(y)
# Randomly initialize weights
w1 = np.random.randn(D_in, H)
print(w1)
w2 = np.random.randn(H, D_out)

learning_rate = 1e-6
for t in range(2):
    # Forward pass: compute predicted y
    h = x.dot(w1)
    print(x.shape,w1.shape,h.shape)
    h_relu = np.maximum(h, 0)
    y_pred = h_relu.dot(w2)

    # Compute and print loss
    loss = np.square(y_pred - y).sum()
    print(t, loss)

    # Backprop to compute gradients of w1 and w2 with respect to loss
    grad_y_pred = 2.0 * (y_pred - y)
    grad_w2 = h_relu.T.dot(grad_y_pred)
    grad_h_relu = grad_y_pred.dot(w2.T)
    grad_h = grad_h_relu.copy()
    grad_h[h < 0] = 0
    grad_w1 = x.T.dot(grad_h)

    # Update weights
    w1 -= learning_rate * grad_w1
    w2 -= learning_rate * grad_w2

[[ 0.11775788  0.23780472 -1.02606852  1.00063258  1.00039596  0.31268485
  -1.75148429  0.69984025  0.88554264 -0.6793946 ]
 [-0.34425306 -1.08798413  2.23359899 -1.7940647  -0.4537034   0.48559513
   0.76120268  0.24791524 -0.62999207  0.05294884]]
[[-0.89098639 -0.26546819]
 [-1.27465242 -0.34272358]]
[[-1.14892994  0.23121612  1.08752472  1.01325244  0.1914233  -1.20131496
  -1.23316885 -1.04501256  0.51408315  0.65159334]
 [-0.37446678  0.85676305 -1.33048715  0.61041294 -1.41323425  0.24104083
  -1.50678513 -0.63667013  2.34743816  0.07828749]
 [ 1.32360918  1.24167997 -0.6078787   1.42931597 -1.40036564 -1.35517548
   0.30962952 -0.50656    -0.70602446  0.52550044]
 [-1.52214646  1.03204344 -1.01298744 -0.78511719  0.08670349 -0.26090849
   0.20108174  1.9696811  -0.12814443 -0.12927743]
 [-0.01302479 -0.22216839 -0.94538482  0.61198803 -1.06512162  0.83342853
   0.60841652  1.03044665 -1.30749622  0.18137457]
 [-1.75438103  0.11069258 -1.85254437  0.01231894  0.1912688   1.5930

In [16]:
x = np.random.randn(2,3)
w = np.random.randn(3,3)
print(x)
print(w)
y = np.zeros((2,3))
print(y)
for i in range(2):
    for j in range(3):
        for k in range(3):
            y[i,j] += x[i,k]*w[k,j]
print(y)
print(x.dot(w))
        

[[ 0.32542228 -0.67337973  0.73552769]
 [ 1.08543382  1.9117296  -0.3103033 ]]
[[ 0.63589227  0.62559243  0.42424098]
 [ 0.52668423 -0.44175642  1.49488721]
 [-1.61439249 -1.11604034 -1.81831939]]
[[ 0.  0.  0.]
 [ 0.  0.  0.]]
[[-1.33515536 -0.31982704 -2.20599355]
 [ 2.19804812  0.18083135  3.88253613]]
[[-1.33515536 -0.31982704 -2.20599355]
 [ 2.19804812  0.18083135  3.88253613]]
