In [None]:
%matplotlib inline


TensorFlow: Static Graphs
-------------------------

A fully-connected ReLU network with one hidden layer and no biases, trained to
predict y from x by minimizing squared Euclidean distance.

This implementation uses basic TensorFlow operations to set up a computational
graph, then executes the graph many times to actually train the network.

One of the main differences between TensorFlow and PyTorch is that TensorFlow
uses static computational graphs while PyTorch uses dynamic computational
graphs.

In TensorFlow we first set up the computational graph, then execute the same
graph many times.



In [4]:
import tensorflow as tf
import numpy as np

# First we set up the computational graph:

# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.

N, D_in, H, D_out = 64, 1000, 100, 10

# Create placeholders for the input and target data; these will be filled with real data when we execute the graph.
x = tf.placeholder(tf.float32, shape=(None, D_in))
y = tf.placeholder(tf.float32, shape=(None, D_out))

# Create Variables for the weights and initialize them with random data.
# A tensorflow Variable persists its value across executions of the graph.
w1 = tf.Variable(tf.random_normal((D_in, H)))
w2 = tf.Variable(tf.random_normal((H, D_out)))

# Forward pass: Compute the predicted y using operations on Tensorflow Tensors.
# Note that this code does not actually perform any numeric operations; 
# it merely sets up the computational graph that we will execute later.
h = tf.matmul(x, w1)
h_relu = tf.maximum(h, tf.zeros(1))
y_pred = tf.matmul(h_relu, w2)

# Compute loss using operations on Tensorflow Tensors
loss = tf.reduce_sum((y - y_pred) ** 2.0)

# Compute gradient of the loss with respect to w1 and w2
grad_w1, grad_w2 = tf.gradients(loss, [w1, w2])

# Update the weights using gradient descent. To actually update the weights we need to evaluate new_w1 and new_w2
# when executing the graph. Note that in Tensorflow the act of updating the value of the weights is part of the 
# computational graph; in Pytorch this happens outside the computational graph.
learning_rate = 1e-6
new_w1 = w1.assign(w1 - learning_rate * grad_w1)
new_w2 = w2.assign(w2 - learning_rate * grad_w2)

# Now we have built our computational graph, so we enter a Tensorflow session to actually execute the graph.

# Create numpy arrays holding the actual data for the inputs x and targets y
x_value = np.random.randn(N, D_in)
y_value = np.random.randn(N, D_out)

with tf.Session() as sess:
    # Run the graph once to initialize the Variables w1 and w2.
    sess.run(tf.global_variables_initializer())
    

    for t in range(500):
        # Execute the graph many times. Each time it executes we want to bind x_value and y_value to x and y
        # specified with the feed_dict argument. 
        # Each time we execute the graph we want to compute the values for loss, new_w1 and new_w2; 
        # the values of these Tensors are returned as numpy arrays.
        loss_value, _, _ = sess.run([loss, new_w1, new_w2], 
                                    feed_dict = {x: x_value, y: y_value})
        
        print(t, loss_value)

0 36774172.0
1 39029410.0
2 47016332.0
3 49462404.0
4 38736780.0
5 20464478.0
6 8282062.5
7 3392531.0
8 1814877.0
9 1244795.1
10 969695.5
11 794617.5
12 665141.0
13 563195.6
14 480492.03
15 412444.56
16 355949.78
17 308658.25
18 268781.9
19 235008.31
20 206199.83
21 181530.31
22 160297.56
23 141962.05
24 126079.57
25 112270.58
26 100202.39
27 89624.04
28 80322.875
29 72129.94
30 64864.1
31 58425.246
32 52713.766
33 47637.016
34 43106.62
35 39058.215
36 35436.05
37 32187.445
38 29269.957
39 26656.014
40 24301.195
41 22176.883
42 20258.234
43 18524.008
44 16952.121
45 15527.13
46 14233.115
47 13057.084
48 11988.27
49 11014.54
50 10126.842
51 9316.969
52 8577.85
53 7902.2495
54 7284.559
55 6719.069
56 6201.0557
57 5725.954
58 5290.3086
59 4890.1367
60 4522.7
61 4184.9697
62 3874.0835
63 3587.981
64 3324.74
65 3082.044
66 2858.2432
67 2651.858
68 2461.3345
69 2285.47
70 2122.8684
71 1972.55
72 1833.5527
73 1705.0416
74 1586.0264
75 1475.7561
76 1373.6702
77 1279.1185
78 1191.3738
79 1109.9