# Lecture 8  Deep Learning Software

## Numpy

In [None]:
# Computational Graphs
import numpy as np

np.random.seed(0)
N, D = 3, 4

x = np.random.randn(N, D)
y = np.random.randn(N, D)
z = np.random.randn(N, D)

a = x * y
b = a + z
c = np.sum(b)

grad_c = 1.0
grad_b = grad_c * np.ones((N, D))
grad_a = grad_b.copy()
grad_z = grad_b.copy()
grad_x = grad_a * y
grad_y = grad_a * x

## Tensorflow

In [None]:
import numpy as np
np.random.seed(0)
import tensorflow as tf

N, D = 3000, 4000
''' 
# Tensorflow 1.6 version
with tf.device('/gpu:0'):
  x = tf.placeholder(tf.float32)
  y = tf.placeholder(tf.float32)
  z = tf.placeholder(tf.float32)

  a = x * y
  b = a + z
  c = tf.reduce_sum(b)

grad_x, grad_y, grad_z = tf.gradients(c, [x, y, z])

with tf.Session() as sess:
  values = {
      x: np.random.randn(N, D),
      y: np.random.randn(N, D),
      z: np.random.randn(N, D)
  }
  out = sess.run([c, grad_x, grad_y, grad_z],
             feed_dict = values)
  c_val, grad_x_val, grad_y_val, grad_z_val = out
'''

" \n# Tensorflow 1.6 version\nwith tf.device('/gpu:0'):\n  x = tf.placeholder(tf.float32)\n  y = tf.placeholder(tf.float32)\n  z = tf.placeholder(tf.float32)\n\n  a = x * y\n  b = a + z\n  c = tf.reduce_sum(b)\n\ngrad_x, grad_y, grad_z = tf.gradients(c, [x, y, z])\n\nwith tf.Session() as sess:\n  values = {\n      x: np.random.randn(N, D),\n      y: np.random.randn(N, D),\n      z: np.random.randn(N, D)\n  }\n  out = sess.run([c, grad_x, grad_y, grad_z],\n             feed_dict = values)\n  c_val, grad_x_val, grad_y_val, grad_z_val = out\n"

## Pytorch

In [None]:
import torch
from torch.autograd import Variable

N, D = 3, 4

x = Variable(torch.randn(N, D), requires_grad = True)
y = Variable(torch.randn(N, D), requires_grad = True)
z = Variable(torch.randn(N, D), requires_grad = True)

a = x * y
b = a + z
c = torch.sum(b)

# computes all gradients
c.backward()

print(x.grad.data)
print(y.grad.data)
print(z.grad.data)

tensor([[-0.9877,  0.0456,  0.3350,  0.5279],
        [ 0.8590,  1.6820, -1.4755, -0.9407],
        [ 0.9549, -0.3822,  0.4878, -0.2568]])
tensor([[ 0.9742,  0.2109,  0.6860,  1.9508],
        [-0.9324, -1.4801, -0.7895,  0.5620],
        [-1.7452,  3.0810, -1.1070,  1.1661]])
tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])


## TensorFlow: Neural Net

In [None]:
# Train a two-layer ReLU network on random data with L2 loss

'''
# Define computational graph
N, D, H = 64, 1000, 100
x = tf.placeholder(tf.float32, shape(N, D))
y = tf.placeholder(tf.float32, shape(N, D))
w1 = tf.placeholder(tf.float32, shape(D, H))
w2 = tf.placeholder(tf.float32, shape(H, D))

# Forward pass
h = tf.maximum(tf.matmul(x, w1), 0)
y_pred = tf.matmul(h, w2)
diff = y_pred - y
#L2 Euclidean Loss
loss = tf.reduce_mean(tf.reduce_sum(diff ** 2, axis = 1))

grad_w1, grad_w2 = tf.gradients(loss, [w1, w2])

# add assign operations to update w1 and w2 as part of the graph
learning_rate = 1e-5
new_w1 = w1.assign(w1 - learning_rate * grad_w1)
new_w2 = w2.assign(w2 - learning_rate * grad_w2)
# Run the graph many times  
with tf.Session() as sess:
  sess.run(tf.global_variables_initializer())
  values = {
      x: np.random.randn(N, D),
      w1: np.random.randn(D, H),
      w2: np.random.randn(H, d),
      y: np.random.randn(N, D) }
  
  out= sess.run([loss, grad_w1, grad_w2],
                feed_dict = values)
  loss_val, grad_w1_val, grad_w2_val = out
'''  

'\n# Define computational graph\nN, D, H = 64, 1000, 100\nx = tf.placeholder(tf.float32, shape(N, D))\ny = tf.placeholder(tf.float32, shape(N, D))\nw1 = tf.placeholder(tf.float32, shape(D, H))\nw2 = tf.placeholder(tf.float32, shape(H, D))\n\n# Forward pass\nh = tf.maximum(tf.matmul(x, w1), 0)\ny_pred = tf.matmul(h, w2)\ndiff = y_pred - y\n#L2 Euclidean Loss\nloss = tf.reduce_mean(tf.reduce_sum(diff ** 2, axis = 1))\n\ngrad_w1, grad_w2 = tf.gradients(loss, [w1, w2])\n\n# add assign operations to update w1 and w2 as part of the graph\nlearning_rate = 1e-5\nnew_w1 = w1.assign(w1 - learning_rate * grad_w1)\nnew_w2 = w2.assign(w2 - learning_rate * grad_w2)\n# Run the graph many times  \nwith tf.Session() as sess:\n  sess.run(tf.global_variables_initializer())\n  values = {\n      x: np.random.randn(N, D),\n      w1: np.random.randn(D, H),\n      w2: np.random.randn(H, d),\n      y: np.random.randn(N, D) }\n  \n  out= sess.run([loss, grad_w1, grad_w2],\n                feed_dict = values)\n 

In [None]:
# TensorFlow : Optimizer
'''
# Define computational graph
N, D, H = 64, 1000, 100
x = tf.placeholder(tf.float32, shape(N, D))
y = tf.placeholder(tf.float32, shape(N, D))
w1 = tf.placeholder(tf.float32, shape(D, H))
w2 = tf.placeholder(tf.float32, shape(H, D))

# Forward pass
h = tf.maximum(tf.matmul(x, w1), 0)
y_pred = tf.matmul(h, w2)
diff = y_pred - y
#L2 Euclidean Loss
loss = tf.reduce_mean(tf.reduce_sum(diff ** 2, axis = 1))

optimizer = tf.train.GradientDescentOptimizer(1e-5)
updates = optimizer.minimize(loss)

with tf.Session() as sess:
  sess.run(tf.global_variables_initializer())
  values = {
      x: np.random.randn(N, D),
      y: np.random.randn(N, D) }
  losses = []
  for t in range(50):
    
  loss_val, _ = sess.run([loss, updates],
                feed_dict = values)
'''  

In [None]:
'''
N, D, H = 64, 1000, 100
x = tf.placeholder(tf.float32, shape(N, D))
y = tf.placeholder(tf.float32, shape(N, D))
init = tf.contrib.layers.xavier_initializer() # use Xavier initializer
h = tf.layers.dense(inputs = x, units = H, activation = tf.nn.relu, kernel_initializer = init)
y_pred = tf.layers.dense(inputs = h, units = D, kernel_initializer = init)

loss = tf.losses.mean_squared_error(y_pred, y)

optimizer = tf.train.GradientDescentOptimizer(1e0)
updates = optimizer.minimize(loss)

with tf.Session() as sess:
  sess.run(tf.global_variables_initializer())
  values = { x: np.random.randn(N, D),
             y: np.random.randn(N, D)}
    for t in range(50):
      loss_val, _ = sess.run([loss, updates],
                feed_dict = values)

'''

In [None]:
# Keras : High-Level-wrapper
from keras.models import Sequential
from keras.layers.core import Dense, Activation
from keras.optimizers import SGD
'''
N, D, H = 64, 1000, 100

model = Sequential()
model.add(Dense(input_dim = D, output_dim = H))
model.add(Activation('relu'))
model.add(Dense(input_dim = H, output_dim = D))

optimizer = SGD(lr = 1e0)
model.compile(loss = 'mean_squared_error', optimizer = optimizer)

x = np.random.randn(N, D)
y = np.random.randn(N, D)
history = model.fit(x, y, nb_epoch = 50,
                    batch_size = N, verbose = 0)

'''

"\nN, D, H = 64, 1000, 100\n\nmodel = Sequential()\nmodel.add(Dense(input_dim = D, output_dim = H))\nmodel.add(Activation('relu'))\nmodel.add(Dense(input_dim = H, output_dim = D))\n\noptimizer = SGD(lr = 1e0)\nmodel.compile(loss = 'mean_squared_error', optimizer = optimizer)\n\nx = np.random.randn(N, D)\ny = np.random.randn(N, D)\nhistory = model.fit(x, y, nb_epoch = 50,\n                    batch_size = N, verbose = 0)\n\n"