# NN, ReLU, Xavier, Dropout, and Adam
## TensorFlow

In [1]:
import tensorflow as tf
import random
from tensorflow.examples.tutorials.mnist import input_data

  from ._conv import register_converters as _register_converters


In [2]:
mnist = input_data.read_data_sets('MNIST_data/', one_hot = True)

learning_rate = 0.001
training_epochs = 15
batch_size = 100

X = tf.placeholder(tf.float32, [None, 784])
Y = tf.placeholder(tf.float32, [None, 10])

# dropout (keep_prob) rate  0.7 on training, but should be 1 for testing
keep_prob = tf.placeholder(tf.float32)

W1 = tf.get_variable("W1", shape=[784, 512],
                     initializer=tf.contrib.layers.xavier_initializer())
b1 = tf.Variable(tf.random_normal([512]))
L1 = tf.nn.relu(tf.matmul(X, W1) + b1)
L1 = tf.nn.dropout(L1, keep_prob=keep_prob)

W2 = tf.get_variable("W2", shape=[512, 512],
                     initializer=tf.contrib.layers.xavier_initializer())
b2 = tf.Variable(tf.random_normal([512]))
L2 = tf.nn.relu(tf.matmul(L1, W2) + b2)
L2 = tf.nn.dropout(L2, keep_prob=keep_prob)

W3 = tf.get_variable("W3", shape=[512, 512],
                     initializer=tf.contrib.layers.xavier_initializer())
b3 = tf.Variable(tf.random_normal([512]))
L3 = tf.nn.relu(tf.matmul(L2, W3) + b3)
L3 = tf.nn.dropout(L3, keep_prob=keep_prob)

W4 = tf.get_variable("W4", shape=[512, 512],
                     initializer=tf.contrib.layers.xavier_initializer())
b4 = tf.Variable(tf.random_normal([512]))
L4 = tf.nn.relu(tf.matmul(L3, W4) + b4)
L4 = tf.nn.dropout(L4, keep_prob=keep_prob)

W5 = tf.get_variable("W5", shape=[512, 10],
                     initializer=tf.contrib.layers.xavier_initializer())
b5 = tf.Variable(tf.random_normal([10]))
hypothesis = tf.matmul(L4, W5) + b5

Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting MNIST_data/train-images-idx3-ubyte.gz
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Instructions for updating:
Please use tf.one_hot on tensors.
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.


In [3]:
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(\
                                                                 logits=hypothesis, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

sess = tf.Session()
sess.run(tf.global_variables_initializer())

for epoch in range(training_epochs):
    avg_cost = 0
    total_batch = int(mnist.train.num_examples / batch_size)

    for i in range(total_batch):
        batch_xs, batch_ys = mnist.train.next_batch(batch_size)
        c, _ = sess.run([cost, optimizer], feed_dict={X: batch_xs, Y: batch_ys, keep_prob: 0.7})
        avg_cost += c / total_batch

    print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost))

print('Learning Finished!')

correct_prediction = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print('Accuracy:', sess.run(accuracy, feed_dict={X: mnist.test.images, Y: mnist.test.labels, keep_prob: 1}))

r = random.randint(0, mnist.test.num_examples - 1)
print("Label: ", sess.run(tf.argmax(mnist.test.labels[r:r + 1], 1)))
print("Prediction: ", sess.run(
    tf.argmax(hypothesis, 1), feed_dict={X: mnist.test.images[r:r + 1], keep_prob: 1}))

Epoch: 0001 cost = 0.455545127
Epoch: 0002 cost = 0.170053109
Epoch: 0003 cost = 0.128156056
Epoch: 0004 cost = 0.106163415
Epoch: 0005 cost = 0.094012907
Epoch: 0006 cost = 0.083202525
Epoch: 0007 cost = 0.076992806
Epoch: 0008 cost = 0.066918303
Epoch: 0009 cost = 0.061193185
Epoch: 0010 cost = 0.059472976
Epoch: 0011 cost = 0.054911419
Epoch: 0012 cost = 0.054906902
Epoch: 0013 cost = 0.051169861
Epoch: 0014 cost = 0.047243623
Epoch: 0015 cost = 0.044883233
Learning Finished!
Accuracy: 0.9815
Label:  [2]
Prediction:  [2]


## PyTorch

In [1]:
from torchvision import datasets, transforms
import torch
import torch.nn as nn
import torch.nn.functional as F

In [2]:
batch_size = 100
learning_rate = 0.001
num_epochs = 15

# MNIST Dataset
train_dataset = datasets.MNIST(root = './mnist_data/', train = True, 
                               transform = transforms.ToTensor(), download = True)
test_dataset = datasets.MNIST(root = './mnist_data/', train = False,
                             transform = transforms.ToTensor())

train_loader = torch.utils.data.DataLoader(dataset = train_dataset, batch_size = batch_size,
                                          shuffle = True)
test_loader = torch.utils.data.DataLoader(dataset = test_dataset, batch_size = batch_size,
                                         shuffle = False)

In [5]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.l1 = nn.Linear(784, 512)
        self.l2 = nn.Linear(512, 512)
        self.l3 = nn.Linear(512, 512)
        self.l4 = nn.Linear(512, 512)
        self.l5 = nn.Linear(512, 10)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.3)
        self.sigmoid = torch.sigmoid
        
        nn.init.xavier_uniform_(self.l1.weight)
        nn.init.xavier_uniform_(self.l2.weight)
        nn.init.xavier_uniform_(self.l3.weight)
        nn.init.xavier_uniform_(self.l4.weight)
        nn.init.xavier_uniform_(self.l5.weight)
    
    def forward(self, x):
        out1 = self.dropout(self.relu(self.l1(x)))
        out2 = self.dropout(self.relu(self.l2(out1)))
        out3 = self.dropout(self.relu(self.l3(out2)))
        out4 = self.dropout(self.relu(self.l4(out3)))
        out5 = self.l5(out4)
        return out5

In [6]:
model = Net()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)

model.train()
for epoch in range(num_epochs):
    avg_cost = 0
    for i, batch in enumerate(train_loader):
        batch_xs, batch_ys = batch
        y_pred = model(batch_xs.reshape(100, -1))
        loss = criterion(y_pred, batch_ys)
        avg_cost+= loss / len(train_loader)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(epoch, '\tCost: ', avg_cost)

0 	Cost:  tensor(0.3104, grad_fn=<ThAddBackward>)
1 	Cost:  tensor(0.1460, grad_fn=<ThAddBackward>)
2 	Cost:  tensor(0.1123, grad_fn=<ThAddBackward>)
3 	Cost:  tensor(0.0970, grad_fn=<ThAddBackward>)
4 	Cost:  tensor(0.0836, grad_fn=<ThAddBackward>)
5 	Cost:  tensor(0.0775, grad_fn=<ThAddBackward>)
6 	Cost:  tensor(0.0660, grad_fn=<ThAddBackward>)
7 	Cost:  tensor(0.0643, grad_fn=<ThAddBackward>)
8 	Cost:  tensor(0.0579, grad_fn=<ThAddBackward>)
9 	Cost:  tensor(0.0531, grad_fn=<ThAddBackward>)
10 	Cost:  tensor(0.0518, grad_fn=<ThAddBackward>)
11 	Cost:  tensor(0.0504, grad_fn=<ThAddBackward>)
12 	Cost:  tensor(0.0496, grad_fn=<ThAddBackward>)
13 	Cost:  tensor(0.0453, grad_fn=<ThAddBackward>)
14 	Cost:  tensor(0.0433, grad_fn=<ThAddBackward>)


In [9]:
correct, total = 0, 0
model.eval()
for images, labels in test_loader:
    images = images.view(-1, 28*28)
    outputs = model(images)
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum()
print('Accuracy: %d %%' % (100 * correct / total))

Accuracy: 98 %
