# Lab 10 - NN, ReLU, Xavier, Dropout and Adam

In [2]:
import numpy, os, urllib, gzip, tempfile, random
import matplotlib.pyplot as plt

SOURCE_URL = 'http://yann.lecun.com/exdb/mnist/'
TRAIN_IMAGES = 'train-images-idx3-ubyte.gz'
TRAIN_LABELS = 'train-labels-idx1-ubyte.gz'
TEST_IMAGES = 't10k-images-idx3-ubyte.gz'
TEST_LABELS = 't10k-labels-idx1-ubyte.gz'

VALIDATION_SIZE = 5000

def _read32(bytestream):
    dt = numpy.dtype(numpy.uint32).newbyteorder('>')
    return numpy.frombuffer(bytestream.read(4), dtype=dt)[0]

def download(filename, workdir):
    if not os.path.exists(workdir):
        os.mkdir(workdir)

    filepath = os.path.join(workdir, filename)

    print('Downloading', SOURCE_URL + filename)
    urllib.request.urlretrieve(SOURCE_URL + filename, filename=filepath)
    return filepath

def extract_images(filepath):
    print('Extracting', filepath)
    with gzip.open(filepath) as bytestream:
        magic = _read32(bytestream)
        if magic != 2051:
            raise ValueError('Invalid magic number %d in MNIST image file: %s' % (magic, filepath))
        num_images = _read32(bytestream)
        rows = _read32(bytestream)
        cols = _read32(bytestream)
        buf = bytestream.read(rows * cols * num_images)
        data = numpy.frombuffer(buf, dtype=numpy.uint8)
        data = data.reshape(num_images, rows, cols, 1)

        # Reshape for NN
        data = data.reshape(data.shape[0], data.shape[1] * data.shape[2])
        data = data.astype(numpy.float32)
        data = numpy.multiply(data, 1. / 255.)
        return data

def extract_labels(filepath):
    print('Extracting', filepath)
    with gzip.open(filepath) as bytestream:
        magic = _read32(bytestream)
        if magic != 2049:
            raise ValueError('Invalid magic number %d in MNIST image file: %s' % (magic, filepath))
        num_items = _read32(bytestream)
        buf = bytestream.read(num_items)
        labels = numpy.frombuffer(buf, dtype=numpy.uint8)

        num_labels = labels.shape[0]
        num_classes = 10    # 0..9
        index_offset = numpy.arange(num_labels) * num_classes
        labels_one_hot = numpy.zeros((num_labels, num_classes))
        labels_one_hot.flat[index_offset + labels.ravel()] = 1
        return labels_one_hot

workdir = tempfile.mkdtemp()
train_images = extract_images(download(TRAIN_IMAGES, workdir))
train_labels = extract_labels(download(TRAIN_LABELS, workdir))
test_images = extract_images(download(TEST_IMAGES, workdir))
test_labels = extract_labels(download(TEST_LABELS, workdir))
validation_images = train_images[:VALIDATION_SIZE]
validation_labels = train_labels[:VALIDATION_SIZE]
train_images = train_images[VALIDATION_SIZE:]
train_labels = train_labels[VALIDATION_SIZE:]

assert len(train_labels) == len(train_images)
assert len(validation_labels) == len(validation_images)
assert len(test_labels) == len(test_images)
num_train_data = len(train_labels)
num_validation_data = len(validation_labels)
num_test_data = len(test_labels)


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Extracting /var/folders/3z/zvzt19xs7k955z3mjvlnm7sc0000gn/T/tmpgy2ui0l9/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Extracting /var/folders/3z/zvzt19xs7k955z3mjvlnm7sc0000gn/T/tmpgy2ui0l9/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Extracting /var/folders/3z/zvzt19xs7k955z3mjvlnm7sc0000gn/T/tmpgy2ui0l9/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Extracting /var/folders/3z/zvzt19xs7k955z3mjvlnm7sc0000gn/T/tmpgy2ui0l9/t10k-labels-idx1-ubyte.gz


In [3]:
import tensorflow as tf

learning_rate = .001
training_epochs = 15
batch_size = 100


X = tf.placeholder(tf.float32, shape=[None, 784])    # image of shape 28 x 28 = 784
Y = tf.placeholder(tf.float32, shape=[None, 10])     # 0..9 digits

W = tf.Variable(tf.random_normal([784, 10]))
b = tf.Variable(tf.random_normal([10]))
hypothesis = tf.matmul(X, W) + b

cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=hypothesis, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

sess = tf.Session()
sess.run(tf.global_variables_initializer())

for epoch in range(training_epochs):
    avg_cost = 0
    total_batch = int(num_train_data / batch_size)
    
    for i in range(total_batch):
        batch_xs = train_images[i*batch_size:(i+1)*batch_size]
        batch_ys = train_labels[i*batch_size:(i+1)*batch_size]
        c, _ = sess.run([cost, optimizer], feed_dict={X: batch_xs, Y:batch_ys})
        avg_cost += c/ total_batch
    
    print('Epoch:', '{:04d}'.format(epoch + 1), ', cost = ', '{:.9f}'.format(avg_cost))

print('Learning finished')

correct_prediction = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print('Accuracy: ', sess.run(accuracy, feed_dict={X: test_images, Y: test_labels}))

sess.close()

Epoch: 0001 , cost =  4.849802413
Epoch: 0002 , cost =  1.626679888
Epoch: 0003 , cost =  1.096894575
Epoch: 0004 , cost =  0.883146428
Epoch: 0005 , cost =  0.762786742
Epoch: 0006 , cost =  0.683292284
Epoch: 0007 , cost =  0.625944211
Epoch: 0008 , cost =  0.582150302
Epoch: 0009 , cost =  0.547327469
Epoch: 0010 , cost =  0.518842767
Epoch: 0011 , cost =  0.495057617
Epoch: 0012 , cost =  0.474860831
Epoch: 0013 , cost =  0.457458310
Epoch: 0014 , cost =  0.442271223
Epoch: 0015 , cost =  0.428871944
Learning finished
Accuracy:  0.9009


In [4]:
X = tf.placeholder(tf.float32, shape=[None, 784])    # image of shape 28 x 28 = 784
Y = tf.placeholder(tf.float32, shape=[None, 10])     # 0..9 digits

W1 = tf.Variable(tf.random_normal([784, 256]))
b1 = tf.Variable(tf.random_normal([256]))
L1 = tf.nn.relu(tf.matmul(X, W1) + b1)

W2 = tf.Variable(tf.random_normal([256, 256]))
b2 = tf.Variable(tf.random_normal([256]))
L2 = tf.nn.relu(tf.matmul(L1, W2) + b2)

W3 = tf.Variable(tf.random_normal([256, 10]))
b3 = tf.Variable(tf.random_normal([10]))
hypothesis = tf.matmul(L2, W3) + b3

cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=hypothesis, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

sess = tf.Session()
sess.run(tf.global_variables_initializer())

for epoch in range(training_epochs):
    avg_cost = 0
    total_batch = int(num_train_data / batch_size)
    
    for i in range(total_batch):
        batch_xs = train_images[i*batch_size:(i+1)*batch_size]
        batch_ys = train_labels[i*batch_size:(i+1)*batch_size]
        c, _ = sess.run([cost, optimizer], feed_dict={X: batch_xs, Y:batch_ys})
        avg_cost += c/ total_batch
    
    print('Epoch:', '{:04d}'.format(epoch + 1), ', cost = ', '{:.9f}'.format(avg_cost))

print('Learning finished')

correct_prediction = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print('Accuracy: ', sess.run(accuracy, feed_dict={X: test_images, Y: test_labels}))

sess.close()

Epoch: 0001 , cost =  166.226272164
Epoch: 0002 , cost =  41.136390598
Epoch: 0003 , cost =  26.349806695
Epoch: 0004 , cost =  18.620440508
Epoch: 0005 , cost =  13.632632707
Epoch: 0006 , cost =  10.225308105
Epoch: 0007 , cost =  7.779223825
Epoch: 0008 , cost =  5.863761199
Epoch: 0009 , cost =  4.535676005
Epoch: 0010 , cost =  3.435842434
Epoch: 0011 , cost =  2.604721716
Epoch: 0012 , cost =  1.997740422
Epoch: 0013 , cost =  1.537656595
Epoch: 0014 , cost =  1.135337872
Epoch: 0015 , cost =  0.955357635
Learning finished
Accuracy:  0.9433


In [7]:
import tensorflow as tf
tf.reset_default_graph()

learning_rate = .001
training_epochs = 15
batch_size = 100

X = tf.placeholder(tf.float32, shape=[None, 784])    # image of shape 28 x 28 = 784
Y = tf.placeholder(tf.float32, shape=[None, 10])     # 0..9 digits

W1 = tf.get_variable("W1", shape=[784,256], initializer=tf.contrib.layers.xavier_initializer())
b1 = tf.Variable(tf.random_normal([256]))
L1 = tf.nn.relu(tf.matmul(X, W1) + b1)

W2 = tf.get_variable("W2", shape=[256,256], initializer=tf.contrib.layers.xavier_initializer())
b2 = tf.Variable(tf.random_normal([256]))
L2 = tf.nn.relu(tf.matmul(L1, W2) + b2)

W3 = tf.get_variable("W3", shape=[256,10], initializer=tf.contrib.layers.xavier_initializer())
b3 = tf.Variable(tf.random_normal([10]))
hypothesis = tf.matmul(L2, W3) + b3

cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=hypothesis, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

sess = tf.Session()
sess.run(tf.global_variables_initializer())

for epoch in range(training_epochs):
    avg_cost = 0
    total_batch = int(num_train_data / batch_size)
    
    for i in range(total_batch):
        batch_xs = train_images[i*batch_size:(i+1)*batch_size]
        batch_ys = train_labels[i*batch_size:(i+1)*batch_size]
        c, _ = sess.run([cost, optimizer], feed_dict={X: batch_xs, Y:batch_ys})
        avg_cost += c/ total_batch
    
    print('Epoch:', '{:04d}'.format(epoch + 1), ', cost = ', '{:.9f}'.format(avg_cost))

print('Learning finished')

correct_prediction = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print('Accuracy: ', sess.run(accuracy, feed_dict={X: test_images, Y: test_labels}))

sess.close()

Epoch: 0001 , cost =  0.336163099
Epoch: 0002 , cost =  0.134684066
Epoch: 0003 , cost =  0.086793265
Epoch: 0004 , cost =  0.059878554
Epoch: 0005 , cost =  0.042300234
Epoch: 0006 , cost =  0.032108150
Epoch: 0007 , cost =  0.024569017
Epoch: 0008 , cost =  0.021572553
Epoch: 0009 , cost =  0.018631316
Epoch: 0010 , cost =  0.016060304
Epoch: 0011 , cost =  0.013056713
Epoch: 0012 , cost =  0.011068924
Epoch: 0013 , cost =  0.010429467
Epoch: 0014 , cost =  0.013630596
Epoch: 0015 , cost =  0.011559048
Learning finished
Accuracy:  0.9732


## Dropout for MNIST
To prevent overfitting

# `keep_prob`
* Train: 0.5 ~ 0.7
* Testing: obviously 1

In [13]:
tf.reset_default_graph()

keep_prob = tf.placeholder(tf.float32)

learning_rate = .001
training_epochs = 15
batch_size = 100

X = tf.placeholder(tf.float32, shape=[None, 784])    # image of shape 28 x 28 = 784
Y = tf.placeholder(tf.float32, shape=[None, 10])     # 0..9 digits

W1 = tf.get_variable('W1', shape=[784, 512], initializer=tf.contrib.layers.xavier_initializer())
b1 = tf.Variable(tf.random_normal([512]))
L1 = tf.nn.relu(tf.matmul(X, W1) + b1)
L1 = tf.nn.dropout(L1, keep_prob=keep_prob)

W2 = tf.get_variable('W2', shape=[512, 512], initializer=tf.contrib.layers.xavier_initializer())
b2 = tf.Variable(tf.random_normal([512]))
L2 = tf.nn.relu(tf.matmul(L1, W2) + b2)
L2 = tf.nn.dropout(L2, keep_prob=keep_prob)

W3 = tf.get_variable('W3', shape=[512, 512], initializer=tf.contrib.layers.xavier_initializer())
b3 = tf.Variable(tf.random_normal([512]))
L3 = tf.nn.relu(tf.matmul(L2, W3) + b3)
L3 = tf.nn.dropout(L3, keep_prob=keep_prob)

W4 = tf.get_variable('W4', shape=[512, 10], initializer=tf.contrib.layers.xavier_initializer())
b4 = tf.Variable(tf.random_normal([10]))
hypothesis = tf.matmul(L3, W4) + b4

cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=hypothesis, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

sess = tf.Session()
sess.run(tf.global_variables_initializer())

for epoch in range(training_epochs):
    avg_cost = 0
    total_batch = int(num_train_data / batch_size)
    
    for i in range(total_batch):
        batch_xs = train_images[i*batch_size:(i+1)*batch_size]
        batch_ys = train_labels[i*batch_size:(i+1)*batch_size]
        c, _ = sess.run([cost, optimizer], feed_dict={X: batch_xs, Y:batch_ys, keep_prob: 0.7})
        avg_cost += c/ total_batch
    
    print('Epoch:', '{:04d}'.format(epoch + 1), ', cost = ', '{:.9f}'.format(avg_cost))

print('Learning finished')

correct_prediction = tf.equal(tf.argmax(hypothesis, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print('Accuracy: ', sess.run(accuracy, feed_dict={X: test_images, Y: test_labels, keep_prob: 1}))

sess.close()

Epoch: 0001 , cost =  0.426693615
Epoch: 0002 , cost =  0.161900063
Epoch: 0003 , cost =  0.121908065
Epoch: 0004 , cost =  0.100050659
Epoch: 0005 , cost =  0.082410858
Epoch: 0006 , cost =  0.073014813
Epoch: 0007 , cost =  0.066355426
Epoch: 0008 , cost =  0.058996734
Epoch: 0009 , cost =  0.057059867
Epoch: 0010 , cost =  0.049955109
Epoch: 0011 , cost =  0.047349964
Epoch: 0012 , cost =  0.047509635
Epoch: 0013 , cost =  0.040289384
Epoch: 0014 , cost =  0.040143691
Epoch: 0015 , cost =  0.038205221
Learning finished
Accuracy:  0.9807


## Optimizers

https://www.tensorflow.org/api_guides/python/train : List of optimizers

* ADAM : a method for stochastic optimization, most recommended