# Mnist with Fully Convolutional Networks

In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

In [2]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('../MNIST_data', validation_size=0, one_hot=True)

Extracting ../MNIST_data/train-images-idx3-ubyte.gz
Extracting ../MNIST_data/train-labels-idx1-ubyte.gz
Extracting ../MNIST_data/t10k-images-idx3-ubyte.gz
Extracting ../MNIST_data/t10k-labels-idx1-ubyte.gz


### FCN architecture

1. input: [batch_size, 28, 28, 1] images
2. layer1: conv layer, 32 filters, kernel_size=3, padding=same, activation=relu, strides=2, max_pooling
3. layer2: conv layer, 64 filters, kernel_size=3, padding=same, activation=relu, strides=2, max_pooling

In [30]:
def define_inputs():
    x = tf.placeholder(dtype=tf.float32, shape=[None, 28, 28, 1])
    y_ = tf.placeholder(dtype=tf.float32, shape=[None, 1, 1, 10])
    is_train = tf.placeholder(dtype=tf.bool)
    return x, y_, is_train

In [31]:
def define_model(x, is_train):
    layer1 = tf.layers.conv2d(x, filters=64, kernel_size=3, padding="same", activation=tf.nn.relu)
    layer1 = tf.layers.max_pooling2d(layer1, pool_size=2, strides=2, padding="valid")
    # now 14x14x64
    
    layer2 = tf.layers.conv2d(layer1, filters=128, kernel_size=5, padding="valid", activation=tf.nn.relu)
    # now 10x10x128
    layer2 = tf.layers.max_pooling2d(layer2, pool_size=2, strides=2, padding="valid")
    # now 5x5x128
    
    layer3 = tf.layers.conv2d(layer2, filters=1024, kernel_size=5, padding="valid", activation=tf.nn.relu)
    # now 1x1x1024
    
    # dropout 
    layer4 = tf.layers.dropout(layer3, rate=0.5, training=is_train)
    
    logits = tf.layers.conv2d(layer4, filters=10, kernel_size=1, padding="valid", activation=None)
    # now 1x1x10
    
    return logits

In [32]:
def define_loss(logits, y_):
    loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=y_, logits=logits)
    loss = tf.reduce_mean(loss)
    return loss 

In [33]:
def define_opt(loss, learn_rate=0.001, beta=0.9):
    opt = tf.train.AdamOptimizer(learning_rate=learn_rate, beta1=beta).minimize(loss)
    return opt

In [34]:
def make_networks(learn_rate=0.001, beta=0.9):
    x, y_, is_train = define_inputs()
    logits = define_model(x, is_train)
    loss = define_loss(logits, y_)
    opt = define_opt(loss, learn_rate=learn_rate, beta=beta)
    return logits, opt

In [65]:
def train(mnist, batch_size, steps, learn_rate=0.001, beta=0.9):
    # first, define model 
    tf.reset_default_graph()
    x, y_, is_train=define_inputs()
    logits = define_model(x, is_train)
    loss = define_loss(logits, y_)
    opt = define_opt(loss, learn_rate=learn_rate, beta=beta)
    
    # second, calc accuracy 
    correct_pred = tf.equal(tf.argmax(logits, 3), tf.argmax(y_, 3))
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
    
    # start training
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for step in range(steps):
            batch = mnist.train.next_batch(batch_size)
            imgs = batch[0].reshape(batch_size, 28, 28, 1)
            labels = batch[1].reshape(batch_size, 1, 1, 10)
            
            if step%20 == 0:
                train_acc = sess.run(accuracy, feed_dict={x: imgs, y_:labels, is_train:False})
                print("step {0}, acc: {1}".format(step, train_acc))
            
            sess.run(opt, feed_dict={x:imgs, y_:labels, is_train:True})
        
        # test model after training is finished 
        batch_size_test = 1000
        batch_test = mnist.test.next_batch(batch_size_test)
        imgs_test = batch_test[0].reshape(batch_size_test, 28, 28, 1)
        labels_test = batch_test[1].reshape(batch_size_test, 1, 1, 10)

        test_acc = sess.run(accuracy, feed_dict={x: imgs, y_:labels, is_train:False})
        print("testing, acc: {0}".format(test_acc))
            
    
    

In [66]:
train(mnist, batch_size=50, steps=400)

step 0, acc: 0.03999999910593033
step 20, acc: 0.6800000071525574
step 40, acc: 0.8999999761581421
step 60, acc: 0.8799999952316284
step 80, acc: 0.9399999976158142
step 100, acc: 0.9399999976158142
step 120, acc: 0.9599999785423279
step 140, acc: 1.0
step 160, acc: 0.9599999785423279
step 180, acc: 0.9800000190734863
step 200, acc: 0.9800000190734863
step 220, acc: 0.9800000190734863
step 240, acc: 0.9800000190734863
step 260, acc: 1.0
step 280, acc: 0.9599999785423279
step 300, acc: 0.9599999785423279
step 320, acc: 1.0
step 340, acc: 0.9800000190734863
step 360, acc: 1.0
step 380, acc: 0.9399999976158142
testing, acc: 1.0
