In [25]:
import tensorflow as tf
import numpy as np
from utils import mnist, MNIST, NN

In [27]:
class CNN(MNIST, NN):
    
    def __init__(self, n_input, n_output, ckpt_dir='ckpt_cnn'):
        self.dropout_rate = tf.placeholder(tf.float32)
        super().__init__(n_input, n_output, ckpt_dir)
        
    @staticmethod
    def conv2d(x, W, b, stride=1, name=None):
        x = tf.nn.conv2d(x, W, strides=[1,stride, stride, 1], padding='SAME')
        x = tf.nn.bias_add(x, b)
        return tf.nn.relu(x, name=name)
    
    @staticmethod
    def maxpool(x, ksize=2, stride=2, name=None):
        return tf.nn.max_pool(x, ksize=[1, ksize, ksize, 1], strides=[1, stride, stride, 1], padding='SAME', name=name)
    
    @staticmethod
    def init_var(shape, name=None):
        return tf.Variable(tf.random_normal(shape, stddev=0.01), name=name)
    
    def init_weights(self, n_input, n_output):
        '''return tuple: (weights, biases)'''
        weights = {
            'wc1': self.init_var([11, 11, 1, 32], name='wc1'),
            'wc2': self.init_var([5, 5, 32, 64], name='wc2'),
            'wc3': self.init_var([3, 3, 64, 128], name='wc3'),
            'wc4': self.init_var([3, 3, 128, 128], name='wc4'),
            'wc5': self.init_var([3, 3, 128, 64], name='wc5'),
            'wd1': self.init_var([4*4*64, 625], name='wd1'),
            'wd2': self.init_var([625, 625], name='wd2'),
            'out': self.init_var([625, n_output], name='out')
        }
        biases = {
            'bc1': self.init_var([32], name='bc1'),
            'bc2': self.init_var([64], name='bc2'),
            'bc3': self.init_var([128], name='bc3'),
            'bc4': self.init_var([128], name='bc4'),
            'bc5': self.init_var([64], name='bc5'),
            'bd1': self.init_var([625], name='bd1'),
            'bd2': self.init_var([625], name='bd2'),
            'out': self.init_var([n_output], name='out')
        }        
        return weights, biases
    
    def build_model(self, weights, biases):
        '''AlexNet'''
        # reshape for CNN
        sample_input = tf.reshape(self.sample_input, shape=[-1, 28, 28, 1])
        
        # convolution 1
        conv1 = self.conv2d(sample_input, weights['wc1'], biases['bc1'], stride=1, name='conv1')
        pool1 = self.maxpool(conv1, ksize=2, stride=2, name='pool1')
        
        # convolution 2
        conv2 = self.conv2d(pool1, weights['wc2'], biases['bc2'], stride=1, name='conv2')
        pool2 = self.maxpool(conv2, ksize=2, stride=2, name='pool2')
        
        # convolution 3
        conv3 = self.conv2d(pool2, weights['wc3'], biases['bc3'], stride=1, name='conv3')
        
        # convolution 4
        conv4 = self.conv2d(conv3, weights['wc4'], biases['bc4'], stride=1, name='conv4')
        
        # convolution 5
        conv5 = self.conv2d(conv4, weights['wc5'], biases['bc5'], stride=1, name='conv5')
        pool5 = self.maxpool(conv5, ksize=2, stride=2, name='pool5')
        
        # full layer 1
        fc1 = tf.reshape(pool5, [-1, weights['wd1'].get_shape().as_list()[0]])
        fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])
        fc1 = tf.nn.relu(fc1)
        fc1 = tf.nn.dropout(fc1, rate=self.dropout_rate)
        
        # full layer 2
        fc2 = tf.add(tf.matmul(fc1, weights['wd2']), biases['bd2'])
        fc2 = tf.nn.relu(fc2)
        fc2 = tf.nn.dropout(fc2, rate=self.dropout_rate)
        
        # output layer
        out = tf.add(tf.matmul(fc2, weights['out']), biases['out'])
        
        return out
    
    def build_optimizer(self, target):
        '''Gradient Descent Optimizer by default'''
        return tf.train.RMSPropOptimizer(0.001, 0.9).minimize(target)

In [30]:
network = CNN(28*28, 10, 'ckpt_cnn')

# initial accuracy
# network.test(mnist.test.images, mnist.test.labels)

# train
network.train(mnist.train.next_batch, batch_size = 200, 
              epochs = 10, 
              display_interval = 1,
              save_model = False,
              restore_model = True,
              save_interval = 500,
              train_feed_dict = { network.dropout_rate: 0.5 },
              test_feed_dict = { network.dropout_rate: 0.0 })

# final accuracy
network.test(mnist.test.images, mnist.test.labels, feed_dict = { network.dropout_rate: 0.0 })

INFO:tensorflow:Restoring parameters from ckpt_cnn\model-1000
Epoch      1, Iter      200: loss = 0.0054   training accuracy = 100.0000%
Epoch      2, Iter      400: loss = 0.0020   training accuracy = 100.0000%
Epoch      3, Iter      600: loss = 0.0029   training accuracy = 100.0000%
Epoch      4, Iter      800: loss = 0.0010   training accuracy = 100.0000%
Epoch      5, Iter     1000: loss = 0.0608   training accuracy = 98.5000%
Epoch      6, Iter     1200: loss = 0.0311   training accuracy = 98.5000%
Epoch      7, Iter     1400: loss = 0.0401   training accuracy = 99.0000%
Epoch      8, Iter     1600: loss = 0.0107   training accuracy = 99.5000%
Epoch      9, Iter     1800: loss = 0.0165   training accuracy = 99.5000%
Epoch     10, Iter     2000: loss = 0.0300   training accuracy = 99.0000%


0.983