## Example two-layer classifier models

Below example code is given for creating instances of the CIFAR-10 and CIFAR-100 data provider objects and using them to train simple two-layer feedforward network models with rectified linear activations in TensorFlow. You may wish to use this code as a starting point for your own experiments.

In [1]:
import os
import tensorflow as tf
import numpy as np
from mlp.data_providers import CIFAR10DataProvider, CIFAR100DataProvider
import matplotlib.pyplot as plt
%matplotlib inline



### CIFAR-10

In [2]:
train_data = CIFAR10DataProvider('train', batch_size=50)
valid_data = CIFAR10DataProvider('valid', batch_size=50)

In [35]:
def fully_connected_layer(inputs, input_dim, output_dim, nonlinearity=tf.nn.relu, name='fc-layer'):
    with tf.name_scope(name):
        weights = tf.Variable(
            tf.truncated_normal([input_dim, output_dim], stddev=2. / (input_dim + output_dim)**0.5), 
            name='weights'
        )
        biases = tf.Variable(tf.zeros([output_dim]), name='biases')
        outputs = nonlinearity(tf.matmul(inputs, weights) + biases)
        return outputs
    
def err_acc(outputs, targets):
    with tf.name_scope('error'):
        error = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(outputs, targets))
    with tf.name_scope('accuracy'):
        accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(outputs, 1), tf.argmax(targets, 1)), tf.float32))
    return error, accuracy

In [48]:
def get_optimizer(optimizer):
    func_dict = {
        'adam': tf.train.AdamOptimizer,
        'gd': tf.train.GradientDescentOptimizer,
        'adagrad': tf.train.AdagradOptimizer,
        'momentum': tf.train.MomentumOptimizer,
        'rms': tf.train.RMSPropOptimizer
    }
    return func_dict.get(optimizer, tf.train.AdamOptimizer)

In [52]:
def build_model(num_layers=2, num_hidden=200, optimizer='gd', learning_rate=0.01):
    inputs = tf.placeholder(tf.float32, [None, train_data.inputs.shape[1]], 'inputs')
    targets = tf.placeholder(tf.float32, [None, train_data.num_classes], 'targets')
    
    lay = dict()
    
#     with tf.name_scope('fc-layer-1'):
#         lay['fc-layer-1'] = fully_connected_layer(inputs, train_data.inputs.shape[1], num_hidden)
    
#     for layer in range(num_layers):
#         with tf.name_scope('fc-layer-{}'.format(layer+2)):
#             lay['fc-layer-{}'.format(layer+2)] = fully_connected_layer(lay['fc-layer-{}'.format(layer+1)], num_hidden, num_hidden)
        
#     with tf.name_scope('output-layer'):
#         outputs = fully_connected_layer(lay['fc-layer-{}'.format(num_layers+1)], num_hidden, train_data.num_classes, tf.identity)

    with tf.name_scope('fc-layer-1'):
        hidden_1 = fully_connected_layer(inputs, train_data.inputs.shape[1], num_hidden)
    with tf.name_scope('fc-layer-2'):
        hidden_2 = fully_connected_layer(hidden_1, num_hidden, num_hidden)
    with tf.name_scope('fc-layer-3'):
        hidden_3 = fully_connected_layer(hidden_2, num_hidden, num_hidden)
    with tf.name_scope('output-layer'):
        outputs = fully_connected_layer(hidden_3, num_hidden, train_data.num_classes, tf.identity)
        
    with tf.name_scope('error'):
        error = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(outputs, targets))
    with tf.name_scope('accuracy'):
        accuracy = tf.reduce_mean(tf.cast(
                tf.equal(tf.argmax(outputs, 1), tf.argmax(targets, 1)), 
                tf.float32))
        
    global_step = tf.Variable(0, trainable=False)
    starter_learning_rate = learning_rate
    learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step, 1000, 0.96)

    with tf.name_scope('train'):
        train_step = get_optimizer(optimizer)(learning_rate).minimize(error, global_step=global_step)

    init = tf.global_variables_initializer()
    
    with tf.Session() as sess:
        sess.run(init)
        for e in range(20):
            running_error = 0.
            running_accuracy = 0.

            for input_batch, target_batch in train_data:
                _, batch_error, batch_acc = sess.run(
                    [train_step, error, accuracy], 
                    feed_dict={inputs: input_batch, targets: target_batch})

                running_error += batch_error
                running_accuracy += batch_acc

            running_error /= train_data.num_batches
            running_accuracy /= train_data.num_batches
            print('End of epoch {0:02d}: err(train)={1:.2f} acc(train)={2:.2f}'
                  .format(e + 1, running_error, running_accuracy))

            if (e + 1) % 5 == 0:
                valid_error = 0.
                valid_accuracy = 0.
                for input_batch, target_batch in valid_data:
                    batch_error, batch_acc = sess.run(
                        [error, accuracy], 
                        feed_dict={inputs: input_batch, targets: target_batch})
                    valid_error += batch_error
                    valid_accuracy += batch_acc

                valid_error /= valid_data.num_batches
                valid_accuracy /= valid_data.num_batches
                print('                 err(valid)={0:.2f} acc(valid)={1:.2f}'
                       .format(valid_error, valid_accuracy))

In [53]:
build_model(optimizer='gd', learning_rate=0.1)

End of epoch 01: err(train)=2.01 acc(train)=0.27
End of epoch 02: err(train)=1.78 acc(train)=0.36
End of epoch 03: err(train)=1.69 acc(train)=0.39
End of epoch 04: err(train)=1.63 acc(train)=0.42
End of epoch 05: err(train)=1.58 acc(train)=0.43
                 err(valid)=1.61 acc(valid)=0.43
End of epoch 06: err(train)=1.54 acc(train)=0.45
End of epoch 07: err(train)=1.50 acc(train)=0.46
End of epoch 08: err(train)=1.47 acc(train)=0.47
End of epoch 09: err(train)=1.44 acc(train)=0.48
End of epoch 10: err(train)=1.41 acc(train)=0.49
                 err(valid)=1.53 acc(valid)=0.47
End of epoch 11: err(train)=1.39 acc(train)=0.50
End of epoch 12: err(train)=1.37 acc(train)=0.51
End of epoch 13: err(train)=1.35 acc(train)=0.52
End of epoch 14: err(train)=1.32 acc(train)=0.53
End of epoch 15: err(train)=1.30 acc(train)=0.53
                 err(valid)=1.49 acc(valid)=0.48
End of epoch 16: err(train)=1.28 acc(train)=0.54
End of epoch 17: err(train)=1.25 acc(train)=0.55
End of epoch 18: err

In [51]:
build_model(4, optimizer='gd', learning_rate=0.1)

End of epoch 01: err(train)=2.26 acc(train)=0.14
End of epoch 02: err(train)=1.99 acc(train)=0.26
End of epoch 03: err(train)=1.84 acc(train)=0.33
End of epoch 04: err(train)=1.77 acc(train)=0.37
End of epoch 05: err(train)=1.71 acc(train)=0.39
                 err(valid)=1.75 acc(valid)=0.39
End of epoch 06: err(train)=1.66 acc(train)=0.40
End of epoch 07: err(train)=1.62 acc(train)=0.42
End of epoch 08: err(train)=1.59 acc(train)=0.43
End of epoch 09: err(train)=1.56 acc(train)=0.44
End of epoch 10: err(train)=1.53 acc(train)=0.45
                 err(valid)=1.59 acc(valid)=0.43
End of epoch 11: err(train)=1.51 acc(train)=0.46
End of epoch 12: err(train)=1.49 acc(train)=0.46
End of epoch 13: err(train)=1.47 acc(train)=0.47
End of epoch 14: err(train)=1.46 acc(train)=0.48
End of epoch 15: err(train)=1.44 acc(train)=0.48
                 err(valid)=1.55 acc(valid)=0.45
End of epoch 16: err(train)=1.43 acc(train)=0.49
End of epoch 17: err(train)=1.41 acc(train)=0.50
End of epoch 18: err

In [4]:
inputs = tf.placeholder(tf.float32, [None, train_data.inputs.shape[1]], 'inputs')
targets = tf.placeholder(tf.float32, [None, train_data.num_classes], 'targets')
num_hidden = 200

with tf.name_scope('fc-layer-1'):
    hidden_1 = fully_connected_layer(inputs, train_data.inputs.shape[1], num_hidden)
with tf.name_scope('fc-layer-2'):
    hidden_2 = fully_connected_layer(hidden_1, num_hidden, num_hidden)
with tf.name_scope('output-layer'):
    outputs = fully_connected_layer(hidden_2, num_hidden, train_data.num_classes, tf.identity)

with tf.name_scope('error'):
    error = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(outputs, targets))
with tf.name_scope('accuracy'):
    accuracy = tf.reduce_mean(tf.cast(
            tf.equal(tf.argmax(outputs, 1), tf.argmax(targets, 1)), 
            tf.float32))

with tf.name_scope('train'):
    train_step = tf.train.AdamOptimizer().minimize(error)
    
init = tf.global_variables_initializer()

In [7]:
def run_sess(init):
    with tf.Session() as sess:
        sess.run(init)
        for e in range(100):
            running_error = 0.
            running_accuracy = 0.

            for input_batch, target_batch in train_data:
                _, batch_error, batch_acc = sess.run(
                    [train_step, error, accuracy], 
                    feed_dict={inputs: input_batch, targets: target_batch})

                running_error += batch_error
                running_accuracy += batch_acc

            running_error /= train_data.num_batches
            running_accuracy /= train_data.num_batches
            print('End of epoch {0:02d}: err(train)={1:.2f} acc(train)={2:.2f}'
                  .format(e + 1, running_error, running_accuracy))

            if (e + 1) % 5 == 0:
                valid_error = 0.
                valid_accuracy = 0.
                for input_batch, target_batch in valid_data:
                    batch_error, batch_acc = sess.run(
                        [error, accuracy], 
                        feed_dict={inputs: input_batch, targets: target_batch})
                    valid_error += batch_error
                    valid_accuracy += batch_acc

                valid_error /= valid_data.num_batches
                valid_accuracy /= valid_data.num_batches
                print('                 err(valid)={0:.2f} acc(valid)={1:.2f}'
                       .format(valid_error, valid_accuracy))

In [None]:
run_sess(init)

# Activation Functions

In [54]:
import os
def build_graph(name='graph', num_layers=2, num_hidden=200, activation=tf.nn.relu):
    graph = tf.Graph()
    
    with graph.as_default():
        inputs = tf.placeholder(tf.float32, [None, train_data.inputs.shape[1]], 'inputs')
        targets = tf.placeholder(tf.float32, [None, train_data.num_classes], 'targets')
        
        lay = dict()
    
        lay['fc-layer-1'] = fully_connected_layer(inputs, train_data.inputs.shape[1], num_hidden, activation, 'fc-layer-1')
        
        for layer in range(num_layers):
            lay['fc-layer-{}'.format(layer+2)] = fully_connected_layer(
                lay['fc-layer-{}'.format(layer+1)],
                num_hidden, num_hidden,
                activation,
                'fc-layer-{}'.format(layer+2)
            )

        outputs = fully_connected_layer(
            lay['fc-layer-{}'.format(num_layers+1)],
            num_hidden, train_data.num_classes,
            tf.identity,
            'output-layer'
        )

#         hidden_1 = fully_connected_layer(inputs, train_data.inputs.shape[1], num_hidden, activation, 'fc-layer-1')
#         hidden_2 = fully_connected_layer(hidden_1, num_hidden, num_hidden, activation, 'fc-layer-2')
#         outputs = fully_connected_layer(hidden_2, num_hidden, train_data.num_classes, tf.identity, 'output-layer')

        error, accuracy = err_acc(outputs, targets)

        with tf.name_scope('train'):
            train_step = tf.train.AdamOptimizer().minimize(error)
            
        tf.summary.scalar('error', error)
        tf.summary.scalar('accuracy', accuracy)
        summary_op = tf.summary.merge_all()
        
        train_writer = tf.summary.FileWriter(os.path.join('tf-log', name, 'train'), graph=graph)
        valid_writer = tf.summary.FileWriter(os.path.join('tf-log', name, 'valid'), graph=graph)
        
        init = tf.global_variables_initializer()

    sess = tf.InteractiveSession(graph=graph)
    num_epoch = 40
    valid_inputs = valid_data.inputs
    valid_targets = valid_data.to_one_of_k(valid_data.targets)
    sess.run(init)
    for e in range(num_epoch):
        print('Epoch {}'.format(e))
        for b, (input_batch, target_batch) in enumerate(train_data):
            _, summary = sess.run(
                [train_step, summary_op],
                feed_dict={inputs: input_batch, targets: target_batch})
            train_writer.add_summary(summary, e * train_data.num_batches + b)
            if b % 100 == 0:
                valid_summary = sess.run(
                    summary_op, feed_dict={inputs: valid_inputs, targets: valid_targets})
                valid_writer.add_summary(valid_summary, e * train_data.num_batches + b)

In [38]:
build_graph('ac/relu')

Epoch 0
Epoch 1
Epoch 2
Epoch 3
Epoch 4
Epoch 5
Epoch 6
Epoch 7
Epoch 8
Epoch 9
Epoch 10
Epoch 11
Epoch 12
Epoch 13
Epoch 14
Epoch 15
Epoch 16
Epoch 17
Epoch 18
Epoch 19
Epoch 20
Epoch 21
Epoch 22
Epoch 23
Epoch 24
Epoch 25
Epoch 26
Epoch 27
Epoch 28
Epoch 29
Epoch 30
Epoch 31
Epoch 32
Epoch 33
Epoch 34
Epoch 35
Epoch 36
Epoch 37
Epoch 38
Epoch 39


In [40]:
build_graph('ac/tanh', tf.tanh)

Epoch 0
.
.
.
.
.
.
.
.
Epoch 1
.
.
.
.
.
.
.
.
Epoch 2
.
.
.
.
.
.
.
.
Epoch 3
.
.
.
.
.
.
.
.
Epoch 4
.
.
.
.
.
.
.
.
Epoch 5
.
.
.
.
.
.
.
.
Epoch 6
.
.
.
.
.
.
.
.
Epoch 7
.
.
.
.
.
.
.
.
Epoch 8
.
.
.
.
.
.
.
.
Epoch 9
.
.
.
.
.
.
.
.
Epoch 10
.
.
.
.
.
.
.
.
Epoch 11
.
.
.
.
.
.
.
.
Epoch 12
.
.
.
.
.
.
.
.
Epoch 13
.
.
.
.
.
.
.
.
Epoch 14
.
.
.
.
.
.
.
.
Epoch 15
.
.
.
.
.
.
.
.
Epoch 16
.
.
.
.
.
.
.
.
Epoch 17
.
.
.
.
.
.
.
.
Epoch 18
.
.
.
.
.
.
.
.
Epoch 19
.
.
.
.
.
.
.
.
Epoch 20
.
.
.
.
.
.
.
.
Epoch 21
.
.
.
.
.
.
.
.
Epoch 22
.
.
.
.
.
.
.
.
Epoch 23
.
.
.
.
.
.
.
.
Epoch 24
.
.
.
.
.
.
.
.
Epoch 25
.
.
.
.
.
.
.
.
Epoch 26
.
.
.
.
.
.
.
.
Epoch 27
.
.
.
.
.
.
.
.
Epoch 28
.
.
.
.
.
.
.
.
Epoch 29
.
.
.
.
.
.
.
.
Epoch 30
.
.
.
.
.
.
.
.
Epoch 31
.
.
.
.
.
.
.
.
Epoch 32
.
.
.
.
.
.
.
.
Epoch 33
.
.
.
.
.
.
.
.
Epoch 34
.
.
.
.
.
.
.
.
Epoch 35
.
.
.
.
.
.
.
.
Epoch 36
.
.
.
.
.
.
.
.
Epoch 37
.
.
.
.
.
.
.
.
Epoch 38
.
.
.
.
.
.
.
.
Epoch 39
.
.
.
.
.
.
.
.


In [43]:
build_graph('ac/elu', tf.nn.elu)

Epoch 0
Epoch 1
Epoch 2
Epoch 3
Epoch 4
Epoch 5
Epoch 6
Epoch 7
Epoch 8
Epoch 9
Epoch 10
Epoch 11
Epoch 12
Epoch 13
Epoch 14
Epoch 15
Epoch 16
Epoch 17
Epoch 18
Epoch 19
Epoch 20
Epoch 21
Epoch 22
Epoch 23
Epoch 24
Epoch 25
Epoch 26
Epoch 27
Epoch 28
Epoch 29
Epoch 30
Epoch 31
Epoch 32
Epoch 33
Epoch 34
Epoch 35
Epoch 36
Epoch 37
Epoch 38
Epoch 39


In [44]:
build_graph('ac/sigmoid', tf.sigmoid)

Epoch 0
Epoch 1
Epoch 2
Epoch 3
Epoch 4
Epoch 5
Epoch 6
Epoch 7
Epoch 8
Epoch 9
Epoch 10
Epoch 11
Epoch 12
Epoch 13
Epoch 14
Epoch 15
Epoch 16
Epoch 17
Epoch 18
Epoch 19
Epoch 20
Epoch 21
Epoch 22
Epoch 23
Epoch 24
Epoch 25
Epoch 26
Epoch 27
Epoch 28
Epoch 29
Epoch 30
Epoch 31
Epoch 32
Epoch 33
Epoch 34
Epoch 35
Epoch 36
Epoch 37
Epoch 38
Epoch 39


In [50]:
build_graph('test/sigmoid-2', tf.sigmoid)

Epoch 0
Epoch 1
Epoch 2
Epoch 3
Epoch 4


# Model layers

In [53]:
build_graph(
    name='layers/lay=2,hidden=50',
    num_layers=2,
    num_hidden=50
)

Epoch 0
Epoch 1
Epoch 2
Epoch 3
Epoch 4
Epoch 5
Epoch 6
Epoch 7
Epoch 8
Epoch 9
Epoch 10
Epoch 11
Epoch 12
Epoch 13
Epoch 14
Epoch 15
Epoch 16
Epoch 17
Epoch 18
Epoch 19
Epoch 20
Epoch 21
Epoch 22
Epoch 23
Epoch 24
Epoch 25
Epoch 26
Epoch 27
Epoch 28
Epoch 29
Epoch 30
Epoch 31
Epoch 32
Epoch 33
Epoch 34
Epoch 35
Epoch 36
Epoch 37
Epoch 38
Epoch 39


In [52]:
build_graph(
    name='layers/lay=2,hidden=100',
    num_layers=2,
    num_hidden=100
)

Epoch 0
Epoch 1
Epoch 2
Epoch 3
Epoch 4
Epoch 5
Epoch 6
Epoch 7
Epoch 8
Epoch 9
Epoch 10
Epoch 11
Epoch 12
Epoch 13
Epoch 14
Epoch 15
Epoch 16
Epoch 17
Epoch 18
Epoch 19
Epoch 20
Epoch 21
Epoch 22
Epoch 23
Epoch 24
Epoch 25
Epoch 26
Epoch 27
Epoch 28
Epoch 29
Epoch 30
Epoch 31
Epoch 32
Epoch 33
Epoch 34
Epoch 35
Epoch 36
Epoch 37
Epoch 38
Epoch 39


In [None]:
build_graph(
    name='layers/lay=2,hidden=100',
    num_layers=2,
    num_hidden=200
)

In [None]:
build_graph(
    name='layers/lay=2,hidden=100',
    num_layers=2,
    num_hidden=400
)