In [3]:
import tensorflow as tf
import numpy as np
from tensorflow.python.platform import tf_logging as logging

print("Tensorflow version " + tf.__version__)
logging.set_verbosity(logging.INFO)

Tensorflow version 1.1.0


In [4]:
# Various constants for describing the data set

# number of classes is 2 (go and stop)
NUM_CLASSES = 2

# Width and height of each image. (pixels)
WIDTH = 72
HEIGHT = 72

# Number of channels in each image, 3 channels: Red, Green, Blue.
NUM_CHANNELS = 3

In [5]:
# Function to read a single image from input file
def get_image(filename, name="get_image"):
    with tf.name_scope(name):
        # convert filename to a queue for an input pipeline.
        filename_queue = tf.train.string_input_producer([filename], num_epochs=None)

        # object to read records
        reader = tf.TFRecordReader()

        # read the full set of features for a single example
        key, example = reader.read(filename_queue)

        # parse the full example into its' component features.
        features = tf.parse_single_example(
            example,
            features={
                'image/height': tf.FixedLenFeature([], tf.int64),
                'image/width': tf.FixedLenFeature([], tf.int64),
                'image/colorspace': tf.FixedLenFeature([], dtype=tf.string, default_value=''),
                'image/channels': tf.FixedLenFeature([], tf.int64),
                'image/class/label': tf.FixedLenFeature([], tf.int64),
                'image/class/text': tf.FixedLenFeature([], dtype=tf.string, default_value=''),
                'image/format': tf.FixedLenFeature([], dtype=tf.string, default_value=''),
                'image/filename': tf.FixedLenFeature([], dtype=tf.string, default_value=''),
                'image/encoded': tf.FixedLenFeature([], dtype=tf.string, default_value='')
            })

        # now we are going to manipulate the label and image features

        label = features['image/class/label']
        image_buffer = features['image/encoded']

        # Decode the jpeg
        # name_scope effects ops
        with tf.name_scope('decode_jpeg', [image_buffer], None):
            # decode turns tensor of type string. 0-D the JPEG encoded image
            # to tensor of type uint8. 3-D with shape [height, width, channels]
            image = tf.image.decode_jpeg(image_buffer, channels=3, name="decode")
            image = tf.image.convert_image_dtype(image, dtype=tf.float32, name="convert_dtype")

        image.set_shape([HEIGHT, WIDTH, NUM_CHANNELS])

        # re-define label as a "one-hot" vector
        # it will be [0,1] or [1,0] here.
        # This approach can easily be extended to more classes
        label = tf.stack(tf.one_hot(label - 1, NUM_CLASSES), name="one_hot")

        return label, image


In [6]:
# "label" and "image" are associated with corresponding feature from a single example in the training data file
# at this point label is one hot vector. If label = 1 then [1,0]... if label = 2 then [0,1]
# (and yes that's opposite to binary!)
label, image = get_image("../../dataset/traffic_sign/train-00000-of-00001")


# and similarly for the validation data
vlabel, vimage = get_image("../../dataset/traffic_sign/validation-00000-of-00001")


# associate "label_batch" and "image_batch" objects with a randomly selected batch of labels and images respectively
# train.shuffle_batch creates batches by randomly shuffling tensors. Adds to the current graph:
# 1: A shuffling queue into which tensors from the tensors arg are enqueued.
# 2: A dequeue_many operation to create batches from the queue.
# 3: A QueueRunner to QUEUE_RUNNER collection, to enqueue the tensors from tensors arg.
with tf.name_scope("shuffle_batch"):
    imageBatch, labelBatch = tf.train.shuffle_batch(
        [image, label],
        batch_size=64,
        capacity=220,
        min_after_dequeue=60)

    # and similarly for the validation data
    vimageBatch, vlabelBatch = tf.train.shuffle_batch(
        [vimage, vlabel],
        batch_size=64,
        capacity=220,
        min_after_dequeue=15)

In [7]:
# Placeholders for data we will populate later
with tf.name_scope("inputs"):
    # input X: 72*72*3 pixel images, the first dimension (None) will index the images in the mini-batch
    X = tf.placeholder(tf.float32, [None, HEIGHT, WIDTH, NUM_CHANNELS], name="images")
    # similarly, we have a placeholder for true outputs (obtained from labels)
    Y_ = tf.placeholder(tf.float32, [None, NUM_CLASSES], name="labels")
    # variable learning rate
    lr = tf.placeholder(tf.float32, name="learning_rate")
    # Probability of keeping a node during dropout = 1.0 at test time (no dropout) and 0.75 at training time
    pkeep = tf.placeholder(tf.float32, name="dropout_prob")
    tf.summary.image("input", X, 4)


In [8]:
def model():
    """
    The convolutional model: 3 conv layers with kernel shape [filter_height, filter_width, in_channels, out_channels]
    and 2 fully connected layers, one to bring all the activation maps together (outputs of all the filters) and one
    final layer to predict a class
    :return: The predictions Y and the logits
    """
    with tf.variable_scope("the_model"):
        # three convolutional layers with their channel counts, and a
        # fully connected layer (the last layer has 2 softmax neurons for "stop" and "go")
        I = 128  # 1st convolutional layer output channels
        J = 128  # 2nd convolutional layer output channels
        K = 160  # 3rd convolutional layer output channels
        L = 256  # 4th
        M = 384  # 5th
        N = 2048 # fully connected layer

        # weights / kernels
        # 7x7 patch, 3 input channel, J output channels
        W1 = tf.Variable(tf.truncated_normal([7, 7, NUM_CHANNELS, I], stddev=0.1))
        W2 = tf.Variable(tf.truncated_normal([5, 5, I, J], stddev=0.1))
        W3 = tf.Variable(tf.truncated_normal([3, 3, J, K], stddev=0.1))
        W4 = tf.Variable(tf.truncated_normal([3, 3, K, L], stddev=0.1))
        W5 = tf.Variable(tf.truncated_normal([3, 3, L, M]))
        W6 = tf.Variable(tf.truncated_normal([5 * 5 * M, N], stddev=0.1))
        W7 = tf.Variable(tf.truncated_normal([N, NUM_CLASSES], stddev=0.1))

     #   visualize_kernel(W1)

        # biases
        B1 = tf.Variable(tf.constant(0.1, tf.float32, [I]))
        B2 = tf.Variable(tf.constant(0.1, tf.float32, [J]))
        B3 = tf.Variable(tf.constant(0.1, tf.float32, [K]))
        B4 = tf.Variable(tf.constant(0.1, tf.float32, [L]))
        B5 = tf.Variable(tf.constant(0.1, tf.float32, [M]))
        B6 = tf.Variable(tf.constant(0.1, tf.float32, [N]))
        B7 = tf.Variable(tf.constant(0.1, tf.float32, [NUM_CLASSES]))

        # 72x72
        X1 = tf.nn.max_pool(X, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")
        tf.summary.image("MAX_POOL1", X1, 4)
        # 36x36
        X2 = tf.nn.max_pool(X1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")
        tf.summary.image("MAX_POOL2", X2, 4)
        #18x18
        X3 = tf.nn.max_pool(X2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")
        tf.summary.image("MAX_POOL3", X3, 4)
        #9x9
        X4 = tf.nn.max_pool(X3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")
        tf.summary.image("MAX_POOL4", X4, 4)

        with tf.name_scope("first_layer"):
            # 72x72 images
            Y1r = tf.nn.relu(tf.nn.conv2d(X, W1, strides=[1, 1, 1, 1], padding='SAME') + B1)
            # 36x36 images after max_pool
            Y1p = tf.nn.max_pool(Y1r, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")
            Y1 = tf.nn.dropout(Y1p, pkeep)

        with tf.name_scope("second_layer"):
            Y2r = tf.nn.relu(tf.nn.conv2d(Y1, W2, strides=[1, 1, 1, 1], padding='SAME') + B2)
            # 18x18 images after max_pool
            Y2p = tf.nn.max_pool(Y2r, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")
            Y2 = tf.nn.dropout(Y2p, pkeep)

        with tf.name_scope("third_layer"):
            Y3r = tf.nn.relu(tf.nn.conv2d(Y2, W3, strides=[1, 1, 1, 1], padding='SAME') + B3)
            # 9x9 images after max_pool
            Y3p = tf.nn.max_pool(Y3r, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")
            Y3 = tf.nn.dropout(Y3p, pkeep)

        with tf.name_scope("fourth_layer"):
            Y4r = tf.nn.relu(tf.nn.conv2d(Y3, W4, strides=[1, 1, 1, 1], padding='SAME') + B4)
            # 5x5 images after max_pool
            Y4p = tf.nn.max_pool(Y4r, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="SAME")
            Y4 = tf.nn.dropout(Y4p, pkeep)

        with tf.name_scope("fifth_layer"):
            Y5r = tf.nn.relu(tf.nn.conv2d(Y4, W5, strides=[1, 1, 1, 1], padding='SAME') + B5)
            Y5 = tf.nn.dropout(Y5r, pkeep)

        with tf.name_scope("fc_layer"):
            YY = tf.reshape(Y5, shape=[-1, 5 * 5 * M])
            Y6 = tf.nn.relu(tf.matmul(YY, W6) + B6)

            YY6 = tf.nn.dropout(Y6, pkeep)
            Ylogits = tf.matmul(YY6, W7) + B7
            Y = tf.nn.softmax(Ylogits)

        return Y, Ylogits

In [9]:
Y, Ylogits = model()

# cross-entropy loss function (= -sum(Y_i * log(Yi)) ), normalised for batches of 50 images
# TensorFlow provides the softmax_cross_entropy_with_logits function to avoid numerical stability
# problems with log(0) which is NaN
with tf.name_scope("x-ent"):
    cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=Ylogits, labels=Y_)
    cross_entropy = tf.reduce_mean(cross_entropy) * 64
    tf.summary.scalar("x-ent", cross_entropy)

# accuracy of the trained model, between 0 (worst) and 1 (best)
with tf.name_scope("accuracy"):
    correct_prediction = tf.equal(tf.argmax(Y, 1), tf.argmax(Y_, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    tf.summary.scalar("accuracy", accuracy)

# training step, the learning rate is a placeholder
with tf.name_scope("train"):
    train_step = tf.train.AdamOptimizer(lr).minimize(cross_entropy)

# interactive session allows interleaving of building and running steps
sess = tf.InteractiveSession()
# init
sess.run(tf.global_variables_initializer())

# start the threads used for reading files
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)

writer = tf.summary.FileWriter("./traffic_graph/1.1", sess.graph)
merged_summary = tf.summary.merge_all()

In [10]:
# start training
nSteps = 2000
for i in range(nSteps):

    batch_xs, batch_ys = sess.run([imageBatch, labelBatch])

    s, k = sess.run([merged_summary, train_step], feed_dict={X: batch_xs, Y_: batch_ys, lr: 0.0007, pkeep: 0.5})
    writer.add_summary(s, i)

    if (i + 1) % 100 == 0:  # then perform validation

        # get a validation batch
        vbatch_xs, vbatch_ys = sess.run([vimageBatch, vlabelBatch])
        train_accuracy = accuracy.eval(feed_dict={X: vbatch_xs, Y_: vbatch_ys, lr: 0.0007, pkeep: 1.0})

        print("step %d, training accuracy %g" % (i + 1, train_accuracy))


# finalise
coord.request_stop()
coord.join(threads)

step 100, training accuracy 0.484375
step 200, training accuracy 0.4375
step 300, training accuracy 0.375
step 400, training accuracy 0.5
step 500, training accuracy 0.5
step 600, training accuracy 0.5625
step 700, training accuracy 0.484375
step 800, training accuracy 0.5
step 900, training accuracy 0.5
step 1000, training accuracy 0.53125
step 1100, training accuracy 0.46875


KeyboardInterrupt: 