In [2]:
import random
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
%matplotlib inline

In [3]:
import pickle, gzip

traffic_dir = 'traffic_signs_data'

def load_traffic_signs():
    
    with gzip.open(traffic_dir + '/train2.p.gz', mode='rb') as f:
        train = pickle.load(f)
    with gzip.open(traffic_dir + '/test2.p.gz', mode='rb') as f:
        test = pickle.load(f)

    X_train, y_train = train['images'], train['labels']
    X_test, y_test = test['images'], test['labels']
    return X_train, y_train, X_test, y_test

X_train, y_train, X_test, y_test = load_traffic_signs()

In [6]:
image_shape = X_train.shape[1:3]
n_classes = np.unique(y_train).shape[0]
'Train:', X_train.shape[0], 'Test:', X_test.shape[0], 'Image shape:', image_shape, 'Classes:', n_classes

('Train:', 39209, 'Test:', 12630, 'Image shape:', (32, 32), 'Classes:', 43)

## Normalize Data

In [7]:
A, B = 0.1, 0.9

def normalize(image_data, min_x=None, max_x=None):
    min_x = np.min(image_data) if min_x is None else min_x
    max_x = np.max(image_data) if max_x is None else max_x
    result = A + (image_data - min_x) * (B - A) / (max_x - min_x)
    return result, min_x, max_x

In [8]:
def restore_normal(image_data, min_x, max_x):
    result = (image_data - A) * (max_x - min_x) / (B - A) + min_x
    return result.astype(np.uint8)

In [9]:
X_train_norm, min_x, max_x = normalize(X_train)

In [10]:
X_test_norm, _, _ = normalize(X_test, min_x, max_x)

## LeNet-5

In [11]:
from tensorflow.contrib.layers import flatten

def LeNet(x):    
    mu = 0.01
    sigma = 0.1
    
    # Layer 1: Convolutional. Input = 32x32x3. Output = 28x28x6.
    conv1_W = tf.Variable(tf.truncated_normal(shape=(5, 5, 3, 6), mean = mu, stddev = sigma), name="conv1_W")
    conv1_b = tf.Variable(tf.zeros(6), name="conv1_b")
    conv1 = tf.nn.conv2d(x, conv1_W, strides = [1, 1, 1, 1], padding='VALID') + conv1_b

    conv1 = tf.nn.relu(conv1, name="conv1_relu")

    # Pooling. Input = 28x28x6. Output = 14x14x6.
    conv1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID', name="conv1")
    LeNet.conv1 = conv1

    # Layer 2: Convolutional. Output = 10x10x16.
    conv2_W = tf.Variable(tf.truncated_normal(shape=(5, 5, 6, 16), mean=mu, stddev=sigma), name="conv2_W")
    conv2_b = tf.Variable(tf.zeros(16), name="conv2_b")
    conv2 = tf.nn.conv2d(conv1, conv2_W, strides=[1, 1, 1, 1], padding='VALID') + conv2_b
    
    conv2 = tf.nn.relu(conv2, name="conv2_relu")

    # TODO: Pooling. Input = 10x10x16. Output = 5x5x16.
    conv2 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID', name="conv2")
    LeNet.conv2 = conv2

    # Flatten. Input = 5x5x16. Output = 400.
    fc0 = flatten(conv2)
    LeNet.fc0 = fc0
    
    # Layer 3: Fully Connected. Input = 400. Output = 120.
    fc1_W = tf.Variable(tf.truncated_normal(shape=(400, 120), mean=mu, stddev=sigma), name="fc1_W")
    fc1_b = tf.Variable(tf.zeros(120), name="fc1_b")
    fc1 = tf.matmul(fc0, fc1_W) + fc1_b
    
    fc1 = tf.nn.relu(fc1, name="fc1")
    LeNet.fc1 = fc1

    # TODO: Layer 4: Fully Connected. Input = 120. Output = 84.
    fc2_W = tf.Variable(tf.truncated_normal(shape=(120, 84), mean=mu, stddev=sigma), name="fc2_W")
    fc2_b = tf.Variable(tf.zeros(84), name="fc2_b")
    fc2 = tf.matmul(fc1, fc2_W) + fc2_b
    
    fc2 = tf.nn.relu(fc2, name="fc2")
    LeNet.fc2 = fc2

    # TODO: Layer 5: Fully Connected. Input = 84. Output = 43.
    fc3_W = tf.Variable(tf.truncated_normal(shape=(84, 43), mean=mu, stddev=sigma), name="fc3_W")
    fc3_b = tf.Variable(tf.zeros(43), name="fc3_b")
    
    logits = tf.add(tf.matmul(fc2, fc3_W), fc3_b, name="logits")
    LeNet.logits = logits
    
    return logits

### Train Model

In [12]:
from sklearn.model_selection import train_test_split
X_train_norm, X_valid_norm, y_train, y_valid = train_test_split(X_train_norm, y_train)

In [13]:
x = tf.placeholder(tf.float32, (None, 32, 32, 3), name='x')
y = tf.placeholder(tf.int32, (None), name='y')
one_hot_y = tf.one_hot(y, 43)

In [14]:
rate = 0.001

logits = LeNet(x)
prediction = tf.nn.softmax(logits, name="prediction")
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=one_hot_y, name="cross_entropy")
loss_operation = tf.reduce_mean(cross_entropy, name="loss_operation")
optimizer = tf.train.AdamOptimizer(learning_rate = rate, beta1=0.9, beta2=0.999, epsilon=1e-08, name="optimizer")
training_operation = optimizer.minimize(loss_operation, name="training_operation")

In [15]:
correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(one_hot_y, 1), name="correct_prediction")
accuracy_operation = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name="accuracy_operation")
saver = tf.train.Saver()

BATCH_SIZE = 64

def evaluate(X_data, y_data):
    num_examples = len(X_data)
    total_accuracy = 0
    sess = tf.get_default_session()
    for offset in range(0, num_examples, BATCH_SIZE):
        batch_x, batch_y = X_data[offset:offset+BATCH_SIZE], y_data[offset:offset+BATCH_SIZE]
        accuracy = sess.run(accuracy_operation, feed_dict={x: batch_x, y: batch_y})
        total_accuracy += (accuracy * len(batch_x))
    return total_accuracy / num_examples

def softmax(x):
    return np.exp(x) / np.sum(np.exp(x))

#### Capture convolution activation evolution

In [16]:
from moviepy.editor import VideoFileClip

CAPTURE_ACTIVATION = LeNet.conv1
TEST_IMAGE = X_train_norm[100]

def activation_for_image(results, sess, image_input, tf_activation=CAPTURE_ACTIVATION):
    activation = tf_activation.eval(session=sess, feed_dict={x : image_input})
    feature_image_count = activation.shape[3]
    for layer in range(feature_image_count):
        act_image = activation[0,:,:, layer]
        restored_image = restore_normal(act_image, min_x, max_x)
        if len(results) < layer + 1:
            results.append([])
        results[layer].append(act_image)

In [18]:
from sklearn.utils import shuffle

EPOCHS = 20

def train(x_train, y_train, x_valid, y_valid):
    num_examples = x_train.shape[0]
    
    activations = []
    activations_per_epoch = 10
    offset_per_activation = num_examples / activations_per_epoch
    
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        print("Training...\n")
        activations_for_epoch = [[]]
        
        activation_for_image(activations, sess, [TEST_IMAGE], CAPTURE_ACTIVATION)
        
        for i in range(EPOCHS):
            x_train, y_train = shuffle(x_train, y_train)
            for offset in range(0, num_examples, BATCH_SIZE):
                end = offset + BATCH_SIZE
                batch_x, batch_y = x_train[offset:end], y_train[offset:end]
                sess.run(training_operation, feed_dict={x: batch_x, y: batch_y})
                
#                 print(offset, offset_per_activation, len(activations_for_epoch))
                
                if offset / offset_per_activation > len(activations_for_epoch[0]):
                    activation_for_image(activations_for_epoch, sess, [TEST_IMAGE], CAPTURE_ACTIVATION)
#                     print(len(activations_for_epoch[0]))

            validation_accuracy = evaluate(x_valid, y_valid)
            for src, dest in zip(activations_for_epoch, activations):
                dest.extend(src)
            activations_for_epoch = [[]]

            print("EPOCH {} ...".format(i + 1))
            print("Validation Accuracy = {:.3f}".format(validation_accuracy))
            print()

        saver.save(sess, './lenet')
        print("Model saved")
        
    return activations

In [19]:
def show_images(images):
    plt.figure(1, figsize=(15,15))
    cols = 8
    rows = len(images) / cols + 1
    
    for index, img in enumerate(images):
        subplot = index + 1
        plt.subplot(rows, cols, subplot)
        plt.title('Image ' + str(subplot))
        plt.imshow(img, interpolation="nearest", cmap="gray")

In [20]:
%%time 

activations = train(X_train_norm, y_train, X_valid_norm, y_valid)

Training...

EPOCH 1 ...
Validation Accuracy = 0.758

EPOCH 2 ...
Validation Accuracy = 0.876

EPOCH 3 ...
Validation Accuracy = 0.932

EPOCH 4 ...
Validation Accuracy = 0.940

EPOCH 5 ...
Validation Accuracy = 0.946

EPOCH 6 ...
Validation Accuracy = 0.964

EPOCH 7 ...
Validation Accuracy = 0.961

EPOCH 8 ...
Validation Accuracy = 0.969

EPOCH 9 ...
Validation Accuracy = 0.972

EPOCH 10 ...
Validation Accuracy = 0.982

EPOCH 11 ...
Validation Accuracy = 0.973

EPOCH 12 ...
Validation Accuracy = 0.978

EPOCH 13 ...
Validation Accuracy = 0.975

EPOCH 14 ...
Validation Accuracy = 0.983

EPOCH 15 ...
Validation Accuracy = 0.979

EPOCH 16 ...
Validation Accuracy = 0.980

EPOCH 17 ...
Validation Accuracy = 0.976

EPOCH 18 ...
Validation Accuracy = 0.977

EPOCH 19 ...
Validation Accuracy = 0.979

EPOCH 20 ...
Validation Accuracy = 0.976

Model saved
CPU times: user 20min 50s, sys: 3min 36s, total: 24min 26s
Wall time: 8min 58s


In [21]:
len(activations[1])

201

In [None]:
show_images(activations[0])

## Test the model on the test dataset

In [23]:
with tf.Session() as sess:
    saver.restore(sess, tf.train.latest_checkpoint('.'))
    res = evaluate(X_test_norm, y_test)
    print("Test accuracy = {:.3f}".format(res))

INFO:tensorflow:Restoring parameters from ./lenet
Test accuracy = 0.910


## Try images from the Internet

In [None]:
import matplotlib.image as mpimg
import cv2

def test_image_picture(file_name, sess):
    img_orig = cv2.cvtColor(cv2.imread(file_name), cv2.COLOR_BGR2RGB)
    img = cv2.resize(img_orig, (32, 32))
    
    pred = sess.run(prediction, feed_dict = { x : [img] })
    index = np.argmax(pred)
    
    top_k_preds = sess.run(tf.nn.top_k(tf.constant(pred), k=3))
    print('Prediction: ', class_names[index])
    print('Top 3 probabilities:', top_k_preds)

    plt.imshow(img_orig)

The sign below is not in our dataset, so we don't count it toward the accuracy.

In [None]:
test_image_picture('???', s_loaded)

### Visualize Filter State

In [None]:
def output_features(image_input, tf_activation, activation_min=None, activation_max=None, plt_num=1):
    activation = tf_activation.eval(session=sess, feed_dict={x : image_input})
    feature_image_count = activation.shape[3]
    plt.figure(plt_num, figsize=(15,15))

    def show_one(image, subplot):
        plt.subplot(6, 8, subplot) # sets the number of feature maps to show on each row and column
        plt.title('FeatureMap ' + str(subplot)) # displays the feature map number
        ri = restore_normal(image, min_x, max_x)
        plt.imshow(ri, interpolation="nearest", cmap="gray")
        
    show_one(image_input[0], 1)
    for layer in range(feature_image_count):
        act_image = activation[0,:,:, layer]
        show_one(act_image, layer + 2)
    plt.show()

In [None]:
normal_white = np.zeros(X_train[0].shape, dtype=np.uint8)
normal_white.fill(255)
WHITE, _, _ = normalize(normal_white, min_x, max_x)
plt.imshow(restore_normal(WHITE, min_x, max_x))
# plt.imshow(normal_white)
# restore_normal(WHITE, min_x, max_x)

In [None]:
with tf.Session() as sess:
    saver.restore(sess, tf.train.latest_checkpoint('.'))
    output_features([X_train_norm[304]], LeNet.conv1) # , 0, 100)