In [None]:
import tensorflow as tf
import numpy as np
import pickle
import matplotlib.pyplot as plt
import timeit

<h1>Load training, validation, testing set from your preprocessed files</h1>

In [None]:
x_train, y_train = pickle.load(open('cat_dog_train.p','rb'))

x_valid, y_valid = pickle.load(open('cat_dog_valid.p','rb'))
x_test = pickle.load(open('cat_dog_test.p','rb'))

<h1>Define hyperparameter</h1>

In [None]:
learning_rate = 0.001
batch_size = 64

image_width = 227
image_height = 227
image_depth = 3
num_labels = 2

<h1>Placeholder</h1>

In [None]:
tf.reset_default_graph()

<h1>AlexNet</h1>

In [None]:
def AlexNet(X):

    # Reshape input to 4-D vector
    input_layer = tf.reshape(X, [-1, 227, 227, 3]) # -1 adds minibatch support.

    # Convolutional Layer #1
    # Has a default stride of 1
    # Output: 28 * 28 * 6
    conv1 = tf.layers.conv2d(
      inputs=input_layer,
      filters=96, # Number of filters.
      kernel_size=11, 
      strides=(4,4),
      padding="valid", # No padding is applied to the input.
      activation=tf.nn.relu,
      kernel_initializer=he_init)

    # Pooling Layer #1
    # Sampling half the output of previous layer
    # Output: 14 * 14 * 6
    pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[3, 3], strides=2)
    pool1 = tf.nn.local_response_normalization(pool1)

    # Convolutional Layer #2
    # Output: 10 * 10 * 16
    conv2 = tf.layers.conv2d(
      inputs=pool1,
      filters=256, # Number of filters
      kernel_size=5, # Size of each filter is 5x5
      padding="SAME", # No padding
      activation=tf.nn.relu,
      kernel_initializer=he_init)

    # Pooling Layer #2
    # Output: 5 * 5 * 16
    pool2 = tf.layers.average_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)
    pool2 = tf.nn.local_response_normalization(pool2)
    
    conv3 = tf.layers.conv2d(
      inputs=pool1,
      filters=256, # Number of filters
      kernel_size=5, # Size of each filter is 5x5
      padding="SAME", # No padding
      activation=tf.nn.relu,
      kernel_initializer=he_init)

    # Pooling Layer #2
    # Output: 5 * 5 * 16
    pool2 = tf.layers.average_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)
    pool2 = tf.nn.local_response_normalization(pool2)
    
    conv4 = tf.layers.conv2d(
      inputs=pool1,
      filters=256, # Number of filters
      kernel_size=5, # Size of each filter is 5x5
      padding="SAME", # No padding
      activation=tf.nn.relu,
      kernel_initializer=he_init)

    # Pooling Layer #2
    # Output: 5 * 5 * 16
    pool2 = tf.layers.average_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)
    pool2 = tf.nn.local_response_normalization(pool2)
    
    conv5 = tf.layers.conv2d(
      inputs=pool1,
      filters=256, # Number of filters
      kernel_size=5, # Size of each filter is 5x5
      padding="SAME", # No padding
      activation=tf.nn.relu,
      kernel_initializer=he_init)

    # Pooling Layer #2
    # Output: 5 * 5 * 16
    pool2 = tf.layers.average_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)
    pool2 = tf.nn.local_response_normalization(pool2)

    # Reshaping output into a single dimention array for input to fully connected layer
    pool2_flat = tf.reshape(pool2, [-1, 5 * 5 * 16])

    # Fully connected layer #1: Has 120 neurons
    dense1 = tf.layers.dense(inputs=pool2_flat, units=120, activation=tf.nn.relu,kernel_initializer=he_init)

    # Fully connected layer #2: Has 84 neurons
    dense2 = tf.layers.dense(inputs=dense1, units=84, activation=tf.nn.relu,kernel_initializer=he_init)

    # Output layer, 10 neurons for each digit
    logits = tf.layers.dense(inputs=dense2, units=10)

    return logits

<h1>Cost and Optimization</h1>

In [None]:
logits = AlexNet(X)
softmax = tf.nn.softmax(logits)

# Convert our labels into one-hot-vectors
#labels = tf.one_hot(indices=tf.cast(Y, tf.int32), depth=10)

# Compute the cross-entropy loss
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits,
                                                                 labels=Y))

# Use adam optimizer to reduce cost
optimizer = tf.train.AdamOptimizer(learning_rate=lr)
train_op = optimizer.minimize(cost)


# For testing and prediction
predictions = tf.argmax(softmax, axis=1)
correct_prediction = tf.equal(tf.argmax(logits,1),tf.argmax(Y,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

# Initialize all the variables
init = tf.global_variables_initializer()

<h1>Training and validation</h1>
<h2>Train your model only 10 epochs</h2>
<p style="font-size:20px">1. Print out training accuracy and validation accuracy each training epoch</p>
<p style="font-size:20px">2. Print out training time each training epoch</p>
<p style="font-size:20px">3. Your goal is to reach 85% validation accuracy in 10 training epochs. If you reach that, you can perform testing, print out your test accuracy. Plot out the ten images with title that contains the probability of the labeled class.</p>

In [None]:
with tf.Session() as sess:
 
    sess.run(init)

    for epoch in range(epochs):
        num_samples = train_features.shape[0]
        num_batches = (num_samples // batch_size) + 1
        epoch_cost = 0.
        i = 0
        while i < num_samples:
            batch_x = train_features[i:i+batch_size,:]
            batch_y = train_labels[i:i+batch_size]

            i += batch_size

            # Train on batch and get back cost
            _, c = sess.run([train_op, cost], feed_dict={X:batch_x, Y:batch_y})
            epoch_cost += (c/num_batches)

        # Get accuracy for validation
        valid_accuracy = accuracy.eval(
            feed_dict={X:valid_features, Y:valid_labels})

        print ("Epoch {}: Cost: {}".format(epoch+1, epoch_cost))
        print("Validation accuracy: {}".format(valid_accuracy))

    test_accuracy = accuracy.eval(feed_dict={X:test_features, Y:test_labels})
    
    print("Testing accuracy: {}".format(test_accuracy))