In [13]:
### Import the necessary libraries and packages
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import numpy as np
import cv2
import glob
import time
from alexnet import AlexNet
# from sklearn.svm import LinearSVC
# from sklearn.preprocessing import StandardScaler
# from skimage.feature import hog
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
import pandas as pd
import tensorflow as tf
%matplotlib inline


In [6]:
### Create the training dataset
## Using the Udacity Dataset
# Read the data csv file
udacity_df = pd.read_csv("../data/labels.csv")

# Check the top of the dataframe to make sure it's read in correctly
udacity_df.head()


Unnamed: 0,Filename,xmin,ymin,xmax,ymax,occluded,label,attributes
0,uda_1478019952686311006.jpg,950,574,1004,620,0,car,
1,uda_1478019952686311006.jpg,1748,482,1818,744,0,pedestrian,
2,uda_1478019953180167674.jpg,872,586,926,632,0,car,
3,uda_1478019953689774621.jpg,686,566,728,618,1,truck,
4,uda_1478019953689774621.jpg,716,578,764,622,0,car,


In [7]:
# Get some details about the dataset
udacity_df.describe()


Unnamed: 0,xmin,ymin,xmax,ymax,occluded
count,93086.0,93086.0,93086.0,93086.0,93086.0
mean,805.390864,533.632942,936.441914,652.539243,0.426079
std,448.187238,99.227053,455.023755,130.14067,0.494508
min,0.0,0.0,16.0,54.0,0.0
25%,468.0,514.0,598.0,616.0,0.0
50%,804.0,562.0,898.0,662.0,0.0
75%,1112.0,592.0,1236.0,706.0,1.0
max,1896.0,886.0,1924.0,1198.0,1.0


In [9]:
# Extract the filename and bounding box data from the dataframe and convert them to a numpy array
udacity_filenames = "../data/udacity_dataset/" + udacity_df["Filename"].values
udacity_filenames = np.reshape(udacity_filenames, (-1, 1))
udacity_bboxes = udacity_df[["xmin", "ymin", "xmax", "ymax"]].values
udacity_dataset = np.concatenate((udacity_filenames, udacity_bboxes), axis=1)

print(udacity_dataset.shape)
print(udacity_dataset[:10])


(93086, 5)
[['../data/udacity_dataset/uda_1478019952686311006.jpg' 950 574 1004 620]
 ['../data/udacity_dataset/uda_1478019952686311006.jpg' 1748 482 1818 744]
 ['../data/udacity_dataset/uda_1478019953180167674.jpg' 872 586 926 632]
 ['../data/udacity_dataset/uda_1478019953689774621.jpg' 686 566 728 618]
 ['../data/udacity_dataset/uda_1478019953689774621.jpg' 716 578 764 622]
 ['../data/udacity_dataset/uda_1478019953689774621.jpg' 826 580 880 626]
 ['../data/udacity_dataset/uda_1478019953689774621.jpg' 1540 488 1680 608]
 ['../data/udacity_dataset/uda_1478019953689774621.jpg' 1646 498 1848 594]
 ['../data/udacity_dataset/uda_1478019954186238236.jpg' 662 562 710 616]
 ['../data/udacity_dataset/uda_1478019954186238236.jpg' 686 576 730 628]]


In [10]:
# One-hot encode the labels
from sklearn.preprocessing import LabelBinarizer

udacity_labels = udacity_df["label"].values
encoder = LabelBinarizer()
encoder.fit(udacity_labels)
udacity_labels = encoder.transform(udacity_labels)


In [11]:
# Shuffle and split the data into a training and test set

X_train, X_test, y_train, y_test = train_test_split(udacity_dataset, udacity_labels, test_size=0.2, 
                                                    stratify=udacity_labels, random_state=42)

# Split the training set into a training and validation set
X_train, X_validate, y_train, y_validate = train_test_split(X_train, y_train, test_size=0.1,
                                                            stratify=y_train, random_state=0)

In [14]:
print(X_train.shape)
print(X_validate.shape)
print(X_test.shape)
n_classes = y_train.shape[-1]
print(n_classes)

(67021, 5)
(7447, 5)
(18618, 5)
5


In [16]:
def get_batches(X, y, batch_size):
    '''
        A generator that supplies batches of data to the Neural Network model
        Parameters:
            X: The input dataset
            y: the labels for each row of the dataset
            batch_size: how many rows are in each batch
        Returns:
            X_out: a batch, of length batch_size, of the dataset
            y_out: the corresponding labels to the batch output
    '''
    for ii in range(0, len(X), batch_size):
        X_out = []    # List for storing the extracted image array
        for row in X[ii:ii + batch_size]:
            # Extract the image filename and bounded box coordinates
            file, xmin, ymin, xmax, ymax = row
            img = mpimg.imread(file)
            img = img[ymin:(ymax + 1), xmin:(xmax + 1)]
            img = cv2.resize(img, (64, 64))
            X_out.append(img)

        X_out = np.array(X_out)
        y_out = y[ii:ii + batch_size]

        yield X_out, y_out
        

In [20]:
# Create the graph placeholders
features = tf.placeholder(tf.float32, (None, 64, 64, 3))
resized = tf.image.resize_images(features, (227, 227)) # AlexNet expects images of size 227 x 227
labels = tf.placeholder(tf.int64, (None, n_classes))

# Get the penultimate layer from AlexNet
fc7 = AlexNet(resized, feature_extract=True)
fc7 = tf.stop_gradient(fc7) # Freeze back-propagation from updating weights behind this layer

# Create a new fully-connected layer for classification on this dataset
fc8_shape = [fc7.get_shape().as_list()[-1], n_classes]
fc8_w = tf.Variable(tf.truncated_normal(fc8_shape, stddev=1e-2), name="fc8_weights")
fc8_b = tf.Variable(tf.zeros(n_classes), name="fc8_biases")

logits = tf.nn.xw_plus_b(fc7, fc8_w, fc8_b, name="logits")
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels))
opt = tf.train.AdamOptimizer().minimize(loss, var_list=[fc8_w, fc8_b])
correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1))
accy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

save_file = "./model.ckpt"
saver = tf.train.Saver()

batch_size = 128
epochs = 10

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    n_batches = len(X_train) // batch_size
    
    for e in range(epochs):
        X_train, y_train = shuffle(X_train, y_train)
        total_val_loss = 0
        total_val_accy = 0
        for ii, (batch_x, batch_y) in enumerate(get_batches(X_train, y_train, batch_size)):
            feed_dict = {features: batch_x, labels: batch_y}
            _ = sess.run(opt, feed_dict)

            if ii % 2 == 0:
                train_loss = loss.eval(feed_dict)
                train_accy = accy.eval(feed_dict)
                print("epoch {}/{}... batch {}/{}...".format(e+1, epochs, ii, n_batches),
                      "training loss: {:.4f}... ".format(train_loss),
                      "training accuracy: {:.4f}".format(train_accy))
        
        for jj, (val_batch_x, val_batch_y) in enumerate(get_batches(X_validate, y_validate, batch_size)):
            feed_dict = {features: val_batch_x, labels: val_batch_y}
            val_loss = loss.eval(feed_dict)
            val_accy = accy.eval(feed_dict)
            total_val_loss += val_loss * len(val_batch_x)
            total_val_accy += val_accy * len(val_batch_x)
        
        total_val_loss = total_val_loss / len(X_validate)
        total_val_accy = total_val_accy / len(X_validate)
            
        print("epoch {}/{}... ".format(e+1, epochs),
              "validaion loss: {:/4f}... ".format(total_val_loss),
              "validation accuracy: {:.4f}".format(total_val_accy))
             
    # Save the model
    saver.save(sess, save_file) 
                              

epoch 1/10... batch 0/523... training loss: 0.8083...  training accuracy: 0.7734
epoch 1/10... batch 2/523... training loss: 0.7222...  training accuracy: 0.7656
epoch 1/10... batch 4/523... training loss: 0.3964...  training accuracy: 0.9062
epoch 1/10... batch 6/523... training loss: 0.7313...  training accuracy: 0.7969
epoch 1/10... batch 8/523... training loss: 0.7188...  training accuracy: 0.7891
epoch 1/10... batch 10/523... training loss: 0.7232...  training accuracy: 0.8359
epoch 1/10... batch 12/523... training loss: 0.3887...  training accuracy: 0.8750
epoch 1/10... batch 14/523... training loss: 0.3659...  training accuracy: 0.8750
epoch 1/10... batch 16/523... training loss: 0.3695...  training accuracy: 0.8672


KeyboardInterrupt: 