In [None]:
# Create some helper functions for convenience

def read_files(data_dir, shuffle=False):
    image_files = glob.glob(data_dir)
    image_files = np.array(image_files)
    if shuffle:
        image_files = shuffle(image_files)
        
    return image_files
   
def show_images(image_files, bounds=[None, None], num_img=10):
    for img_file in image_files[:10]:
        fig = plt.figure()
        img = mpimg.imread(img_file)
        if all(bounds):
            img = img[row.ymin:row.ymax, row.xmin:row.xmax]
            img = cv2.resize(img, (64, 64))
        plt.imshow(img)


In [None]:
# Get the car positions from the images
udacity_cars = udacity_data.loc[udacity_data.label=="car"]
udacity_cars.describe()

In [None]:
## Using the GTI/KITTI Dataset

# Extract the vehicle image file names from the various directories
data_dir = "./Dataset/vehicles/"
far_image_files = read_files(data_dir + "GTI_Far/*.png")
left_image_files = read_files(data_dir + "GTI_Left/*.png")
close_image_files = read_files(data_dir + "GTI_MiddleClose/*.png")
right_image_files = read_files(data_dir + "GTI_Right/*.png")
kitti_image_files = read_files(data_dir + "KITTI_extracted/*.png")
vehicle_image_files = np.concatenate((far_image_files, left_image_files, close_image_files, right_image_files,
                                      kitti_image_files, udacity_car_filenames))


In [None]:
# Extract the non-vehicle image file names
data_dir = "./Dataset/non_vehicles/"
extra_image_files = read_files(data_dir + "Extras/*.png")
GTI_image_files = read_files(data_dir + "GTI/*.png")
udacity_non_cars = udacity_data.loc[udacity_data.label!="car"]
udacity_non_car_files = udacity_non_cars["Filename"].values
non_vehicle_image_files = np.concatenate((extra_image_files, GTI_image_files, udacity_non_car_files))


In [None]:
## Shuffle the images so that that training and test set have a balanced number of the far, close, 
# left, right types of GTI car images and also a balanced number of the Udacity and KITTI car images

# Create temporary labels for the different kinds of car images and one label for the non-car images
labels = ([0] * len(far_image_files) + [1] * len(left_image_files) + [2] * len(close_image_files) + 
          [3] * len(right_image_files) + [4] * len(kitti_image_files) + [5] * len(udacity_car_filenames) +
          [6] * len(non_vehicle_image_files))
labels = np.array(labels)


In [None]:
### Constructing the Neural Network. The following creates a class that contains helper functions 
# for creating the CNN, its cost and optimizer functions, and for training the network

class CNN:
    
    def __init__(self):
        tf.reset_default_graph()
        
    # Create a generator to get batches of the dataset - assuming use of Udacity dataset only
    def get_batches(self, X, y, batch_size):
        '''
            A generator that supplies batches of data to the Neural Network model
            Parameters:
                X: The input dataset
                y: the labels for each row of the dataset
                batch_size: how many rows are in each batch
            Returns:
                X_out: a batch, of length batch_size, of the dataset
                y_out: the corresponding labels to the batch output
        '''
        for ii in range(0, len(X), batch_size):
            X_out = []    # List for storing the extracted image array
            for row in X[ii:ii + batch_size]:
                # Extract the image filename and bounded box coordinates
                file, xmin, ymin, xmax, ymax = row
                img = mpimg.imread(file)
                img = img[ymin:(ymax + 1), xmin:(xmax + 1)]
                img = cv2.resize(img, (64, 64))
                X_out.append(img)
            
            X_out = np.array(X_out)
            y_out = y[ii:ii + batch_size]

            yield X_out, y_out

    def get_model_inputs(self, n_classes):
        '''
            Create the tensorflow input and label placeholders
            Parameters:
                n_classes: The number of classes in the dataset
            Returns:
                input_x: The placeholder for the input batch of image arrays
                labels_y: The placeholder for the corresponding labels    
                is_training: The placeholder for the batch_normalization training flag
        '''
    
        input_x = tf.placeholder(tf.float32, (None, 64, 64, 3))
        labels_y = tf.placeholder(tf.float32, (None, n_classes)) 
        is_training = tf.placeholder(tf.bool)

        return input_x, labels_y, is_training

    def build_network(self, input_x, is_training, n_classes, alpha=0.2):
        '''
            Build the Convolutional Neural Network to classify the images
            Parameters:
                input_x: The input image data
                is_training: For use in batch normalization. If True, then use batch statistics.
                          If False, then use population statistics
                n_classes: The number of classes in the dataset
                alpha: Parameter to tune Leaky ReLU
            Returns:
                logits: The output of the CNN, flattened to a vector of length equal to the number of classes (4 here)
        '''
        # Input shape is 64x64x3
        # First Convolution Layer, no batch normalization
        x1 = tf.layers.conv2d(input_x, filters=64, kernel_size=3, strides=1, padding="same")
        x1 = tf.layers.max_pooling2d(x1, pool_size=2, strides=2)
        x1 = tf.maximum(x1, alpha * x1) # Output shape is 32x32x64

        # Second Convolution Layer, with batch normalization
        x2 = tf.layers.conv2d(x1, filters=128, kernel_size=3, strides=1, padding="same")
        x2 = tf.layers.max_pooling2d(x2, pool_size=2, strides=2)
        x2 = tf.layers.batch_normalization(x2, training=is_training)
        x2 = tf.maximum(x2, alpha * x2) # Output shape 16x16x128

        # Third Convolution Layer, with batch normalization
        x3 = tf.layers.conv2d(x2, filters=256, kernel_size=3, strides=1, padding="same")
        x3 = tf.layers.max_pooling2d(x3, pool_size=2, strides=2)
        x3 = tf.layers.batch_normalization(x3, training=is_training)
        x3 = tf.maximum(x3, alpha * x3) # Output shape 8x8x256

        # Fourth Convolution Layer, with batch normalization
        x4 = tf.layers.conv2d(x3, filters=512, kernel_size=3, strides=1, padding="same")
        x4 = tf.layers.max_pooling2d(x4, pool_size=2, strides=2)
        x4 = tf.layers.batch_normalization(x4, training=is_training)
        x4 = tf.maximum(x4, alpha * x4) # Output shape 4x4x512
        x4_shape = x4.get_shape().as_list()[1:]

        # Flatten the tensor and pass through a fully-connected layer
        flat = tf.reshape(x4, (-1, x4_shape[0] * x4_shape[1] * x4_shape[2]))
        logits = tf.layers.dense(flat, units=n_classes, activation=None)

        return logits

    def get_model_loss(self, logits_x, labels_y):
        '''
            Create the model loss function
            Parameters:
                logits_x: logits from the neural network
                labels_y: corresponding labels for the input
            Returns:
                cost: the cost function for the model
        '''
        cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits_x, labels=labels_y))

        return cost
    
    def get_model_accy(self, logits, y):
        '''
            Calculate the model's accuracy
            Parameters:
                logits: logits output of the neural network
                y: the corresponding labels for the logits
            Returns:
                accuracy: the model's accuracy
        '''
        correct_pred = tf.equal(tf.argmax(logits, 1), tf.argmax(y, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32), name='accuracy')
        
        return accuracy
    
    def get_model_opt(self, cost, learning_rate, beta1):
        '''
            Create the model optimizer
            Parameters:
                cost: The model's cost function
                learning_rate: The learning rate for the optimizer
                beta1: The exponential decay rate for the 1st moment estimates
            Returns:
                opt: The model optimizer set with learning and beta rates, and to minimize the cost function           
        '''

        # The following line allows for batch normalization to update population statistics
        with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)): 
            opt = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=beta1).minimize(cost)

        return opt

    def train_model(self, *args, batch_size=64, epochs=1):
        '''
            Train the CNN using the batches
            Paramters: 
                X: The entire training dataset
                y: The corresponding labels
                learning_rate: The learning rate for the optimizer
                beta1: The exponential decay rate of the 1st moment for the optimizer
                batch_size: The size of the batches for training
                epochs: The number of epochs to train the model
        '''

        saver = tf.train.Saver()
        accy_training = [] # To store the model accuracy at certain intervals during training
        accy_val = []
        print_every = 10 # Print the losses at this number of intervals
        save_every = 100 # Save the losses at this number of intervals
        n_batches = len(X_train) // batch_size
              
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            for e in range(epochs):
                for ii, (batch_x, batch_y) in enumerate(self.get_batches(X_train, y_train, batch_size)):
                    print(ii)
                    # Train the model
                    _ = sess.run(opt, feed_dict={input_x: batch_x, labels_y: batch_y, is_training: True})
                    
                    # Show training loss and accuracy every few steps
                    if ii % print_every == 0:
                        train_loss = cost.eval({input_x: batch_x, labels_y: batch_y, is_training: False})
                        train_accy =  accy.eval({input_x: batch_x, labels_y: batch_y, is_training: False})
                        print("Epochs {}/{}...".format(e + 1, epochs),
                              "Batch {}/{}...".format(ii + 1, n_batches),
                              "Training Loss: {:.4f}...".format(train_loss),
                              "Training Accuracy: {:.4f}".format(train_accy))
                    
                    # Show and save the validation loss every few steps
                    if ii % save_every == 0:
                        # Read in the validation images from their filenames 
                        val_batch = self.get_batches(X_validate, y_validate, len(X_validate))
                        val_imgs, y = next(val_batch)
                        val_loss = cost.eval({input_x: val_imgs, labels_y: y_validate, is_training: False})
                        val_accy = accy.eval({input_x: val_imgs, labels_y: y_validate, is_training: False})
                        losses.append()
                        print("Epochs {}/{}...".format(e + 1, epochs),
                              "Validation Loss: {:.4f}...".format(val_loss),
                              "Validation Accuracy: {:.4f}".format(val_accy))
                        accy_training.append(train_accy)
                        accy_val.append(val_accy)
            
            print("Training Complete...Calculating Test Loss and Accuracy")
            # Read in the test images from their filenames 
            test_imgs, _ = self.get_batches(X_test, y_test, len(X_test))
            # Calculate the test accuracy after training
            test_loss = cost.eval({input_x: test_imgs, labels_y: y_test, is_training: False})
            test_accy = accy.eval({input_x: test_imgs, labels_y: y_test, is_training: False})
            print("Test Loss: {:.4f}...".format(test_loss),
                  "Test Accuracy: {:.4f}".format(test_accy))
            

In [None]:
alpha = 0.2
learning_rate = 0.001
beta1 = 0.05
batch_size = 64
epochs = 1
n_classes = udacity_labels.shape[-1]

net = CNN()
input_x, labels_y, is_training = net.get_model_inputs(n_classes)
logits = net.build_network(input_x, is_training, n_classes=n_classes, alpha=alpha)
cost = net.get_model_loss(logits, labels_y)
accy = net.get_model_accy(logits, labels_y)
opt = net.get_model_opt(cost, learning_rate, beta1)

net.train_model(X_train, y_train, X_validate, y_validate, input_x, labels_y, is_training, cost, accy, opt, 
                batch_size=batch_size, epochs=epochs)


In [None]:
#     if len(frame.bboxes) >= frame.samples:
#         total_bboxes = [item for sublist in frame.bboxes for item in sublist]
#         frame.centroids = total_bboxes
#         frame.bboxes = []
#         heat = np.zeros_like(image[:,:,0]).astype(np.float)
#         heat = add_heat(heat, total_bboxes)
#         heat = 255 * (heat - np.min(heat)) / (np.max(heat) - np.min(heat))
#         heat3d = np.dstack((heat, np.zeros_like(image[:,:,0]).astype(np.float), np.zeros_like(image[:,:,0]).astype(np.float)))
#         for box in total_bboxes:
#             cv2.rectangle(heat3d, box[0], box[1], (0, 0, 255), 1)
#         frame.image = heat3d
#     else:
#         if frame.centroids:
#             heat = np.zeros_like(image[:,:,0]).astype(np.float)
#             heat = add_heat(heat, frame.centroids)
#             heat = 255 * (heat - np.min(heat)) / (np.max(heat) - np.min(heat))
#             heat3d = np.dstack((heat, np.zeros_like(image[:,:,0]).astype(np.float), np.zeros_like(image[:,:,0]).astype(np.float)))
#             for box in frame.centroids:
#                 cv2.rectangle(heat3d, box[0], box[1], (0, 0, 255), 1)
#             frame.image = heat3d