# Load our video file into memory

Welcome to foosbot

In [1]:
!pip install numpy
!pip install imageio
!pip install matplotlib
!pip install keras-vis



In [2]:
from __future__ import print_function

import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"] = ""

import sys
import os
import csv
import numpy as np
from random import randint
from random import shuffle

from PIL import Image
import imageio
import itertools as it

import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout

import pprint
pp = pprint.PrettyPrinter(depth=6)

# Paths relative to current python file.
data_path  = ".\\..\\Recorder\\FeatureSetBuilder\\Experiments\\Experiment3Result\\settings.tsv"

class Chunk(object):
    '''
    Simple class that wraps video frames loaded in memory
    '''

    def __init__(self, video_file, position_file, min_positions, max_positions, position_rel_indexes, frame_rel_indexes, validation_rate):
        # Load the position data for each frame
        position_diff_rel_indexes = []
        f = open(position_file)
        self.positions = []
        for line in f.readlines():
            self.positions.append( list(map(int, line.split("\t")) ) )
        f.close()
        
        # Normalize the position data on the range [0.0, 1.0]
        self.positions_norm = []
        for position in self.positions:
            for idx, value in enumerate(position):
                position[idx] = float(position[idx] - min_positions[idx]) / float(max_positions[idx] - min_positions[idx])
                # TODO: BRING THESE BACK
                #position[idx] = min(1.0, position[idx])
                #position[idx] = max(0.0, position[idx])
            self.positions_norm.append(position)
        self.output_size = len(self.positions_norm[0])*len(position_rel_indexes)
        
        # Camera frames in memory
        self.validation_rate = validation_rate
        self.video_file = video_file
        self.is_video_loaded = False
        self.video_data = None
        
        print(self.video_file)
        video_reader = imageio.get_reader(self.video_file)
        self.num_frames   = len(video_reader)
        first_frame = video_reader.get_data(0)
        self.width = np.shape(first_frame)[1]
        self.height = np.shape(first_frame)[0]
        #pp.pprint("Width: %i, Height %i" % (self.width, self.height))
        video_reader.close()
        
        self.frames_training = range(round(self.num_frames * (1.0-validation_rate)) )
        self.frames_validation = range(max(self.frames_training)+1, self.num_frames)
        
        
        self.current_frame = -min(frame_rel_indexes)
        self.position_rel_indexes = position_rel_indexes
        self.position_diff_rel_indexes = position_diff_rel_indexes
        self.frame_rel_indexes = frame_rel_indexes

    def _load_video_memory(self):
        # video_data: [frame#, x, y, channels]
        if not self.is_video_loaded:
            #print("Loading video into memory from %s..." % self.video_file)
            video_reader = imageio.get_reader(self.video_file)
            self.video_data = np.zeros(shape=(self.num_frames, self.height, self.width, 3), dtype=np.float32)
            for frame_index in range(0, self.num_frames):
                self.video_data[frame_index,:,:,:] = self._read_frame(video_reader.get_data(frame_index))[:,:,:]
            #print("Loaded %i frames of video into memory." % self.num_frames)
            self.is_video_loaded = True
            video_reader.close()
            
    def clear_video_memory(self):
        #print("Clearing video memory...")
        #self.video_data = None
        #self.is_video_loaded = False
        pass

    def _read_frame(self, data):
        '''
        Based on http://vlg.cs.dartmouth.edu/c3d/c3d_video.pdf
        crop.
        '''
        image = Image.fromarray(data)

        norm_image = np.array(image, dtype=np.float32)
        norm_image -= 128.0
        norm_image /= 128.0

        # (height, width, channels)
        return np.ascontiguousarray(norm_image)

    def move_first_training_frame(self):
        self.current_frame = -min(self.frame_rel_indexes)
        self._load_video_memory()
        
    def move_first_validation_frame(self):
        self.current_frame = min(self.frames_validation) - min(self.frame_rel_indexes)
        self._load_video_memory()
        
    def move_to_frame(self, index):
        self.current_frame = max(-min(self.frame_rel_indexes), index)
        self._load_video_memory()
        
    def get_frame(self, index):
        # Load the sequence of frames
        frames = np.zeros(shape=(len(self.frame_rel_indexes), np.size(self.video_data,1), np.size(self.video_data,2), 3), dtype=np.float32)
        for idx, rel_idx in enumerate(self.frame_rel_indexes):
            frames[idx, :, :, :] = self.video_data[index + rel_idx,:,:,:]

        # Load the sequence of output positions
        output = []
        for idx, rel_idx in enumerate(self.position_rel_indexes):
            #pp.pprint(self.positions_norm)
            #pp.pprint(self.positions_norm[int(index+rel_idx)][:])
            output += list(self.positions_norm[index+rel_idx][:])
            
        # Load the position differences
        
        
        return (frames, output)
    
    def get_next_training_frame(self):
        # Returns:
        # ([frames], [training outputs])
        if self.is_video_loaded == True and self.current_frame + max(self.position_rel_indexes) + 1 in self.frames_training:
            # Load the sequence of frames
            (frames, output) = self.get_frame(self.current_frame)
            self.current_frame += 1
            return (frames, output)
        else:
            # Reached the end, clear the memory usage of this Chunk
            self.clear_video_memory()
            return (None, None)
        
    def get_next_validation_frame(self):
         # Returns:
        # ([frames], [training outputs])
        if self.is_video_loaded == True and self.current_frame + max(self.position_rel_indexes) + 1 in self.frames_validation:
            # Load the sequence of frames
            (frames, output) = self.get_frame(self.current_frame)
            self.current_frame += 1
            return (frames, output)
        else:
            # Reached the end, clear the memory usage of this Chunk
            self.clear_video_memory()
            return (None, None)

    
class TrainingInput(object):
    def __init__(self, settings_file, position_rel_indexes, frame_rel_indexes, valdiation_rate):
        self.base_path = os.path.dirname(settings_file)
        
        # Create the chunks
        f = open(settings_file,"r")
        self.chunks = []
        self.length = 0
        self.valdiation_rate = valdiation_rate
        self.width = None
        self.height = None
        self.depth = len(frame_rel_indexes)
        self.channels = 3
        self.output_size = None
        for row in f.readlines():
            tokens = row.replace("\n","").split("\t")
            num_columns = len(tokens[2:])
            
            min_range = list(map(int, tokens[2:int(2+num_columns/2)]))
            max_range = list(map(int, tokens[int(2+num_columns/2):]))
            
            print("Creating training chunk from %s" % os.path.join(self.base_path, tokens[0]))
            chunk = Chunk(os.path.join(self.base_path, tokens[0]), os.path.join(self.base_path, tokens[1]), min_range, max_range, position_rel_indexes, frame_rel_indexes, valdiation_rate)
            self.length += chunk.num_frames
            self.width = chunk.width
            self.height = chunk.height
            self.output_size = chunk.output_size
            
            print("added %i new frames for a total of %i" % (chunk.num_frames, self.length))
            self.chunks.append(chunk)
        
        self.active_chunk = 0
    
    def move_first_training_frame(self):
        self.active_chunk = 0
        
        for chunk in self.chunks:
            chunk.clear_video_memory()
        
        if len(self.chunks) > 0 :
            self.chunks[0].move_first_training_frame()
            
    def move_first_validation_frame(self):
        self.active_chunk = 0
        
        for chunk in self.chunks:
            chunk.clear_video_memory()
        
        if len(self.chunks) > 0 :
            self.chunks[0].move_first_validation_frame()
    
    def get_next_training_frame(self):
        if self.active_chunk < len(self.chunks):
            # Get the next training frame from the active chunk
            (frames, output) = self.chunks[self.active_chunk].get_next_training_frame()
            if frames == None:
                # Move to the next chunk
                self.active_chunk += 1
                if self.active_chunk < len(self.chunks):
                    self.chunks[self.active_chunk].move_first_training_frame()
                    return self.get_next_training_frame()
                
            return (frames, output)
        
        return (None, None)
        
    def get_next_validation_frame(self):
        if self.active_chunk < len(self.chunks):
            # Get the next training frame from the active chunk
            (frames, output) = self.chunks[self.active_chunk].get_next_validation_frame()
            if frames == None:
                # Move to the next chunk
                self.active_chunk += 1
                if self.active_chunk < len(self.chunks):
                    self.chunks[self.active_chunk].move_first_validation_frame()
                    return self.get_next_validation_frame()
            return (frames, output)
        
        return (None, None)



print("Opening training frames from config %s." % (data_path))
position_rel_indexes = [0, 10] # Predict current rod positions and future position in 10 frames
frame_rel_indexes = [0] # Use only current frame as input
training = TrainingInput(data_path, position_rel_indexes, frame_rel_indexes, 0.2)

'''
training.move_first_validation_frame()
(frame, position) = training.get_next_validation_frame()
count = 0
while frame != None:
    (frame, position) = training.get_next_validation_frame()
    print("%i - %i" % (training.active_chunk, training.chunks[training.active_chunk].current_frame))
    count += 1
'''


Using TensorFlow backend.


Opening training frames from config .\..\Recorder\FeatureSetBuilder\Experiments\Experiment3Result\settings.tsv.
Creating training chunk from .\..\Recorder\FeatureSetBuilder\Experiments\Experiment3Result\chunk0.avi
.\..\Recorder\FeatureSetBuilder\Experiments\Experiment3Result\chunk0.avi
added 15932 new frames for a total of 15932
Creating training chunk from .\..\Recorder\FeatureSetBuilder\Experiments\Experiment3Result\chunk1.avi
.\..\Recorder\FeatureSetBuilder\Experiments\Experiment3Result\chunk1.avi
added 22943 new frames for a total of 38875


'\ntraining.move_first_validation_frame()\n(frame, position) = training.get_next_validation_frame()\ncount = 0\nwhile frame != None:\n    (frame, position) = training.get_next_validation_frame()\n    print("%i - %i" % (training.active_chunk, training.chunks[training.active_chunk].current_frame))\n    count += 1\n'

# Train our model using CNTK

In [3]:

class TrainingIterator(object):
    def __init__(self, input):
        self.input = input
    
    def reset(self):
        self.input.move_first_training_frame()
    
    def __iter__(self):
        self.input.move_first_training_frame()
        (frames, output) = self.input.get_next_training_frame()
        while frames != None:
            yield (frames, output)
            (frames, output) = self.input.get_next_training_frame()
    
class ValidationIterator(object):
    def __init__(self, input):
        self.input = input
    
    def reset(self):
        self.input.move_first_validation_frame()
    
    def __iter__(self):
        self.input.move_first_validation_frame()
        (frames, output) = self.input.get_next_validation_frame()
        
        while frames != None:
            yield (frames, output)
            (frames, output) = self.input.get_next_validation_frame()
            
image_height       = training.height
image_width        = training.width
image_depth        = training.depth
image_channels     = training.channels
output_size        = training.output_size


def TrainGen():
    while True:
        #print("TrainGen restarting training input.")
        training.move_first_training_frame()
        (frames, output) = training.get_next_training_frame()
        while frames != None:
            yield (frames, output)
            (frames, output) = training.get_next_training_frame()
            
def ValidateGen():
    while True:
        #print("Validation restarting training input.")
        training.move_first_validation_frame()
        (frames, output) = training.get_next_validation_frame()
        while frames != None:
            yield (frames, output)
            (frames, output) = training.get_next_validation_frame()
            
def TrainBatchGen(batch_size):
    gen = TrainGen()
    while True:
        # Build the next batch
        batch_frames = np.zeros(shape=(batch_size, image_depth, image_height, image_width, image_channels), dtype=np.float32)
        batch_outputs = np.zeros(shape=(batch_size, output_size), dtype=np.float32)
        for i in range(batch_size):
            (frames, output) = next(gen)
            output[3] = output[3]-output[0]
            output[4] = output[4]-output[1]
            output[5] = output[4]-output[2]
            batch_frames[i,:,:,:,:] = frames
            batch_outputs[i,:] = output
            
        
        #pp.pprint("Yielding batch")
        #pp.pprint(batch_outputs)
        yield (batch_frames, batch_outputs)
        #pp.pprint("Yielded batch")

def ValidateBatchGen(batch_size):
    gen = ValidateGen()
    while True:
        # Build the next batch
        batch_frames = np.zeros(shape=(batch_size, image_depth, image_height, image_width, image_channels), dtype=np.float32)
        batch_outputs = np.zeros(shape=(batch_size, output_size), dtype=np.float32)
        for i in range(batch_size):
            (frames, output) = next(gen)
            output[3] = output[3]-output[0]
            output[4] = output[4]-output[1]
            output[5] = output[4]-output[2]
            batch_frames[i,:,:,:,:] = frames
            batch_outputs[i,:] = output
        
        #pp.pprint("Yielding batch")
        #pp.pprint(batch_outputs)
        yield (batch_frames, batch_outputs)
        #pp.pprint("Yielded batch")
    
# Train the model, iterating on the data in batches of 32 samples
gen_train = TrainingIterator(training)
gen_validation = ValidationIterator(training)

WEIGHTS_FNAME = 'mnist_cnn_weights_%i.hdf'


In [4]:
from keras.models import Sequential
from keras.layers import *


def validate(model, reader, trainer):
    # process minibatches and evaluate the model
    metric_numer    = 0
    metric_denom    = 0
    minibatch_index = 0
    minibatch_size = 1
    
    reader.reset()
    output_true = np.zeros((reader.size(),2))
    output_predicted = np.zeros((reader.size(),2))
    current_output = 0
    
    while reader.has_more():
        videos, labels, current_minibatch = reader.next_minibatch(minibatch_size)
        
        for i in range(minibatch_size):
            output_true[current_output,:] = labels[i,:]
            
            # Use the model to predict the corresponding otuput
            video = np.empty(shape=(1, 1, reader.sequence_length, reader.height, reader.width), dtype=np.float32)
            video[0,0,:,:,:] = videos[i,:,:,:]
            predictions = model.eval({model.arguments[0]:video})
            
            # Log the result
            output_predicted[current_output,:] = predictions
            
            current_output += 1
            
        
        # minibatch data to be trained with
        result = trainer.test_minibatch({input_var : videos, label_var : labels})
        #pp.pprint(result)
        metric_numer += result * current_minibatch
        #print('error rate on an unseen minibatch: {}'.format(metric_numer))
        metric_denom += current_minibatch
        # Keep track of the number of samples processed so far.
        minibatch_index += 1

    print("Validation Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format(minibatch_index+1, (metric_numer*100.0)/metric_denom, metric_denom))
    plt.subplot(211)
    count = reader.size()
    true, predicted = zip(*sorted(zip(output_true[0:count,0], output_predicted[0:count,0])))
    plt.plot(range(count),true, range(count),predicted )
    plt.ylabel("Linear acceleration")
    plt.title("First 200 output recordings")
    plt.grid(True)
    plt.subplot(212)
    true, predicted = zip(*sorted(zip(output_true[0:count,1], output_predicted[0:count,1])))
    plt.plot(range(count),true, range(count),predicted, marker='.', markersize = 2, linewidth =0.1, markerfacecolor='black')
    plt.ylabel("Angular velocity")
    plt.grid(True)
    plt.show()
    

# Training options
max_epochs = 100

# These values must match for both train and test reader.
image_height       = training.height
image_width        = training.width
image_depth        = training.depth
image_channels     = training.channels
output_size        = training.output_size



# Build the model
pp.pprint("Input shape without batches:")
pp.pprint((image_depth, image_height, image_width, image_channels))
model = Sequential([

    Conv3D(64,
           input_shape=(image_depth, image_height, image_width, image_channels),
           data_format="channels_last",
           kernel_size = (1, 3, 3),
           padding = "same",
           name = "conv1"),
    
    Activation('relu'),
        
    Conv3D(64,
           data_format="channels_last",
           kernel_size = (1, 3, 3),
           padding = "same",
           name = "conv1.2"),
        
    Activation('relu'),
        
    MaxPooling3D( pool_size=(1, 2, 2),
                  padding = "same",
                  data_format="channels_last"),
    
    Conv3D(64,
           data_format="channels_last",
           kernel_size = (1, 3, 3),
           padding = "same",
           name = "conv2"),
    
    Activation('relu'),
        
    Conv3D(64,
           data_format="channels_last",
           kernel_size = (1, 3, 3),
           padding = "same",
           name = "conv2.1"),
    
    Activation('relu'),
        
    MaxPooling3D( pool_size=(1, 2, 2),
                  padding = "same",
                  data_format="channels_last"),
        
    Conv3D(64,
           data_format="channels_last",
           kernel_size = (1, 3, 3),
           padding = "same",
           name = "conv3"),
    
    Activation('relu'),
        
    Conv3D(64,
           data_format="channels_last",
           kernel_size = (1, 3, 3),
           padding = "same",
           name = "conv3.1"),
    
    Activation('relu'),
        
    MaxPooling3D( pool_size=(1, 2, 2),
                  padding = "same",
                  data_format="channels_last"),
        
    Conv3D(64,
           data_format="channels_last",
           kernel_size = (1, 3, 3),
           padding = "same",
           name = "conv4"),
    
    Activation('relu'),
    
    Flatten(),
    
    Dense(128),
    Activation('relu'),
    Dropout(0.5),
    
    Dense(64),
    Activation('relu'),
    Dropout(0.5),
        
    Dense(64),
    Activation('relu'),
    Dropout(0.5),
    
    Dense(6),
])

'''
model = Sequential([
    Conv3D(32,
           input_shape=(1, sequence_length, image_height, image_width),
           data_format="channels_first",
           kernel_size = (3, 3, 3),
           padding = "same"),
    
    Activation('relu'),
  
        
    MaxPooling3D( pool_size=(1, 2, 2),
                  padding = "same",
                  data_format="channels_first"),
    
    Dropout(0.5), 
    
    
    
    Conv3D(128,
           data_format="channels_first",
           kernel_size = (1, 3, 3),
           padding = "same"),
    
    Activation('relu'),
    
    MaxPooling3D( pool_size=(1, 2, 2),
                  padding = "same",
                  data_format="channels_first"),
    
    Dropout(0.5),
    
        
        
    Conv3D(32,
           data_format="channels_first",
           kernel_size = (1, 3, 3),
           padding = "same"),
    
    Activation('relu'),
    
    MaxPooling3D( pool_size=(1, 2, 2),
                  padding = "same",
                  data_format="channels_first"),
    
    Dropout(0.5),
        
        
    Conv3D(32,
           data_format="channels_first",
           kernel_size = (1, 3, 3),
           padding = "same"),
    
    Activation('relu'),
    
    MaxPooling3D( pool_size=(2, 2, 2),
                  padding = "same",
                  data_format="channels_first"),
    
    Dropout(0.5),
    
    
        
    Flatten(),
    
    Dense(512),
    Activation('relu'),
    Dropout(0.5),
        
    Dense(64),
    Activation('relu'),
    Dropout(0.5),
    
    Dense(output_size),
])
'''

# For a multi-class classification problem
model.compile(optimizer=keras.optimizers.RMSprop(lr=0.00001),
              loss='mean_squared_error',
              metrics=['accuracy'])

model.summary()

'Input shape without batches:'
(1, 54, 100, 3)
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1 (Conv3D)               (None, 1, 54, 100, 64)    1792      
_________________________________________________________________
activation_1 (Activation)    (None, 1, 54, 100, 64)    0         
_________________________________________________________________
conv1.2 (Conv3D)             (None, 1, 54, 100, 64)    36928     
_________________________________________________________________
activation_2 (Activation)    (None, 1, 54, 100, 64)    0         
_________________________________________________________________
max_pooling3d_1 (MaxPooling3 (None, 1, 27, 50, 64)     0         
_________________________________________________________________
conv2 (Conv3D)               (None, 1, 27, 50, 64)     36928     
_________________________________________________________________
activation_3 (Activation)    

In [None]:
epoch = 22
WEIGHTS_FNAME = 'mnist_cnn_weights_%i.hdf'
model.load_weights(WEIGHTS_FNAME % epoch)
print("Loaded model.")

#output_true = labels_validate
#output_predicted = model.predict(data_validate, batch_size=32, verbose=1)
#print("Predicted.")


Loaded model.


In [None]:
def plot_validate():
    (frames, outputs_true) = next(ValidateBatchGen(2000))
    outputs_predicted = model.predict(frames, batch_size=32, verbose=1)
    print("Predicted.")
    
    
    pp.pprint(outputs_true)
    pp.pprint(outputs_predicted)
    
    plt.figure(figsize=(5,20))
    plt.subplot(611)
    
    count = 2000
    true, predicted = zip(*sorted(zip(outputs_true[0:count,0], outputs_predicted[0:count,0])))
    plt.plot(range(count),true, range(count),predicted )
    plt.ylabel("Rod 1 Position")
    plt.title("First 200 output recordings")
    plt.grid(True)
    
    plt.subplot(612)
    true, predicted = zip(*sorted(zip(outputs_true[0:count,1], outputs_predicted[0:count,1])))
    plt.plot(range(count),true, range(count),predicted, marker='.', markersize = 2, linewidth =0.1, markerfacecolor='black')
    plt.ylabel("Rod 2 Position")
    plt.grid(True)
    
    plt.subplot(613)
    true, predicted = zip(*sorted(zip(outputs_true[0:count,2], outputs_predicted[0:count,2])))
    plt.plot(range(count),true, range(count),predicted, marker='.', markersize = 2, linewidth =0.1, markerfacecolor='black')
    plt.ylabel("Rod 3 Position")
    plt.grid(True)
    
    plt.subplot(614)
    true, predicted = zip(*sorted(zip(outputs_true[0:count,3], outputs_predicted[0:count,3])))
    plt.plot(range(count),true, range(count),predicted, marker='.', markersize = 2, linewidth =0.1, markerfacecolor='black')
    plt.ylabel("Rod 1 Position in 1/2 second")
    plt.grid(True)
    
    plt.subplot(615)
    true, predicted = zip(*sorted(zip(outputs_true[0:count,4], outputs_predicted[0:count,4])))
    plt.plot(range(count),true, range(count),predicted, marker='.', markersize = 2, linewidth =0.1, markerfacecolor='black')
    plt.ylabel("Rod 2 Position in 1/2 second")
    plt.grid(True)
    
    plt.subplot(616)
    true, predicted = zip(*sorted(zip(outputs_true[0:count,5], outputs_predicted[0:count,5])))
    plt.plot(range(count),true, range(count),predicted, marker='.', markersize = 2, linewidth =0.1, markerfacecolor='black')
    plt.ylabel("Rod 3 Position in 1/2 second")
    plt.grid(True)
    
    plt.show()
    
plot_validate()

