In [159]:
import numpy as np
import matplotlib.pylab as pylab
import imageio
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import accuracy_score
from imageio.core.util import asarray as imgToArr
%matplotlib inline

In [160]:
videoFile = './data/driving.avi'
vid = imageio.get_reader(videoFile,  'ffmpeg')

# Columns: Frame, Clutch, Brake, AccPed, GazeX, GazeY, heading, Speed, HeadRate, Accel
dataFile = './data/actions.csv'
df = pd.read_csv(dataFile, delimiter='\t')

# One-hot encode brakes
outputs = OneHotEncoder(sparse=False).fit_transform(df['Brake'].reshape(-1,1))  # column 0: no brake, column 1: brake

In [161]:
def minibatch(data, batch_size, data_size):
    """Generates a minibatch from the given data and parameters."""
    randomized = np.random.permutation(data)
    batches = []
    num_batches = 0
    while num_batches * batch_size < data_size:
        new_batch = randomized[num_batches * batch_size:(num_batches + 1) * batch_size]
        batches.append(new_batch)
        num_batches += 1
    return batches

def get_glimpses(images, coords):
    """Gets a batch of glimpses."""
    arr = []
    for img, coord in zip(images, coords):
        arr.append(get_glimpse(img, coord[0], coord[1]))
    return np.array(arr)

def get_glimpse(image, x, y, stride=14):
    """Returns a subsection (glimpse) of the image centered on the given point."""
    x = int(x)  # Force to int
    y = int(y)  # Force to int
    min_x = x - stride
    max_x = x + stride
    
    min_y = y - stride
    max_y = y + stride
    image_glimpse = image[min_y:max_y, min_x:max_x, :]  # NOTE: row, column, RGB
#     image_glimpse = image[min_y:max_y, min_x:max_x, 0]  # NOTE: row, column, RGB; everything is greyscale; flatten RGB layer
    return imgToArr(image_glimpse)

In [178]:
# define TensorFlow computation graph

"""Create a logistic regression model for brake classification with 28x28x3 image input."""
# Create placeholders for inputs that will be placed via batches
image_input = tf.placeholder(tf.float32, [None, 28*28*3], name="image")
gaze_input = tf.placeholder(tf.float32, [None, 2], name="gaze")
y_ = tf.placeholder(tf.float32, [None, 2], name="output")

image_weights = tf.Variable(tf.truncated_normal([28*28*3, 2], stddev=1), name="image_weights")
gaze_weights = tf.Variable(tf.truncated_normal([2, 2], stddev=1), name="gaze_weights")

image_bias = tf.Variable(tf.truncated_normal([2], stddev=1), name="image_bias")
gaze_bias = tf.Variable(tf.truncated_normal([2], stddev=1), name="gaze_bias")

image_logits = tf.matmul(image_input, image_weights) + image_bias
gaze_logits = tf.matmul(gaze_input, gaze_weights) + gaze_bias

logits = tf.mul(tf.add(image_logits, gaze_logits), 0.5)
y = tf.nn.softmax(logits)

cross_entropy = tf.reduce_mean(tf.reduce_sum(-y_*tf.log(tf.clip_by_value(y, 1e-10,1.0)),reduction_indices=[1]))
optimizer = tf.train.AdamOptimizer().minimize(cross_entropy)

# initialization of variables
init = tf.initialize_all_variables()

In [179]:
def train_model():
    """Trains a model."""
    with tf.Session() as sess:
        sess = tf.Session()
        sess.run(init)
        for epoch in range(100):
            epochs = minibatch(range(100), 25, 100)

            for index_batch in epochs:
                index_batch = index_batch.tolist()
                frames = df['Frame'][index_batch].astype(np.int32)
                gaze_xs = df['GazeX'][index_batch]
                gaze_ys = df['GazeY'][index_batch]

                # Image is 900x244, so bound the gaze to get full 28x28 blocks
                if any(gaze_xs < 14) or any(gaze_xs > 900-14):
                    continue
                if any(gaze_ys < 14) or any(gaze_ys > 244-14):
                    continue

                gazes = np.array([gaze_xs, gaze_ys]).T
                output = outputs[index_batch]
                images = [vid.get_data(frame) for frame in frames]
                glimpses = get_glimpses(images, gazes)
                glimpses = glimpses.reshape(-1, 28*28*3)

                sess.run(optimizer, feed_dict={image_input: glimpses, gaze_input: gazes, y_: output})
                ce = sess.run(cross_entropy, feed_dict={image_input: glimpses, gaze_input: gazes, y_: output})
                print(ce)
    #             if index == 67:
    #                 fig = pylab.figure()
    #                 pylab.imshow(this_glimpse)
    #                 pylab.show()

In [180]:
train_model()

22.1048
22.1048
22.1048
22.1048
22.1048
20.2627
22.1048
20.2627
17.4996
20.2627
21.1838
22.1048
19.3417
19.3417
20.2627
16.5786
14.7365
17.0973
14.7365
14.7365
15.6576
17.4996
15.6576
13.8155
12.8945
11.0524
15.4858
11.0524
6.44724
11.9734
9.21034
10.0018
8.28931
9.21034
4.60517
3.68414
11.9734
3.68414
7.36827
7.36827
5.5262


KeyboardInterrupt: 