In [1]:
import json
import numpy as np
from numpy import array
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.utils.np_utils import to_categorical
from keras.layers import TimeDistributed
from collections import defaultdict

Using TensorFlow backend.


### Loading in All Data:

In [3]:
with open('saved_frame_data/M_1_2_everything.json') as data_file:    
    M_1_everything = json.load(data_file)
with open('saved_frame_data/M_1_2_labels.json') as data_file:    
    M_1_labels = json.load(data_file)

In [4]:
with open('saved_frame_data/M_2_2_everything.json') as data_file:    
    M_2_everything = json.load(data_file)
with open('saved_frame_data/M_2_2_labels.json') as data_file:    
    M_2_labels = json.load(data_file)

In [5]:
with open('saved_frame_data/M_3_2_everything.json') as data_file:    
    M_3_everything = json.load(data_file)
with open('saved_frame_data/M_3_labels.json') as data_file:    
    M_3_labels = json.load(data_file)

In [6]:
with open('saved_frame_data/M_4_2_everything.json') as data_file:    
    M_4_everything = json.load(data_file)
with open('saved_frame_data/M_4_labels.json') as data_file:    
    M_4_labels = json.load(data_file)

In [7]:
with open('saved_frame_data/T_1_2_everything.json') as data_file:    
    T_1_everything = json.load(data_file)
with open('saved_frame_data/T_1_labels.json') as data_file:    
    T_1_labels = json.load(data_file)

In [8]:
with open('saved_frame_data/T_2_2_everything.json') as data_file:    
    T_2_everything = json.load(data_file)
with open('saved_frame_data/T_2_labels.json') as data_file:    
    T_2_labels = json.load(data_file)

In [9]:
with open('saved_frame_data/T_3_2_everything.json') as data_file:    
    T_3_everything = json.load(data_file)
with open('saved_frame_data/T_3_labels.json') as data_file:    
    T_3_labels = json.load(data_file)

In [11]:
with open('saved_frame_data/BJ_1_everything.json') as data_file:    
    BJ_everything = json.load(data_file)
with open('saved_frame_data/BJ_1_labels.json') as data_file:    
    BJ_labels = json.load(data_file)

### Combining Data into One Set:

In [12]:
def bucket_vectors(vect):
    if vect[0] <= 0:
        return 0 # left
    return 1 # right

In [13]:
def bucket_box(box):
    center_x = (box[3]+box[1])/2.0
    if center_x < 0.5:
        return 0 # left
    return 1 # right

In [14]:
def get_X_and_Y(all_data, all_labels, frame_thresh, time_steps):
    person_to_boxes = defaultdict(list)
    person_to_flow = defaultdict(list)
    person_to_flow_x = defaultdict(list)
    for entry in all_data:
        if all_labels[str(entry['person_id'])] != "unknown":
            person_to_boxes[entry['person_id']].append(entry['box'])
            person_to_flow[entry['person_id']].append((entry['direction'], entry['direction2']))
            person_to_flow_x[entry['person_id']].append((entry['flow'][0], entry['flow2'][0]))
    person_to_boxes2 = defaultdict(list)
    person_to_flow2 = defaultdict(list)
    person_to_flow_x2 = defaultdict(list)
    for person in person_to_boxes:
        if len(person_to_boxes[person])>=frame_thresh:
            person_to_boxes2[person] = person_to_boxes[person]
            person_to_flow2[person] = person_to_flow[person]
            person_to_flow_x2[person] = person_to_flow_x[person]
    person_to_boxes = person_to_boxes2
    person_to_flow = person_to_flow2
    person_to_flow_x = person_to_flow_x2
    X = []
    y = []
    dir_init = []
    dir_later = []
    pos_init = []
    pos_later = []
    num_frames = []
    for person in person_to_flow:
        person_data = []
        for i in range(time_steps):
            flow = person_to_flow_x[person][i]
            box = person_to_boxes[person][i]
            if flow[0]=="n":
                entry = [0.0, 0.0, float(box[0]), float(box[1]), float(box[2]), float(box[3])]
            else:
                entry = [float(flow[0]), float(flow[1]), float(box[0]), float(box[1]), float(box[2]), float(box[3])]
            person_data.append(entry)
        X.append(person_data)
        num_frames.append(len(person_to_boxes[person]))
        dir_init.append(bucket_vectors(person_to_flow_x[person][0]))
        dir_later.append(bucket_vectors(person_to_flow_x[person][time_steps]))
        pos_init.append(bucket_box(person_to_boxes[person][0]))
        pos_later.append(bucket_box(person_to_boxes[person][time_steps]))
        if (all_labels[str(person)][0] == "l"):
            y.append(0)
        else:
            y.append(1)
    return X,y, dir_init, dir_later, pos_init, pos_later, num_frames

In [15]:
threshold = 10
num_time_steps = 5
dim_data = 6
X1,y1, d_init1, d_lat1, p_init1, p_lat1, n_frames1 = get_X_and_Y(M_1_everything, M_1_labels, threshold, num_time_steps)
X2,y2, d_init2, d_lat2, p_init2, p_lat2, n_frames2 = get_X_and_Y(M_2_everything, M_2_labels, threshold, num_time_steps)
X3,y3, d_init3, d_lat3, p_init3, p_lat3, n_frames3 = get_X_and_Y(M_3_everything, M_3_labels, threshold, num_time_steps)
X4,y4, d_init4, d_lat4, p_init4, p_lat4, n_frames4 = get_X_and_Y(M_4_everything, M_4_labels, threshold, num_time_steps)
X5,y5, d_init5, d_lat5, p_init5, p_lat5, n_frames5 = get_X_and_Y(T_1_everything, T_1_labels, threshold, num_time_steps)
X6,y6, d_init6, d_lat6, p_init6, p_lat6, n_frames6 = get_X_and_Y(T_2_everything, T_2_labels, threshold, num_time_steps)
X7,y7, d_init7, d_lat7, p_init7, p_lat7, n_frames7 = get_X_and_Y(T_3_everything, T_3_labels, threshold, num_time_steps)
X8,y8, d_init8, d_lat8, p_init8, p_lat8, n_frames8 = get_X_and_Y(BJ_everything, BJ_labels, threshold, num_time_steps)
X = X1 + X2 + X4 + X5  + X6 + X3 + X7 
y = y1 + y2 + y4 + y5  + y6 + y3 + y7
init_pos = p_init1 + p_init2 + p_init4 + p_init5 + p_init6  + p_init3+ p_init7
num_examples = len(X1) + len(X2)+ len(X4) + len(X5)  + len(X6)  + len(X3)+ len(X7)

In [16]:
X_train = array(X).reshape(num_examples, num_time_steps , dim_data)
print("We have %d total TRAINING examples" % np.shape(X_train)[0])
Y_train = array(y)
X_test = array(X8).reshape(len(X8), num_time_steps , dim_data)
print("We have %d total TESTING examples" % np.shape(X_test)[0])
Y_test = array(y8)

We have 527 total TRAINING examples
We have 24 total TESTING examples


### Lets Look at Our Data

In [20]:
M_X = X1 + X2 + X4 # + X3
M_Y = y1 + y2 + y4 # + y3
T_X = X5  + X6 #+X7
T_Y = y5  + y6 #+y7
print("We have %d data points from Manhattan" % len(M_Y))
print("We have %d data points from Tokyo" % len(T_Y))

We have 196 data points from Manhattan
We have 229 data points from Tokyo


In [123]:
count_m_right = len([i for i in M_Y if i==1])
count_t_right = len([i for i in T_Y if i==1])
print("In Tokyo, %d people pass to the LEFT" % (len(T_Y) - count_t_right))
print("In Tokyo, %d people pass to the RIGHT" % (count_t_right))
print("In Manhattan, %d people pass to the LEFT" % (len(M_Y) - count_m_right))
print("In Manhattan, %d people pass to the RIGHT" % (count_m_right))

In Tokyo, 113 people pass to the LEFT
In Tokyo, 90 people pass to the RIGHT
In Manhattan, 129 people pass to the LEFT
In Manhattan, 67 people pass to the RIGHT


### Baseline 1 (2 versions):

In [124]:
m_pos_start = p_init1 + p_init2 + p_init4 # + p_init3
m_pos_end = y1 + y2 + y4 # + y3
t_pos_start = p_init5 + p_init7 # + p_init6
t_pos_end = y5 + y7 # + y6
m_l_l = 0; m_r_r = 0; m_l_r = 0; m_r_l = 0;
t_l_l = 0; t_r_r = 0; t_l_r = 0; t_r_l = 0;
for i in range(len(m_pos_start)):
    if m_pos_start[i] == 0:
        if m_pos_end[i] == 0:
            m_l_l+=1
        else:
            m_l_r+=1
    else:
        if m_pos_end[i] == 0:
            m_r_l+=1
        else:
            m_r_r+=1
for i in range(len(t_pos_start)):
    if t_pos_start[i] == 0:
        if t_pos_end[i] == 0:
            t_l_l+=1
        else:
            t_l_r+=1
    else:
        if t_pos_end[i] == 0:
            t_r_l+=1
        else:
            t_r_r+=1
print("Comparing initial position to final position:")
print("In Tokyo, %d people start on the LEFT and pass on the LEFT" % t_l_l)
print("In Tokyo, %d people start on the RIGHT and pass on the RIGHT" % t_r_r)
print("In Tokyo, %d people start on the LEFT and pass on the RIGHT" % t_l_r)
print("In Tokyo, %d people start on the RIGHT and pass on the LEFT" % t_r_l)
print("Total correct in Tokyo given baseline is " + str(t_l_l+t_r_r) + "/" + str(t_l_l+t_r_r+t_l_r+t_r_l))
print("The percentage is %0.2f" % (100*(t_l_l+t_r_r)/(1.0*(t_l_l+t_r_r+t_l_r+t_r_l))) )
print("In Manhattan, %d people start on the LEFT and pass on the LEFT" % m_l_l)
print("In Manhattan, %d people start on the RIGHT and pass on the RIGHT" % m_r_r)
print("In Manhattan, %d people start on the LEFT and pass on the RIGHT" % m_l_r)
print("In Manhattan, %d people start on the RIGHT and pass on the LEFT" % m_r_l)
print("Total correct in Manhattan given baseline is " + str(m_l_l+m_r_r) + "/" + str(m_l_l+m_r_r+m_l_r+m_r_l))
print("The percentage is %0.2f" % (100*(m_l_l+m_r_r)/(1.0*(m_l_l+m_r_r+m_l_r+m_r_l))) )

Comparing initial position to final position:
In Tokyo, 82 people start on the LEFT and pass on the LEFT
In Tokyo, 85 people start on the RIGHT and pass on the RIGHT
In Tokyo, 5 people start on the LEFT and pass on the RIGHT
In Tokyo, 31 people start on the RIGHT and pass on the LEFT
Total correct in Tokyo given baseline is 167/203
The percentage is 82.27
In Manhattan, 74 people start on the LEFT and pass on the LEFT
In Manhattan, 63 people start on the RIGHT and pass on the RIGHT
In Manhattan, 4 people start on the LEFT and pass on the RIGHT
In Manhattan, 55 people start on the RIGHT and pass on the LEFT
Total correct in Manhattan given baseline is 137/196
The percentage is 69.90


In [125]:
m_pos_start = p_lat1 + p_lat2 + p_lat4 # + p_lat3
m_pos_end = y1 + y2 + y4 # + y3
t_pos_start = p_lat5 + p_lat7 # + p_lat6
t_pos_end = y5 + y7 # + y6
m_l_l = 0; m_r_r = 0; m_l_r = 0; m_r_l = 0;
t_l_l = 0; t_r_r = 0; t_l_r = 0; t_r_l = 0;
for i in range(len(m_pos_start)):
    if m_pos_start[i] == 0:
        if m_pos_end[i] == 0:
            m_l_l+=1
        else:
            m_l_r+=1
    else:
        if m_pos_end[i] == 0:
            m_r_l+=1
        else:
            m_r_r+=1
for i in range(len(t_pos_start)):
    if t_pos_start[i] == 0:
        if t_pos_end[i] == 0:
            t_l_l+=1
        else:
            t_l_r+=1
    else:
        if t_pos_end[i] == 0:
            t_r_l+=1
        else:
            t_r_r+=1
print("Comparing later initial position to final position:")
print("In Tokyo, %d people start on the LEFT and pass on the LEFT" % t_l_l)
print("In Tokyo, %d people start on the RIGHT and pass on the RIGHT" % t_r_r)
print("In Tokyo, %d people start on the LEFT and pass on the RIGHT" % t_l_r)
print("In Tokyo, %d people start on the RIGHT and pass on the LEFT" % t_r_l)
print("Total correct in Tokyo given baseline is " + str(t_l_l+t_r_r) + "/" + str(t_l_l+t_r_r+t_l_r+t_r_l))
print("The percentage is %0.2f" % (100*(t_l_l+t_r_r)/(1.0*(t_l_l+t_r_r+t_l_r+t_r_l))) )
print("In Manhattan, %d people start on the LEFT and pass on the LEFT" % m_l_l)
print("In Manhattan, %d people start on the RIGHT and pass on the RIGHT" % m_r_r)
print("In Manhattan, %d people start on the LEFT and pass on the RIGHT" % m_l_r)
print("In Manhattan, %d people start on the RIGHT and pass on the LEFT" % m_r_l)
print("Total correct in Manhattan given baseline is " + str(m_l_l+m_r_r) + "/" + str(m_l_l+m_r_r+m_l_r+m_r_l))
print("The percentage is %0.2f" % (100*(m_l_l+m_r_r)/(1.0*(m_l_l+m_r_r+m_l_r+m_r_l))) )

Comparing initial position to final position:
In Tokyo, 84 people start on the LEFT and pass on the LEFT
In Tokyo, 85 people start on the RIGHT and pass on the RIGHT
In Tokyo, 5 people start on the LEFT and pass on the RIGHT
In Tokyo, 29 people start on the RIGHT and pass on the LEFT
Total correct in Tokyo given baseline is 169/203
The percentage is 83.25
In Manhattan, 74 people start on the LEFT and pass on the LEFT
In Manhattan, 63 people start on the RIGHT and pass on the RIGHT
In Manhattan, 4 people start on the LEFT and pass on the RIGHT
In Manhattan, 55 people start on the RIGHT and pass on the LEFT
Total correct in Manhattan given baseline is 137/196
The percentage is 69.90


### Baseline 2:

In [126]:
m_pos_start = d_init1 + d_init2 + d_init4 # + d_init3
m_pos_end = y1 + y2 + y4 # + y3
t_pos_start = d_init5 + d_init7 # + d_init6
t_pos_end = y5 + y7 # + y6
m_l_l = 0; m_r_r = 0; m_l_r = 0; m_r_l = 0;
t_l_l = 0; t_r_r = 0; t_l_r = 0; t_r_l = 0;
for i in range(len(m_pos_start)):
    if m_pos_start[i] == 0:
        if m_pos_end[i] == 0:
            m_l_l+=1
        else:
            m_l_r+=1
    else:
        if m_pos_end[i] == 0:
            m_r_l+=1
        else:
            m_r_r+=1
for i in range(len(t_pos_start)):
    if t_pos_start[i] == 0:
        if t_pos_end[i] == 0:
            t_l_l+=1
        else:
            t_l_r+=1
    else:
        if t_pos_end[i] == 0:
            t_r_l+=1
        else:
            t_r_r+=1
print("Comparing initial position to final position:")
print("In Tokyo, %d people move towards the LEFT and pass on the LEFT" % t_l_l)
print("In Tokyo, %d people move towards the RIGHT and pass on the RIGHT" % t_r_r)
print("In Tokyo, %d people move towards the LEFT and pass on the RIGHT" % t_l_r)
print("In Tokyo, %d people move towards the RIGHT and pass on the LEFT" % t_r_l)
print("Total correct in Tokyo given baseline is " + str(t_l_l+t_r_r) + "/" + str(t_l_l+t_r_r+t_l_r+t_r_l))
print("The percentage is %0.2f" % (100*(t_l_l+t_r_r)/(1.0*(t_l_l+t_r_r+t_l_r+t_r_l))) )
print("In Manhattan, %d people move towards the LEFT and pass on the LEFT" % m_l_l)
print("In Manhattan, %d people move towards the RIGHT and pass on the RIGHT" % m_r_r)
print("In Manhattan, %d people move towards the LEFT and pass on the RIGHT" % m_l_r)
print("In Manhattan, %d people move towards the RIGHT and pass on the LEFT" % m_r_l)
print("Total correct in Manhattan given baseline is " + str(m_l_l+m_r_r) + "/" + str(m_l_l+m_r_r+m_l_r+m_r_l))
print("The percentage is %0.2f" % (100*(m_l_l+m_r_r)/(1.0*(m_l_l+m_r_r+m_l_r+m_r_l))) )

Comparing initial position to final position:
In Tokyo, 1 people move towards the LEFT and pass on the LEFT
In Tokyo, 90 people move towards the RIGHT and pass on the RIGHT
In Tokyo, 0 people move towards the LEFT and pass on the RIGHT
In Tokyo, 112 people move towards the RIGHT and pass on the LEFT
Total correct in Tokyo given baseline is 91/203
The percentage is 44.83
In Manhattan, 61 people move towards the LEFT and pass on the LEFT
In Manhattan, 52 people move towards the RIGHT and pass on the RIGHT
In Manhattan, 15 people move towards the LEFT and pass on the RIGHT
In Manhattan, 68 people move towards the RIGHT and pass on the LEFT
Total correct in Manhattan given baseline is 113/196
The percentage is 57.65


In [127]:
m_pos_start = d_lat1 + d_lat2 + d_lat4 # + d_lat3
m_pos_end = y1 + y2 + y4 # + y3
t_pos_start = d_lat5 + d_lat7 # + d_lat6
t_pos_end = y5 + y7 # + y6
m_l_l = 0; m_r_r = 0; m_l_r = 0; m_r_l = 0;
t_l_l = 0; t_r_r = 0; t_l_r = 0; t_r_l = 0;
for i in range(len(m_pos_start)):
    if m_pos_start[i] == 0:
        if m_pos_end[i] == 0:
            m_l_l+=1
        else:
            m_l_r+=1
    else:
        if m_pos_end[i] == 0:
            m_r_l+=1
        else:
            m_r_r+=1
for i in range(len(t_pos_start)):
    if t_pos_start[i] == 0:
        if t_pos_end[i] == 0:
            t_l_l+=1
        else:
            t_l_r+=1
    else:
        if t_pos_end[i] == 0:
            t_r_l+=1
        else:
            t_r_r+=1
print("Comparing initial position to final position:")
print("In Tokyo, %d people move towards the LEFT and pass on the LEFT" % t_l_l)
print("In Tokyo, %d people move towards the RIGHT and pass on the RIGHT" % t_r_r)
print("In Tokyo, %d people move towards the LEFT and pass on the RIGHT" % t_l_r)
print("In Tokyo, %d people move towards the RIGHT and pass on the LEFT" % t_r_l)
print("Total correct in Tokyo given baseline is " + str(t_l_l+t_r_r) + "/" + str(t_l_l+t_r_r+t_l_r+t_r_l))
print("The percentage is %0.2f" % (100*(t_l_l+t_r_r)/(1.0*(t_l_l+t_r_r+t_l_r+t_r_l))) )
print("In Manhattan, %d people move towards the LEFT and pass on the LEFT" % m_l_l)
print("In Manhattan, %d people move towards the RIGHT and pass on the RIGHT" % m_r_r)
print("In Manhattan, %d people move towards the LEFT and pass on the RIGHT" % m_l_r)
print("In Manhattan, %d people move towards the RIGHT and pass on the LEFT" % m_r_l)
print("Total correct in Manhattan given baseline is " + str(m_l_l+m_r_r) + "/" + str(m_l_l+m_r_r+m_l_r+m_r_l))
print("The percentage is %0.2f" % (100*(m_l_l+m_r_r)/(1.0*(m_l_l+m_r_r+m_l_r+m_r_l))) )

Comparing initial position to final position:
In Tokyo, 0 people move towards the LEFT and pass on the LEFT
In Tokyo, 90 people move towards the RIGHT and pass on the RIGHT
In Tokyo, 0 people move towards the LEFT and pass on the RIGHT
In Tokyo, 113 people move towards the RIGHT and pass on the LEFT
Total correct in Tokyo given baseline is 90/203
The percentage is 44.33
In Manhattan, 60 people move towards the LEFT and pass on the LEFT
In Manhattan, 55 people move towards the RIGHT and pass on the RIGHT
In Manhattan, 12 people move towards the LEFT and pass on the RIGHT
In Manhattan, 69 people move towards the RIGHT and pass on the LEFT
Total correct in Manhattan given baseline is 115/196
The percentage is 58.67


### LSTM:

In [12]:
model = Sequential()
model.add(LSTM(50, input_shape=(num_time_steps, dim_data), return_sequences=True))
model.add(LSTM(50, input_shape=(num_time_steps, dim_data)))
model.add(Dense(1))

In [13]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc'])
print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 5, 50)             11400     
_________________________________________________________________
lstm_2 (LSTM)                (None, 50)                20200     
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 51        
Total params: 31,651
Trainable params: 31,651
Non-trainable params: 0
_________________________________________________________________
None


In [19]:
model.fit(X_train, Y_train, epochs=10, batch_size=5, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x13c644690>

In [145]:
def summarize_success(X, Y, model):
    correct = 0
    count_left = 0
    count_right = 0
    correct_left = 0
    correct_right = 0
    preds = []
    for i in range(len(X)):
        pred = model.predict_classes(array([X[i]]), batch_size=1, verbose=0)[0][0]
        preds.append(pred)
        if Y[i] == 0:
            count_left+=1
            if (pred==Y[i]):
                correct_left+=1
                correct+=1
        else:
            count_right+=1
            if (pred==Y[i]):
                correct_right+=1
                correct+=1
    print("There are %d total examples in X" % len(X))
    #print("There are %d total examples in X that pass on the LEFT" % count_left)
    #print("There are %d total examples in X that pass on the RIGHT" % count_right)
    print("We classify %d of all examples correctly" % correct)
    print("That is %0.2f %% correct" % (100*correct/(1.0*len(X))))
    return preds

In [146]:
predictions_train = summarize_success(X_train, Y_train, model)

There are 322 total examples in X
We classify 246 of all examples correctly
That is 76.40 % correct


In [147]:
predictions_test = summarize_success(X_test, Y_test, model)

There are 77 total examples in X
We classify 68 of all examples correctly
That is 88.31 % correct
