In [1]:
import json
import numpy as np
from numpy import array
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.utils.np_utils import to_categorical
from keras.layers import TimeDistributed
from collections import defaultdict

Using TensorFlow backend.


### Loading in All Data:

In [2]:
with open('saved_frame_data/M_1_2_everything.json') as data_file:    
    M_1_everything = json.load(data_file)
with open('saved_frame_data/M_1_2_labels.json') as data_file:    
    M_1_labels = json.load(data_file)

In [3]:
with open('saved_frame_data/M_2_2_everything.json') as data_file:    
    M_2_everything = json.load(data_file)
with open('saved_frame_data/M_2_2_labels.json') as data_file:    
    M_2_labels = json.load(data_file)

In [4]:
with open('saved_frame_data/M_3_2_everything.json') as data_file:    
    M_3_everything = json.load(data_file)
with open('saved_frame_data/M_3_labels.json') as data_file:    
    M_3_labels = json.load(data_file)

In [5]:
with open('saved_frame_data/M_4_2_everything.json') as data_file:    
    M_4_everything = json.load(data_file)
with open('saved_frame_data/M_4_labels.json') as data_file:    
    M_4_labels = json.load(data_file)

In [6]:
with open('saved_frame_data/T_1_2_everything.json') as data_file:    
    T_1_everything = json.load(data_file)
with open('saved_frame_data/T_1_labels.json') as data_file:    
    T_1_labels = json.load(data_file)

In [7]:
with open('saved_frame_data/T_2_2_everything.json') as data_file:    
    T_2_everything = json.load(data_file)
with open('saved_frame_data/T_2_labels.json') as data_file:    
    T_2_labels = json.load(data_file)

In [8]:
with open('saved_frame_data/T_3_2_everything.json') as data_file:    
    T_3_everything = json.load(data_file)
with open('saved_frame_data/T_3_labels.json') as data_file:    
    T_3_labels = json.load(data_file)

In [9]:
with open('saved_frame_data/BJ_1_everything.json') as data_file:    
    BJ_everything = json.load(data_file)
with open('saved_frame_data/BJ_1_labels.json') as data_file:    
    BJ_labels = json.load(data_file)

In [10]:
with open('saved_frame_data/L_1_everything.json') as data_file:    
    L1_everything = json.load(data_file)
with open('saved_frame_data/L_1_labels.json') as data_file:    
    L1_labels = json.load(data_file)

In [11]:
with open('saved_frame_data/L_2_everything.json') as data_file:    
    L2_everything = json.load(data_file)
with open('saved_frame_data/L_2_labels.json') as data_file:    
    L2_labels = json.load(data_file)

In [12]:
with open('saved_frame_data/L_3_everything.json') as data_file:    
    L3_everything = json.load(data_file)
with open('saved_frame_data/L_3_labels.json') as data_file:    
    L3_labels = json.load(data_file)

In [13]:
with open('saved_frame_data/L_4_everything.json') as data_file:    
    L4_everything = json.load(data_file)
with open('saved_frame_data/L_4_labels.json') as data_file:    
    L4_labels = json.load(data_file)

### Combining Data into One Set:

In [14]:
def bucket_vectors(vect):
    if vect[0] <= 0:
        return 0 # left
    return 1 # right

In [15]:
def bucket_box(box):
    center_x = (box[3]+box[1])/2.0
    if center_x < 0.5:
        return 0 # left
    return 1 # right

In [16]:
def get_X_and_Y(all_data, all_labels, frame_thresh, time_steps):
    person_to_boxes = defaultdict(list)
    person_to_flow = defaultdict(list)
    person_to_flow_x = defaultdict(list)
    for entry in all_data:
        if all_labels[str(entry['person_id'])] != "unknown":
            person_to_boxes[entry['person_id']].append(entry['box'])
            person_to_flow[entry['person_id']].append((entry['direction'], entry['direction2']))
            person_to_flow_x[entry['person_id']].append((entry['flow'][0], entry['flow2'][0]))
    person_to_boxes2 = defaultdict(list)
    person_to_flow2 = defaultdict(list)
    person_to_flow_x2 = defaultdict(list)
    for person in person_to_boxes:
        if len(person_to_boxes[person])>=frame_thresh:
            person_to_boxes2[person] = person_to_boxes[person]
            person_to_flow2[person] = person_to_flow[person]
            person_to_flow_x2[person] = person_to_flow_x[person]
    person_to_boxes = person_to_boxes2
    person_to_flow = person_to_flow2
    person_to_flow_x = person_to_flow_x2
    X = []
    y = []
    dir_init = []
    dir_later = []
    pos_init = []
    pos_later = []
    num_frames = []
    for person in person_to_flow:
        person_data = []
        for i in range(time_steps):
            flow = person_to_flow_x[person][i]
            box = person_to_boxes[person][i]
            if flow[0]=="n":
                entry = [0.0, 0.0, float(box[0]), float(box[1]), float(box[2]), float(box[3])]
            else:
                entry = [float(flow[0]), float(flow[1]), float(box[0]), float(box[1]), float(box[2]), float(box[3])]
            person_data.append(entry)
        X.append(person_data)
        num_frames.append(len(person_to_boxes[person]))
        dir_init.append(bucket_vectors(person_to_flow_x[person][0]))
        dir_later.append(bucket_vectors(person_to_flow_x[person][time_steps]))
        pos_init.append(bucket_box(person_to_boxes[person][0]))
        pos_later.append(bucket_box(person_to_boxes[person][time_steps]))
        if (all_labels[str(person)][0] == "l"):
            y.append(0)
        else:
            y.append(1)
    return X,y, dir_init, dir_later, pos_init, pos_later, num_frames

In [17]:
threshold = 10
num_time_steps = 5
dim_data = 6
X1,y1, d_init1, d_lat1, p_init1, p_lat1, n_frames1 = get_X_and_Y(M_1_everything, M_1_labels, threshold, num_time_steps)
X2,y2, d_init2, d_lat2, p_init2, p_lat2, n_frames2 = get_X_and_Y(M_2_everything, M_2_labels, threshold, num_time_steps)
X3,y3, d_init3, d_lat3, p_init3, p_lat3, n_frames3 = get_X_and_Y(M_3_everything, M_3_labels, threshold, num_time_steps)
X4,y4, d_init4, d_lat4, p_init4, p_lat4, n_frames4 = get_X_and_Y(M_4_everything, M_4_labels, threshold, num_time_steps)
X5,y5, d_init5, d_lat5, p_init5, p_lat5, n_frames5 = get_X_and_Y(T_1_everything, T_1_labels, threshold, num_time_steps)
X6,y6, d_init6, d_lat6, p_init6, p_lat6, n_frames6 = get_X_and_Y(T_2_everything, T_2_labels, threshold, num_time_steps)
X7,y7, d_init7, d_lat7, p_init7, p_lat7, n_frames7 = get_X_and_Y(T_3_everything, T_3_labels, threshold, num_time_steps)
X8,y8, d_init8, d_lat8, p_init8, p_lat8, n_frames8 = get_X_and_Y(BJ_everything, BJ_labels, threshold, num_time_steps)
X9,y9, d_init9, d_lat9, p_init9, p_lat9, n_frames9 = get_X_and_Y(L1_everything, L1_labels, threshold, num_time_steps)
X10,y10, d_init10, d_lat10, p_init10, p_lat10, n_frames10 = get_X_and_Y(L2_everything, L2_labels, threshold, num_time_steps)
X11,y11, d_init11, d_lat11, p_init11, p_lat11, n_frames11 = get_X_and_Y(L3_everything, L3_labels, threshold, num_time_steps)
X12,y12, d_init12, d_lat12, p_init12, p_lat12, n_frames12 = get_X_and_Y(L4_everything, L4_labels, threshold, num_time_steps)
#X = X1 + X2 +  X5  + X6 + X3 + X7
X = X5 + X6 + X7 + X8 + X9 + X10 + X11 + X12
#y = y1 + y2 +  y5  + y6 + y3 + y7
y = y5 + y6 + y7 + y8 + y9 + y10 + y11 + y12
#pos_init_train = p_init1 + p_init2 + p_init5 + p_init6  + p_init3 + p_init7
pos_init_train = p_init5 + p_init6 + p_init7 + p_init8  + p_init9 + p_init10 + p_init11 + p_init12
#pos_lat_train = p_lat1 + p_lat2 + p_lat5 + p_lat6  + p_lat3 + p_lat7
pos_lat_train = p_lat5 + p_lat6 + p_lat7 + p_lat8  + p_lat9 + p_lat10 + p_lat11 + p_lat12
#d_init_train = d_init1 + d_init2 + d_init5 + d_init6  + d_init3 + d_init7
d_init_train = d_init5 + d_init6 + d_init7 + d_init8  + d_init9 + d_init10 + d_init11 + d_init12
#d_lat_train = d_lat1 + d_lat2 + d_lat5 + d_lat6  + d_lat3 + d_lat7
d_lat_train = d_lat5 + d_lat6 + d_lat7 + d_lat8  + d_lat9 + d_lat10 + d_lat11 + d_lat12
num_examples = len(X)

In [18]:
X_train = array(X).reshape(num_examples, num_time_steps , dim_data)
print("We have %d total TRAINING examples" % np.shape(X_train)[0])
Y_train = array(y)
# X_test = X9 + X4 + X10 + X11 + X12
X_test = X1 + X2 + X3 + X4
X_test = array(X_test).reshape(len(X_test), num_time_steps , dim_data)
#pos_init_test = p_init9 + p_init4 + p_init10 + p_init11+ p_init12
pos_init_test = p_init1 + p_init2 + p_init3 + p_init4
#pos_lat_test = p_lat9 + p_lat4 + p_lat10 + p_lat11 + p_lat12
pos_lat_test = p_lat1 + p_lat2 + p_lat3 + p_lat4
#d_init_test = d_init9 + d_init4 + d_init10 + d_init11 + d_init12
d_init_test = d_init1 + d_init2 + d_init3 + d_init4
#d_lat_test = d_lat9 + d_lat4 + d_lat10 + d_lat11 + d_lat12
d_lat_test = d_lat1 + d_lat2 + d_lat3 + d_lat4
print("We have %d total TESTING examples" % np.shape(X_test)[0])
#Y_test = y9 + y4 + y10 + y11 + y12
Y_test = y1 + y2 + y3 + y4
Y_test = array(Y_test)

We have 381 total TRAINING examples
We have 221 total TESTING examples


### Lets Look at Our Data

In [19]:
count_right = len([i for i in Y_train if i==1])
print("%d people pass to the LEFT" % (len(Y_train) - count_right))
print("%d people pass to the RIGHT" % (count_right))

215 people pass to the LEFT
166 people pass to the RIGHT


### Baseline 1 Training

In [20]:
l_l = 0; r_r = 0; l_r = 0; r_l = 0;
for i in range(len(Y_train)):
    if pos_init_train[i] == 0:
        if Y_train[i] == 0:
            l_l+=1
        else:
            l_r+=1
    else:
        if Y_train[i] == 0:
            r_l+=1
        else:
            r_r+=1
print("In Training, %d people start on the LEFT and pass on the LEFT" % l_l)
print("In Training, %d people start on the RIGHT and pass on the RIGHT" % r_r)
print("In Training, %d people start on the LEFT and pass on the RIGHT" % l_r)
print("In Training, %d people start on the RIGHT and pass on the LEFT" % r_l)
print("Total correct in Training given baseline is " + str(l_l+r_r) + "/" + str(l_l+r_r+l_r+r_l))
print("The percentage is %0.2f" % (100*(l_l+r_r)/(1.0*(l_l+r_r+l_r+r_l))))

In Training, 165 people start on the LEFT and pass on the LEFT
In Training, 160 people start on the RIGHT and pass on the RIGHT
In Training, 6 people start on the LEFT and pass on the RIGHT
In Training, 50 people start on the RIGHT and pass on the LEFT
Total correct in Training given baseline is 325/381
The percentage is 85.30


In [21]:
l_l = 0; r_r = 0; l_r = 0; r_l = 0;
for i in range(len(Y_train)):
    if pos_lat_train[i] == 0:
        if Y_train[i] == 0:
            l_l+=1
        else:
            l_r+=1
    else:
        if Y_train[i] == 0:
            r_l+=1
        else:
            r_r+=1
print("In Training, %d people start on the LEFT and pass on the LEFT" % l_l)
print("In Training, %d people start on the RIGHT and pass on the RIGHT" % r_r)
print("In Training, %d people start on the LEFT and pass on the RIGHT" % l_r)
print("In Training, %d people start on the RIGHT and pass on the LEFT" % r_l)
print("Total correct in Training given baseline is " + str(l_l+r_r) + "/" + str(l_l+r_r+l_r+r_l))
print("The percentage is %0.2f" % (100*(l_l+r_r)/(1.0*(l_l+r_r+l_r+r_l))))

In Training, 167 people start on the LEFT and pass on the LEFT
In Training, 160 people start on the RIGHT and pass on the RIGHT
In Training, 6 people start on the LEFT and pass on the RIGHT
In Training, 48 people start on the RIGHT and pass on the LEFT
Total correct in Training given baseline is 327/381
The percentage is 85.83


### Baseline 2 Training

In [22]:
l_l = 0; r_r = 0; l_r = 0; r_l = 0;
for i in range(len(Y_train)):
    if d_init_train[i] == 0:
        if Y_train[i] == 0:
            l_l+=1
        else:
            l_r+=1
    else:
        if Y_train[i] == 0:
            r_l+=1
        else:
            r_r+=1
print("In Training, %d people move towards the LEFT and pass on the LEFT" % l_l)
print("In Training, %d people move towards the RIGHT and pass on the RIGHT" % r_r)
print("In Training, %d people move towards the LEFT and pass on the RIGHT" % l_r)
print("In Training, %d people move towards the RIGHT and pass on the LEFT" % r_l)
print("Total correct in Training given baseline is " + str(l_l+r_r) + "/" + str(l_l+r_r+l_r+r_l))
print("The percentage is %0.2f" % (100*(l_l+r_r)/(1.0*(l_l+r_r+l_r+r_l))))

In Training, 70 people move towards the LEFT and pass on the LEFT
In Training, 144 people move towards the RIGHT and pass on the RIGHT
In Training, 22 people move towards the LEFT and pass on the RIGHT
In Training, 145 people move towards the RIGHT and pass on the LEFT
Total correct in Training given baseline is 214/381
The percentage is 56.17


In [23]:
l_l = 0; r_r = 0; l_r = 0; r_l = 0;
for i in range(len(Y_train)):
    if d_lat_train[i] == 0:
        if Y_train[i] == 0:
            l_l+=1
        else:
            l_r+=1
    else:
        if Y_train[i] == 0:
            r_l+=1
        else:
            r_r+=1
print("In Training, %d people move towards the LEFT and pass on the LEFT" % l_l)
print("In Training, %d people move towards the RIGHT and pass on the RIGHT" % r_r)
print("In Training, %d people move towards the LEFT and pass on the RIGHT" % l_r)
print("In Training, %d people move towards the RIGHT and pass on the LEFT" % r_l)
print("Total correct in Training given baseline is " + str(l_l+r_r) + "/" + str(l_l+r_r+l_r+r_l))
print("The percentage is %0.2f" % (100*(l_l+r_r)/(1.0*(l_l+r_r+l_r+r_l))))

In Training, 65 people move towards the LEFT and pass on the LEFT
In Training, 151 people move towards the RIGHT and pass on the RIGHT
In Training, 15 people move towards the LEFT and pass on the RIGHT
In Training, 150 people move towards the RIGHT and pass on the LEFT
Total correct in Training given baseline is 216/381
The percentage is 56.69


### Baseline TESTING

In [24]:
l_l = 0; r_r = 0; l_r = 0; r_l = 0;
for i in range(len(Y_test)):
    if pos_init_test[i] == 0:
        if Y_test[i] == 0:
            l_l+=1
        else:
            l_r+=1
    else:
        if Y_test[i] == 0:
            r_l+=1
        else:
            r_r+=1
print("In Testing, %d people start on the LEFT and pass on the LEFT" % l_l)
print("In Testing, %d people start on the RIGHT and pass on the RIGHT" % r_r)
print("In Testing, %d people start on the LEFT and pass on the RIGHT" % l_r)
print("In Testing, %d people start on the RIGHT and pass on the LEFT" % r_l)
print("Total correct in Testing given baseline is " + str(l_l+r_r) + "/" + str(l_l+r_r+l_r+r_l))
print("The percentage is %0.2f" % (100*(l_l+r_r)/(1.0*(l_l+r_r+l_r+r_l))))

In Testing, 94 people start on the LEFT and pass on the LEFT
In Testing, 68 people start on the RIGHT and pass on the RIGHT
In Testing, 4 people start on the LEFT and pass on the RIGHT
In Testing, 55 people start on the RIGHT and pass on the LEFT
Total correct in Testing given baseline is 162/221
The percentage is 73.30


In [25]:
l_l = 0; r_r = 0; l_r = 0; r_l = 0;
for i in range(len(Y_test)):
    if pos_lat_test[i] == 0:
        if Y_test[i] == 0:
            l_l+=1
        else:
            l_r+=1
    else:
        if Y_test[i] == 0:
            r_l+=1
        else:
            r_r+=1
print("In Testing, %d people start on the LEFT and pass on the LEFT" % l_l)
print("In Testing, %d people start on the RIGHT and pass on the RIGHT" % r_r)
print("In Testing, %d people start on the LEFT and pass on the RIGHT" % l_r)
print("In Testing, %d people start on the RIGHT and pass on the LEFT" % r_l)
print("Total correct in Testing given baseline is " + str(l_l+r_r) + "/" + str(l_l+r_r+l_r+r_l))
print("The percentage is %0.2f" % (100*(l_l+r_r)/(1.0*(l_l+r_r+l_r+r_l))))

In Testing, 94 people start on the LEFT and pass on the LEFT
In Testing, 68 people start on the RIGHT and pass on the RIGHT
In Testing, 4 people start on the LEFT and pass on the RIGHT
In Testing, 55 people start on the RIGHT and pass on the LEFT
Total correct in Testing given baseline is 162/221
The percentage is 73.30


### Baseline 2 (Testing):

In [26]:
l_l = 0; r_r = 0; l_r = 0; r_l = 0;
for i in range(len(Y_test)):
    if d_init_test[i] == 0:
        if Y_test[i] == 0:
            l_l+=1
        else:
            l_r+=1
    else:
        if Y_test[i] == 0:
            r_l+=1
        else:
            r_r+=1
print("In Testing, %d people start on the LEFT and pass on the LEFT" % l_l)
print("In Testing, %d people start on the RIGHT and pass on the RIGHT" % r_r)
print("In Testing, %d people start on the LEFT and pass on the RIGHT" % l_r)
print("In Testing, %d people start on the RIGHT and pass on the LEFT" % r_l)
print("Total correct in Testing given baseline is " + str(l_l+r_r) + "/" + str(l_l+r_r+l_r+r_l))
print("The percentage is %0.2f" % (100*(l_l+r_r)/(1.0*(l_l+r_r+l_r+r_l))))

In Testing, 71 people start on the LEFT and pass on the LEFT
In Testing, 60 people start on the RIGHT and pass on the RIGHT
In Testing, 12 people start on the LEFT and pass on the RIGHT
In Testing, 78 people start on the RIGHT and pass on the LEFT
Total correct in Testing given baseline is 131/221
The percentage is 59.28


In [46]:
## Non traditional baseline 2
l_l = 0; r_r = 0; l_r = 0; r_l = 0;
l_l_c = 0; r_r_c = 0; l_r_c = 0; r_l_c = 0;
for i in range(len(Y_test)):
    pred = d_init_test[i]
    if pos_init_test[i] == 0:
        if Y_test[i] == 0:
            l_l+=1
            if (pred == Y_test[i]):
                l_l_c+=1
        else:
            l_r+=1
            if (pred == Y_test[i]):
                l_r_c+=1
    else:
        if Y_test[i] == 0:
            r_l+=1
            if (pred == Y_test[i]):
                r_l_c+=1
        else:
            r_r+=1
            if (pred == Y_test[i]):
                r_r_c+=1
print("Total non tradit %d" % (l_r + r_l))
print("Total correct non tradit %d" % (l_r_c + r_l_c))
print("percentage %0.2f" % ((100*(l_r_c + r_l_c))/(1.0*(l_r + r_l))))

Total non tradit 59
Total correct non tradit 6
percentage 10.17


In [27]:
l_l = 0; r_r = 0; l_r = 0; r_l = 0;
for i in range(len(Y_test)):
    if d_lat_test[i] == 0:
        if Y_test[i] == 0:
            l_l+=1
        else:
            l_r+=1
    else:
        if Y_test[i] == 0:
            r_l+=1
        else:
            r_r+=1
print("In Testing, %d people start on the LEFT and pass on the LEFT" % l_l)
print("In Testing, %d people start on the RIGHT and pass on the RIGHT" % r_r)
print("In Testing, %d people start on the LEFT and pass on the RIGHT" % l_r)
print("In Testing, %d people start on the RIGHT and pass on the LEFT" % r_l)
print("Total correct in Testing given baseline is " + str(l_l+r_r) + "/" + str(l_l+r_r+l_r+r_l))
print("The percentage is %0.2f" % (100*(l_l+r_r)/(1.0*(l_l+r_r+l_r+r_l))))

In Testing, 71 people start on the LEFT and pass on the LEFT
In Testing, 63 people start on the RIGHT and pass on the RIGHT
In Testing, 9 people start on the LEFT and pass on the RIGHT
In Testing, 78 people start on the RIGHT and pass on the LEFT
Total correct in Testing given baseline is 134/221
The percentage is 60.63


### LSTM:

In [39]:
model = Sequential()
#model.add(LSTM(50, input_shape=(num_time_steps, dim_data), return_sequences=True))
model.add(LSTM(30, input_shape=(num_time_steps, dim_data)))
model.add(Dense(1))

In [40]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc'])
print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_2 (LSTM)                (None, 30)                4440      
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 31        
Total params: 4,471
Trainable params: 4,471
Non-trainable params: 0
_________________________________________________________________
None


In [41]:
model.fit(X_train, Y_train, epochs=100, batch_size=5, verbose=0)

<keras.callbacks.History at 0x140124e50>

In [42]:
def summarize_success(X, Y, model, init_pos):
    l_l = 0; r_r = 0; l_r = 0; r_l = 0;
    l_l_c = 0; r_r_c = 0; l_r_c = 0; r_l_c = 0;
    preds = []
    for i in range(len(Y)):
        pred = model.predict_classes(array([X[i]]), batch_size=1, verbose=0)[0][0]
        preds.append(pred)
        if init_pos[i] == 0:
            if Y[i] == 0:
                l_l+=1
                if (pred == Y[i]):
                    l_l_c+=1
            else:
                l_r+=1
                if (pred == Y[i]):
                    l_r_c+=1
        else:
            if Y[i] == 0:
                r_l+=1
                if (pred == Y[i]):
                    r_l_c+=1
            else:
                r_r+=1
                if (pred == Y[i]):
                    r_r_c+=1
    print("There are %d examples total" % len(Y))
    print("We classify %d of those correctly" % (l_l_c+l_r_c+r_l_c+r_r_c))
    print("That is %0.2f of those" % ((100*(l_l_c+l_r_c+r_l_c+r_r_c))/(1.0*len(Y))))
    print("There are %d normal people" % (l_l+r_r))
    print("We classify %d of those correctly" % (l_l_c+r_r_c))
    print("That is %0.2f of those" % ((100*(l_l_c+r_r_c))/(1.0*(l_l+r_r))))
    print("There are %d abnormal people" % (l_r+r_l))
    print("We classify %d of those correctly" % (l_r_c+r_l_c))
    print("That is %0.2f of those" % ((100*(l_r_c+r_l_c))/(1.0*(l_r+r_l))))
    return preds

In [43]:
predictions_train = summarize_success(X_train, Y_train, model, pos_init_train)

There are 381 examples total
We classify 346 of those correctly
That is 90.81 of those
There are 325 normal people
We classify 319 of those correctly
That is 98.15 of those
There are 56 abnormal people
We classify 27 of those correctly
That is 48.21 of those


In [44]:
predictions_test = summarize_success(X_test, Y_test, model, pos_init_test)

There are 221 examples total
We classify 165 of those correctly
That is 74.66 of those
There are 162 normal people
We classify 152 of those correctly
That is 93.83 of those
There are 59 abnormal people
We classify 13 of those correctly
That is 22.03 of those
