In [7]:
import pandas as pd
idx = pd.IndexSlice
import hashlib

## A basic idea for analysis

### Cleaning/Prep

* Since the day's video files (and resulting csv's of tracks from the yolov3deepsort algorithm we ran on Colab) are in several files, first read them and calculate a hash of the file name and track id to avoid track id duplicates
* also add the max frame id from all previous files to keep that unique
* calculate object centroids from provided object bounding box

In [9]:
def hash_tracks(track_id):
    to_hash = file + str(track_id)
    hash = hashlib.sha1(to_hash.encode("UTF-8")).hexdigest()
    return hash[:8]

In [51]:
##https://stackoverflow.com/questions/4567089/hash-function-that-produces-short-hashes/23170837
import hashlib

# hash track ids with file number to avoid duplicate tracks
all_tracks = pd.DataFrame()
for file in ['3', '4', '5', '6', '7', '8']:
    track = pd.read_csv(f'../data/converted_{file}.mp4_classes.csv').set_index('frame_index')
    track['hash_trk_id'] = track['track_id'].apply(hash_tracks)
    track = track.rename(columns={'track_id': 'video_track_id', 'hash_trk_id': 'track_id'})
    
    # increment frame index by max frame index so far to keep it unique+increasing
    if all_tracks.shape[0] > 0:
        print(f'video {str(int(file) - 1)} max frame: {all_tracks.index.max()}')
        track.index = track.index + all_tracks.index.max()
        # check that hashed track_id is unique
        if (track['track_id'].isin(all_tracks['track_id'])).any():
            print('hash collision!')
    all_tracks = all_tracks.append(track)

video 3 max frame: 3344
video 4 max frame: 6857
video 5 max frame: 11331
video 6 max frame: 15978
video 7 max frame: 19557


In [32]:
# calculate centroids from bounding box
all_tracks['centroid_x'] = (all_tracks['bbox0'] + all_tracks['bbox2']) / 2
all_tracks['centroid_y'] = (all_tracks['bbox1'] + all_tracks['bbox3']) / 2

In [33]:
all_tracks

Unnamed: 0_level_0,video_track_id,class_name,bbox0,bbox1,bbox2,bbox3,track_id,centroid_x,centroid_y
frame_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2,1,car,1004,336,1264,452,63266754,1134.0,394.0
2,2,truck,528,281,687,398,cb4e5208,607.5,339.5
2,3,car,441,315,464,329,b6692ea5,452.5,322.0
2,4,car,422,315,440,327,f1f836cb,431.0,321.0
2,5,car,346,309,361,318,972a67c4,353.5,313.5
...,...,...,...,...,...,...,...,...,...
29412,5905,car,934,332,1012,369,1f8a92b5,973.0,350.5
29412,5906,car,263,299,332,342,4a3b3165,297.5,320.5
29412,5908,truck,505,300,570,343,9fbd06a8,537.5,321.5
29412,5915,car,514,301,576,346,20ad192c,545.0,323.5


In [34]:
##TODO dict of appropriate values for each input file...
##filename: (x_ped_threshold, y_car_threshold)
x_y_cuts = {'converted_3.mp4':(510, 400)}

##TODO will eventually need dict of fps if they differ for time calculations...
video_fps = 5

In [35]:
## use x=510 centroid for peds
## use y=400 for cars passing

### Filtering tracks

* Detect which tracks cross by checking if each track was, at some point, on both sides of the relevant threshold
    * Find the frame closest to that crossing

In [36]:
crossing_tracks = pd.DataFrame()

x_thr = x_y_cuts['converted_3.mp4'][0]
y_thr = x_y_cuts['converted_3.mp4'][1]

## could probably figure out something faster
for track_id in all_tracks['track_id'].unique():
    this_track = all_tracks[all_tracks['track_id'] == track_id]
    if (this_track['class_name'] == 'person').all():
        if (this_track['centroid_x'] < x_thr).any() and (this_track['centroid_x'] > x_thr).any():
            crossing_tracks = crossing_tracks.append(this_track)
    elif (this_track['class_name'] == 'car').all():
        if (this_track['centroid_y'] < y_thr).any() and (this_track['centroid_y'] > y_thr).any():
            crossing_tracks = crossing_tracks.append(this_track)
crossing_tracks = crossing_tracks.set_index('track_id', append=True)

In [37]:
crossing_tracks

Unnamed: 0_level_0,Unnamed: 1_level_0,video_track_id,class_name,bbox0,bbox1,bbox2,bbox3,centroid_x,centroid_y
frame_index,track_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
8,81c69212,16,car,1162,376,1230,415,1196.0,395.5
9,81c69212,16,car,1165,377,1232,415,1198.5,396.0
10,81c69212,16,car,1164,377,1233,416,1198.5,396.5
11,81c69212,16,car,1164,378,1231,417,1197.5,397.5
12,81c69212,16,car,1164,379,1230,417,1197.0,398.0
...,...,...,...,...,...,...,...,...,...
28102,1e39da6e,5064,person,87,248,143,370,115.0,309.0
28103,1e39da6e,5064,person,79,247,134,369,106.5,308.0
28129,1e39da6e,5064,person,102,259,144,357,123.0,308.0
28130,1e39da6e,5064,person,108,262,141,341,124.5,301.5


In [38]:
# add column denoting crossing frames

with_crossing = pd.DataFrame()

## could probably figure out something faster
for frame, track_df in crossing_tracks.groupby(level=1):
    if (track_df['class_name'] == 'person').all():
        track_df['x_boundary_dist'] = abs(track_df['centroid_x'] - x_thr)
        track_df['crossing_frame'] = track_df['x_boundary_dist'] == track_df['x_boundary_dist'].min()
    elif (track_df['class_name'] == 'car').all():
        track_df['y_boundary_dist'] = abs(track_df['centroid_y'] - y_thr)
        track_df['crossing_frame'] = track_df['y_boundary_dist'] == track_df['y_boundary_dist'].min()

    with_crossing = with_crossing.append(track_df)
with_crossing = with_crossing.drop(columns=['bbox0', 'bbox1', 'bbox2', 'bbox3'])

In [39]:
with_crossing

Unnamed: 0_level_0,Unnamed: 1_level_0,video_track_id,class_name,centroid_x,centroid_y,y_boundary_dist,crossing_frame,x_boundary_dist
frame_index,track_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
11471,0017ccb6,225,car,1147.0,427.5,27.5,False,
11472,0017ccb6,225,car,1128.0,426.5,26.5,True,
11498,0017ccb6,225,car,493.0,373.0,27.0,False,
11499,0017ccb6,225,car,468.5,370.5,29.5,False,
13868,005f0736,1884,car,1149.0,431.0,31.0,False,
...,...,...,...,...,...,...,...,...
25,fffb8e85,34,car,977.5,393.0,7.0,False,
26,fffb8e85,34,car,967.5,393.5,6.5,False,
27,fffb8e85,34,car,943.5,391.5,8.5,False,
43,fffb8e85,34,car,498.0,374.0,26.0,False,


### Formatting for analysis

* Create a new df of *crossing* events
* Filter into people crossing
    * To attempt an estimate of drivers failing to yield, count the number of cars that proceeded through the intersection after each pedestrian appears and before that pedestrian crosses

In [40]:
# reformat df to 1 row per crossing, with relevant info

crossing_info = pd.DataFrame()

for frame, track_df in with_crossing.groupby(level=1):
    crossing = {}
    crossing['first_frame'] = track_df.index.get_level_values('frame_index').min()
    crossing['last_frame'] = track_df.index.get_level_values('frame_index').max()
    crossing['crossed_frame'] = track_df[track_df['crossing_frame'] == True].index.get_level_values('frame_index')
    crossing['frames_before_cross'] = crossing['crossed_frame'] - crossing['first_frame']
    crossing['class_name'] = track_df['class_name'].iloc[0]
    crossing['track_id'] = track_df.index.get_level_values('track_id')[0]
    
    crossing_info = crossing_info.append(pd.DataFrame(crossing).set_index('track_id'))

In [41]:
crossing_info

Unnamed: 0_level_0,first_frame,last_frame,crossed_frame,frames_before_cross,class_name
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0017ccb6,11471,11499,11472,1,car
005f0736,13868,13891,13878,10,car
009ddf88,1396,1416,1397,1,car
010a8d0a,515,542,516,1,car
012b0de9,14310,14317,14314,4,car
...,...,...,...,...,...
fc2b3d0c,14602,14612,14602,0,car
fc8ad27c,12205,12222,12207,2,car
fda98b21,2205,2239,2226,21,car
fe8fdb1a,15760,15870,15870,110,car


In [49]:
crossing_info[crossing_info['class_name'] == 'person']

Unnamed: 0_level_0,first_frame,last_frame,crossed_frame,frames_before_cross,class_name
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0230f59b,7188,7217,7188,0,person
19fe161b,11566,11613,11612,46,person
1e39da6e,28025,28131,28069,44,person
452bac48,13478,13560,13522,44,person
4ac9c774,1492,1517,1499,7,person
5c45b433,16522,16524,16522,0,person
5c45b433,16522,16524,16523,1,person
5c45b433,16522,16524,16524,2,person
6d333d16,20643,20649,20648,5,person
7ed4896f,6526,6593,6542,16,person


In [52]:
people = crossing_info[crossing_info['class_name'] == 'person']

In [58]:
def cars_crossing_first(df):
    count = crossing_info[(crossing_info['class_name'] == 'car') & (
            crossing_info['crossed_frame'].isin(
                            range(df['first_frame'], df['crossed_frame'])))].count()[0]
    return count

In [60]:
people['cars_crossed_first'] = people.apply(cars_crossing_first, axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  people['cars_crossed_first'] = people.apply(cars_crossing_first, axis=1)


In [61]:
people

Unnamed: 0_level_0,first_frame,last_frame,crossed_frame,frames_before_cross,class_name,cars_crossed_first
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0230f59b,7188,7217,7188,0,person,0
19fe161b,11566,11613,11612,46,person,2
1e39da6e,28025,28131,28069,44,person,0
452bac48,13478,13560,13522,44,person,0
4ac9c774,1492,1517,1499,7,person,0
5c45b433,16522,16524,16522,0,person,0
5c45b433,16522,16524,16523,1,person,0
5c45b433,16522,16524,16524,2,person,0
6d333d16,20643,20649,20648,5,person,0
7ed4896f,6526,6593,6542,16,person,0


In [62]:
people[people['cars_crossed_first'] > 0]

Unnamed: 0_level_0,first_frame,last_frame,crossed_frame,frames_before_cross,class_name,cars_crossed_first
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
19fe161b,11566,11613,11612,46,person,2


### Conclusion: does it work?

* Not really!
* While this code functions as-intended, clearly there's something wrong if it only found one failure to yield event in a day
* Our best guess is that this computer vision algorithm doesn't maintain tracking all that well, for example if several cars pass in front of the camera while the pedestrian is waiting I think it no longer realizes that it's the same pedestrian. So when we need that continuous tracking of a unique pedestrian the most in order to find a failure to yield event, it doesn't happen!
* While this is as far as we're able to go for now, potential solutions could include:
    * A better computer vision tool able to "re-recognize" the same pedestrian even several frames layer
    * Multiple cameras with some sort of shared spatial reference (hard)
    * A single camera with a better angle that can't be blocked as easily
    
note: The track id shown above where it *does* seem to detect a failure to yield event... actually isn't so! A cyclist was travelling along Motor, in the same direction as car traffic. Since they did eventually cross our vertical threshold it was counted as a crossing event, and during that time a couple cars crossed our vertical threshold. (converted_6_results.avi, approx. 1:00)