In [1]:
from IPython.core.display import display,HTML
display(HTML("<style>.container {width:100% !important;}</style>"))

In [2]:
# imports
# standard libray
import pandas as pd
import numpy as np

# visualization
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.figure_factory as ff

# add directory to path for relative local imports
# import sys
# sys.path.insert(1, '/opt/apps/capstone/app/ssurveillance/code/base')

# local support modules
# from capstone_analysis_support import calc_transformation_mat, get_location, coordinates_data, create_binary_string, create_image_fig

# pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('max_colwidth', None) 

This notebook shows how the Evaluation distribution graph was created and also how the comparison of alerts between the camera, YOLOv4, and FaceNet.

In [3]:
# first load the data
camera_df=pd.read_pickle('data/camera_df.p.gz')
yolo_df=pd.read_pickle('/data/yolo_df.p.gz')
fr_df=pd.read_pickle('data/facerecognition_df.p.gz')

In [4]:
# next we need to trim the data because it contains almost 400,000 records and the graphing library will choke our available memory. We trim the data by performing a groupby to only keep 1 instance of an alert. This should be fair for what we are trying to achieve.
yolo_dist_df = yolo_df.drop_duplicates(subset=['tracking_id','hour','minute','date']).copy()
yolo_dist_df['bin_hour'] = yolo_dist_df.apply(lambda x: x['hour']+x['minute']/60,axis=1)
yolo_dist_df = yolo_dist_df[['timestamp','tracking_id','event','date','hour','minute','second','bin_hour']].reset_index(drop=True)

camera_dist_df = camera_df.drop_duplicates(subset=['hour','minute','date']).copy()
camera_dist_df['bin_hour'] = camera_df.apply(lambda x: x['hour']+x['minute']/60,axis=1)
camera_dist_df = camera_dist_df[['timestamp','date','hour','minute','second','bin_hour']].reset_index(drop=True)

fr_dist_df = fr_df.drop_duplicates(subset=['hour','minute','date']).copy()
fr_dist_df['bin_hour'] = fr_dist_df.apply(lambda x: x['hour']+x['minute']/60,axis=1)
fr_dist_df = fr_dist_df[['timestamp','time','tracking_id','date','hour','minute','second','bin_hour','id']].reset_index(drop=True)

In [8]:
# for the distribution plot, we will bin by hour to highlight periods during the day that there is more activity.
yolo_group = list(yolo_dist_df['bin_hour'])
camera_group = list(camera_dist_df['bin_hour'])
fr_group = list(fr_dist_df['bin_hour'])

# we create custom text for the rug plot below the distribution plot
dist_text = []
cam_text = list(camera_dist_df.apply(lambda x: str(x['date'])[:10] + '<br>time: ' + str(x['hour']) + ':' + str(x['minute']),axis=1))
yolo_text = list(yolo_dist_df.apply(lambda x: pd.to_datetime(x['date']).strftime("%Y-%m-%d") + ': , hour: ' + str(x['hour']) + ', : ' + str(x['minute']),axis=1))
fr_text = list(fr_dist_df.apply(lambda x: str(x['date']) + ': , hour: ' + str(x['hour']) + ', : ' + str(x['minute']),axis=1))

In [12]:
# our distribution plot
dist_fig=ff.create_distplot(
    [camera_group,yolo_group, fr_group],
    ['camera','yolo','face recognition'],
    bin_size=.5,
    curve_type='kde',
    colors=['#94f3e4','#37aa9c','#333f44'],
    rug_text=dist_text
)
dist_fig.update_layout(
    margin=dict(l=0,r=0,t=0,b=0),
    height=500,
    plot_bgcolor='rgba(0,0,0,0)',
    showlegend=True,
    yaxis=dict(gridcolor='#e5e5e7',gridwidth=1),
    xaxis=dict(gridcolor='#e5e5e7',gridwidth=1,title='Hour',tickmode = 'array',
        tickvals = [i for i in range(0,25)],
        ticktext = [i if i%4==0 else '' for i in range(0,25)])
)

dist_fig.show()

In [14]:
# next we modify our data to trim it again and pull out 3 days worth of data; we will display a vertical line for each data point to visually observe the number of alerts generated by the 3 systems
camera_dist_df2 = camera_dist_df[(camera_dist_df['date']>='2022-03-22')&(camera_dist_df['date']<'2022-03-25')]
yolo_dist_df2 = yolo_dist_df[(yolo_dist_df['date']>='2022-03-22')&(yolo_dist_df['date']<'2022-03-25')]
fr_dist_df2 = fr_dist_df[fr_dist_df['id']==''].reset_index(drop=True)
fr_dist_df3 = fr_dist_df2[(fr_dist_df2['date']>='2022-03-22')&(fr_dist_df2['date']<'2022-03-25')].reset_index(drop=True)
fr_dist_df3['timestamp'] = pd.to_datetime(fr_dist_df3.date.astype(str)+ ' '+fr_dist_df3.time, format='%Y-%m-%d %H:%M:%S')

In [15]:
fig = make_subplots(rows=3, cols=1,
#                     shared_xaxes=True,
                    vertical_spacing=0.05)
fig.update_layout(
    height=1000,
    title_text="Alert System Comparison",
    
    plot_bgcolor='#ffffff',
    yaxis1=dict(showticklabels=False,title='Camera',titlefont=dict(size=18)),
    yaxis2=dict(showticklabels=False,title='YOLO',titlefont=dict(size=18)),
    yaxis3=dict(showticklabels=False,title='FaceNet',titlefont=dict(size=18)),
    xaxis1={'visible': False, 'showticklabels': False},
    xaxis2={'visible': False, 'showticklabels': False},
    xaxis3=dict(
        tickmode='array',
        tickvals=fr_dist_df3.apply(lambda x: '' if x.name%82!=0 else x['date'].strftime('%Y-%m-%d'),axis=1),
        ticktext=fr_dist_df3.apply(lambda x: '' if x.name%82!=0 else x['date'].strftime('%Y-%m-%d'),axis=1)
    ),
)

for x in list((camera_dist_df2['timestamp']).astype(str)):
    fig.add_trace(go.Scatter(
        x=[x,x],
        y=[0,2],
        mode='lines',
        line=dict(
            color='#94f3e4'
        ),
        showlegend=False
    ),row=1, col=1)
    

for x in list((yolo_dist_df2['timestamp']).astype(str)):
    fig.add_trace(go.Scatter(
        x=[x,x],
        y=[0,2],
        mode='lines',
        line=dict(
            color='#37aa9c'
        ),
        showlegend=False
    ),row=2, col=1)
    

for x in list((fr_dist_df3['timestamp']).astype(str)):
    fig.add_trace(go.Scatter(
        x=[x,x],
        y=[0,2],
        mode='lines',
        line=dict(
            color='#333f44'
        ),
        showlegend=False
    ),row=3, col=1)
    


fig.show()