In [134]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import ast
import plotly.express as px
import os

from src.utils import *

val_exclude = [
    "2022-05-1008-00-02_f_05.mp4",
    "2022-05-1008-00-02_f_06.mp4",
    "2022-05-1008-00-02_f_12.mp4",
    "2022-07-0507-00-03_u_09_9032.83.mp4",
#     "2022-07-0507-00-03_u_18_14638.2.mp4",
    "2022-01-2407-45-06_c_01_1048.83.mp4",
    "2022-02-1107-45-04_r_04_5094.83.mp4",
    "2022-05-0308-00-02_n_18.mp4",
    "2022-06-1007-00-05_t_02_13722.8.mp4",
    "2022-06-1307-00-07_a_04_22774.2.mp4",
    "2022-04-1311-18-39_r_01.mp4",
    "2022-04-2808-00-02_d_06.mp4",
    "2022-02-1708-00-07_m_11_22530.8.mp4",
    "2022-06-0712-23-08.mp4",
    "2022-08-0407-00-01_k_02_2182.83.mp4",
    "2022-08-0407-00-01_k_05_7821.5.mp4",
    "2022-08-0807-00-03_h_16_17305.5.mp4",
    "2022-04-1414-05-32_o_01.mp4",
    "2022-05-0905-00-03_e_13.mp4",
    "2022-06-2707-00-08_z_04_5989.5.mp4",
    "2022-06-2811-04-39_f_03_668.167.mp4",
    "2022-06-2811-04-39_f_06_2696.17.mp4",
    "2022-02-1411-08-48-P91360.mp4",
    "2022-04-0714-19-57.mp4",
    "2022-02-1411-44-31-232511.mp4", 
    # Maybe
    "2022-06-0807-00-05_d_06_26524.2.mp4"
    "2022-05-2508-00-07_a_03.mp4"
]

def plot_probs(probs, times, joined, path=None):
    for i, idx in enumerate(joined.index):
        y = probs.loc[idx, :]
        y = y[~np.isnan(y)].values

        x = times.loc[idx, :]
        x = x[~np.isnan(x)].values
        x = x[0: len(y)]

        truths = joined.loc[idx, :]
        st, et = truths['start_time'], truths['end_time']
        print(f'start time is {st} end time is {et}')
        fig = px.scatter(x=x, y=y)
        if not np.isnan(st):
            fig.add_shape(
                type='line',
                yref="y",
                xref="x",
                x0=st,
                y0=0,
                x1=st,
                y1=max(y),
                line=dict(color='black', width=3)
            )
        if not np.isnan(et):
            fig.add_shape(
                type='line',
                yref="y",
                xref="x",
                x0=et,
                y0=0,
                x1=et,
                y1=max(y),
                line=dict(color='black', width=3)
            )
        fig.update_traces(connectgaps=False)
        fig.update_layout(showlegend=False, title=idx.split('/')[-1])
        fig.show()
        
        if path is not None:
            os.makedirs(path, exist_ok=True)
            fig.write_image(os.path.join(path, f'prob_plot_{i}.png'))

These are 45 predicted videos from the validation set 

In [96]:
probs = pd.read_csv('inference_results/csv/probs_8inside-40outside005-val.csv', index_col='Unnamed: 0')
times = pd.read_csv('inference_results/csv/times_8inside-40outside005-val.csv', index_col='Unnamed: 0')

truth = format_data_csv('src/data/val_na_stratified.csv', '', dropna=False).set_index('origin_uri')
truth = truth.drop(truth[truth["local_path"].isin(val_exclude)].index)

joined = truth.join(probs, how='inner')

# plot_probs(probs, times, joined)

In [87]:
performance_val = pd.read_csv('inference_results/csv/performance_val.csv')

In [94]:
performance_val.index = performance_val["video_uri"].apply(lambda x: "https://s3.amazonaws.com/" + x)
performance_val

Unnamed: 0_level_0,index,split,video_uri,start,end,entry-time,exit-time,entry_performance,exit_performance
video_uri,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
https://s3.amazonaws.com/is-cdp-s143500-ingest-ph/myrtlesc/PC155/202205/2022-05-1108-00-01_v_17.mp4,50,val,is-cdp-s143500-ingest-ph/myrtlesc/PC155/202205...,32333.08,986990.06,26000.0,993000.0,within_range,within_range
https://s3.amazonaws.com/is-cdp-s149445-ingest-ph/clemsonsc/PC161/202207/2022-07-0507-00-03_u_11_9604.17.mp4,51,val,is-cdp-s149445-ingest-ph/clemsonsc/PC161/20220...,13999.93,1432318.94,8000.0,1438000.0,within_range,within_range
https://s3.amazonaws.com/is-cdp-s149445-ingest-ph/clemsonsc/PC161/202207/2022-07-1107-00-03_o_02_5182.83.mp4,52,val,is-cdp-s149445-ingest-ph/clemsonsc/PC161/20220...,44666.29,827991.65,38000.0,836000.0,within_range,within_range
https://s3.amazonaws.com/is-cdp-s149445-ingest-ph/clemsonsc/PC161/202207/2022-07-1107-00-03_o_04_7048.83.mp4,53,val,is-cdp-s149445-ingest-ph/clemsonsc/PC161/20220...,52666.21,793991.99,46000.0,800000.0,within_range,within_range
https://s3.amazonaws.com/is-cdp-s149445-ingest-ph/clemsonsc/PC161/202207/2022-07-1107-00-03_o_11_13181.5.mp4,54,val,is-cdp-s149445-ingest-ph/clemsonsc/PC161/20220...,54666.19,750992.42,49000.0,758000.0,within_range,within_range
https://s3.amazonaws.com/is-cdp-s149445-ingest-ph/clemsonsc/PC161/202207/2022-07-1107-00-03_o_20_23008.2.mp4,55,val,is-cdp-s149445-ingest-ph/clemsonsc/PC161/20220...,32666.41,636326.9,26000.0,643000.0,within_range,within_range
https://s3.amazonaws.com/is-cdp-s167132-ingest-ph/ORANGEBURG/PC154/202205/2022-05-0408-00-03_c_01.mp4,56,val,is-cdp-s167132-ingest-ph/ORANGEBURG/PC154/2022...,11333.29,580327.46,4000.0,590000.0,within_range,within_range
https://s3.amazonaws.com/is-cdp-s167132-ingest-ph/ORANGEBURG/PC154/202205/2022-05-0408-00-03_c_04.mp4,57,val,is-cdp-s167132-ingest-ph/ORANGEBURG/PC154/2022...,53999.53,1565317.61,48000.0,1571000.0,within_range,within_range
https://s3.amazonaws.com/is-cdp-s167132-ingest-ph/ORANGEBURG/PC154/202205/2022-05-0408-00-03_c_07.mp4,58,val,is-cdp-s167132-ingest-ph/ORANGEBURG/PC154/2022...,39666.34,705992.87,41000.0,721000.0,early_prediction,within_range
https://s3.amazonaws.com/is-cdp-s167132-ingest-ph/ORANGEBURG/PC154/202205/2022-05-1108-00-03_f_02.mp4,59,val,is-cdp-s167132-ingest-ph/ORANGEBURG/PC154/2022...,39333.01,647326.79,32000.0,654000.0,within_range,within_range


In [117]:
def plot_gt_against_preds(probs, times, joined, path=None):
    for i, idx in enumerate(joined.index):
        y = probs.loc[idx, :]
        y = y[~np.isnan(y)].values

        x = times.loc[idx, :]
        x = x[~np.isnan(x)].values
        x = x[0: len(y)]

        truths = joined.loc[idx, :]
        st, et = truths['entry-time'], truths['exit-time']
        pst, pet = truths['start'], truths['end']
        print(idx)
        fig = px.scatter(x=x, y=y)
        if not np.isnan(st):
            fig.add_shape(
                type='line',
                yref="y",
                xref="x",
                x0=st,
                y0=0,
                x1=st,
                y1=max(y),
                line=dict(color='black', width=3)
            )
        if not np.isnan(et):
            fig.add_shape(
                type='line',
                yref="y",
                xref="x",
                x0=et,
                y0=0,
                x1=et,
                y1=max(y),
                line=dict(color='black', width=3)
            )
        if not np.isnan(pst):
            fig.add_shape(
                type='line',
                yref="y",
                xref="x",
                x0=pst,
                y0=0,
                x1=pst,
                y1=max(y),
                line=dict(color='red', width=3)
            )
        if not np.isnan(pet):
            fig.add_shape(
                type='line',
                yref="y",
                xref="x",
                x0=pet,
                y0=0,
                x1=pet,
                y1=max(y),
                line=dict(color='red', width=3),
                name='predicted exit time'
            )
            
        fig.update_traces(connectgaps=False)
        fig.update_layout(showlegend=True, title=idx.split('/')[-1])
        fig.show()
        
        if path is not None:
            os.makedirs(path, exist_ok=True)
            fig.write_image(os.path.join(path, f'prob_plot_{i}.png'))

In [118]:
performance_val[performance_val['exit_performance'] == 'late_prediction']

Unnamed: 0_level_0,index,split,video_uri,start,end,entry-time,exit-time,entry_performance,exit_performance
video_uri,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
https://s3.amazonaws.com/is-cdp-s167132-ingest-ph/ORANGEBURG/PC154/202205/2022-05-1108-00-03_f_06.mp4,60,val,is-cdp-s167132-ingest-ph/ORANGEBURG/PC154/2022...,49999.64,569994.3,36000.0,540000.0,within_range,late_prediction
https://s3.amazonaws.com/is-cdp-s656189-ingest-ph/Endo/PC118/202202/2022-02-1808-00-07_a_03_6462.83.mp4,77,val,is-cdp-s656189-ingest-ph/Endo/PC118/202202/202...,29666.51,556327.77,16000.0,510000.0,within_range,late_prediction
https://s3.amazonaws.com/is-cdp-s830922-ingest-ph/tampabayfl/PC188/202208/2022-08-0807-00-03_h_13_13266.2.mp4,96,val,is-cdp-s830922-ingest-ph/tampabayfl/PC188/2022...,25666.55,2008313.25,12000.0,1992000.0,within_range,late_prediction


In [125]:
performance_val['exit_performance'].value_counts()

within_range              45
correctly_not_detected     5
late_prediction            3
not_detected               2
incorrectly_detected       1
Name: exit_performance, dtype: int64

In [120]:
performance_val['end'] = performance_val['end'].astype(float) + 7000
performance_val['start'] = performance_val['start'].astype(float) + 7000

In [126]:
performance_val[performance_val['exit_performance'] == 'not_detected']

Unnamed: 0_level_0,index,split,video_uri,start,end,entry-time,exit-time,entry_performance,exit_performance
video_uri,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
https://s3.amazonaws.com/is-cdp-s666323-ingest-ph/AIR/Lancaster/LT113/2022-02-0715-05-38-1664036.mp4,80,val,is-cdp-s666323-ingest-ph/AIR/Lancaster/LT113/2...,,,,587000.0,correctly_not_detected,not_detected
https://s3.amazonaws.com/is-cdp-s666323-ingest-ph/AIR/Lancaster/LT113/2022-02-0812-52-53-1114253.mp4,81,val,is-cdp-s666323-ingest-ph/AIR/Lancaster/LT113/2...,,,,971000.0,correctly_not_detected,not_detected


In [128]:
plot_gt_against_preds(probs, times, performance_val[performance_val['exit_performance'] == 'incorrectly_detected'])

https://s3.amazonaws.com/is-cdp-s920842-ingest-ph/Guilford/PC126/202205/2022-05-2508-00-07_a_03.mp4


In [166]:
import seaborn as sns
import plotly.graph_objects as go

x = [
    "Model V1: Dataset 1 (lightml)",
    "Model V2: Dataset 2 (uniform)",
    "Model V2: Dataset 3 (uniform)",
    "Model V2: Dataset 4 (uniform)"
]

y = [
    78,
    80,
    88,
    89,
]

df = pd.DataFrame(np.array([x, y]).T, columns=["model", "acc"])
df["acc"] = df["acc"].astype(int)

fig = go.Figure(data=go.Bar(x=x, y=y, text=y))

fig.update_layout(
    title='Model Iteration vs. Val Accuracy', 
    xaxis_title='Frame-Level Model Iteration', 
    yaxis_title='Validation Accuracy (%)',
)
fig.write_image('frame_level_model_val_acc.png')


In [161]:
df.dtypes

model    object
acc      object
dtype: object