In [1]:
import numpy as np
import pandas as pd
from bokeh.charts import Bar, output_file, show
from bokeh.io import output_notebook

from thumos14_helper import Thumos14

output_notebook()

In [2]:
th_ds = Thumos14('../data/thumos14/')
seg_info = {'val': th_ds.segments_info(),
            'test': th_ds.segments_info('test')}

In [3]:
n_frames, uniq_frames, counts = {}, {}, {}
for i in seg_info:
    n_frames[i] = seg_info[i].loc[:, 'n-frames']
    uniq_frames[i], counts[i] = np.unique(n_frames[i], return_counts=True)

# Visualization
# TODO: Include tab for test
df = pd.DataFrame({'Unique Frames': uniq_frames['val'],
                   'Counts': counts['val']})
p = Bar(df, 'Unique Frames', values='Counts', title="Frames Distrib")
f = show(p)

Proposal are designed to achieve high-recall so we should analyze the *"CFD"* of the number of frames in order to fix our temporal length.

*Note: A multi-scale approach is an interesting future direction but it might be conditioned by current benchmarks.*

In [5]:
cum_counts = {}
for i in seg_info:
    cum_counts[i] = np.cumsum(counts[i]) / 1.0 / counts[i].sum()

# Visualization
# TODO: Include tab for test
df = pd.DataFrame({'Unique Frames': uniq_frames['val'],
                   'Counts': cum_counts['val']})
p = Bar(df, 'Unique Frames', values='Counts', title="Cumulative Distrib")
f = show(p)

for i in seg_info:
    print '----{}----'.format(i)
    for j in [0.5, 0.75, 0.9, 0.95, 0.99]:
        v = uniq_frames[i][cum_counts[i] >= j].min() 
        print '{} videos less than {} frames'.format(j, v) 

----test----
0.5 videos less than 97.0 frames
0.75 videos less than 181.0 frames
0.9 videos less than 264.0 frames
0.95 videos less than 337.0 frames
0.99 videos less than 586.0 frames
----val----
0.5 videos less than 88.0 frames
0.75 videos less than 163.0 frames
0.9 videos less than 235.0 frames
0.95 videos less than 286.0 frames
0.99 videos less than 514.0 frames


For an IOU of 0.5 we can choose a window size equal to the half of the previous number frames. For a single scale multi-segment approach and seeking a model with high-recall, we should choose a canonical size around 143 - 169 frames.

Notes:
1. A multi-scale approach can improve correct retrieval of small segments.
2. It's highly possible that this criteria generate an unbalanced distribution given the distribution of the data.