In [1]:
import time

import numpy as np
import pandas as pd
from bokeh.charts import Bar, show
from bokeh.io import output_notebook

from data_generation import generate_segments
from utils import segment_format
from thumos14_helper import Thumos14

output_notebook()

# Number of segments generated

this section explores the number of segments generated in total following the heuristic proposed by Erhan et al. CVPR2-014.

In [2]:
def wrapper_data_gen_for_T(T, max_time=3600):
    # T:= canonical temporal size
    start_time = time.time()

    th = Thumos14('../data/thumos14/')
    df_annot_val = th.segments_info()

    n = 0
    for i in np.unique(df_annot_val['video-name']):
        idx = df_annot_val['video-name'] == i
        target_c = df_annot_val.loc[idx, ['f-init', 'n-frames']]
        L = df_annot_val.loc[idx, 'video-frames'].min()
        annotations = segment_format(np.array(target_c), 'c2b')
        a, b = generate_segments(T, L, annotations)

        if len(b) <= 0:
            print 'no segments for video-id:', i
        n += len(b)

        elapsed_time = time.time() - start_time
        if elapsed_time > max_time:
            print 'reach max time... sorry'
            break
    return n

In [3]:
T_cal = [64, 128, 128+3*16, 256, 384, 512]
n_samples = []
for i in T_cal:
    print 'T =', i
    n_samples.append(wrapper_data_gen_for_T(i))

T = 64
T = 128
T = 176
T = 256
no segments for video-id: video_validation_0000190
T = 384
no segments for video-id: video_validation_0000190
T = 512
no segments for video-id: video_validation_0000190
no segments for video-id: video_validation_0000854


In [4]:
p = Bar({'# segments': n_samples, 'T-size': T_cal},
        values='# segments', label='T-size')
f = show(p)

# Draft of code for segment generation

This is an example of how to use activity annotations to produce data for training.

In [1]:
import hickle as hkl
import natsort
import numpy as np
import pandas as pd


from data_generation import compute_priors
from data_generation import dump_files
from thumos14_helper import Thumos14
from utils import c3d_batch_feature_stacking
from utils import idx_of_queries


T, n_priors = 256, 16
th = Thumos14('../data/thumos14/')
df_seg = th.segments_info()

# Generate segments for training and priors for regression
priors, df = compute_priors(df_seg, T, n_priors, iou_thr=0.5, rng_seed=313)

# Use 70% videos for training
train_ratio = 0.7


# Number of videos used
m = 70
video_names = df['video-name'].unique()
video_names = np.array(natsort.natsorted(video_names.tolist())[:m])

n_videos = len(video_names)
n_train_videos = int(np.ceil(train_ratio * n_videos))

rng = np.random.RandomState(313)
idx = rng.permutation(n_videos)
idx_vid_train = idx[:n_train_videos]
idx_vid_test = idx[n_train_videos::]

idx_train = df['video-name'].isin(video_names[idx_vid_train]).nonzero()[0]
idx_test = df['video-name'].isin(video_names[idx_vid_test]).nonzero()[0]

In [None]:
# Don't execute this cell unless that you want to save results
filename = '../a01_val'
dump_files(filename, df=df.loc[idx_test, :], conf=True)

filename = '../a01_train'
dump_files(filename, priors=priors, df=df.loc[idx_train, :], conf=True)

Xval = c3d_batch_feature_stacking(df.loc[idx_test, :],
                                  '../data/thumos14/c3d/val/',
                                  stack_prm={'pool_type': 'mean'})


Xtrain = c3d_batch_feature_stacking(df.loc[idx_train, :],
                                    '../data/thumos14/c3d/val/',
                                    stack_prm={'pool_type': 'mean'})

Xval = Xval[0].reshape((Xval[0].shape[0], Xval[0].shape[2]))
Xtrain = Xtrain[0].reshape((Xtrain[0].shape[0], Xtrain[0].shape[2]))

prefix = '../data/experiments/thumos14/a01/'
hkl.dump(xtrain, prefix + 'train_fc7_mean.hkl', mode='w',
         compression='gzip', compression_opts=9)
hkl.dump(xval, prefix + 'val_fc7_mean.hkl', mode='w',
         compression='gzip', compression_opts=9)

In [3]:
print np.array(df.loc[idx_train, df.columns[4::]]).sum()*1.0 / len(idx_train) / len(df.columns[4::])
print np.array(df.loc[idx_test, df.columns[4::]]).sum()*1.0 / len(idx_test) / len(df.columns[4::])

0.0793958993762
0.0670446508937
