In [1]:
import itertools
import os

from bokeh.charts import Bar, output_file, show
from bokeh.io import output_notebook
from io import StringIO
from joblib import Parallel, delayed

import numpy as np
import pandas as pd

from data_generation import compute_priors
from data_generation import evaluate_priors
from thumos14_helper import Thumos14

output_notebook()

Recall analysis of priors in validation set
-----------------------------------------------------

In [2]:
"""
Set up the parameters for the experiment.
"""
EXPERIMENT_ID = 'val_01'
NR_CPUS = 24
# Parameters.
T_LST = [64, 128, 176, 256]
K_LST = [16, 32, 64, 128, 256]
IOU = np.arange(0.1, 1.1, 0.1)
MIN_STRIDE_STEP = 16

DATA_PATH = '../data/priors_eval/{}'.format(EXPERIMENT_ID)
if not os.path.isdir(DATA_PATH):
    os.makedirs(DATA_PATH)
# Config file.
csv_filename = os.path.join(DATA_PATH, '{}.csv'.format(EXPERIMENT_ID))
file_format = 'priors_{}_{}_{}.csv'
"""CSV FORMAT:
T,K,stride,filename
"""
csv_content = 'T,K,stride,filename'
for x in list(itertools.product(T_LST,K_LST)):
    for stride in range(MIN_STRIDE_STEP, x[0]+1, MIN_STRIDE_STEP):
        filename = os.path.join(DATA_PATH, file_format.format(x[0], x[1], stride))
        csv_content += '\n{},{},{},{}'.format(x[0], x[1], stride, filename)
with open(csv_filename, 'w') as fobj:
    fobj.write(csv_content)
EXPERIMENT_DF = pd.read_csv(csv_filename, header=0)

In [None]:
def wrapper_evaluate_priors(df, row):
    try:
        priors, dnf = compute_priors(df, row['T'], row['K'])
        eval_df = evaluate_priors(df, priors, row['T'], row['stride'])
        eval_df.to_csv(row['filename'], index=False)
        return True
    except:
        return False

def compute_recall_from_data_frame(eval_df, iou_thr=0.5):
    n_annotations = eval_df.shape[0]
    recall = (eval_df['iou'] >= iou_thr).sum().astype(float)/n_annotations
    return recall

def run_experiment(experiment_df=EXPERIMENT_DF, iou_thr_lst=IOU,
                   dataset_path='../data/thumos14/', nr_cpus=NR_CPUS,
                   subset='validation', recompute=False):

    exp_df_copy = experiment_df.copy()
    # Avoid recomputing if desired.
    for i, x in exp_df_copy.iterrows():
        if os.path.isfile(x['filename']) and not recompute:
            exp_df_copy.drop(i, inplace=True)
    
    TH14 = Thumos14(dataset_path)
    df = TH14.segments_info(subset)
    
    compute_flag = [True]
    if exp_df_copy.shape[0] > 1:
        # Compute and store evaluation DataFrames.
        compute_flag = \
          Parallel(n_jobs=nr_cpus)(delayed(wrapper_evaluate_priors)(df, row) for i, row in exp_df_copy.iterrows())    
    elif exp_df_copy.shape[0] == 1:
        compute_flag = wrapper_evaluate_priors(df, exp_df_copy)
    if not all(compute_flag):
        raise ValueError('Incomplete evaluation DataFrames.')
            
    # Build experiment result DataFrame.
    T_lst, K_lst, stride_lst, iou_thr_helper, recall_lst, filename_lst, kidxs = [], [], [], [], [], [], []    
    for i, x in experiment_df.iterrows():
        for iou_thr in iou_thr_lst:
            this_df = pd.read_csv(x['filename'])
            recall = compute_recall_from_data_frame(this_df, iou_thr=iou_thr)
            T_lst.append(x['T'])
            K_lst.append(x['K'])
            stride_lst.append(x['stride'])
            iou_thr_helper.append(iou_thr)
            recall_lst.append(recall)
            filename_lst.append(x['filename'])
            kidxs.append(this_df['k-idx'])
    result_df = pd.DataFrame({'T': T_lst,
                              'K': K_lst,
                              'stride': stride_lst,
                              'iou_thr': iou_thr_helper,
                              'recall': recall_lst,
                              'filename': filename_lst})
    return result_df, kidxs

result_df, kidxs = run_experiment()

In [None]:
def plot_K_vs_T(result_df, stride=None, iou_thr=0.5):
    if not stride:
        stride_unique = result_df['stride'].unique()    
        for stride in stride_unique:
            idx = (result_df['iou_thr']==iou_thr) & (result_df['stride']==stride)
            df = result_df.loc[idx, ['K', 'T', 'recall']]        
            fig = Bar(df, label='K', values='recall', group='T', legend='bottom_right',
                      title='Recall vs K (at fixed T)', ylabel='Recall')
            show(fig)
    else:
        idx = (result_df['iou_thr']==iou_thr) & (result_df['stride']==stride)
        df = result_df.loc[idx, ['K', 'T', 'recall']]
        fig = Bar(df, label='K', values='recall', group='T', legend='bottom_right',
                 title='Recall vs K (at fixed T)', ylabel='Recall')
        show(fig)

def plot_stride_vs_T(result_df, iou_thr=0.5, K=64):
    idx = (result_df['iou_thr']==iou_thr) & (result_df['K']==K)
    df = result_df.loc[idx, ['T', 'stride', 'recall']]
    fig = Bar(df, label='stride', values='recall', group='T', legend='bottom_right',
              title='Recall vs K (at fixed T)', ylabel='Recall')
    show(fig)
    
plot_K_vs_T(result_df, stride=64)
plot_stride_vs_T(result_df, K=64)