# Breakdown retrieval results

## Breakdown per NOUNs

Study single video retrieval results per NOUNs.

- Use cell below to dump CSV and add data to G-sheets. To quickly swift through data and plot results.

In [3]:
import numpy as np
import pandas as pd
import json

yfcc100m_csv = '../data/interim/yfcc100m/001.csv'
nouns_to_video_didemo_json = '../data/interim/didemo/nouns_to_video.json'
exp_csv = '../data/interim/hsmcn_07/2.csv'
OUTPUT_FILE = '../data/interim/hsmcn_07/2_breakdown.csv'
SUBSET = 'val'
METRICS = ['iou', 'r@1', 'r@5']

yfcc100m_df = pd.read_csv(yfcc100m_csv)
with open(nouns_to_video_didemo_json, 'r') as fid:
    nouns_to_instances = json.load(fid)['annotations_per_subset'][SUBSET]
exp_df = pd.read_csv(exp_csv)

nouns_metrics = []
underrep_ids = []
for tag, _ in yfcc100m_df.groupby('topk_tags'):
    annotation_ids = nouns_to_instances[tag]
    underrep_ids.extend(annotation_ids)
    ind = np.in1d(exp_df['annotation_id'], annotation_ids)
    if len(ind) == 0:
        continue
    # groupby if slow as hell
    metrics = [exp_df.loc[ind, i].mean() for i in METRICS]
    nouns_metrics.append([tag] + metrics)

# Add results from under-represented NOUNs
ind = np.in1d(exp_df['annotation_id'], underrep_ids)
metrics = [exp_df.loc[ind, i].mean() for i in METRICS]
nouns_metrics.append(['NOUNs Underrepresented/Unseen'] + metrics)
wellrep_ids = np.setdiff1d(exp_df['annotation_id'], underrep_ids)
assert set(underrep_ids).isdisjoint(wellrep_ids.tolist())
ind = np.in1d(exp_df['annotation_id'], wellrep_ids)
metrics = [exp_df.loc[ind, i].mean() for i in METRICS]
nouns_metrics.append(['NOUNs Overrepresented'] + metrics)

df = pd.DataFrame(nouns_metrics, columns=['attribute'] + METRICS)
df.to_csv(OUTPUT_FILE, index=None)

## Sentence retrieval evaluation

Updated: Aug 10. Created: Aug 9

Study phrase retrieval from visual moments holistically.

__Instructions__

Use cell below to dump CSV and add data to G-sheets. To quickly swift through data and plot results.

__Assumptions__

- Each row represents a visual moment and colums represents phrases.

- data was loaded from a JSON containing a list. 🤞 it's read on the same order.

- TODO (minor): dump annotation_id in hdf5 for cross-matching

- TODO (minor): cross-check JSON always return the list in the correct order. Why wouldn't do it?

By August 9, there is a strong assumption of ordering in the code without assertion i.e.

<img src="https://www.generadormemes.com/download/77megl">

In [None]:
import h5py
import numpy as np
import pandas as pd
import json
import os
import sys

sys.path.append('..')
from np_segments_ops import iou as iou_op

def compute_phrase_rank(filename, ind_blockout=None):
    # TODO - blah
    # each row represents who well a moment is described by a phrase we wish to see
    # a diagonal matrix, dreaming has 0 cost, coming back to reality that's not
    # gonna happen. Thus we gotta look where are the phrases associated with our
    # moments. If they are far from the diagonal, we are doing something wrong or
    # there is a lot of correlation among moments/descriptions
    # @escorciav couldn't find another way to do this, but he is a good guy here is
    # the explanation. BTW, PR is welcome.
    # Do u remember the diagoanl stuff? OK, it means we are looking for the
    # location of the row index in each row. What? here we go again. In the first
    # row, the phrase of that moment is on the first column. Is it clear now? just
    # to be clear, for the i-th row the phrase it's on the i-th column.
    # Given that numpy broadcast along axis 0, we better transpose. Later, we
    # substract the vector of number of rows. When the element is zero, it means
    # that's the spot of our phrase. To recover back its rank, you better transpose
    # back. Phew... that's how you throw away 1.5 hour of thoughts and dump
    # documentation about it (hopefully useful for someone else?)
    with h5py.File(filename, 'r') as fid:
        use_similarity = fid['similarity'].value
        distance_matrix = fid['prediction_matrix'][:]
    
    if ind_blockout is not None:
        fill_value = distance_matrix.max()
    if use_similarity:
        fill_value = 0
    distance_matrix[ind_blockout] = fill_value
    
    num_moments = len(distance_matrix)
    sorted_ind_matrix = distance_matrix.argsort()
    if use_similarity:
        raise NotImplementedError('WIP: we didnt care about')
    phrases_rank = np.where(
        (sorted_ind_matrix.T - np.arange(num_moments)).T == 0)[1]
    return phrases_rank, sorted_ind_matrix

# relevant inputs
# the YFCC100 files allow to study results for a set of under-represented
# nouns. Choose them basd on the log-files or the set of your interest
FILENAME_REF = '../data/interim/hsmcn_07/9_phrase_retrieval.h5'
# only-videos
FILENAME = '../data/interim/smcn_12/a/5_phrase_retrieval.h5'
# only-images
#FILENAME = '../data/interim/hsmcn_07/9_phrase_retrieval.h5'
YFCC100_JSON = '../data/interim/yfcc100m/train_02/train_data.json'
YFCC100_CSV = '../data/interim/yfcc100m/001.csv'
# only-images clean-up-S2
# FILENAME = '../data/interim/hsmcn_08/c/0_phrase_retrieval.h5'
# YFCC100_JSON = '../data/interim/yfcc100m/train_04/train_data.json'
# YFCC100_CSV = '../data/interim/yfcc100m/003-25-1.csv'
IND_MOMENT = 0  # sync with retrieval program otherwise it's buggy
# minor inputs
GT_JSON = '../data/raw/val_data.json'
NOUNS_TO_VIDEO_DIDEMO_JSON = '../data/interim/didemo/nouns_to_video.json'
SUBSET = 'val'
OVERLAP = 1e-32
TOPK = 10  # for REST

# Create mapping btw instance and NOUNs and viceversa
with open(NOUNS_TO_VIDEO_DIDEMO_JSON, 'r') as fid:
    nouns_to_instance_list = json.load(fid)['annotations_per_subset'][SUBSET]
    nouns_to_instances = {k: set(v) for k, v in nouns_to_instance_list.items()}

# Load vocabulary and create mapping of underrepresented and unseen instances
# and the nouns on them.
with open(YFCC100_JSON, 'r') as fid:
    # get under/unseen nouns used for image collection
    underrepresented_unseen_nouns = set(
        pd.read_csv(YFCC100_CSV)['topk_tags'].unique().tolist())
    
    # get nouns seens during training
    image_nouns = set()
    for _, moment_i in enumerate(json.load(fid)):
        image_nouns.update(
            moment_i['description'].split(' '))

    # map instances to untargeted nouns 
    untargeted_nouns = {}
    for noun_i in image_nouns:
        if (noun_i in underrepresented_unseen_nouns or
            noun_i not in nouns_to_instances):
            continue
        for instance_id in nouns_to_instances[noun_i]:
            if instance_id not in untargeted_nouns:
                untargeted_nouns[instance_id] = [noun_i]
            else:
                untargeted_nouns[instance_id].append(noun_i)

    # map instances to under/unseen subset
    underrepresented_unseen = {}
    underrepresented_unseen_nouns2id = {
        i: [] for i in underrepresented_unseen_nouns}
    for noun_i in underrepresented_unseen_nouns:
        if noun_i not in nouns_to_instances:
            continue
        for instance_id in nouns_to_instances[noun_i]:
            if instance_id not in underrepresented_unseen:
                underrepresented_unseen[instance_id] = [noun_i]
            else:
                underrepresented_unseen[instance_id].append(noun_i)
            underrepresented_unseen_nouns2id[noun_i].append(instance_id)
    
# mapping annotation_id to index
id2ind, ind2id = {}, {}
all_moments = []
with open(GT_JSON, 'r') as fid:  
    for i, moment_i in enumerate(json.load(fid)):
        id2ind[moment_i['annotation_id']] = i
        ind2id[i] = moment_i['annotation_id']
        all_moments.append(moment_i)
    ind_underrepresented_unseen = [id2ind[i]
                                   for i in underrepresented_unseen]
    underrepresented_unseen_nouns2ind = {
        k: [id2ind[i] for i in v]
        for k, v in underrepresented_unseen_nouns2id.items()
    }
    ind_complement = np.setdiff1d(np.arange(len(all_moments)),
                                  ind_underrepresented_unseen)

    # collect indices of phrases where visual moments overlap in time
    ind_blockout = None
    if OVERLAP > 0:
        _ind_blockout_i, _ind_blockout_j = [], []
        all_moments_df = pd.DataFrame(all_moments)
        for _, moments_per_video in all_moments_df.groupby('video'):
            moments_time = np.array(
                moments_per_video['times'].apply(
                    lambda x: x[IND_MOMENT]).tolist())
            # make it continuous
            moments_time *= 1 
            moments_time[:, 1] += 1
            iou_among_moments = iou_op(moments_time, moments_time)
            # remove the yo-con-yo, busque pareja mi llave!
            np.fill_diagonal(iou_among_moments, 0)
            ind_overlap_i, ind_overlap_j = np.where(
                iou_among_moments >= OVERLAP)
            _ind_blockout_i.append(moments_per_video.index[ind_overlap_i])
            _ind_blockout_j.append(moments_per_video.index[ind_overlap_j])
        ind_blockout = (np.concatenate(_ind_blockout_i),
                        np.concatenate(_ind_blockout_j))

phrases_length = [len(moment_i['description']) for moment_i in all_moments]
phrases_rank, sorted_ind_matrix = compute_phrase_rank(FILENAME, ind_blockout)
phrases_rank_ref, _ = compute_phrase_rank(FILENAME_REF, ind_blockout)
rank_diff = phrases_rank - phrases_rank_ref  # lower is better

phrases_rank_underrepresented_unseen = (
    phrases_rank[ind_underrepresented_unseen])
phrases_rank_complement = phrases_rank[ind_complement]

# TODO (critical): compute auc of recall vs rank-k

# Display results
print('Median (overall/under+unseeen/comeplement)')
print(np.median(phrases_rank))
print(np.median(phrases_rank_underrepresented_unseen))
print(np.median(phrases_rank_complement))
print('Average (overall/under+unseeen/comeplement)')
print(np.mean(phrases_rank))
print(np.mean(phrases_rank_underrepresented_unseen))
print(np.mean(phrases_rank_complement))
print('Std (overall/under+unseeen/comeplement)')
print(np.std(phrases_rank))
print(np.std(phrases_rank_underrepresented_unseen))
print(np.std(phrases_rank_complement))
print('Rank diff wrt reference +/-/None')
print(f'{(rank_diff < 0).sum() / len(rank_diff):.4f}')
print(f'{(rank_diff > 0).sum() / len(rank_diff):.4f}')
print(f'{(rank_diff == 0).sum() / len(rank_diff):.4f}')
print('Rank diff under+unseen wrt reference +/-/None')
print(f'{(rank_diff[ind_underrepresented_unseen] < 0).sum() / len(ind_underrepresented_unseen):.4f}')
print(f'{(rank_diff[ind_underrepresented_unseen] > 0).sum() / len(ind_underrepresented_unseen):.4f}')
print(f'{(rank_diff[ind_underrepresented_unseen] == 0).sum() / len(ind_underrepresented_unseen):.4f}')
print('Number instances with under+unseen')
print(f'{len(ind_underrepresented_unseen)}')

underrepresented_unseen_nouns_improvement_rate = {
    k: (rank_diff[v] < 0).sum() / len(v)
    for k, v in underrepresented_unseen_nouns2ind.items()
}

# Dump CSV to plot in G-sheets
# Format rank and length together
underrepresented_unseen_nouns_improvement_rate_df = pd.DataFrame.from_dict(
    underrepresented_unseen_nouns_improvement_rate, orient='index')
summary_file = FILENAME.replace('phrase_retrieval.h5', 'rate_pos-rank-diff.csv')
with open(summary_file, 'x') as fid:
    underrepresented_unseen_nouns_improvement_rate_df.to_csv(fid)

# Dump CSV to plot in G-sheets
# Format rank and length together
rank_vs_length = pd.DataFrame([phrases_rank, phrases_length]).T
rank_vs_length.columns = ['rank', 'length']
# summary_file = 'blah'
# with open(summary_file, 'x') as fid:
#     rank_vs_length.to_csv(fid, index=None)

# Dump REST to explore results
summary = {}
for i in range(len(all_moments)):
    annotation_id = all_moments[i]['annotation_id']
    summary[annotation_id] = {
        'description': all_moments[i]['description'],
        # this format the video such that we can plug it in in HTML
        'video': '/'.join(all_moments[i]['video'].split('_')[:2]),
        'time': all_moments[i]['times'][IND_MOMENT],
        'rank': int(phrases_rank[i]),
        'topk': [all_moments[j]['description']
                 for j in sorted_ind_matrix[i, :TOPK]],
        # TODO: add nouns from underrepresented_unseen
        'untargeted_nouns': [i for i in untargeted_nouns.get(annotation_id) or []],
        'noun_subset': [i for i in underrepresented_unseen.get(annotation_id) or []],
        'rank_diff': int(rank_diff[i]),
    }
summary_file = FILENAME.replace('phrase_retrieval.h5', 'pr_rest.json')
with open(summary_file, 'x') as fid:
    json.dump(summary, fid)

Maybe, soft-evaluation it's the way to go. At least for `cyclist` and `diver`, it looks the rank difference of the image-only model is better.

Double check the following cases. It seems, there were POS errors: 
    - Cyclists racing.
    - first cyclist exits frame left
    - the first cyclist rides over the triangle on the road
    - diver first comes into view

## Moment retrieval evaluation

Updated: Aug 23. Created: Aug 16

Study moment retrieval from descriptions.

_Note_:

- We move the evaluation code to the respectinve `evaluation.py` and `corpus.py` modules.

- TODO: dump data for REST API.

[experiments] computing numbers for meeting on Aug. 21

In [2]:
import sys
sys.path.append('..')
from evaluation import CorpusVideoMomentRetrievalEvalFromMatrix

h5_files = [
    '../data/interim/smcn_10/a/4_moment_retrieval.h5',
    '../data/interim/smcn_12/a/5_moment_retrieval.h5',
    '../data/interim/hsmcn_10/3_moment_retrieval.h5',
    '../data/interim/hsmcn_07/9_moment_retrieval.h5',
    '../data/interim/smcn_06/a/4_moment_retrieval.h5',
    '../data/interim/mcn/_corpus_val_rgb_matrix.hdf5',
]
tags = [
    'SMCN OnlyVideo-Inter+Intra-Local',
    'SMCN OnlyVideo-Inter-Local',
    'HSMCN Joint-Inter+Intra-Local',
    'HSMCN OnlyImage-Inter-Local',
    'SMCN OnlyVideo-Inter+Intra-Local+Global+TEF',
    'MCN OnlyVideo-Inter+Intra-Local+Global+TEF'
]
assert len(h5_files) == len(tags)

RECALL_VALUES = (1, 5, 10, 100, 1000, 2000, 10000)
json_filename = '../data/raw/val_data_wwa.json'
rows = []
for i, h5_filename in enumerate(h5_files):
    judge = CorpusVideoMomentRetrievalEvalFromMatrix(
        json_filename, h5_filename, RECALL_VALUES, 0.1)
    recall, mrank = judge.eval()
    rows.append([tags[i]] + recall + [mrank])
    

import pandas as pd
df = pd.DataFrame(rows)
df.columns = ['model'] + [f'R@{i}' for i in RECALL_VALUES] + ['mean-rank']
df.to_csv('2018-08-21.csv', index=None)

[experiments] computing numbers for meeting on Aug. 23

In [1]:
import sys
sys.path.append('..')
from evaluation import CorpusVideoMomentRetrievalEvalFromMatrix

h5_files = [
    '../data/interim/smcn_13/3_moment_retrieval.h5',
    '../data/interim/mcn_pytorch_12/3_moment_retrieval.h5',
]
tags = [
    'SMCN OnlyVideo-Inter+Intra-Local+Global',
    'MCN OnlyVideo-Inter+Intra-ResNet'
]
assert len(h5_files) == len(tags)

RECALL_VALUES = (1, 5, 10, 100, 1000, 2000, 10000)
json_filename = '../data/raw/val_data_wwa.json'
rows = []
for i, h5_filename in enumerate(h5_files):
    judge = CorpusVideoMomentRetrievalEvalFromMatrix(
        json_filename, h5_filename, RECALL_VALUES, 0.1)
    recall, mrank = judge.eval()
    rows.append([tags[i]] + recall + [mrank])
    

import pandas as pd
df = pd.DataFrame(rows)
df.columns = ['model'] + [f'R@{i}' for i in RECALL_VALUES] + ['mean-rank']
df.to_csv('2018-08-23.csv', index=None)

[debug] retrieval with corpus matrix

- compute distance matrix of original MCN features

In [None]:
import hashlib
import h5py
import numpy as np
from scipy.spatial.distance import cdist

def video_to_iid(video):
    # return video integer id
    return int(hashlib.sha256(video.encode('utf-8')).hexdigest(), 16) % 10**8

_filename1 = '../data/interim/mcn/corpus_val_rgb.hdf5'
_filename2 = '../data/interim/mcn/queries_val_rgb.hdf5'
with h5py.File(_filename1, 'r')as f:
    visual_features = []
    videos_order = []
    for i, (k, v) in enumerate(f.items()):
        visual_features.append(v[:])
        videos_order.append((i, video_to_iid(k)))
    visual_features = np.concatenate(visual_features)
    
with h5py.File(_filename2, 'r')as f:
    queries_features = []
    moments_order = []
    for i, (k, v) in enumerate(f.items()):
        queries_features.append(v[:].reshape((1, -1)))
        moments_order.append((i, int(k)))
    queries_features = np.concatenate(queries_features)
    
prediction_matrix = cdist(queries_features, visual_features, 'sqeuclidean')

with h5py.File('../data/interim/mcn/_corpus_val_rgb_matrix.hdf5', 'x') as fid:
        fid['prediction_matrix'] = prediction_matrix
        fid['similarity'] = False
        fid['_video_index'] = np.array(videos_order)
        fid['_moments_index'] = np.array(moments_order)

Unit-test to double check moment retrieval with distance matrix vs previous approach

In [None]:
import sys
sys.path.append('..')

import h5py
from evaluation import RetrievalEvaluation
from evaluation import CorpusVideoMomentRetrievalEvalFromMatrix

_filename1 = '../data/interim/mcn/corpus_val_rgb.hdf5'
_filename2 = '../data/raw/val_data_wwa.json'
_judge = RetrievalEvaluation(_filename1, _filename2, (1, 5, 10), 0.1)
_filename = '../data/interim/mcn/queries_val_rgb.hdf5'

with h5py.File(_filename, 'r') as fid:
    for _sample_key, h5ds in fid.items():
        _query_id = int(_sample_key)
        _query_vector = h5ds[:]
        _judge.eval_single_vector(_query_vector, _query_id)
    _performace = _judge.eval(full=True)
    print('R@{0:}={2:};\nR@{0:},{1:}={3:};\nR@{0:},didemo={4:};\n'
          'mIOU={5:.4f};\nmRank={6:.2f};'
          .format(_judge.k, _judge.iou_threshold,
                  *_performace))
    
_h5_filename = '../data/interim/mcn/_corpus_val_rgb_matrix.hdf5'
_json_filename = '../data/raw/val_data_wwa.json'
_judge2 = CorpusVideoMomentRetrievalEvalFromMatrix(
    _json_filename, _h5_filename, (1, 5, 10), 0.1)
vale_mine = _judge2.eval(True)
print('R@{0:}={2:};\nR@{0:},{1:}={3:};\nR@{0:},didemo={4:};\n'
      'mIOU={5:.4f};\nmRank={6:.2f};'
      .format(_judge.k, _judge.iou_threshold,
              *vale_mine))