# Code for visualization of Chamfer Distance alignmet between objects/clips and language.

Data needs to be dumped from the _eval_item function in dataset untrimmed and from the chamfer distance class. <br>
In particular we will try to trace back the mapping between each word and the relative object/scene.

In [1]:
import numpy as np
import json
import os
import sys
import tqdm
import time
import re
import cv2

import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import matplotlib.patches as patches

# set display defaults
plt.rcParams['figure.figsize'] = (15, 6)        # small images
plt.rcParams['image.interpolation'] = 'nearest'  # don't interpolate: show square pixels
plt.rcParams['image.cmap'] = 'gray'  # use grayscale output rather than a (potentially misleading) color heatmap

In [2]:
from utils import LanguageRepresentationMCN_glove, compare_glove, sentences_to_words

  from ._conv import register_converters as _register_converters


In [3]:
from ancillary_visualization import *

# Setup Glove

In [4]:
## Initialization glove
t = time.time()
lang_interface = LanguageRepresentationMCN_glove(max_words=1)
print('Done in {:.2f} seconds.'.format(time.time()-t))

Done in 92.82 seconds.


# Read Data

In [5]:
# Load obj vocab
classes_VG = read_VG_data()
classes_VG_revisited, map_glove_word = polish_VG_data(classes_VG, lang_interface)

# Remove unnecessary images from the one I downloaded
# videos = load_from_annotations()

# Read dumped data:
dumped_moments = read_dumped_metadata()
# dumped_chamfer_dist = read_dumped_distances()

# Read mapping between clips and object classes
mapping_obj = read_mapping_between_clips_and_obj()

# Load Object data 
from ancillary_visualization import read_obj_detections
obj_detections = read_obj_detections()

# Analisys and mapping

In [6]:
# Global variables:
P_size = 21                  # number of proposals
clip_size = 2.5              # clip size

# Given annotations 
- Compute tokens (words) from description, they will be used for mathing with obj
- Get GT annotations to determine if we localize the action
- Load frames list given video id and clip size (Proposals will tell me which frames are relevant)
- From object features determine:
    - Object location (how do I understand which clip is the object related to? Maybe from raw features?)
    - Object name (inverse map - minimum distance with precomputed features)
- From pairwise distance we can determine which word points to which object and which frame

In [9]:
from ancillary_visualization import _get_frames_indices

In [10]:
# cycle over queries 
query_id = 4
metadata = dumped_moments[query_id]
data_obj = obj_detections[metadata['video']]

In [11]:
# 1- Compute tokens
tokens = sentences_to_words(metadata['description'])
# 2- Load frames paths
gt_annotations = metadata['times']
# 3- Load frames paths
frames = _get_frames_indices(video_id=metadata['video'], clip_size=clip_size)

In [12]:
proposal_obj_data = {}
for proposal_idx in range(len(metadata['proposals'])):
    # 4- Read data for specific proposal
    prop = metadata['proposals'][proposal_idx]
    proposal_obj_data[proposal_idx] = extract_obj_names_and_positions_per_clip(data_obj, prop,clip_size,classes_VG, False)

### proposal_obj_data
- key1 = key for proposal in video [0,1,2,...,20] [int]
- key2 = positions/names 
- key3 = key for clip in positions/names for each clip in the proposal [0,1,2,...,11] [int]
- inner variable is a list.

# Create images of proposals:

In [15]:
from ancillary_visualization import _get_frames_per_proposal
from ancillary_visualization import _inverse_map_box

In [45]:
def _plot_proposals(video_id,description, proposals_frames,time_annotations, moment_location, proposals_objects, proposals_mapping, prop_idx):
    path_dump = '../data/interim/matching_evaluation/images/'
    folder    = '{}{}/'.format(path_dump,video_id)
    if not os.path.exists(folder):
        os.makedirs(folder)
    
    names  = proposals_objects['names']
    names_keys = list(names.keys())
    bboxes = proposals_objects['positions']
    bboxes_keys = list(bboxes.keys())

    my_dpi=1
    fig, ax = plt.subplots(nrows=1, ncols=len(proposals_frames))
#     DPI = fig.get_dpi()
#     fig.set_size_inches(len(frames)*320/float(DPI),2*240.0/float(DPI))# figsize=(len(frames)*3,len(frames)/2))
    
    time_annotations = [t.tolist() for t in time_annotations]
    time_annotations = " ".join([f'[{t[0]}-{t[1]}]' for t in time_annotations])
    plt.suptitle(f'{description}  {moment_location} \n {time_annotations}', fontsize=20)
    fig.tight_layout()
    fig.subplots_adjust(left=None, bottom=0.0, right=None, top=None, wspace=0.01, hspace=None)
    
    for i, frame in enumerate(proposals_frames):
        im = cv2.imread(frame)
        im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
        height, width, _ = im.shape
        
        obj_names_per_frame = names[names_keys[i]]
        obj_loc_per_frame   = _inverse_map_box(bboxes[bboxes_keys[i]], height, width)
        
        ax[i].imshow(im)
        ax[i].axis('off')
        
        for n,bbox in zip(obj_names_per_frame,obj_loc_per_frame):
            ax[i].add_patch(patches.Rectangle((bbox[0], bbox[1]),
                            bbox[2] - bbox[0],
                            bbox[3] - bbox[1], fill=False,
                            edgecolor='red', linewidth=2, alpha=0.5))
            ax[i].text(bbox[0], bbox[1] - 2,
                            '%s' % (n),
                            bbox=dict(facecolor='blue', alpha=0.25),
                            fontsize=10, color='white')
        
    f_name = frame.split('/')[-1]
    dump_path = '{}{}'.format(folder,prop_idx)
    plt.savefig(dump_path,bbox_inches='tight')
    plt.close()

In [46]:
# Cycle over proposals and create the image
for i, p in enumerate(metadata['proposals']):
    proposals_frames = _get_frames_per_proposal(frames, p, clip_size)
    _plot_proposals(video_id          = metadata['video'],
                    description       = metadata['description'], 
                    time_annotations  = metadata['times'],
                    moment_location   = p,
                    proposals_frames  = proposals_frames, 
                    proposals_objects = proposal_obj_data[i], 
                    proposals_mapping = {},
                    prop_idx=i)

In [47]:
# Do all - deprecated
ids = list(np.arange(12,4021))
for query_id in ids:
    try:
        metadata = dumped_moments[query_id+1]
        data_obj = obj_detections[metadata['video']]
        # 1- Compute tokens
        tokens = sentences_to_words(metadata['description'])
        # 2- Load frames paths
        gt_annotations = metadata['times']
        # 3- Load frames paths
        frames = _get_frames_indices(video_id=metadata['video'], clip_size=clip_size)
        proposal_obj_data = {}
        for proposal_idx in range(len(metadata['proposals'])):
            # 4- Read data for specific proposal
            prop = metadata['proposals'][proposal_idx]
            proposal_obj_data[proposal_idx] = extract_obj_names_and_positions_per_clip(data_obj, prop,clip_size,classes_VG, False)

        # Cycle over proposals and create the image
        for i, p in enumerate(metadata['proposals']):
            proposals_frames = _get_frames_per_proposal(frames, p, clip_size)
            _plot_proposals(video_id          = metadata['video'],
                            description       = metadata['description'], 
                            time_annotations  = metadata['times'],
                            moment_location   = p,
                            proposals_frames  = proposals_frames, 
                            proposals_objects = proposal_obj_data[i], 
                            proposals_mapping = {},
                            prop_idx=i)
    except:
        print(query_id)

12
27
30
31
32
33
34
36
39
42
48
50
58
59
61
75
76
77
81
86
89
92
111
117
121
125
128
138
142
145
147
149
159
162
168
170
171
173
175
180
184
185
186
188
194
196
197
200
210
214
217
222
226
228
242
246
247
249
252
253
261
265
269
270
272
276
279
284
296
297
299
300
301
304
306
311
313
316
320
328
334
338
344
354
357
358
364
366
368
369
370
372
373
374
377
381
383
394
398
407
408
414
420
422
428
433
438
439
443
446
456
465
472
477
484
487
488
495
497
502
510
512
513
519
520
524
526
527
531
532
540
545
548
551
566
570
571
572
578
580
582
586
591
594
596
597
604
606
611
614
616
619
627
628
632
633
641
648
655
657
689
691
694
696
701
702
711
712
716
717
718
727
734
736
744
745
766
771
772
777
786
791
793
803
809
810
813
815
819
821
829
834
838
842
854
856
859
878
880
883
887
888
891
895
899
905
910
911
918
919
925
928
930
931
936
937
954
959
961
962
973
975
976
983
987
996
1007
1008
1011
1012
1019
1026
1030
1037
1043
1044
1053
1054
1055
1059
1069
1076
1087
1088
1090
1093
1096
1107
1110
111