Formats the feature files in order to get the feature file required by GPNN.

# Features required by GPNN
dict_keys(['metadata', 'num_obj', 'lr', 'mr', 'cr', 'object_pairs', 'num_relation', 'i3d_feature_map', 'bboxes', 'cnn_bbox_feature'])

Put all the features of all the files in one pickle file and then write additional code in the dataloader to separate the features according to test, train and val splits.

In [11]:
import pickle as pkl

train_file = '/workspace/data/data_folder/o2o/all_features/gpnn/val.pkl'
d = pkl.load(open(train_file, 'rb'))

In [12]:
import numpy as np
import torchvision
import torch

def roi_align(feature_map, boxes, num_obj, im_width, im_height):

    feature_map = feature_map.to(torch.device('cpu'))

    fmap_height, fmap_width = feature_map.shape[2:]
    boxes[:,:,0]/=im_width
    boxes[:,:,2]/=im_width

    boxes[:,:,1]/=im_height
    boxes[:,:,3]/=im_height

    boxes[:,:,0]*=fmap_width
    boxes[:,:,2]*=fmap_width

    boxes[:,:,1]/=fmap_height
    boxes[:,:,3]/=fmap_height
    
    pooler = torchvision.ops.RoIAlign(output_size=(1, 1), spatial_scale = 1.0, sampling_ratio=1)
    
    
    num_frames = 11
    roi_pools = []

    boxes_list = list(boxes.split(1, dim=1))
    boxes_list = [boxes_list[i].squeeze(1) for i in range(len(boxes_list))]
    
    boxes_list_2 = []
    
    for i, b in enumerate(boxes_list):
        boxes_list_2.append( b[:num_obj] )
    
    pooled_output = pooler(feature_map, boxes_list_2)
    return pooled_output

In [17]:
# Master Feature Generator for VSGNet
import torch
import os
from copy import deepcopy as copy

def master_feature_generator_old( full_feature_filename ):

    # Get the feature from the vsgnet folder
    # Load the resnet 152 feature map
    # remove unnecessary keys
    # perform RoIalign on the resnet152 feature map
    # return the result
    required_keys = ['metadata', 'num_obj', 'lr', 'mr', 'cr', 'object_pairs', 'num_relation', 'bboxes'] # 'i3d_feature_vec'
    full_feature = torch.load(full_feature_filename)
    
    new_dict = {}
    
    for r in required_keys:
        new_dict[r] = copy(full_feature[r])
    
    convo_feature_map = full_feature['i3d_fmap']
    i3d_bboxes = full_feature['bboxes'][:,1::2,:]
    
    im_width = full_feature['metadata']['frame_width']
    im_height = full_feature['metadata']['frame_height']
    num_obj = full_feature['num_obj']
    
    # perform RoI align
    roialign_output = roi_align(convo_feature_map, i3d_bboxes,
                                num_obj, im_width, im_height)
    
    result_tensor = torch.zeros((11, 12, 1024))
    
    for frame in range(5):
        ind0 = frame*num_obj
        ind1 = (frame + 1)*num_obj
        result_tensor[frame, :num_obj] = roialign_output[ind0:ind1,0,0]
       
    
    new_dict['i3d_feature_vec'] = result_tensor
        
    return new_dict


In [18]:
sample_path = '/workspace/data/data_folder/o2o/gifs_11_features_ral/_5qYC5nmwmU_1602_5.pt'
master_feature_generator_old(sample_path)

{'metadata': {'activity name': 'PolishCar',
  'yt_id': '_5qYC5nmwmU',
  'frame no.': '1602',
  'frame_width': 1280,
  'frame_height': 720},
 'num_obj': 5,
 'lr': tensor([[0., 0., 1., 0., 0.],
         [0., 1., 0., 0., 0.],
         [1., 0., 0., 0., 0.],
         [0., 1., 1., 0., 0.],
         [1., 0., 0., 0., 0.],
         [1., 0., 0., 0., 0.],
         [0., 0., 1., 0., 0.],
         [0., 0., 1., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.]]),
 'mr': tensor([[0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
         [0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0