## Steps:
1. **'make_data_loader'** [function](https://github.com/MCG-NJU/STMixer/blob/main/alphaction/dataset/build.py): creates a list of dataloaders. We get one sample batch from this data_loader.This batch contains: 
   * **'slow_clips'**: cpu-torch tensor of shape [1, 3, 16, 256, 352] with dtype = torch.float32.
   * **'fast_clips'**: None for this config file.
   * **'whwh'**: cpu-torch tensor of shape [1, 4]) with dtype = torch.float32.
   * **'boxes'**: a tuple (batch size). 'boxes[0]': np array of shape Nx4, with dype = float64. These boxes are not normalized.
   * **'metadata'**: ([0, 902],), where 902 is the starting frame index of the video (video after cutting first 15 minutes, however, frame_index is started from 0).
   * **'clip_ids'**: (0,)
   
   
2. **inside 'make_data_loader'**:
   * ss

In [1]:
import argparse
import os

import torch
from alphaction.config import cfg
from alphaction.dataset import make_data_loader
from alphaction.engine.inference import inference
from alphaction.modeling.detector import build_detection_model
from alphaction.utils.checkpoint import ActionCheckpointer
from torch.utils.collect_env import get_pretty_env_info
from alphaction.utils.comm import synchronize, get_rank
from alphaction.utils.logger import setup_logger
#pytorch issuse #973
import resource

In [2]:
config_file = '../config_files/VMAE-ViTB-16x4.yaml'

In [3]:
cfg.merge_from_file(config_file)

In [4]:
# change model weight path
cfg.merge_from_list(["MODEL.WEIGHT", "../checkpoints/VMAE_ViTB_16x4.pth"])
# change output dir
cfg.merge_from_list(["OUTPUT_DIR", "../output_dir/"])


# change path for data_dir
cfg.merge_from_list(["DATA.PATH_TO_DATA_DIR", "/work/ava"])

# folder name of annotations
cfg.merge_from_list(["AVA.ANNOTATION_DIR", "annotations/"])

# file name of  frame_lists
cfg.merge_from_list(["AVA.TRAIN_LISTS", ['sample.csv']])
cfg.merge_from_list(["AVA.TEST_LISTS", ['sample.csv']])

# file name of predicted_bboxes
cfg.merge_from_list(["AVA.TRAIN_GT_BOX_LISTS", ['ava_sample_predicted_boxes.csv']])
cfg.merge_from_list(["AVA.TEST_GT_BOX_LISTS", ['ava_sample_predicted_boxes.csv']])

# file name of exlusions
cfg.merge_from_list(["AVA.EXCLUSION_FILE", 'ava_sample_train_excluded_timestamps_v2.2.csv'])

# number of batches in test scenario
cfg.merge_from_list(["TEST.VIDEOS_PER_BATCH", 1])

# number of workers
cfg.merge_from_list(["DATALOADER.NUM_WORKERS", 1])


### 1. Calling 'make_data_loader'

'make_data_loader' method defined here:

https://github.com/MCG-NJU/STMixer/blob/main/alphaction/dataset/build.py

In [5]:
data_loaders_test = make_data_loader(cfg, is_train=False, is_distributed=False)

In [6]:
type(data_loaders_test), len(data_loaders_test)

(list, 1)

In [7]:
batch = next(iter(data_loaders_test[0]))

In [8]:
slow_clips, fast_clips, whwh, boxes, label_arrs, metadata, clip_ids = batch

In [9]:
slow_clips.shape, slow_clips.dtype, slow_clips.device, slow_clips.requires_grad

(torch.Size([1, 3, 16, 256, 352]), torch.float32, device(type='cpu'), False)

In [20]:
if fast_clips:
    print(fast_clips.shape, fast_clips.dtype, fast_clips.device, fast_clips.requires_grad)
else:
    print(f"fast_clips is None for this config: {config_file}")

fast_clips is None for this config: ../config_files/VMAE-ViTB-16x4.yaml


In [19]:
type(fast_clips)

NoneType

In [12]:
whwh # tensor([[346., 256., 346., 256.]])

tensor([[346., 256., 346., 256.]])

In [11]:
whwh.shape, whwh.dtype, whwh.device, whwh.requires_grad
# (torch.Size([1, 4]), torch.float32, device(type='cpu'), False)

(torch.Size([1, 4]), torch.float32, device(type='cpu'), False)

In [13]:
type(boxes), len(boxes)

(tuple, 1)

In [14]:
boxes[0].shape, type(boxes[0]), boxes[0].dtype, boxes[0][0,:]

((11, 4),
 numpy.ndarray,
 dtype('float64'),
 array([112.6656,  47.36  , 162.432 , 227.072 ]))

In [15]:
type(label_arrs), len(label_arrs)

(tuple, 1)

In [16]:
label_arrs[0].shape, type(label_arrs[0]),  label_arrs[0].dtype, label_arrs[0][0,:]

((11, 80),
 numpy.ndarray,
 dtype('int32'),
 array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], dtype=int32))

In [17]:
metadata

([0, 902],)

In [18]:
clip_ids

(0,)

### 2. inside 'make_data_loader'

steps:
   1. setting some params.
   2. building datasets: `datasets = build_dataset(cfg, split=split)`. We will inspect this method in next section.
   3. For each **dataset**:
       * create **sampler**: `sampler = make_data_sampler(dataset, shuffle, is_distributed)`
       * create **batch_sampler**: `batch_sampler = make_batch_data_sampler(
            dataset, sampler, aspect_grouping, videos_per_gpu, num_iters, start_iter, drop_last)`
       * create **collator**: `collator = BatchCollator(cfg.DATALOADER.SIZE_DIVISIBILITY)`
       * create **data_loader** based on 'torch.utils.data.DataLoader' where `num_workers = cfg.DATALOADER.NUM_WORKERS`: 
       
       `data_loader = torch.utils.data.DataLoader(dataset, num_workers=num_workers,batch_sampler=batch_sampler, collate_fn=collator,) `

#### 2.1 params

In [None]:
is_distributed = False

In [None]:
is_train = False

In [None]:
num_gpus = 1

In [None]:
videos_per_batch = cfg.TEST.VIDEOS_PER_BATCH
videos_per_batch # 1

In [None]:
cfg.TEST.VIDEOS_PER_BATCH

In [None]:
assert (videos_per_batch % num_gpus == 0), "TEST.VIDEOS_PER_BATCH ({}) must be divisible by the number of GPUs ({}) used.".format(videos_per_batch, num_gpus)

In [None]:
videos_per_gpu = videos_per_batch // num_gpus

In [None]:
shuffle = False if not is_distributed else True
shuffle

In [None]:
drop_last = False
num_iters = None
start_iter = 0
split = 'test'

In [None]:
# group images which have similar aspect ratio. In this case, we only
# group in two cases: those with width / height > 1, and the other way around,
# but the code supports more general grouping strategy
aspect_grouping = [1] if cfg.DATALOADER.ASPECT_RATIO_GROUPING else []
aspect_grouping

#### 2.2. Calling 'buid_dataset'
'make_data_loader' calls **['build_dataset']()**.

In [None]:
from alphaction.dataset.build import build_dataset

In [None]:
datasets = build_dataset(cfg, split=split)

In [None]:
datasets

#### 2.3 create 'sampler' and 'batch_sampler'
For each `dataset`, we define `sampler` and `batch_sampler`:
* `sampler = make_data_sampler(dataset, shuffle, is_distributed)` defined [here](https://github.com/MCG-NJU/STMixer/blob/main/alphaction/dataset/build.py).
* `batch_sampler = make_batch_data_sampler(dataset, sampler, aspect_grouping, videos_per_gpu, num_iters, start_iter, drop_last)` defined [here](https://github.com/MCG-NJU/STMixer/blob/main/alphaction/dataset/build.py).

In [None]:
shuffle, is_distributed

* For distributed:
   * sampler is instantiated from the defined sampler for distributed data: `samplers.DistributedSampler(dataset, shuffle=shuffle)`
* Else:
   * if shuffle:
       * calls `torch.utils.data.sampler.RandomSampler(dataset)`
   * else:
       * calls `sampler = torch.utils.data.sampler.SequentialSampler(dataset)` (**THIS CASE**)

In [None]:
aspect_grouping, videos_per_gpu, num_iters, start_iter, drop_last

* if `aspect_grouping`:
    * `batch_sampler` is defined from `samplers.GroupedBatchSampler` defined [here](https://github.com/MCG-NJU/STMixer/tree/main/alphaction/dataset/samplers).
* else:
    * if `num_iters` is None (**THIS CASE**):
     
        ```batch_sampler = torch.utils.data.sampler.BatchSampler(sampler, videos_per_batch, drop_last=drop_last)```
    * else: 
    
    ```batch_sampler = samplers.IterationBasedBatchSampler(batch_sampler, num_iters, start_iter)```


In [None]:
num_workers = cfg.DATALOADER.NUM_WORKERS
num_workers

'collator': used to define how individual samples from a dataset are batched together. 

In [None]:
cfg.DATALOADER.SIZE_DIVISIBILITY

In [None]:
# collator = BatchCollator(cfg.DATALOADER.SIZE_DIVISIBILITY)

### 3. inside 'build_dataset'

'build_dataset' will call 'D.Ava(cfg, split)'

In [None]:
cfg.DATA.DATASETS[0] 

### 3. inside 'dataset.Ava' class

Object from this [class](https://github.com/MCG-NJU/STMixer/blob/main/alphaction/dataset/datasets/ava_dataset.py)

#### 3.1 init
1. setting params
2. calling `self._load_data(cfg)`
3. setting `eval_file_paths`

##### 3.1.1 setting params

In [None]:
self_cfg = cfg
self_split = split

self_split

In [None]:
self_sample_rate = cfg.DATA.SAMPLING_RATE
self_sample_rate  

In [None]:
self_video_length = cfg.DATA.NUM_FRAMES
self_video_length   

In [None]:
self_seq_len = self_video_length * self_sample_rate
self_seq_len

In [None]:
self_num_classes = cfg.MODEL.STM.ACTION_CLASSES
self_num_classes       

In [None]:
# Augmentation params.
self_data_mean = cfg.DATA.MEAN
self_data_std = cfg.DATA.STD
self_use_bgr = cfg.AVA.BGR

self_data_mean, self_data_std, self_use_bgr

In [None]:
if self_split == "train":
    self_jitter_min_scale = cfg.DATA.TRAIN_MIN_SCALES # list
    self_jitter_max_scale = cfg.DATA.TRAIN_MAX_SCALE # int
    self_random_horizontal_flip = cfg.DATA.RANDOM_FLIP
    self_use_color_augmentation = cfg.AVA.TRAIN_USE_COLOR_AUGMENTATION
    self_pca_jitter_only = cfg.AVA.TRAIN_PCA_JITTER_ONLY
    self_pca_eigval = cfg.AVA.TRAIN_PCA_EIGVAL
    self_pca_eigvec = cfg.AVA.TRAIN_PCA_EIGVEC
else:
    self_jitter_min_scale = cfg.DATA.TEST_MIN_SCALES
    self_jitter_max_scale = cfg.DATA.TEST_MAX_SCALE
    self_test_force_flip = cfg.AVA.TEST_FORCE_FLIP
    
    print(self_jitter_min_scale, self_jitter_max_scale, self_test_force_flip)

##### 3.1.2 self._load_data(cfg)

 calls 'load_image_lists(cfg, is_train=(self._split == "train"))'

##### 3.1.2.1. Loading frame paths.

In [None]:
from alphaction.dataset.datasets.ava_helper import load_image_lists

In [None]:
(self_image_paths, self_video_idx_to_name) = load_image_lists(cfg, is_train=False)

In [None]:
self_image_paths

In [None]:
self_video_idx_to_name

##### 3.1.2.2. Loading annotations for boxes and labels.

In [None]:
from alphaction.dataset.datasets.ava_helper import load_boxes_and_labels

In [None]:
# Loading annotations for boxes and labels.
boxes_and_labels = load_boxes_and_labels(cfg, mode=self_split)

In [None]:
boxes_and_labels

In [None]:
assert len(boxes_and_labels) == len(self_image_paths)

In [None]:
boxes_and_labels = [
            boxes_and_labels[self_video_idx_to_name[i]]
            for i in range(len(self_image_paths))
        ]

In [None]:
boxes_and_labels

##### 3.1.2.3. Get indices of keyframes and corresponding boxes and labels.

In [None]:
from alphaction.dataset.datasets.ava_helper import get_keyframe_data

In [None]:
# Get indices of keyframes and corresponding boxes and labels.
(self_keyframe_indices, self_keyframe_boxes_and_labels,) = get_keyframe_data(boxes_and_labels)

In [None]:
self_keyframe_indices

In [None]:
self_keyframe_boxes_and_labels

##### 3.1.2.4. Calculate the number of used boxes.

In [None]:
from alphaction.dataset.datasets.ava_helper import get_num_boxes_used

In [None]:
self_num_boxes_used = get_num_boxes_used(self_keyframe_indices, self_keyframe_boxes_and_labels)

In [None]:
self_num_boxes_used

##### 3.1.3. setting `self.eval_file_paths`

In [None]:
anno_dir = os.path.join(cfg.DATA.PATH_TO_DATA_DIR, cfg.AVA.ANNOTATION_DIR)
csv_gt_file = os.path.join(anno_dir, cfg.AVA.TEST_GT_BOX_LISTS[0])
labelmap_file = os.path.join(anno_dir, cfg.AVA.LABEL_MAP_FILE)
exclusion_file = os.path.join(anno_dir, cfg.AVA.EXCLUSION_FILE)

In [None]:
anno_dir, csv_gt_file, labelmap_file, exclusion_file

### 3.2. `__getitem__(self, idx)`

Generate corresponding clips, boxes, labels and metadata for given idx.
   * Args:`idx` (int): the video index provided by the pytorch sampler.
   * Returns:
       * **frames** (tensor): the frames of sampled from the video. The dimension is `channel` x `num frames` x `height` x `width`.
       * **label** (ndarray): the label for correspond boxes for the current video.
       * **time index** (zero): The time index is currently **not supported for AVA**.
       * **idx** (int): the video index provided by the pytorch sampler.
       * **extra_data** (dict): a dict containing extra data fields, like "boxes", "ori_boxes" and "metadata".


#### def __getitem__(self, idx):

In [None]:
idx = 0

#### 3.2.1. Get the frame idxs for current clip.

In [None]:
video_idx, sec_idx, sec, center_idx = self_keyframe_indices[idx]

In [None]:
video_idx, sec_idx, sec, center_idx 

In [None]:
from alphaction.dataset.datasets.utils import get_sequence

In [None]:
seq = get_sequence(
            center_idx,
            self_seq_len // 2,
            self_sample_rate,
            num_frames=len(self_image_paths[video_idx]),
        )

In [None]:
seq # note that center_idx is located in 8th position of this seq

In [None]:
clip_label_list = self_keyframe_boxes_and_labels[video_idx][sec_idx]

In [None]:
clip_label_list

In [None]:
assert len(clip_label_list) > 0

#### 3.2.2. Get boxes and labels for current clip.

In [None]:
boxes = []
labels = []
for box_labels in clip_label_list:
    boxes.append(box_labels[0])
    labels.append(box_labels[1])

boxes, labels

In [None]:
import numpy as np
boxes = np.array(boxes)

In [None]:
# Score is not used.
boxes = boxes[:, :4].copy()
# ori_boxes = boxes.copy()
boxes

#### 3.2.3. Load images of current clip.

In [None]:
# Load images of current clip.
image_paths = [self_image_paths[video_idx][frame] for frame in seq]
image_paths

In [None]:
from alphaction.dataset.datasets.utils import retry_load_images

In [None]:
imgs = retry_load_images(image_paths, backend='cv2')

In [None]:
len(imgs), imgs[0].shape # (16, (360, 486, 3))

#### 3.2.4. Pre-processing of images and bboxes of current clip.

In this step, we call `imgs, boxes = self._images_and_boxes_preprocessing_cv2(imgs, boxes=boxes)`

#### Inside of `self._images_and_boxes_preprocessing_cv2`

In [None]:
height, width, _ = imgs[0].shape

height, width # (360, 486)

##### 3.2.4.1. undo the normalization of annotated bbox

In [None]:
boxes[:, [0, 2]] *= width
boxes[:, [1, 3]] *= height

In [None]:
boxes

##### 3.2.4.2. applying transformations

In [None]:
from alphaction.dataset.datasets import cv2_transform as cv2_transform

Clip the boxes with the height and width of the image size.

In [None]:
boxes = cv2_transform.clip_boxes_to_image(boxes, height, width)

In [None]:
boxes

In [None]:
# `transform.py` is list of np.array. However, for AVA, we only have
# one np.array.
boxes = [boxes]

In [None]:
type(boxes)

In [None]:
 self_split

Perform a spatial short scale jittering on the given images and corresponding boxes.

* Args:
   * images (list): list of images to perform scale jitter. Dimension is `height` x `width` x `channel`.
   * min_size (int): the minimal size to scale the frames.
   * max_size (int): the maximal size to scale the frames.
   * boxes (list): optional. Corresponding boxes to images. Dimension is `num boxes` x 4.
* Returns:
    * (list): the list of scaled images with dimension of `new height` x `new width` x `channel`.
    * (ndarray or None): the scaled boxes with dimension of `num boxes` x 4.
    

In [None]:
imgs, boxes = cv2_transform.random_short_side_scale_jitter(
    imgs,
    min_sizes=self_jitter_min_scale,
    max_size=self_jitter_max_scale,
    boxes=boxes,
)

In [None]:
imgs[0].shape # (256, 346, 3)

In [None]:
type(boxes), boxes[0].shape, boxes[0]

In [None]:
self_test_force_flip

##### 3.2.4.3. Convert image to CHW keeping BGR order.

In [None]:
# Convert image to CHW keeping BGR order.
imgs = [cv2_transform.HWC2CHW(img) for img in imgs]

In [None]:
imgs[0].shape # (3, 256, 346)

##### 3.2.4.4. color normalization of imgs to [0,1]

In [None]:
# Image [0, 255] -> [0, 1].
imgs = [img / 255.0 for img in imgs]

In [None]:
imgs = [np.ascontiguousarray(
    # img.reshape((3, self._crop_size, self._crop_size))
    img.reshape((3, imgs[0].shape[1], imgs[0].shape[2]))
).astype(np.float32) for img in imgs]

In [None]:
imgs[0].shape # (3, 256, 346)

##### 3.2.4.5. Do color augmentation (after divided by 255.0).

In [None]:
self_split == "train" and self_use_color_augmentation

##### 3.2.4.6. Normalize images by mean and std.

In [None]:
imgs = [
            cv2_transform.color_normalization(
                img,
                np.array(self_data_mean, dtype=np.float32),
                np.array(self_data_std, dtype=np.float32),
            )
            for img in imgs
        ]


In [None]:
imgs[0].shape, imgs[0][:,0,0]

##### 3.2.4.6 Concat list of images to single ndarray.

In [None]:
imgs = np.concatenate(
            [np.expand_dims(img, axis=1) for img in imgs], axis=1
        )


In [None]:
imgs.shape # (3, 16, 256, 346)

##### 3.2.4.7 Convert image format from BGR to RGB.


In [None]:
self_use_bgr

In [None]:
if not self_use_bgr:
    # Convert image format from BGR to RGB.
    imgs = imgs[::-1, ...]

In [None]:
imgs.shape # (3, 16, 256, 346)

In [None]:
imgs = np.ascontiguousarray(imgs)

In [None]:
imgs.shape # (3, 16, 256, 346)

In [None]:
imgs = torch.from_numpy(imgs)

In [None]:
imgs.shape # torch.Size([3, 16, 256, 346])

In [None]:
boxes = cv2_transform.clip_boxes_to_image(
            boxes[0], imgs[0].shape[1], imgs[0].shape[2]
        )

In [None]:
boxes.shape, type(boxes) #((11, 4), numpy.ndarray)

#### 3.2.5 Construct label arrays


In [None]:
label_arrs = np.zeros((len(labels), self_num_classes), dtype=np.int32)


In [None]:
label_arrs.shape # (11, 80)

In [None]:
labels # [[80], [9], [9], [9], [80, 17, 12], [-1], [80, 9], [-1], [-1], [80, 9], [-1]]

In [None]:
for i, box_labels in enumerate(labels):
    # AVA label index starts from 1.
    for label in box_labels:
        if label == -1:
            continue
        assert label >= 1 and label <= 80
        label_arrs[i][label - 1] = 1

In [None]:
label_arrs.shape

In [None]:
pathways = cfg.MODEL.BACKBONE.PATHWAYS

In [None]:
pathways

In [None]:
from alphaction.dataset.datasets.utils import pack_pathway_output

In [None]:
imgs = pack_pathway_output(self_cfg, imgs, pathways=pathways)

In [None]:
type(imgs), len(imgs)

In [None]:
imgs[0].shape # torch.Size([3, 16, 256, 346])

In [None]:
if pathways == 1:
    slow, fast = imgs[0], None
else:
    slow, fast = imgs[0], imgs[1][1]

In [None]:
slow.shape

In [None]:
fast

In [None]:
h, w = slow.shape[-2:]
h, w #(256, 346)

In [None]:
whwh = torch.tensor([w, h, w, h], dtype=torch.float32)
# tensor([346., 256., 346., 256.])

In [None]:
whwh

In [None]:
metadata = [video_idx, sec]
metadata # [0, 902]

In [None]:
idx

### COMPARISON OF OUTPUT OF DATALOADER AND DATASET

* We notice that the shape of output of dataloder should be always divisible to `cfg.DATALOADER.SIZE_DIVISIBILITY`.
* Padding is done in `BatchCollator`, e.g., ` batch_different_videos` defined in [HERE](https://github.com/MCG-NJU/STMixer/blob/main/alphaction/dataset/collate_batch.py).
* Padding seems to be one-sided.

output of dataloader:

In [None]:
slow_clips.shape, slow_clips.dtype, slow_clips.device

In [None]:
slow.shape, slow.dtype, slow.device

In [None]:
slow_clips_without_batch = slow_clips[0,:,:,:,:346]

In [None]:
slow_clips_without_batch.shape

In [None]:
torch.equal(slow_clips_without_batch, slow)

In [None]:
slow

In [10]:
import torch
import math

def batch_different_videos(videos, size_divisible=0):
    '''
    :param videos: a list of video tensors
    :param size_divisible: output_size(width and height) should be divisble by this param
    :return: batched videos as a single tensor
    '''
    assert isinstance(videos, (tuple, list))
    max_size = tuple(max(s) for s in zip(*[clip.shape for clip in videos]))
    
    # max_size: (3, 16, 256, 346)

    if size_divisible > 0:
        stride = size_divisible
        max_size = list(max_size)
        max_size[2] = int(math.ceil(max_size[2] / stride) * stride)
        max_size[3] = int(math.ceil(max_size[3] / stride) * stride)
        max_size = tuple(max_size)
    
    # max_size: (3, 16, 256, 352)
    
    batch_shape = (len(videos),) + max_size # (1, 3, 16, 256, 352)
    
    batched_clips = videos[0].new(*batch_shape).zero_()
    for clip, pad_clip in zip(videos, batched_clips):
        # clip.shape: torch.Size([3, 16, 256, 346])
        pad_clip[:clip.shape[0], :clip.shape[1], :clip.shape[2], :clip.shape[3]].copy_(clip)

    return batched_clips

# Example Usage:
input_tensor = [torch.rand(3, 16, 256, 346)]  # Example tensor of shape 3x16x128x256
size_divisible = 32  # Example size divisible
batched_result = batch_different_videos(input_tensor, size_divisible)
#print(batched_result.shape)  # Output shape of the batched tensor


torch.Size([3, 16, 256, 346])


In [None]:
gt_lists = cfg.AVA.TEST_GT_BOX_LISTS


In [None]:
ann_filenames = [
        os.path.join(cfg.DATA.PATH_TO_DATA_DIR, cfg.AVA.ANNOTATION_DIR, filename)
        for filename in gt_lists
    ]

In [None]:
ann_filenames

In [None]:
ann_is_gt_box = [True] * len(gt_lists)


In [None]:
ann_is_gt_box

In [None]:
from alphaction.dataset.datasets.ava_helper import parse_bboxes_file, load_boxes_and_labels

In [None]:
boxes_and_labels = load_boxes_and_labels(cfg, mode='test')

In [None]:
from alphaction.dataset.datasets.ava_helper import load_image_lists

In [None]:
(image_paths, video_idx_to_name) = load_image_lists(cfg, is_train=False)

In [None]:
image_paths

In [None]:
video_idx_to_name

In [None]:
boxes_and_labels 

In [None]:
boxes_and_labels = [
            boxes_and_labels[video_idx_to_name[i]]
            for i in range(len(image_paths))
        ]

In [None]:
video_idx_to_name = ['-5KQ66BBWC4']

In [None]:
list_filenames = [
        os.path.join(cfg.DATA.PATH_TO_DATA_DIR, cfg.AVA.FRAME_LIST_DIR, filename)
        for filename in (
            cfg.AVA.TRAIN_LISTS if False else cfg.AVA.TEST_LISTS
        )
    ]

In [None]:
list_filenames

In [None]:
from collections import defaultdict
from iopath.common.file_io import g_pathmgr as pathmgr


In [None]:
image_paths = defaultdict(list)
video_name_to_idx = {}
video_idx_to_name = []

In [None]:

for list_filename in list_filenames:
    with pathmgr.open(list_filename, "r") as f:
        f.readline()
        for line in f:
            row = line.split()
            # The format of each row should follow:
            # original_vido_id video_id frame_id path labels.
            assert len(row) == 5
            video_name = row[0]

            if video_name not in video_name_to_idx:
                idx = len(video_name_to_idx)
                video_name_to_idx[video_name] = idx
                video_idx_to_name.append(video_name)

            data_key = video_name_to_idx[video_name]

            image_paths[data_key].append(
                    os.path.join(cfg.DATA.PATH_TO_DATA_DIR, cfg.AVA.FRAME_DIR, row[3])
                )

image_paths = [image_paths[i] for i in range(len(image_paths))]

In [None]:
video_idx_to_name

In [None]:
row[0]

In [None]:
list_filename

In [None]:
cfg.DATA

In [None]:
cfg.DATALOADER

In [None]:
cfg

In [None]:
data_loader_test = data_loaders_test[0]

In [None]:
type(data_loader_test)

In [None]:
iterator = iter(data_loader_test)
one_item = next(iterator)

In [None]:
one_item

In [None]:
imgs = one_item[0]

In [None]:
imgs.shape

In [None]:
one_item[1]