## main

In [1]:
import argparse
import os

import torch
from alphaction.config import cfg
from alphaction.dataset import make_data_loader
from alphaction.engine.inference import inference
from alphaction.modeling.detector import build_detection_model
from alphaction.utils.checkpoint import ActionCheckpointer
from torch.utils.collect_env import get_pretty_env_info
from alphaction.utils.comm import synchronize, get_rank
from alphaction.utils.logger import setup_logger
#pytorch issuse #973
import resource

rlimit = resource.getrlimit(resource.RLIMIT_NOFILE)
resource.setrlimit(resource.RLIMIT_NOFILE, (rlimit[1], rlimit[1]))

In [2]:
rlimit

(1048576, 1048576)

### Config file

In [3]:
config_file = '../config_files/VMAE-ViTB-16x4.yaml'

changing config parameters

In [4]:
cfg.merge_from_file(config_file)



In [5]:
# change model weight path
cfg.merge_from_list(["MODEL.WEIGHT", "../checkpoints/VMAE_ViTB_16x4.pth"])
# change output dir
cfg.merge_from_list(["OUTPUT_DIR", "../output_dir/"])


# change path for data_dir
cfg.merge_from_list(["DATA.PATH_TO_DATA_DIR", "/work/ava"])

# folder name of annotations
cfg.merge_from_list(["AVA.ANNOTATION_DIR", "annotations/"])

# file name of  frame_lists
cfg.merge_from_list(["AVA.TRAIN_LISTS", ['sample.csv']])
cfg.merge_from_list(["AVA.TEST_LISTS", ['sample.csv']])

# file name of predicted_bboxes
cfg.merge_from_list(["AVA.TRAIN_GT_BOX_LISTS", ['ava_sample_predicted_boxes.csv']])
cfg.merge_from_list(["AVA.TEST_GT_BOX_LISTS", ['ava_sample_predicted_boxes.csv']])

# file name of exlusions
cfg.merge_from_list(["AVA.EXCLUSION_FILE", 'ava_sample_train_excluded_timestamps_v2.2.csv'])

# number of batches in test scenario
cfg.merge_from_list(["TEST.VIDEOS_PER_BATCH", 1])

# number of workers
cfg.merge_from_list(["DATALOADER.NUM_WORKERS", 1])


In [6]:
# Print experimental infos.
save_dir = ""
logger = setup_logger("alphaction", save_dir, get_rank())
logger.info("Using {} GPUs".format(1))
logger.info(cfg)

2024-01-22 09:51:16,183 alphaction INFO: Using 1 GPUs
2024-01-22 09:51:16,184 alphaction INFO: AK:
  TEST_GT_BOX_LISTS: ['kinetics/ak_val_gt.csv']
AVA:
  ANNOTATION_DIR: annotations/
  BGR: False
  EXCLUSION_FILE: ava_sample_train_excluded_timestamps_v2.2.csv
  FRAME_DIR: frames/
  FRAME_LIST_DIR: frame_lists/
  GROUNDTRUTH_FILE: ava_val_v2.2.csv
  LABEL_MAP_FILE: ava_action_list_v2.2_for_activitynet_2019.pbtxt
  STRICT_EVAL: True
  TEST_FORCE_FLIP: False
  TEST_GT_BOX_LISTS: ['ava_sample_predicted_boxes.csv']
  TEST_LISTS: ['sample.csv']
  TRAIN_GT_BOX_LISTS: ['ava_sample_predicted_boxes.csv']
  TRAIN_LISTS: ['sample.csv']
  TRAIN_PCA_EIGVAL: [0.225, 0.224, 0.229]
  TRAIN_PCA_EIGVEC: [[-0.5675, 0.7192, 0.4009], [-0.5808, -0.0045, -0.814], [-0.5836, -0.6948, 0.4203]]
  TRAIN_PCA_JITTER_ONLY: True
  TRAIN_USE_COLOR_AUGMENTATION: False
DATA:
  DATASETS: ['ava_v2.2']
  DECODING_BACKEND: pyav
  INPUT_CHANNEL_NUM: [3]
  MEAN: [0.45, 0.45, 0.45]
  NUM_FRAMES: 16
  PATH_TO_DATA_DIR: /work/ava

### building model

In [7]:
model = build_detection_model(cfg)

In [8]:
model.to("cuda")

STMDetector(
  (backbone): ViT(
    (patch_embed): PatchEmbed(
      (proj): Conv3d(3, 768, kernel_size=(2, 16, 16), stride=(2, 16, 16))
    )
    (pos_drop): Dropout(p=0.0, inplace=False)
    (blocks): ModuleList(
      (0): Block(
        (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (attn): Attention(
          (qkv): Linear(in_features=768, out_features=2304, bias=False)
          (attn_drop): Dropout(p=0.0, inplace=False)
          (proj): Linear(in_features=768, out_features=768, bias=True)
          (proj_drop): Dropout(p=0.0, inplace=False)
        )
        (drop_path): Identity()
        (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (mlp): Mlp(
          (fc1): Linear(in_features=768, out_features=3072, bias=True)
          (act): GELU(approximate='none')
          (fc2): Linear(in_features=3072, out_features=768, bias=True)
          (drop): Dropout(p=0.0, inplace=False)
        )
      )
      (1): Block(
        (norm1): La

### loading weight

In [2]:
output_dir = cfg.OUTPUT_DIR

NameError: name 'cfg' is not defined

In [1]:
checkpointer = ActionCheckpointer(cfg, model, save_dir=output_dir)
checkpointer.load(cfg.MODEL.WEIGHT)

NameError: name 'ActionCheckpointer' is not defined

In [11]:
num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
num_gpus

1

In [12]:
distributed = num_gpus > 1
distributed

False

In [13]:
cfg.DATA.DATASETS

['ava_v2.2']

In [14]:
output_folders = [None] * len(cfg.DATA.DATASETS)
output_folders

[None]

In [15]:
dataset_names = cfg.DATA.DATASETS
dataset_names

['ava_v2.2']

In [16]:
mem_active = cfg.MODEL.STM.MEM_ACTIVE
mem_active  

False

In [17]:
if cfg.OUTPUT_DIR:
    for idx, dataset_name in enumerate(dataset_names):
        output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name)
        os.makedirs(output_folder, exist_ok=True)
        output_folders[idx] = output_folder

output_folders

['../output_dir/inference/ava_v2.2']

### create data loaders

In [18]:
data_loaders_test = make_data_loader(cfg, is_train=False, is_distributed=distributed)

2024-01-22 09:51:24,751 alphaction.dataset.datasets.ava_helper INFO: Finished loading image paths from: /work/ava/frame_lists/sample.csv
2024-01-22 09:51:24,766 alphaction.dataset.datasets.ava_helper INFO: Finished loading annotations from: /work/ava/annotations/ava_sample_predicted_boxes.csv
2024-01-22 09:51:24,767 alphaction.dataset.datasets.ava_helper INFO: Number of unique boxes: 1828
2024-01-22 09:51:24,768 alphaction.dataset.datasets.ava_helper INFO: Number of annotations: 2300
2024-01-22 09:51:24,769 alphaction.dataset.datasets.ava_helper INFO: 860 keyframes used.
2024-01-22 09:51:24,770 alphaction.dataset.datasets.ava_dataset INFO: === AVA dataset summary ===
2024-01-22 09:51:24,771 alphaction.dataset.datasets.ava_dataset INFO: Split: test
2024-01-22 09:51:24,772 alphaction.dataset.datasets.ava_dataset INFO: Number of videos: 1
2024-01-22 09:51:24,772 alphaction.dataset.datasets.ava_dataset INFO: Number of frames: 27030
2024-01-22 09:51:24,773 alphaction.dataset.datasets.ava_da

### INSIDE INFERENCE

In [19]:
output_folder = output_folders[0]
output_folder

'../output_dir/inference/ava_v2.2'

In [20]:
dataset_name = dataset_names[0]
dataset_name

'ava_v2.2'

In [21]:
data_loader_test = data_loaders_test[0]
data_loader_test

<torch.utils.data.dataloader.DataLoader at 0x7ff136dd9ee0>

#### IMPORTING OF MODULES INSIDE INFERENCE

In [22]:
import logging
import os

import torch
from tqdm import tqdm
import time
import datetime

from alphaction.dataset.datasets.evaluation import evaluate
from alphaction.utils.comm import get_rank, is_main_process, all_gather, gather, synchronize, get_world_size
from alphaction.structures.memory_pool import MemoryPool


In [23]:
device = torch.device("cuda")

In [24]:
device

device(type='cuda')

In [25]:
num_devices = get_world_size()
num_devices

1

In [26]:
logger = logging.getLogger("alphaction.inference")

In [27]:
dataset = data_loader_test.dataset

In [28]:
logger.info("Start evaluation on {} dataset({} videos).".format(dataset_name, len(dataset)))

2024-01-22 09:51:24,841 alphaction.inference INFO: Start evaluation on ava_v2.2 dataset(860 videos).


In [29]:
model.eval()

STMDetector(
  (backbone): ViT(
    (patch_embed): PatchEmbed(
      (proj): Conv3d(3, 768, kernel_size=(2, 16, 16), stride=(2, 16, 16))
    )
    (pos_drop): Dropout(p=0.0, inplace=False)
    (blocks): ModuleList(
      (0): Block(
        (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (attn): Attention(
          (qkv): Linear(in_features=768, out_features=2304, bias=False)
          (attn_drop): Dropout(p=0.0, inplace=False)
          (proj): Linear(in_features=768, out_features=768, bias=True)
          (proj_drop): Dropout(p=0.0, inplace=False)
        )
        (drop_path): Identity()
        (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (mlp): Mlp(
          (fc1): Linear(in_features=768, out_features=3072, bias=True)
          (act): GELU(approximate='none')
          (fc2): Linear(in_features=3072, out_features=768, bias=True)
          (drop): Dropout(p=0.0, inplace=False)
        )
      )
      (1): Block(
        (norm1): La

If False, 'compute_on_dataset' will call 'compute_on_dataset_1stage',else, call 'compute_on_dataset_2stage'

In [30]:
mem_active

False

##### inside of 'compute_on_dataset_1stage'

In [31]:
# results_dict = compute_on_dataset_1stage(model, data_loader, device)


In [32]:
results_dict = {}
if get_world_size() == 1:
    extra_args = {}
else:
    rank = get_rank()
    extra_args = dict(desc="rank {}".format(rank))
    
extra_args

{}

In [33]:
batch = next(iter(data_loader_test))

In [34]:
batch

(tensor([[[[[-1.8431, -1.8431, -1.8431,  ...,  0.0000,  0.0000,  0.0000],
            [-1.8431, -1.8431, -1.8431,  ...,  0.0000,  0.0000,  0.0000],
            [-1.8431, -1.8431, -1.8431,  ...,  0.0000,  0.0000,  0.0000],
            ...,
            [-0.8322, -0.9194, -0.9368,  ...,  0.0000,  0.0000,  0.0000],
            [-0.5359, -0.6231, -0.6928,  ...,  0.0000,  0.0000,  0.0000],
            [-0.5185, -0.6580, -0.8148,  ...,  0.0000,  0.0000,  0.0000]],
 
           [[-1.8431, -1.8431, -1.8431,  ...,  0.0000,  0.0000,  0.0000],
            [-1.8431, -1.8431, -1.8431,  ...,  0.0000,  0.0000,  0.0000],
            [-1.8431, -1.8431, -1.8431,  ...,  0.0000,  0.0000,  0.0000],
            ...,
            [-0.7800, -0.8148, -0.8148,  ...,  0.0000,  0.0000,  0.0000],
            [-0.5359, -0.6057, -0.6580,  ...,  0.0000,  0.0000,  0.0000],
            [-0.5882, -0.7277, -0.8497,  ...,  0.0000,  0.0000,  0.0000]],
 
           [[-1.8780, -1.8780, -1.8780,  ...,  0.0000,  0.0000,  0.0000]

In [35]:
if False:
    with torch.no_grad():
        for batch in tqdm(data_loader_test, **extra_args):
            slow_video, fast_video, whwh, boxes, labels, metadata, idx = batch
            slow_video = slow_video.to(device)
            if fast_video is not None:
                fast_video = fast_video.to(device)
            whwh = whwh.to(device)
            action_score_list, box_list = model(slow_video, fast_video, whwh, boxes, labels)
            break

In [36]:
with torch.no_grad():

    slow_video, fast_video, whwh, boxes, labels, metadata, idx = batch

    slow_video = slow_video.to(device)


    if fast_video is not None:
        fast_video = fast_video.to(device)
    whwh = whwh.to(device)
    
    # INFERENCE
    action_score_list, box_list = model(slow_video, fast_video, whwh, boxes, labels)




### INPUT BATCH DECOMPOSITION

#### slow_video

In [37]:
slow_video.shape # 1x3x16x256x352

torch.Size([1, 3, 16, 256, 352])

In [38]:
slow_video.dtype, slow_video.device # torch.float32, device(type='cuda', index=0)

(torch.float32, device(type='cuda', index=0))

In [39]:
slow_video[0,2,0,0:5,0:5]

tensor([[-1.8606, -1.8606, -1.8606, -1.8606, -1.8606],
        [-1.8606, -1.8606, -1.8606, -1.8606, -1.8606],
        [-1.8606, -1.8606, -1.8606, -1.8606, -1.8606],
        [-1.8431, -1.8431, -1.8431, -1.8431, -1.8431],
        [-1.8257, -1.8257, -1.8257, -1.8257, -1.8257]], device='cuda:0')

In [40]:
fast_video.shape

AttributeError: 'NoneType' object has no attribute 'shape'

#### whwh

In [41]:
whwh # tensor([[346., 256., 346., 256.]], device='cuda:0')

tensor([[346., 256., 346., 256.]], device='cuda:0')

In [42]:
whwh.shape, whwh.dtype, whwh.device # torch.Size([1, 4]), torch.float32, device(type='cuda', index=0)

(torch.Size([1, 4]), torch.float32, device(type='cuda', index=0))

### OUPUT MODEL DECOMPOSITION

#### action_score_list

In [43]:
action_score_list

[tensor([[9.1237e-04, 1.8565e-06, 3.8518e-04, 5.2749e-03, 1.0854e-04, 1.1021e-04,
          4.8070e-04, 1.1244e-04, 3.7187e-02, 3.0474e-04, 6.2452e-04, 9.7247e-01,
          4.7521e-06, 2.7996e-03, 7.5738e-05, 3.3534e-06, 9.5618e-02, 3.4626e-04,
          2.2128e-06, 5.8158e-06, 1.6095e-05, 4.5821e-04, 1.6409e-06, 1.5436e-05,
          7.7230e-07, 2.0286e-04, 1.5550e-03, 1.4416e-05, 1.8330e-04, 1.5747e-05,
          4.1002e-06, 1.7715e-09, 2.1835e-05, 9.0983e-04, 1.5934e-04, 4.9503e-05,
          1.5614e-03, 1.3252e-04, 1.9455e-05, 5.2948e-06, 1.4612e-03, 4.8311e-05,
          1.2096e-04, 1.1396e-05, 3.6527e-04, 1.2366e-04, 7.0961e-05, 2.1094e-05,
          1.3564e-04, 7.7215e-08, 4.4962e-05, 1.2097e-04, 2.2823e-06, 1.5885e-03,
          4.0419e-05, 5.5051e-03, 2.5459e-04, 2.1811e-03, 7.6489e-03, 2.3249e-05,
          8.6529e-04, 5.5949e-06, 1.0235e-04, 1.0506e-02, 6.0015e-05, 8.2875e-03,
          9.4192e-04, 8.3792e-05, 1.2272e-02, 8.4177e-04, 7.4537e-05, 8.0691e-05,
          1.2542

In [44]:
len(action_score_list) # 1

1

In [45]:
action_score_list[0].shape, action_score_list[0].dtype, action_score_list[0].device, action_score_list[0].requires_grad

# (torch.Size([10, 80]), torch.float32, device(type='cuda', index=0), True)


(torch.Size([10, 80]), torch.float32, device(type='cuda', index=0), False)

#### bbox_list

In [46]:
box_list

[tensor([[0.2160, 0.0277, 0.3633, 0.5164],
         [0.5058, 0.1586, 0.6449, 0.7940],
         [0.6798, 0.0875, 0.7881, 0.3152],
         [0.1277, 0.1867, 0.2822, 0.8096],
         [0.6339, 0.1676, 0.8083, 0.8363],
         [0.0031, 0.1619, 0.1232, 0.8392],
         [0.7782, 0.1503, 0.8620, 0.3536],
         [0.8028, 0.2952, 0.9971, 0.9894],
         [0.8480, 0.1634, 0.9498, 0.4398],
         [0.3250, 0.1778, 0.4951, 0.8835]], device='cuda:0')]

In [47]:
len(box_list), box_list[0].shape, box_list[0].dtype, box_list[0].requires_grad
# (1, torch.Size([10, 4]), torch.float32, True)


(1, torch.Size([10, 4]), torch.float32, False)

#### Output of compute_on_dataset

In [48]:
results_dict.update(
                {video_id: (box.cpu(), action_score.cpu()) for video_id, box, action_score in zip(idx, box_list, action_score_list)}
            )

In [49]:
results_dict.keys()

dict_keys([0])

#### inside of '_accumulate_predictions_from_multiple_gpus'

In [50]:
from alphaction.engine.inference import _accumulate_predictions_from_multiple_gpus

In [51]:
import copy

In [52]:
predictions = copy.deepcopy(results_dict)

In [53]:
predictions = _accumulate_predictions_from_multiple_gpus(predictions)

In [57]:
type(predictions), len(predictions)
# (list, 1)

(list, 1)

In [60]:
type(predictions[0]), len(predictions[0]) # tuple, 2

(tuple, 2)

In [63]:
predictions[0][0].shape, predictions[0][0].device
# (torch.Size([10, 4]), device(type='cpu'))

(torch.Size([10, 4]), device(type='cpu'))

In [64]:
predictions[0][0][0,:]
# tensor([0.2160, 0.0277, 0.3633, 0.5164])

tensor([0.2160, 0.0277, 0.3633, 0.5164])

In [67]:
predictions[0][1].shape, predictions[0][1].device
# (torch.Size([10, 80]), device(type='cpu'))

(torch.Size([10, 80]), device(type='cpu'))

In [69]:
predictions[0][1][0,:]
# tensor([9.1237e-04, 1.8565e-06, 3.8518e-04, 5.2749e-03, 1.0854e-04, 1.1021e-04,
#        4.8070e-04, 1.1244e-04, 3.7187e-02, 3.0474e-04, 6.2452e-04, 9.7247e-01,
#        4.7521e-06, 2.7996e-03, 7.5738e-05, 3.3534e-06, 9.5618e-02, 3.4626e-04,
#        2.2128e-06, 5.8158e-06, 1.6095e-05, 4.5821e-04, 1.6409e-06, 1.5436e-05,
#        7.7230e-07, 2.0286e-04, 1.5550e-03, 1.4416e-05, 1.8330e-04, 1.5747e-05,
#        4.1002e-06, 1.7715e-09, 2.1835e-05, 9.0983e-04, 1.5934e-04, 4.9503e-05,
#        1.5614e-03, 1.3252e-04, 1.9455e-05, 5.2948e-06, 1.4612e-03, 4.8311e-05,
#        1.2096e-04, 1.1396e-05, 3.6527e-04, 1.2366e-04, 7.0961e-05, 2.1094e-05,
#        1.3564e-04, 7.7215e-08, 4.4962e-05, 1.2097e-04, 2.2823e-06, 1.5885e-03,
#        4.0419e-05, 5.5051e-03, 2.5459e-04, 2.1811e-03, 7.6489e-03, 2.3249e-05,
#        8.6529e-04, 5.5949e-06, 1.0235e-04, 1.0506e-02, 6.0015e-05, 8.2875e-03,
#        9.4192e-04, 8.3792e-05, 1.2272e-02, 8.4177e-04, 7.4537e-05, 8.0691e-05,
#        1.2542e-04, 3.9908e-01, 7.4738e-05, 2.7076e-03, 1.1440e-02, 1.8044e-04,
#        6.5097e-02, 8.0956e-01])

tensor([9.1237e-04, 1.8565e-06, 3.8518e-04, 5.2749e-03, 1.0854e-04, 1.1021e-04,
        4.8070e-04, 1.1244e-04, 3.7187e-02, 3.0474e-04, 6.2452e-04, 9.7247e-01,
        4.7521e-06, 2.7996e-03, 7.5738e-05, 3.3534e-06, 9.5618e-02, 3.4626e-04,
        2.2128e-06, 5.8158e-06, 1.6095e-05, 4.5821e-04, 1.6409e-06, 1.5436e-05,
        7.7230e-07, 2.0286e-04, 1.5550e-03, 1.4416e-05, 1.8330e-04, 1.5747e-05,
        4.1002e-06, 1.7715e-09, 2.1835e-05, 9.0983e-04, 1.5934e-04, 4.9503e-05,
        1.5614e-03, 1.3252e-04, 1.9455e-05, 5.2948e-06, 1.4612e-03, 4.8311e-05,
        1.2096e-04, 1.1396e-05, 3.6527e-04, 1.2366e-04, 7.0961e-05, 2.1094e-05,
        1.3564e-04, 7.7215e-08, 4.4962e-05, 1.2097e-04, 2.2823e-06, 1.5885e-03,
        4.0419e-05, 5.5051e-03, 2.5459e-04, 2.1811e-03, 7.6489e-03, 2.3249e-05,
        8.6529e-04, 5.5949e-06, 1.0235e-04, 1.0506e-02, 6.0015e-05, 8.2875e-03,
        9.4192e-04, 8.3792e-05, 1.2272e-02, 8.4177e-04, 7.4537e-05, 8.0691e-05,
        1.2542e-04, 3.9908e-01, 7.4738e-

#### evaluate

In [70]:
from alphaction.dataset.datasets.evaluation import evaluate

In [72]:
output_folder

'../output_dir/inference/ava_v2.2'

In [73]:
dataset

<alphaction.dataset.datasets.ava_dataset.Ava at 0x7ff136dd38e0>

In [74]:
evaluate(
        dataset=dataset,
        predictions=predictions,
        output_folder=output_folder,
    )

2024-01-22 09:58:23,583 alphaction.inference INFO: performing ava evaluation.
2024-01-22 09:58:23,585 alphaction.inference INFO: Preparing results for AVA format
2024-01-22 09:58:23,586 alphaction.inference INFO: Evaluating predictions
2024-01-22 09:58:23,594 alphaction.inference INFO: ==> 0.00683141 seconds to write file ../output_dir/inference/ava_v2.2/result.csv
2024-01-22 09:58:23,600 alphaction.inference INFO: CATEGORIES (60):
[ {'id': 1, 'name': 'bend/bow (at the waist)'},
  {'id': 3, 'name': 'crouch/kneel'},
  {'id': 4, 'name': 'dance'},
  {'id': 5, 'name': 'fall down'},
  {'id': 6, 'name': 'get up'},
  {'id': 7, 'name': 'jump/leap'},
  {'id': 8, 'name': 'lie/sleep'},
  {'id': 9, 'name': 'martial art'},
  {'id': 10, 'name': 'run/jog'},
  {'id': 11, 'name': 'sit'},
  {'id': 12, 'name': 'stand'},
  {'id': 13, 'name': 'swim'},
  {'id': 14, 'name': 'walk'},
  {'id': 15, 'name': 'answer phone'},
  {'id': 17, 'name': 'carry/hold (an object)'},
  {'id': 20, 'name': 'climb (e.g., a moun

({'PascalBoxes_Precision/mAP@0.5IOU': 0.0013085936331720162,
  'PascalBoxes_PerformanceByCategory/AP@0.5IOU/bend/bow (at the waist)': 0.0,
  'PascalBoxes_PerformanceByCategory/AP@0.5IOU/crouch/kneel': 0.0,
  'PascalBoxes_PerformanceByCategory/AP@0.5IOU/dance': 0.0,
  'PascalBoxes_PerformanceByCategory/AP@0.5IOU/fall down': 0.0,
  'PascalBoxes_PerformanceByCategory/AP@0.5IOU/get up': 0.0,
  'PascalBoxes_PerformanceByCategory/AP@0.5IOU/jump/leap': 0.0,
  'PascalBoxes_PerformanceByCategory/AP@0.5IOU/lie/sleep': 0.0,
  'PascalBoxes_PerformanceByCategory/AP@0.5IOU/martial art': 0.029069767441860465,
  'PascalBoxes_PerformanceByCategory/AP@0.5IOU/run/jog': 0.0,
  'PascalBoxes_PerformanceByCategory/AP@0.5IOU/sit': 0.0,
  'PascalBoxes_PerformanceByCategory/AP@0.5IOU/stand': 0.002403846153846154,
  'PascalBoxes_PerformanceByCategory/AP@0.5IOU/swim': nan,
  'PascalBoxes_PerformanceByCategory/AP@0.5IOU/walk': 0.0,
  'PascalBoxes_PerformanceByCategory/AP@0.5IOU/answer phone': nan,
  'PascalBoxes_P