In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.append("/Users/piyush/projects/ViDA-SSL/")

In [20]:
import os
from os.path import join
from collections import defaultdict

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from utils.io import load_json, load_txt

In [4]:
plt.rcParams.update({
    "text.usetex": True,
    "font.family": "serif",
    "font.serif": ["Computer Modern Roman"],
})

In [5]:
# train_split_file = "/Users/piyush/projects/ViDA-SSL/data/charades/splits/train.csv"

# split file downloaded from https://prior.allenai.org/projects/charades
# train_split_file = "/Users/piyush/projects/ViDA-SSL/data/charades/splits/Charades_v1_train_videos.csv"
train_split_file = "/Users/piyush/projects/ViDA-SSL/data/AVA/annotations/ava_train_v2.2.csv"

In [9]:
train_df = pd.read_csv(train_split_file, header=None)

In [10]:
train_df.shape

(862663, 8)

In [11]:
train_df.columns

Int64Index([0, 1, 2, 3, 4, 5, 6, 7], dtype='int64')

In [13]:
train_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7
0,-5KQ66BBWC4,902,0.077,0.151,0.283,0.811,80,1
1,-5KQ66BBWC4,902,0.077,0.151,0.283,0.811,9,1
2,-5KQ66BBWC4,902,0.226,0.032,0.366,0.497,12,0
3,-5KQ66BBWC4,902,0.226,0.032,0.366,0.497,17,0
4,-5KQ66BBWC4,902,0.226,0.032,0.366,0.497,80,0


In [39]:
FPS = 30
AVA_VALID_FRAMES = range(902, 1799)

FRAME_DIR = "/var/scratch/pbagad/datasets/AVA/frames/"
FRAME_LIST_DIR = "/Users/piyush/projects/ViDA-SSL/data/AVA/annotations/"
ANNOTATION_DIR = "/Users/piyush/projects/ViDA-SSL/data/AVA/annotations/"

DETECTION_SCORE_THRESH = 0.9
LABEL_MAP_FILE = "ava_action_list_v2.2_for_activitynet_2019.pbtxt"
EXCLUSION_FILE = "ava_val_excluded_timestamps_v2.2.csv"
GROUNDTRUTH_FILE = "ava_val_v2.2.csv"

TRAIN_LISTS = ["train.csv"]
TEST_LISTS = ["val.csv"]

TRAIN_GT_BOX_LISTS = ["ava_train_v2.2.csv"]
TRAIN_PREDICT_BOX_LISTS = [
"ava_train_v2.2.csv",
"person_box_67091280_iou90/ava_detection_train_boxes_and_labels_include_negative_v2.2.csv",
]
TEST_PREDICT_BOX_LISTS = ["person_box_67091280_iou90/ava_detection_val_boxes_and_labels.csv"]

FULL_TEST_ON_VAL = True


def load_image_lists(is_train=False):
    """
    Loading image paths from corresponding files.

    Args:
        is_train (bool): if it is training dataset or not.

    Returns:
        image_paths (list[list]): a list of items. Each item (also a list)
            corresponds to one video and contains the paths of images for
            this video.
        video_idx_to_name (list): a list which stores video names.
    """
    list_filenames = [
        os.path.join(FRAME_LIST_DIR, filename)
        for filename in (
            TRAIN_LISTS if is_train else TEST_LISTS
        )
    ]
    image_paths = defaultdict(list)
    video_name_to_idx = {}
    video_idx_to_name = []
    for list_filename in list_filenames:
        with open(list_filename, "r") as f:
            f.readline()
            for line in f:
                row = line.split()
                # The format of each row should follow:
                # original_vido_id video_id frame_id path labels.
                assert len(row) == 5
                video_name = row[0]

                if video_name not in video_name_to_idx:
                    idx = len(video_name_to_idx)
                    video_name_to_idx[video_name] = idx
                    video_idx_to_name.append(video_name)

                data_key = video_name_to_idx[video_name]

                image_paths[data_key].append(
                    os.path.join(FRAME_DIR, row[3])
                )

    image_paths = [image_paths[i] for i in range(len(image_paths))]

    print(
        "Finished loading image paths from: %s" % ", ".join(list_filenames)
    )

    return image_paths, video_idx_to_name

In [40]:
def load_boxes_and_labels(mode):
    """
    Loading boxes and labels from csv files.

    Args:
        cfg (CfgNode): config.
        mode (str): 'train', 'val', or 'test' mode.
    Returns:
        all_boxes (dict): a dict which maps from `video_name` and
            `frame_sec` to a list of `box`. Each `box` is a
            [`box_coord`, `box_labels`] where `box_coord` is the
            coordinates of box and 'box_labels` are the corresponding
            labels for the box.
    """
    gt_lists = TRAIN_GT_BOX_LISTS if mode == "train" else []
    pred_lists = (
        TRAIN_PREDICT_BOX_LISTS
        if mode == "train"
        else TEST_PREDICT_BOX_LISTS
    )
    ann_filenames = [
        os.path.join(ANNOTATION_DIR, filename)
        for filename in gt_lists + pred_lists
    ]
    ann_is_gt_box = [True] * len(gt_lists) + [False] * len(pred_lists)

    detect_thresh = DETECTION_SCORE_THRESH
    # Only select frame_sec % 4 = 0 samples for validation if not
    # set FULL_TEST_ON_VAL.
    boxes_sample_rate = (
        4 if mode == "val" and not FULL_TEST_ON_VAL else 1
    )
    all_boxes, count, unique_box_count = parse_bboxes_file(
        ann_filenames=ann_filenames,
        ann_is_gt_box=ann_is_gt_box,
        detect_thresh=detect_thresh,
        boxes_sample_rate=boxes_sample_rate,
    )

    print(
        "Finished loading annotations from: %s" % ", ".join(ann_filenames)
    )
    print("Detection threshold: {}".format(detect_thresh))
    print("Number of unique boxes: %d" % unique_box_count)
    print("Number of annotations: %d" % count)

    return all_boxes


def parse_bboxes_file(
    ann_filenames, ann_is_gt_box, detect_thresh, boxes_sample_rate=1
):
    """
    Parse AVA bounding boxes files.
    Args:
        ann_filenames (list of str(s)): a list of AVA bounding boxes annotation files.
        ann_is_gt_box (list of bools): a list of boolean to indicate whether the corresponding
            ann_file is ground-truth. `ann_is_gt_box[i]` correspond to `ann_filenames[i]`.
        detect_thresh (float): threshold for accepting predicted boxes, range [0, 1].
        boxes_sample_rate (int): sample rate for test bounding boxes. Get 1 every `boxes_sample_rate`.
    """
    all_boxes = {}
    count = 0
    unique_box_count = 0
    for filename, is_gt_box in zip(ann_filenames, ann_is_gt_box):
        with open(filename, "r") as f:
            for line in f:
                row = line.strip().split(",")
                # When we use predicted boxes to train/eval, we need to
                # ignore the boxes whose scores are below the threshold.
                if not is_gt_box:
                    score = float(row[7])
                    if score < detect_thresh:
                        continue

                video_name, frame_sec = row[0], int(row[1])
                if frame_sec % boxes_sample_rate != 0:
                    continue

                # Box with format [x1, y1, x2, y2] with a range of [0, 1] as float.
                box_key = ",".join(row[2:6])
                box = list(map(float, row[2:6]))
                label = -1 if row[6] == "" else int(row[6])

                if video_name not in all_boxes:
                    all_boxes[video_name] = {}
                    for sec in AVA_VALID_FRAMES:
                        all_boxes[video_name][sec] = {}

                if box_key not in all_boxes[video_name][frame_sec]:
                    all_boxes[video_name][frame_sec][box_key] = [box, []]
                    unique_box_count += 1

                all_boxes[video_name][frame_sec][box_key][1].append(label)
                if label != -1:
                    count += 1

    for video_name in all_boxes.keys():
        for frame_sec in all_boxes[video_name].keys():
            # Save in format of a list of [box_i, box_i_labels].
            all_boxes[video_name][frame_sec] = list(
                all_boxes[video_name][frame_sec].values()
            )

    return all_boxes, count, unique_box_count

In [29]:
image_paths, video_idx_to_name = load_image_lists(is_train=True)

Finished loading image paths from: /Users/piyush/projects/ViDA-SSL/data/AVA/annotations/train.csv


In [30]:
len(image_paths), len(video_idx_to_name)

(235, 235)

In [34]:
len(image_paths[200])

27030

In [41]:
all_boxes = load_boxes_and_labels(mode="train")

Finished loading annotations from: /Users/piyush/projects/ViDA-SSL/data/AVA/annotations/ava_train_v2.2.csv, /Users/piyush/projects/ViDA-SSL/data/AVA/annotations/ava_train_v2.2.csv, /Users/piyush/projects/ViDA-SSL/data/AVA/annotations/person_box_67091280_iou90/ava_detection_train_boxes_and_labels_include_negative_v2.2.csv
Detection threshold: 0.9
Number of unique boxes: 666191
Number of annotations: 2507679


In [42]:
len(all_boxes)

235

In [48]:
len(all_boxes[video_idx_to_name[0]])

897

In [49]:
all_boxes[video_idx_to_name[0]]

{902: [[[0.077, 0.151, 0.283, 0.811], [80, 9, 80, 9]],
  [[0.226, 0.032, 0.366, 0.497], [12, 17, 80]],
  [[0.332, 0.194, 0.481, 0.891], [80, 9, 80, 9]],
  [[0.505, 0.105, 0.653, 0.78], [9, 9]],
  [[0.626, 0.146, 0.805, 0.818], [9, 9]],
  [[0.805, 0.222, 0.997, 1.0], [80, 9, 80, 9]],
  [[0.326, 0.185, 0.47, 0.887], [80, 9]],
  [[0.626, 0.153, 0.797, 0.838], [9]],
  [[0.508, 0.117, 0.648, 0.777], [9]],
  [[0.222, 0.031, 0.362, 0.529], [80, 17, 12]],
  [[0.805, 0.289, 0.997, 0.991], [80, 9]]],
 903: [[[0.0, 0.162, 0.177, 0.804], [80, 9, 80, 9]],
  [[0.141, 0.158, 0.298, 0.825], [12, 80, 12, 80]],
  [[0.226, 0.026, 0.363, 0.512], [12, 80]],
  [[0.328, 0.182, 0.484, 0.895], [80, 9, 80, 9]],
  [[0.507, 0.147, 0.666, 0.789], [80, 9, 80, 9]],
  [[0.642, 0.158, 0.791, 0.859], [9, 9]],
  [[0.785, 0.15, 0.886, 0.703], [12, 80, 12, 80]],
  [[0.802, 0.267, 0.994, 0.971], [80, 9, 80, 9]],
  [[0.865, 0.158, 0.991, 0.436], [80, 9, 80, 9]],
  [[0.009, 0.183, 0.147, 0.84], [80, 9]],
  [[0.326, 0.172, 0.

In [50]:
path = join(FRAME_LIST_DIR, "person_box_67091280_iou90/ava_detection_train_boxes_and_labels_include_negative_v2.2.csv")

In [51]:
path

'/Users/piyush/projects/ViDA-SSL/data/AVA/annotations/person_box_67091280_iou90/ava_detection_train_boxes_and_labels_include_negative_v2.2.csv'

In [57]:
df = pd.read_csv(path, header=None)

In [58]:
df.shape

(1054563, 8)

In [59]:
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7
0,-5KQ66BBWC4,902,0.326,0.185,0.47,0.887,80,0.996382
1,-5KQ66BBWC4,902,0.326,0.185,0.47,0.887,9,0.996382
2,-5KQ66BBWC4,902,0.626,0.153,0.797,0.838,9,0.987177
3,-5KQ66BBWC4,902,0.508,0.117,0.648,0.777,9,0.903317
4,-5KQ66BBWC4,902,0.222,0.031,0.362,0.529,80,0.983264
