In [28]:
import torch
from typing import Any, Callable, Dict, List, Optional, Tuple
import h5py
from tqdm.notebook import tqdm
import os
import numpy as np

In [6]:
# reference from https://github.com/facebookresearch/Ego4d/blob/main/ego4d/research/dataset.py#L13

def save_ego4d_features_to_hdf5(video_uids: List[str], feature_dir: str, out_path: str):
    """
    Use this function to preprocess Ego4D features into a HDF5 file with h5py
    """
    with h5py.File(out_path, "w") as out_f:
        for uid in tqdm(video_uids, desc="video_uid", leave=True):
            feature_path = os.path.join(feature_dir, f"{uid}.pt")
            fv = torch.load(feature_path)
            out_f.create_dataset(uid, data=fv.numpy())


def save_ego4d_features_to_numpy(video_uids: List[str], feature_dir: str, out_dir: str):
    """
    Use this function to preprocess EgoExo4D features into individual numpy files
    """
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)

    for uid in video_uids:
        feature_path = os.path.join(feature_dir, f"{uid}.pt")
        fv = torch.load(feature_path)
        fv_numpy = fv.numpy()
        fv_numpy_squeezed = np.squeeze(fv_numpy, axis=1)
        np.save(os.path.join(out_dir, f"{uid}.npy"), fv_numpy_squeezed)

# modify to use list of labels
class LabelledFeatureDset(torch.utils.data.Dataset):
    """
    A simple utility class to load features associated with labels. The input this
    method requires is as follows:
        1. `feature_hdf5_path`: the features transposed to a HDF5 file.
            See `save_ego4d_features_to_hdf5`
        2. `uid_label_path`: a list of (uid, path_to_labels). `path_to_labels` is a
            path to a file containing labels for the uid.
    """

    def __init__(
        self,
        feature_hdf5_path: str,
        uid_label_pairs: List[Tuple[str, Any]],
    ):
        self.uid_label_pairs = uid_label_pairs
        self.features = h5py.File(feature_hdf5_path)

        # frame_index = 
        # seq = [Image.open(os.path.join(vpath, path_list[i])).convert('RGB') for i in frame_index]
        # vid = vlabel[frame_index]
        

    def __len__(self):
        return len(self.uid_label_pairs)

    def __getitem__(self, idx: int):
        uid, label = self.uid_label_pairs[idx]
        # feat = self.aggr_function(self.features[uid], label)
        return feat, label

In [19]:
# uids = ['0a6a26fa-cbbc-4843-bf28-9f799119a4bc_aria_rgb', '00a6dd13-d5b0-4743-b252-ed61e61f1d49_aria06_rgb']
feature_dir = 'egoexo4d/egoexo/features/omnivore_video/'
files = os.listdir(feature_dir)
uids = [fn.split('.')[0] for fn in files]

# save_ego4d_features_to_hdf5(video_uids=uids, feature_dir=feature_dir, out_path='egoexo4d/preprocessed_old/egoexo4d_features.hdf5')

video_uid:   0%|          | 0/2880 [00:00<?, ?it/s]

In [None]:
# path to annotations
label_dir = 'egoexo4d/egoexo/annotations/gravit-groundTruth' # these are at 30FPS



In [20]:
torch.load(feature_dir + '0a6a26fa-cbbc-4843-bf28-9f799119a4bc_aria_rgb.pt').shape

torch.Size([202, 1, 1536])

In [22]:
torch.load(feature_dir + '0a6a26fa-cbbc-4843-bf28-9f799119a4bc_cam01_0.pt').shape

torch.Size([202, 1, 1536])

In [21]:
torch.load(feature_dir + '00a6dd13-d5b0-4743-b252-ed61e61f1d49_aria06_rgb.pt').shape

torch.Size([1245, 1, 1536])

In [23]:
torch.load(feature_dir + '00a6dd13-d5b0-4743-b252-ed61e61f1d49_cam01_0.pt').shape

torch.Size([1245, 1, 1536])

Dimension of Omnivore Swin-L's features for EgoExo4D: [n_frames, 1, n_features]
Seems like all GoPro streams were downsampled to 30 FPS before feature extraction so labels are equivalent between aria and gopro