In [2]:
import os
import json
import re

In [3]:
CLI_OUTPUT_DIR = "/local/juro4948/data/egoexo4d/egoexo" # Replace with the full path to the --output_directory you pass to the cli
ANNOTATIONS_PATH = os.path.join(CLI_OUTPUT_DIR, "annotations")


# See raw annotations in this dictionary
keystep_anns = json.load(open(os.path.join(ANNOTATIONS_PATH, "keystep_train.json")))
keystep_anns_val = json.load(open(os.path.join(ANNOTATIONS_PATH, "keystep_val.json")))

anns = keystep_anns["annotations"]
print(f'Length of training items: {len(anns)}')
anns_test = keystep_anns_val["annotations"]

# Add anns_test to anns dictionary
anns.update(anns_test)
print(f'Length of training + val items: {len(anns)}')

def get_take_id_from_name(take_name):
    for take_id in anns.keys():
        if anns[take_id]['take_name'] == take_name:
            return take_id
    return None

def get_take_name_from_id(take_id):
    if take_id in anns.keys():
        return anns[take_id]['take_name']
    else:
        return None
    

Length of training items: 671
Length of training + val items: 852


In [4]:
# create symlink to each take 
# assuming that same take's aria and cam are all the in the same split 

import os
import shutil

use_aria = True # True = use aria, False = use all gopro views

n_splits = [1, 2, 3, 4, 5]

# path to split bundle files
root = "/home/juro4948/gravit/GraVi-T/data/annotations/egoexo-omnivore-aria/splits"
# dataset = 'egoexo'

# path to npy egoexo features
features_dir = '/home/juro4948/gravit/data/egoexo4d/egoexo4d/preprocessed_old/egoexo4d_features_npy'
all_features_fn = os.listdir(features_dir)

if use_aria == True:
    out_dir = '/home/juro4948/gravit/GraVi-T/data/features/egoexo-omnivore-aria'
else:
    out_dir = '/home/juro4948/gravit/GraVi-T/data/features/egoexo-omnivore-gopro'


aria_ext = '_aria.._rgb.npy'
# aria_save_ext = '_aria.npy'
gopro_ext = '_cam.._0.npy'

file_list = []
for n_split in n_splits:
    split_dir = os.path.join(out_dir, f'split{n_split}')
    # if dir already exists write over it
    if os.path.exists(split_dir):
        shutil.rmtree(split_dir)
    os.makedirs(split_dir)
    for mode in ['train', 'test']:
        mode_name = mode
        if mode_name == 'test':
            mode_name = 'val'
        os.makedirs(os.path.join(split_dir, mode_name))
        print(f'Split: {n_split} # Mode: {mode}')

        # read every take name from the split file
        txt_path = f'{mode}.split' + str(n_split) + '.bundle'
        with open(os.path.join(root, txt_path), 'r') as f:
            for line in f.readlines():
                line = line.strip('\n')
                line = line.split('.')[0]

                take_name = line 
                take_id = get_take_id_from_name(take_name)
                if take_id is None:
                    print('ID not found. Skipping')
                    continue

                # symlink the pt file
                if use_aria == True:
                    pattern = re.compile(take_id + aria_ext)
                else:
                    pattern = re.compile(take_id + gopro_ext)

                i = 0
                # find all files with matching pattern (finding all views of the same take that correspond to aria vs gopro), and save each symlink
                for fn in all_features_fn:
                    if pattern.match(fn):
                        src = os.path.join(features_dir, fn)
                        dst = os.path.join(split_dir, mode_name, take_name + '_' + str(i) + '.npy')
                        i += 1

                        if os.path.exists(dst):
                            continue
                        if not os.path.exists(src):
                            print(f'File not found: {src}')
                            
                        os.symlink(src, dst)
                        print('Creating symlink')
                        

Split: 1 # Mode: train
Creating symlink
Creating symlink
Creating symlink
Creating symlink
Creating symlink
Creating symlink
Creating symlink
Creating symlink
Creating symlink
Creating symlink
Creating symlink
Creating symlink
Creating symlink
Creating symlink
Creating symlink
Creating symlink
Creating symlink
Creating symlink
Creating symlink
Creating symlink
Creating symlink
Creating symlink
Creating symlink
Creating symlink
Creating symlink
Creating symlink
Creating symlink
Creating symlink
Creating symlink
Creating symlink
Creating symlink
Creating symlink
Creating symlink
Creating symlink
Creating symlink
Creating symlink
Creating symlink
Creating symlink
Creating symlink
Creating symlink
Creating symlink
Creating symlink
Creating symlink
Creating symlink
Creating symlink
Creating symlink
Creating symlink
Creating symlink
Creating symlink
Creating symlink
Creating symlink
Creating symlink
Creating symlink
Creating symlink
Creating symlink
Creating symlink
Creating symlink
Creating

In [5]:
split_dirs = [os.path.join(out_dir, f'split{n_split}') for n_split in n_splits]
file_counts = [len(os.listdir(split_dir)) for split_dir in split_dirs]

file_counts

[1214, 1214, 1214, 1214, 1214]