<a href="https://colab.research.google.com/github/harry-graves/Aria_ORI/blob/main/epic_kitchens_visor_preprocessing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# EPIC-KITCHENS-VISOR dataset preprocessing
This script separates frames from the EPIC-KITCHENS-VISOR dataset into ones containing a hand-object interaction, and those that do not, separately for the left and right hand.

The dataset was NOT created using the Project Aria glasses and hence eye-tracking data is not available. This data can therefore not be used on a model conditioned on eye-tracking data, but is useful as a demonstration for image classification.

Furthermore, the images are not square; the majority are 1920x1080, and do not contain the vignette that the Aria glasses RGB frames contain. Therefore, the model is unlikely to generalise to frames recorded with the Aria glasses.

In [None]:
import numpy as np
import tensorflow as tf
import os
import json
import zipfile
from google.colab import drive
import shutil

# Mount Google Drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
training_filenames = ['P01_01', 'P01_03', 'P01_05', 'P01_07', 'P01_09', 'P01_103', 'P01_104', 'P01_14', 'P02_01', 'P02_03', 'P02_07', 'P02_101', 'P02_102', 'P02_107', 'P02_109', 'P02_121', 'P02_122', 'P02_124', 'P02_128', 'P02_130', 'P02_132', 'P02_135', 'P03_03', 'P03_04', 'P03_05', 'P03_101', 'P03_11', 'P03_112', 'P03_113', 'P03_123', 'P03_13', 'P03_17', 'P03_23', 'P03_24', 'P04_02', 'P04_03', 'P04_04', 'P04_05', 'P04_101', 'P04_109', 'P04_11', 'P04_110', 'P04_114', 'P04_12', 'P04_121', 'P04_21', 'P04_25', 'P04_26', 'P04_33', 'P05_01', 'P05_08', 'P06_01', 'P06_07', 'P06_09', 'P06_101', 'P06_102', 'P06_103', 'P06_107', 'P06_11', 'P06_110', 'P06_12', 'P06_13', 'P06_14', 'P07_08', 'P08_09', 'P08_16', 'P08_21', 'P10_04', 'P11_101', 'P11_102', 'P11_103', 'P11_104', 'P11_105', 'P11_107', 'P11_16', 'P12_02', 'P12_03', 'P12_101', 'P13_10', 'P14_05', 'P15_02', 'P17_01', 'P18_03', 'P18_06', 'P18_07', 'P20_03', 'P22_01', 'P22_07', 'P22_117', 'P23_02', 'P23_05', 'P24_05', 'P24_08', 'P25_107', 'P26_110', 'P27_101', 'P28_06', 'P28_101', 'P28_103', 'P28_109', 'P28_110', 'P28_112', 'P28_113', 'P28_13', 'P28_14', 'P30_05', 'P30_101', 'P30_107', 'P30_111', 'P30_112', 'P32_01', 'P35_105', 'P35_109', 'P37_101', 'P37_103']

json_base_url = "https://data.bris.ac.uk/datasets/2v6cgv1x04ol22qp9rm9x2j6a7/GroundTruth-SparseAnnotations/annotations/train/"
zip_base_url = "https://data.bris.ac.uk/datasets/2v6cgv1x04ol22qp9rm9x2j6a7/GroundTruth-SparseAnnotations/rgb_frames/train/"

!mkdir annotations_train
%cd annotations_train/
for suffix in training_filenames:
  url = f"{json_base_url}{suffix}.json"
  !wget {url}
%cd ..

!mkdir images_train
%cd images_train/
for suffix in training_filenames:
    url = f"{zip_base_url}{suffix[0:3]}/{suffix}.zip"
    !wget {url}
%cd ..

/content/annotations_train
--2024-12-24 10:56:52--  https://data.bris.ac.uk/datasets/2v6cgv1x04ol22qp9rm9x2j6a7/GroundTruth-SparseAnnotations/annotations/train/P01_01.json
Resolving data.bris.ac.uk (data.bris.ac.uk)... 137.222.0.78
Connecting to data.bris.ac.uk (data.bris.ac.uk)|137.222.0.78|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 17282261 (16M) [application/json]
Saving to: ‘P01_01.json’


2024-12-24 10:57:11 (915 KB/s) - ‘P01_01.json’ saved [17282261/17282261]

--2024-12-24 10:57:11--  https://data.bris.ac.uk/datasets/2v6cgv1x04ol22qp9rm9x2j6a7/GroundTruth-SparseAnnotations/annotations/train/P01_03.json
Resolving data.bris.ac.uk (data.bris.ac.uk)... 137.222.0.78
Connecting to data.bris.ac.uk (data.bris.ac.uk)|137.222.0.78|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 271379 (265K) [application/json]
Saving to: ‘P01_03.json’


2024-12-24 10:57:13 (253 KB/s) - ‘P01_03.json’ saved [271379/271379]

--2024-12-24 10:57:13--  htt

In [None]:
# If left hand in json file (for now just left) has an object in contact:
#     Get corresponding image frame and place into TRUE folder
# Else if it is (definitely) not in contact:
#     Place it in the FALSE folder
# Else (i.e. inconclusive or in contact but not with a labelled object):
#     Discard

def image_sorter(json_path, zip_path, true_folder, false_folder):

    # Load the JSON data
    try:
        with open(json_path, 'r') as file:
            data = json.load(file)
    except (json.JSONDecodeError, KeyError) as e:
        print(f"Error loading JSON file {json_path}: {e}")
        return

    # Open the ZIP file
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        file_list = zip_ref.namelist()  # Get all file names in the ZIP

        # Track images that are added to the TRUE folder
        true_images = set()
        excluded_images = set()

        # Check for annotations with hand contact
        for video_annotation in data['video_annotations']:
            for annotation in video_annotation['annotations']:
                # Handle left-hand annotations
                if annotation['name'] == 'right hand':
                    if annotation['in_contact_object'] in ['inconclusive', 'none-of-the-above']:
                        # Exclude these cases
                        excluded_images.add(video_annotation['image']['name'])
                    elif annotation['in_contact_object'] not in ['hand-not-in-contact']:
                        # Add to TRUE if valid
                        image_name = video_annotation['image']['name']
                        for file_name in file_list:
                            if file_name.endswith(image_name) and file_name not in true_images:
                                zip_ref.extract(file_name, true_folder)
                                true_images.add(file_name)

        # Add remaining images to the FALSE folder
        for file_name in file_list:
            # Exclude images that are flagged for exclusion or already added to TRUE
            if file_name not in true_images and file_name not in excluded_images:
                zip_ref.extract(file_name, false_folder)


In [None]:
true_folder = "TRUE_RIGHT"
false_folder = "FALSE_RIGHT"

os.makedirs(true_folder, exist_ok=True)
os.makedirs(false_folder, exist_ok=True)

annotations_folder = 'annotations_train'
images_folder = 'images_train'

json_files = sorted([f for f in os.listdir(annotations_folder) if f.endswith('.json')])
zip_files = sorted([f for f in os.listdir(images_folder) if f.endswith('.zip')])

if len(json_files) != len(zip_files):
    print("Warning: Mismatch between JSON and ZIP files.")

# zip() combines two or more iterables element-wise into a single iterable of tuples
# has nothing to do with zip files
for json_file, zip_file in zip(json_files, zip_files):

    json_path = os.path.join(annotations_folder, json_file)
    zip_path = os.path.join(images_folder, zip_file)

    image_sorter(json_path, zip_path, true_folder, false_folder)

In [None]:
# Paths in Google Drive
drive_true_folder = '/content/drive/My Drive/true_right_images.zip'
drive_false_folder = '/content/drive/My Drive/false_right_images.zip'

# Compress and save to Google Drive
# NOTE - Ensure there is adequate space in Drive first
shutil.make_archive(drive_true_folder.replace('.zip', ''), 'zip', true_folder)
shutil.make_archive(drive_false_folder.replace('.zip', ''), 'zip', false_folder)

print("Folders saved to Google Drive.")

Folders saved to Google Drive.


In [None]:
# Make a tensorflow-friendly dataset
!mkdir data
!mv TRUE_RIGHT data/
!mv FALSE_RIGHT data/

In [None]:
# Load the dataset into tensorflow
data = tf.keras.utils.image_dataset_from_directory('data')

# Scale down the pixel intensitites from 0-255 to 0-1
data = data.map(lambda x,y: (x/255, y))
scaled_iterator = data.as_numpy_iterator()
batch = scaled_iterator.next()

# Partition into training and testing data
train_size = int(len(data)*0.7)
val_size = int(len(data)*0.2)
test_size = int(len(data)*0.1)

train = data.take(train_size)
val = data.skip(train_size).take(val_size)
test = data.skip(train_size+val_size).take(test_size)

Found 29703 files belonging to 2 classes.


In [None]:
# Create the CNN
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout

model = Sequential()

model.add(Conv2D(16, (3,3), 1, activation='relu', input_shape=(256,256,3)))
model.add(MaxPooling2D())

model.add(Conv2D(32, (3,3), 1, activation='relu'))
model.add(MaxPooling2D())

model.add(Conv2D(16, (3,3), 1, activation='relu'))
model.add(MaxPooling2D())

model.add(Flatten())

model.add(Dense(256, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

model.compile('adam', loss=tf.losses.BinaryCrossentropy(), metrics=['accuracy'])

model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
logdir = 'logs'
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logdir)
hist = model.fit(train, epochs=20, validation_data=val, callbacks=[tensorboard_callback])

Epoch 1/20
[1m650/650[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1903s[0m 3s/step - accuracy: 0.7607 - loss: 0.5486 - val_accuracy: 0.7704 - val_loss: 0.5329
Epoch 2/20
[1m650/650[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1905s[0m 3s/step - accuracy: 0.7772 - loss: 0.4813 - val_accuracy: 0.7833 - val_loss: 0.4611
Epoch 3/20
[1m650/650[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1871s[0m 3s/step - accuracy: 0.8023 - loss: 0.4328 - val_accuracy: 0.7966 - val_loss: 0.4577
Epoch 4/20
[1m650/650[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1890s[0m 3s/step - accuracy: 0.8421 - loss: 0.3581 - val_accuracy: 0.7976 - val_loss: 0.4906
Epoch 5/20
[1m650/650[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1883s[0m 3s/step - accuracy: 0.8957 - loss: 0.2531 - val_accuracy: 0.7691 - val_loss: 0.6800
Epoch 6/20
[1m650/650[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1888s[0m 3s/step - accuracy: 0.9383 - loss: 0.1599 - val_accuracy: 0.7828 - val_loss: 0.8964
Epoch 7/20
[1m6