# Comprehensive Analysis of Ego4D NLQ Annotations

## 1. Setup Environment

### Import Required Libraries
Import libraries for data manipulation, visualization, and numerical operations

In [None]:
# Standard libraries
import os
import json
import pandas as pd
import numpy as np

# Visualization libraries
import matplotlib.pyplot as plt
import seaborn as sns

# Notebook settings
plt.style.use('ggplot')
sns.set(style="whitegrid", palette="deep")

### Define File Paths
Set paths to annotation files and metadata.

In [9]:
# Define paths to data directory and files
data_dir = "data/ego4d_data"
annotations_file = os.path.join(data_dir, "v1", "annotations/nlq_train.json")
metadata_file = os.path.join(data_dir, "ego4d.json")

# Confirm file existence
assert os.path.exists(annotations_file), "NLQ annotations file not found."
assert os.path.exists(metadata_file), "Metadata file not found."

print("All files are accessible. Ready to load data.")

All files are accessible. Ready to load data.


## 2. Load and Inspect the Data

### Load NLQ Annotations
Load the annotations and inspect the structure.

In [10]:
# Load NLQ annotations
with open(annotations_file, 'r') as f:
    nlq_data = json.load(f)

print("Top-level keys in NLQ annotations:", nlq_data.keys())
print("Number of videos:", len(nlq_data['videos']))

Top-level keys in NLQ annotations: dict_keys(['version', 'date', 'description', 'manifest', 'videos'])
Number of videos: 754


### Load Metadata
Load the metadata file and inspect its structure.

In [11]:
# Load metadata
with open(metadata_file, 'r') as f:
    metadata = json.load(f)

print("Top-level keys in Metadata:", metadata.keys())
print("Number of videos in Metadata:", len(metadata['videos']))

Top-level keys in Metadata: dict_keys(['date', 'version', 'description', 'videos', 'clips', 'concurrent_video_sets', 'physical_settings', 'moments_labels'])
Number of videos in Metadata: 9645


In [28]:
print("queries:")
print(nlq_data['videos'][0]["clips"][0]['annotations'][1]['language_queries'][3]["query"])

queries:
Where did I put a meat container.


### Transform Data into a DataFrame
Extract relevant fields from the NLQ annotations into a structured DataFrame.

In [None]:
# Initialize list to store extracted data
nlq_list = []

for video in nlq_data['videos']:
    video_uid = video['video_uid']
    for clip in video['clips']:
        clip_uid = clip['clip_uid']
        for annotation in clip['annotations']:
            for nlq in annotation['language_queries']:
                if 'query' in nlq and nlq["query"] != "":
                    nlq_list.append({
                        'video_uid': video_uid,
                        'clip_uid': clip_uid if clip_uid else "N/A",  # Replace missing with N/A
                        'query': nlq['query'],  # Query is assumed to exist at this point
                        'template': nlq['template'] if 'template' in nlq else "N/A",
                        'slot_x': nlq['slot_x'] if 'slot_x' in nlq else "N/A",
                        'verb_x': nlq['verb_x'] if 'verb_x' in nlq else "N/A",
                        'slot_y': nlq['slot_y'] if 'slot_y' in nlq else "N/A",
                        'verb_y': nlq['verb_y'] if 'verb_y' in nlq else "N/A",
                        'clip_start_sec': nlq['clip_start_sec'] if 'clip_start_sec' in nlq else 0.0,
                        'clip_end_sec': nlq['clip_end_sec'] if 'clip_end_sec' in nlq else 0.0,
                        'video_start_sec': nlq['video_start_sec'] if 'video_start_sec' in nlq else 0.0,
                        'video_end_sec': nlq['video_end_sec'] if 'video_end_sec' in nlq else 0.0,
                        'video_start_frame': nlq['video_start_frame'] if 'video_start_frame' in nlq else 0,
                        'video_end_frame': nlq['video_end_frame'] if 'video_end_frame' in nlq else 0,
                        'duration': (nlq['clip_end_sec'] - nlq['clip_start_sec']) 
                                    if 'clip_end_sec' in nlq and 'clip_start_sec' in nlq else 0.0
                    })

# Convert to DataFrame
nlq_df = pd.DataFrame(nlq_list)

# Display overview
print("NLQ Annotations DataFrame:")
print(nlq_df.head())
nlq_df.info()

NLQ Annotations DataFrame:
                              video_uid                              clip_uid  \
0  d250521e-5197-44aa-8baa-2f42b24444d2  fae92e70-88aa-4b77-b41a-5879b74c804c   
1  d250521e-5197-44aa-8baa-2f42b24444d2  fae92e70-88aa-4b77-b41a-5879b74c804c   
2  d250521e-5197-44aa-8baa-2f42b24444d2  fae92e70-88aa-4b77-b41a-5879b74c804c   
3  d250521e-5197-44aa-8baa-2f42b24444d2  fae92e70-88aa-4b77-b41a-5879b74c804c   
4  d250521e-5197-44aa-8baa-2f42b24444d2  fae92e70-88aa-4b77-b41a-5879b74c804c   

                                               query  \
0       How many frying pans can i see on the shelf?   
1  What colour bowl did i carry from the plate st...   
2             In what location did i see the basket?   
3                       What did i pour in the bowl?   
4          Where was the soap before i picked it up?   

                                            template              slot_x  \
0         Objects: How many X’s? (quantity question)         frying pans 