In [2]:
import pandas as pd
import os
import re
import h5py
from PIL import Image
import matplotlib.pyplot as plt
import json

In [3]:
# Load CSV file
csv_path = '/workspace/LLaSA/dataset/A2D/a2d_annotation.csv' 
csv_data = pd.read_csv(csv_path)

# Base paths for videos and annotations
video_base_path = '/workspace/LLaSA/dataset/A2D/clips320jpeg/' # Replace with your base video directory
annotation_base_path = '/workspace/LLaSA/dataset/A2D/Annotations/mat/'  # Replace with your base annotation directory


In [5]:
# Dictionary to store collected information
video_info_dict = {}
bullshit = 0

# Iterate through the rows in the CSV file
for index, row in csv_data.iterrows():
    video_id = row['video_id']
    instance_id = int(re.sub(r'\D', '', str(row['instance_id'])))
    caption = row['query']

    # Construct path to the video file
    video_path = os.path.join(video_base_path, f"{video_id}")
    
    # Find all annotation files in the annotation directory
    annotation_files = []
    annotation_dir = os.path.join(annotation_base_path, video_id)
    for root, dirs, files in os.walk(annotation_dir):
        for file in files:
            if file.endswith('.mat'):
                annotation_files.append(os.path.join(root, file))
    if annotation_files:
        # Extract frame numbers from the file names and find the smallest one
        annotation_files_sorted = sorted(annotation_files, key=lambda x: int(os.path.basename(x).split('.')[0]))
        smallest_frame_annotation = annotation_files_sorted[0]
        
        # Extract the frame number from the file name
        bbox_frame = int(os.path.basename(smallest_frame_annotation).split('.')[0])
        
        # Load annotation data using h5py
        with h5py.File(smallest_frame_annotation, 'r') as f:
            if 'reBBox' in f:
                bbox_list = f['reBBox'][:] # Read and convert to list

                if instance_id >= bbox_list.shape[1]:
                    bullshit += 1
                    print(f'{video_id} passed since {instance_id} >= {bbox_list.shape[1]}')
                    continue

                bbox = tuple(sub_list[instance_id] for sub_list in bbox_list)
            else:
                bbox = None
                print(f"Bounding box 'reBBox' not found in file: {smallest_frame_annotation}")
        
        # Create a unique key for each entry
        entry_key = f"{video_id}_{index}"
        
        # Store the collected information in the dictionary
        video_info_dict[entry_key] = {
            "video": video_path,
            "bbox_frame": bbox_frame,
            "bbox": bbox,
            "caption": caption
        }
    else:
        print(f"No annotation files found in the directory: {annotation_dir}")

# Dump the collected information into a JSON file
output_json_path = 'output_video_info.json'  # Specify your output JSON file path
with open(output_json_path, 'w') as json_file:
    json.dump(video_info_dict, json_file, indent=4)

print(f"Collected video information saved to {output_json_path}")
print(f'{bullshit}')


-0cOo0cRVZU passed since 7 >= 7
-2akYw9VucA passed since 2 >= 2
-2akYw9VucA passed since 3 >= 2
-2akYw9VucA passed since 4 >= 2
-2akYw9VucA passed since 5 >= 2
-8FLF-osZmA passed since 4 >= 4
0S0aQ0GArRc passed since 7 >= 7
0S0aQ0GArRc passed since 8 >= 7
0S0aQ0GArRc passed since 9 >= 7
10XCfvnVUL0 passed since 4 >= 4
10XCfvnVUL0 passed since 5 >= 4
157RAPLxUpQ passed since 1 >= 1
19dJTBEjS8Q passed since 1 >= 1
1Am_eZhq0wg passed since 1 >= 1
1DX-aZNP5NQ passed since 3 >= 3
1Fmmpda6-Z8 passed since 4 >= 4
1TY6VJH-wqY passed since 1 >= 1
1f19vXJMH44 passed since 1 >= 1
2G9DODKHHIM passed since 4 >= 4
3CcQVREqgPM passed since 2 >= 2
3kntCqeudw0 passed since 1 >= 1
3kntCqeudw0 passed since 2 >= 1
3zsM3jBwWIM passed since 3 >= 3


4ez0EQpnUW8 passed since 3 >= 3
4ltpjuovmPM passed since 2 >= 2
4xNTw6cEicA passed since 1 >= 1
58rYwU1Rnpw passed since 5 >= 5
5DEOQzI8_Mc passed since 2 >= 2
5MgnA8i9GJE passed since 3 >= 3
5X3tJQtytiU passed since 1 >= 1
5_ONVWEUAcg passed since 1 >= 1
64YpE9Pgsgk passed since 1 >= 1
6JMh_oA23EU passed since 2 >= 2
6KAonV0ej4c passed since 1 >= 1
6KAonV0ej4c passed since 2 >= 1
6YIdUkJO7TA passed since 2 >= 2
6gwZ6MPYW_Y passed since 3 >= 3
6hDL1unsEIg passed since 1 >= 1
6hDL1unsEIg passed since 2 >= 1
6sGQPMg4Gqc passed since 1 >= 1
77CtRWUggp4 passed since 2 >= 2
77CtRWUggp4 passed since 3 >= 2
77CtRWUggp4 passed since 4 >= 2
77CtRWUggp4 passed since 5 >= 2
77CtRWUggp4 passed since 6 >= 2
77CtRWUggp4 passed since 7 >= 2
7ZICknA76oI passed since 2 >= 2
7bud84dpeXo passed since 4 >= 4
7kV4-ziw4pU passed since 1 >= 1
7kuUHEyCV9g passed since 1 >= 1
7kuUHEyCV9g passed since 2 >= 1
84hC_geiftQ passed since 4 >= 4
84hC_geiftQ passed since 5 >= 4
866n2kjpwT8 passed since 1 >= 1
866n2kjp