## **Convert CVAT tracks â†’ canonical shots.csv**

- Parse the JSON and create a table with one row per shot track, plus a per-frame bbox series (serialized) so we can crop dynamically.
- Schema (04_shots/shots.csv):

```
video_rel, start_f, end_f, contact_f, shot_type, player_side, src, fps_proxy, bboxes_json

# video_rel: e.g., channel/yt_id/proxy.mp4 to map back to master.
# bboxes_json: JSON list of [frame_idx, x1, y1, x2, y2] (proxy coordinates).
```

In [None]:
import json, csv, os, glob, re
from collections import defaultdict
from typing import List

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
def infer_video_rel(task_name: str) -> str:
    """
    Infers the relative video path from a CVAT task name.

    Args:
        task_name (str): The name of the CVAT task, e.g., 'channel__ytid__chunkNN'.

    Returns:
        str: The inferred relative video path, e.g., 'channel/ytid/proxy.mp4'.
    """
    m = re.match(r'([^_]+)__([^_]+)', task_name)
    if m:
        return f"{m.group(1)}/{m.group(2)}/proxy.mp4"
    return task_name

In [None]:
def process_cvat_json(json_path: str) -> list:
    """
    Processes a single CVAT JSON file (frame-based format) to extract shot track data.

    This function handles the new frame-based JSON format:
    - It groups annotations by 'track_id' to reconstruct each track.
    - It handles the different bounding box format [x, y, w, h].
    - It extracts track and frame attributes.
    - It returns a list of rows for the CSV.

    Args:
        json_path (str): The full path to the CVAT JSON file.

    Returns:
        list: A list of lists, where each inner list represents a row of data
              for the output CSV file.
    """
    rows = []
    try:
        with open(json_path, 'r') as f:
            data = json.load(f)
    except (FileNotFoundError, json.JSONDecodeError) as e:
        print(f"Error processing {json_path}: {e}")
        return rows

    task_name = os.path.basename(os.path.dirname(json_path))
    video_rel = infer_video_rel(task_name)

    ### CHANGE HERE IF NEEDED, NOW DEFAULT IS 30
    fps_proxy = 30 # by construction

    # Find the label_id corresponding to "shot_track"
    shot_track_label_id = None
    labels = data.get("categories", {}).get("label", {}).get("labels", [])
    for i, label in enumerate(labels):
        if label.get("name") == "shot_track":
            shot_track_label_id = i
            break

    if shot_track_label_id is None:
        print(f"Warning: No 'shot_track' label found in {json_path}. Skipping.")
        return rows

    # Group annotations by track_id
    tracks_data = defaultdict(list)
    # The new format stores annotations per frame in the `items` list
    for item in data.get("items", []):
        frame_num = item.get("attr", {}).get("frame")
        if frame_num is None:
            continue

        for annotation in item.get("annotations", []):
            # Check for the correct label_id and type
            if annotation.get("type") == "bbox" and annotation.get("label_id") == shot_track_label_id:
                track_id = annotation.get("attributes", {}).get("track_id")
                if track_id is not None:
                    # Append a tuple of (frame, annotation) to the corresponding track
                    tracks_data[track_id].append((frame_num, annotation))

    # Process each reconstructed track
    for track_id, annotations_list in tracks_data.items():
        if not annotations_list:
            continue

        # Extract track-level attributes and frames from the first annotation in the track
        # Assuming track-level attributes are consistent across all frames for a track
        track_attrs = annotations_list[0][1].get("attributes", {})
        shot_type = track_attrs.get("shot_type", "other").strip('"')
        player_side = track_attrs.get("player_side", "unknown").strip('"')

        frames = sorted([f for f, _ in annotations_list])
        start_f, end_f = min(frames), max(frames)

        # Look for the contact frame
        contact_f = None
        for frame_num, annotation in annotations_list:
            attrs = annotation.get("attributes", {})
            # Check for both "is_contact" and "contact_frame" attributes
            if "is_contact" in attrs and (str(attrs["is_contact"]).lower() in ["true", "1", "yes"]):
                contact_f = frame_num
                break
            if "contact_frame" in attrs and str(attrs["contact_frame"]).strip().isdigit():
                contact_f = int(attrs["contact_frame"])
                break

        # Serialize bounding boxes
        bboxes = []
        for frame_num, annotation in annotations_list:
            # New format: [x, y, width, height]
            x, y, w, h = annotation["bbox"]
            # Convert to [x1, y1, x2, y2]
            x1, y1, x2, y2 = x, y, x + w, y + h
            bboxes.append([int(frame_num), float(x1), float(y1), float(x2), float(y2)])

        rows.append([
            video_rel,
            start_f,
            end_f,
            contact_f,
            shot_type,
            player_side,
            "youtube",
            fps_proxy,
            json.dumps(bboxes)
        ])

    return rows

In [None]:
def write_csv_data(rows: list, output_path: str):
    """
    Writes a list of data rows to a CSV file.

    Args:
        rows (list): A list of lists, where each inner list is a row of data.
        output_path (str): The full path to the output CSV file.
    """
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    with open(output_path, "w", newline="") as f:
        w = csv.writer(f)
        w.writerow(["video_rel", "start_f", "end_f", "contact_f", "shot_type", "player_side", "src", "fps_proxy", "bboxes_json"])
        w.writerows(rows)
    print(f"Written {len(rows)} rows to {output_path}")

In [None]:
def process_all_cvat_files(exports_dir: str, output_csv_path: str):
    """
    Finds and processes all CVAT JSON files in a directory and writes the
    combined data to a single CSV.

    Args:
        exports_dir (str): The directory containing the CVAT exports.
        output_csv_path (str): The full path for the output CSV file.
    """
    all_rows = []
    # Use glob to find all cvat.json files
    json_files = glob.glob(os.path.join(exports_dir, "*", "cvat.json"))

    if not json_files:
        print(f"No CVAT JSON files found in '{exports_dir}'.")
        return

    print(f"Found {len(json_files)} files to process.")
    for json_path in json_files:
        rows_from_file = process_cvat_json(json_path)
        all_rows.extend(rows_from_file)

    write_csv_data(all_rows, output_csv_path)

In [None]:
def process_specific_cvat_files(json_paths: List[str], output_csv_path: str):
    """
    Processes a list of specific CVAT JSON files and combines the
    data into a single CSV.

    Args:
        json_paths (List[str]): A list of full paths to the CVAT JSON files.
        output_csv_path (str): The full path for the output CSV file.
    """
    all_rows = []

    if not json_paths:
        print("The list of JSON files to process is empty.")
        return

    print(f"Found {len(json_paths)} specific files to process.")

    # Process each file and collect the data
    for json_path in json_paths:
        if not os.path.exists(json_path):
            print(f"Warning: File not found, skipping: {json_path}")
            continue

        rows_from_file = process_cvat_json(json_path)

        # We assume the first row of each file is a header, so we
        # only append the data rows.
        if rows_from_file:
            all_rows.extend(rows_from_file[1:])

    # Prepend the header to the combined data
    # if all_rows:
    #     header = ['video_name', 'frame_number', 'label', 'x', 'y']
    #     all_rows.insert(0, header)

    write_csv_data(all_rows, output_csv_path)

In [None]:
ROOT = "/content/drive/MyDrive/FIT3163,3164/SlowFast"
EXPORTS = f"{ROOT}/03_cvat_exports"
SHOTS = f"{ROOT}/04_shots/shots.csv"

## **Process ALL files as per Drive structure**

In [None]:
print("--- Processing all files in the export directory ---")
process_all_cvat_files(EXPORTS, SHOTS)

--- Processing all files in the export directory ---
Found 1 files to process.
Written 12 rows to /content/drive/MyDrive/FIT3163,3164/SlowFast/04_shots/shots.csv


## **Process specific file(s) to separate CSVs**

In [None]:
input_files = [f"{EXPORTS}/phua_1/cvat.json"]
output_files = [f"{ROOT}/04_shots/phua_1.csv"]
assert len(input_files) == len(output_files)

for i in range(len(input_files)):
    single_file_rows = process_cvat_json(input_files[i])
    write_csv_data(single_file_rows, output_files[i])

Written 72 rows to /content/drive/MyDrive/FIT3163,3164/SlowFast/04_shots/phua_1.csv


## **Process specific files to one single CSV**

In [None]:
input_files = [
    f"{EXPORTS}/matshi_tanthi_2025/cvat.json",
    f"{EXPORTS}/sin_tty_2016/cvat.json",
    f"{EXPORTS}/vitid_anton_2024/cvat.json"
]

output_csv = f"{ROOT}/04_shots/3in1.csv"

print("--- Processing a specific list of files into one CSV ---")
process_specific_cvat_files(input_files, output_csv)

--- Processing a specific list of files into one CSV ---
Found 3 specific files to process.
Written 506 rows to /content/drive/MyDrive/FIT3163,3164/SlowFast/04_shots/3in1.csv
