# Convert Jetbot CSV to DROID Dataset
This notebook converts a Jetbot CSV recording into the DROID dataset format expected by the VJEPA training scripts.


In [None]:
import os
import csv
import json
from collections import defaultdict
import cv2
import numpy as np
import h5py


## Utility functions

In [None]:
def read_csv(csv_path):
    sessions = defaultdict(list)
    with open(csv_path, newline="") as f:
        reader = csv.DictReader(f)
        for row in reader:
            sessions[row['session_id'].strip()].append(row)
    for rows in sessions.values():
        rows.sort(key=lambda r: float(r['timestamp']))
    return sessions

def ensure_dir(p):
    if not os.path.exists(p):
        os.makedirs(p)

def create_video(session_rows, base_dir, video_path, fps):
    first_img = cv2.imread(os.path.join(base_dir, session_rows[0]['image_path']))
    if first_img is None:
        raise FileNotFoundError(session_rows[0]['image_path'])
    h, w = first_img.shape[:2]
    writer = cv2.VideoWriter(video_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
    for row in session_rows:
        img = cv2.imread(os.path.join(base_dir, row['image_path']))
        if img is None:
            raise FileNotFoundError(row['image_path'])
        if img.shape[:2] != (h, w):
            img = cv2.resize(img, (w, h))
        writer.write(img)
    writer.release()

def build_trajectory(session_rows):
    n = len(session_rows)
    cartesian = np.zeros((n, 6), dtype=np.float32)
    gripper = np.zeros((n,), dtype=np.float32)
    pos = 0.0
    for i, row in enumerate(session_rows):
        try:
            action = float(row['action'])
        except ValueError:
            action = 0.0
        pos += action
        cartesian[i, 0] = pos
    extrinsics = np.zeros((n, 6), dtype=np.float32)
    return cartesian, gripper, extrinsics

def write_h5(path, cartesian, gripper, extrinsics):
    with h5py.File(path, 'w') as f:
        obs = f.create_group('observation')
        robot = obs.create_group('robot_state')
        robot.create_dataset('cartesian_position', data=cartesian)
        robot.create_dataset('gripper_position', data=gripper)
        ce = obs.create_group('camera_extrinsics')
        ce.create_dataset('left_left', data=extrinsics)

def process_sessions(sessions, base_dir, out_dir, fps):
    list_file = os.path.join(out_dir, 'droid_paths.csv')
    lines = []
    for sid, rows in sessions.items():
        sdir = os.path.join(out_dir, sid)
        mp4_dir = os.path.join(sdir, 'recordings', 'MP4')
        ensure_dir(mp4_dir)
        video_path = os.path.join(mp4_dir, 'left.mp4')
        create_video(rows, base_dir, video_path, fps)
        cartesian, gripper, extrinsics = build_trajectory(rows)
        write_h5(os.path.join(sdir, 'trajectory.h5'), cartesian, gripper, extrinsics)
        meta = {'left_mp4_path': os.path.join('recordings', 'MP4', 'left.mp4')}
        with open(os.path.join(sdir, 'metadata.json'), 'w') as f:
            json.dump(meta, f)
        lines.append(sdir)
    with open(list_file, 'w') as f:
        for l in lines:
            f.write(l + '
')
    print(f'Wrote dataset list to {list_file}')


## Example usage

In [None]:
# csv_path = 'jetbot/sample.csv'
# data_dir = 'jetbot'  # directory containing the images referenced in the CSV
# output_dir = 'jetbot_droid'
# fps = 4
#
# sessions = read_csv(csv_path)
# ensure_dir(output_dir)
# process_sessions(sessions, data_dir, output_dir, fps)
