In [1]:
from datasets import Dataset, Image
import os
from PIL import Image as PILImage
from pathlib import Path

In [2]:
def generate_frame_pairs(root_dir, task_instruction, frame_offset=50):
    pairs = []
    
    # Iterate through each episode
    for episode_dir in sorted(Path(root_dir).glob('episode*')):
        color_dir = episode_dir / 'camera' / 'color' / 'head'
        if not color_dir.exists():
            continue
            
        # Get all frame paths sorted numerically
        frame_paths = sorted(color_dir.glob('*.png'), key=lambda x: int(x.stem))
        num_frames = len(frame_paths)
        
        # Generate pairs (current_frame, future_frame)
        for i in range(num_frames - frame_offset):
            current_frame = str(frame_paths[i])
            future_frame = str(frame_paths[i + frame_offset])
            pairs.append({
                'current_frame': current_frame,
                'instruction': task_instruction,
                'future_frame': future_frame
            })
    
    return pairs

In [3]:
# Generate pairs for all tasks
tasks = [
    ('block_hammer_beat_D435', 'beat the block with the hammer'),
    ('block_handover_D435', 'handover the blocks'),
    ('blocks_stack_easy_D435', 'stack blocks')
]


In [4]:
all_pairs = []
for task_dir, instruction in tasks:
    all_pairs.extend(generate_frame_pairs(task_dir, instruction))


In [5]:
len(all_pairs)


91303

In [14]:
all_pairs[0:5]

[{'current_frame': 'block_hammer_beat_D435/episode0/camera/color/head/0.png',
  'instruction': 'beat the block with the hammer',
  'future_frame': 'block_hammer_beat_D435/episode0/camera/color/head/50.png'},
 {'current_frame': 'block_hammer_beat_D435/episode0/camera/color/head/1.png',
  'instruction': 'beat the block with the hammer',
  'future_frame': 'block_hammer_beat_D435/episode0/camera/color/head/51.png'},
 {'current_frame': 'block_hammer_beat_D435/episode0/camera/color/head/2.png',
  'instruction': 'beat the block with the hammer',
  'future_frame': 'block_hammer_beat_D435/episode0/camera/color/head/52.png'},
 {'current_frame': 'block_hammer_beat_D435/episode0/camera/color/head/3.png',
  'instruction': 'beat the block with the hammer',
  'future_frame': 'block_hammer_beat_D435/episode0/camera/color/head/53.png'},
 {'current_frame': 'block_hammer_beat_D435/episode0/camera/color/head/4.png',
  'instruction': 'beat the block with the hammer',
  'future_frame': 'block_hammer_beat_D4

In [18]:
all_pairs[30000:30005]

[{'current_frame': 'block_handover_D435/episode42/camera/color/head/361.png',
  'instruction': 'handover the blocks',
  'future_frame': 'block_handover_D435/episode42/camera/color/head/411.png'},
 {'current_frame': 'block_handover_D435/episode42/camera/color/head/362.png',
  'instruction': 'handover the blocks',
  'future_frame': 'block_handover_D435/episode42/camera/color/head/412.png'},
 {'current_frame': 'block_handover_D435/episode42/camera/color/head/363.png',
  'instruction': 'handover the blocks',
  'future_frame': 'block_handover_D435/episode42/camera/color/head/413.png'},
 {'current_frame': 'block_handover_D435/episode42/camera/color/head/364.png',
  'instruction': 'handover the blocks',
  'future_frame': 'block_handover_D435/episode42/camera/color/head/414.png'},
 {'current_frame': 'block_handover_D435/episode42/camera/color/head/365.png',
  'instruction': 'handover the blocks',
  'future_frame': 'block_handover_D435/episode42/camera/color/head/415.png'}]

In [6]:
all_pairs[-1]

{'current_frame': 'blocks_stack_easy_D435/episode99/camera/color/head/377.png',
 'instruction': 'stack blocks',
 'future_frame': 'blocks_stack_easy_D435/episode99/camera/color/head/427.png'}

In [7]:
# Create HuggingFace dataset
def gen():
    for pair in all_pairs:
        yield {
            'current_frame': PILImage.open(pair['current_frame']),
            'instruction': pair['instruction'],
            'future_frame': PILImage.open(pair['future_frame'])
        }

In [8]:
dataset = Dataset.from_generator(gen)

In [9]:
dataset

Dataset({
    features: ['current_frame', 'instruction', 'future_frame'],
    num_rows: 91303
})

In [None]:
!export HF_ENDPOINT=https://hf-mirror.com
!huggingface-cli upload bryandts/robot-action-prediction-dataset ./data/robot_action_prediction_dataset /train --repo-type=dataset --token {fill_the_token}