# Data Analysis

In [1]:
import os
import json
from pathlib import Path
import pandas as pd
import numpy as np
import torch

In [2]:
# Load processed data
data_path = Path("data/processed")
data_files = []

for dir in data_path.iterdir():
    if dir.is_dir():
        scene_file = dir / 'scene.npz'
        if scene_file.exists():
            data_files.append(scene_file)

for idx in range(6):
    print(idx)
    scene_file = data_files[idx]
            
    scene_data = np.load(scene_file)

    # Only use the last 7 entries in each grasp, as they are the values of the hand pose
    grasps = scene_data["grasps"][:, -7:]

    # Convert to tensors
    sdf = torch.from_numpy(scene_data["sdf"])
    grasps = torch.from_numpy(grasps)
    scores = torch.from_numpy(scene_data["scores"])

    print(sdf.shape)
    print(grasps.shape)
    print(scores.shape)


0
torch.Size([48, 48, 48])
torch.Size([480, 7])
torch.Size([480])
1
torch.Size([48, 48, 48])
torch.Size([480, 7])
torch.Size([480])
2
torch.Size([48, 48, 48])
torch.Size([480, 7])
torch.Size([480])
3
torch.Size([48, 48, 48])
torch.Size([480, 7])
torch.Size([480])
4
torch.Size([48, 48, 48])
torch.Size([480, 7])
torch.Size([480])
5
torch.Size([48, 48, 48])
torch.Size([479, 7])
torch.Size([479])


In [3]:
# Problem: Some scenes have 480 grasps, some have 476-479
# SOLVED by padding the scenes with random grasps from the same scene

from dataset import GraspDataset
dataset = GraspDataset(Path("data/processed"))

# Find out what the minimun number of grasps is
min_grasps = min(len(scene_data["grasps"]) for scene_data in dataset)
print(min_grasps)

# Find out what the maximum number of grasps is
max_grasps = max(len(scene_data["grasps"]) for scene_data in dataset)
print(max_grasps)

480
480


In [4]:
# Test worker logic in GraspBatchIterableDataset

import math
import random

# Simulate a dataset with N scenes
num_scenes = 10
scene_indices = list(range(num_scenes))

# Shuffle if desired
shuffle_scenes = True
if shuffle_scenes:
    random.seed(42)  # fixed seed for reproducibility
    random.shuffle(scene_indices)

# Simulate DataLoader settings
num_workers = 3

# Simulate how each worker would get a subset of scenes
for worker_id in range(num_workers):
    per_worker = int(math.ceil(len(scene_indices) / float(num_workers)))
    start = worker_id * per_worker
    end = min(start + per_worker, len(scene_indices))
    indices_to_process = scene_indices[start:end]
    
    print(f"Worker {worker_id}: processing scenes {indices_to_process}")

Worker 0: processing scenes [7, 3, 2, 8]
Worker 1: processing scenes [5, 6, 9, 4]
Worker 2: processing scenes [0, 1]


In [1]:
# Test the GraspBatchIterableDataset

from torch.utils.data import DataLoader
from pathlib import Path
from dataset import GraspDataset, GraspBatchIterableDataset  # make sure this file is saved as dataset.py

# Adjust this to your actual data folder
data_path = Path("data/processed")

# Instantiate base scene dataset
scene_dataset = GraspDataset(data_path)

# Wrap in GraspBatchIterableDataset
batch_dataset = GraspBatchIterableDataset(scene_dataset, grasp_batch_size=32)

# Use DataLoader (important: batch_size=None for IterableDataset!)
loader = DataLoader(batch_dataset, batch_size=None, num_workers=0)  # Set num_workers > 0 to test multiworker

# Iterate and inspect
for i, (sdf, grasp_batch, score_batch) in enumerate(loader):
    print(f"\n🟢 Batch {i}")
    print(f"  SDF shape:         {sdf.shape}")            # (D, D, D) with D=48
    print(f"  Grasp batch shape: {grasp_batch.shape}")    # (B, G_dim) with G_dim=7
    print(f"  Score batch shape: {score_batch.shape}")    # (B,)
    
    print(f"  First 3 scores:     {score_batch[:3].tolist()}")
    
    if i >= 2:
        break  # just preview first 3 batches


🟢 Batch 0
  SDF shape:         torch.Size([48, 48, 48])
  Grasp batch shape: torch.Size([32, 7])
  Score batch shape: torch.Size([32])
  First 3 scores:     [9.375739097595215, -1.0, 13.231476783752441]

🟢 Batch 1
  SDF shape:         torch.Size([48, 48, 48])
  Grasp batch shape: torch.Size([32, 7])
  Score batch shape: torch.Size([32])
  First 3 scores:     [-1.5, 3.9109270572662354, -1.5]

🟢 Batch 2
  SDF shape:         torch.Size([48, 48, 48])
  Grasp batch shape: torch.Size([32, 7])
  Score batch shape: torch.Size([32])
  First 3 scores:     [-1.5, 10.475044250488281, 9.386157035827637]
