# Data Analysis

In [38]:
import os
import json
from pathlib import Path
import pandas as pd
import numpy as np
import torch

In [34]:
# Load processed data
data_path = Path("data/processed")
data_files = []

for dir in data_path.iterdir():
    if dir.is_dir():
        scene_file = dir / 'scene.npz'
        if scene_file.exists():
            data_files.append(scene_file)

for idx in range(6):
    print(idx)
    scene_file = data_files[idx]
            
    scene_data = np.load(scene_file)

    # Only use the last 7 entries in each grasp, as they are the values of the hand pose
    grasps = scene_data["grasps"][:, -7:]

    # Convert to tensors
    sdf = torch.from_numpy(scene_data["sdf"])
    grasps = torch.from_numpy(grasps)
    scores = torch.from_numpy(scene_data["scores"])

    print(sdf.shape)
    print(grasps.shape)
    print(scores.shape)


0
torch.Size([48, 48, 48])
torch.Size([480, 7])
torch.Size([480])
1
torch.Size([48, 48, 48])
torch.Size([480, 7])
torch.Size([480])
2
torch.Size([48, 48, 48])
torch.Size([480, 7])
torch.Size([480])
3
torch.Size([48, 48, 48])
torch.Size([480, 7])
torch.Size([480])
4
torch.Size([48, 48, 48])
torch.Size([480, 7])
torch.Size([480])
5
torch.Size([48, 48, 48])
torch.Size([479, 7])
torch.Size([479])


: 

In [35]:
# Problem: Some scenes have 480 grasps, some have 479

from dataset import GraspDataset
dataset = GraspDataset(Path("data/processed"))

# Find out what the minimun number of grasps is
min_grasps = min(len(scene_data["grasps"]) for scene_data in dataset)
print(min_grasps)

# Find out what the maximum number of grasps is
max_grasps = max(len(scene_data["grasps"]) for scene_data in dataset)
print(max_grasps)

479
480


: 

In [36]:
# The minimum number of grasps is 479, the maximum is 480
# To make sure that all scenes have the same number of grasps, 
# we either need to remove the last grasp or duplicate the last grasp

# But first, let's check if the raw data has the same problem

# Load the first scene in the raw data
# Check grasp counts in raw data
raw_data_path = Path("data/raw")
raw_grasp_counts = []

for dir in raw_data_path.iterdir():
    if dir.is_dir():
        scene_file = dir / 'recording.npz'
        if scene_file.exists():
            scene_data = np.load(scene_file)
            raw_grasp_counts.append(len(scene_data["grasps"]))

print(f"Minimum grasps in raw data: {min(raw_grasp_counts)}")
print(f"Maximum grasps in raw data: {max(raw_grasp_counts)}")
print(f"Number of scenes with different grasp counts:")
for count in set(raw_grasp_counts):
    num_scenes = raw_grasp_counts.count(count)
    print(f"{count} grasps: {num_scenes} scenes")





Minimum grasps in raw data: 476
Maximum grasps in raw data: 480
Number of scenes with different grasp counts:
480 grasps: 11587 scenes
476 grasps: 2 scenes
477 grasps: 59 scenes
478 grasps: 481 scenes
479 grasps: 3418 scenes


: 

In [37]:
from dataset import GraspDataset
from torch.utils.data import DataLoader
from pathlib import Path

dataset = GraspDataset(Path("data/processed"))
dataloader = DataLoader(dataset, batch_size=10, shuffle=False)

sample = next(iter(dataloader))

print(sample.keys())

print(sample["sdf"].shape)
print(sample["grasps"].shape)
print(sample["scores"].shape)


RuntimeError: stack expects each tensor to be equal size, but got [480, 19] at entry 0 and [479, 19] at entry 5

: 

In [None]:
# TODO: Fix data loading, so that all scenes have the same number of grasps

: 