In [None]:
import numpy as np
from pathlib import Path
import pandas as pd
from tqdm import tqdm

In [None]:
# Read data
root = Path("../data/grasps/blocks")
df = pd.read_csv(root / "grasps.csv")

In [None]:
# Inspect data
pos = df[df.score == 1]
neg = df[df.score == 0]

print("number of scenes:", len(np.unique(df.scene_id)))
print("Number of grasps:", len(df.index))
print("Number of positives:", len(pos.index))
print("Number of negatives:", len(neg.index))

In [None]:
# Remove grasp positions that lie outside the workspace
df.drop(
    df[
        (df.x < 0.04)
        | (df.x > 0.26)
        | (df.y < 0.04)
        | (df.y > 0.26)
        | (df.z < 0.04)
        | (df.z > 0.26)
    ].index,
    inplace=True,
)

In [None]:
# Balance data (per scene)
indices = []
for scene_id, group in tqdm(df.groupby("scene_id"), total=len(df.scene_id.unique())):
    pos = group[group.score == 1]
    neg = group[group.score == 0]
    indices += np.random.choice(neg.index, max(0, len(neg.index) - len(pos.index)), replace=False).tolist()
df = df.drop(indices)

In [None]:
# Delete unreferenced scenes
scenes = df.scene_id.unique()
for f in root.iterdir():
    if f.suffix == ".npz" and f.stem not in scenes:
        print("Removed", f)
        f.unlink()

In [None]:
# Write data
df.to_csv(root / "grasps.csv", index=False)

In [None]:
# Merge multiple folders containing grasps
target_foler = "../data/grasps/train"
source_folders = ["../data/grasps/packed", "../data/grasps/pile"]
df = pd.concat([pd.read_csv(root + "/grasps.csv") for root in source_folders])
df.to_csv(target_foler + "/grasps.csv", index=False)