# Dataset

Inspect, merge and balance datasets.

In [None]:
%load_ext autoreload
%autoreload 2

import os
os.chdir('..')

In [None]:
from __future__ import print_function, division

from pathlib2 import Path
import subprocess

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import rospy
import tqdm

from vgn.dataset import Dataset
from vgn.utils.transform import Transform, Rotation
from vgn import vis

In [None]:
rospy.init_node("dataset")

In [None]:
data_dir = Path("data/datasets/train")

csv_path = data_dir / "grasps.csv"
raw_dir = data_dir / "raw"
tsdf_dir= data_dir / "tsdfs"

## Statistics

In [None]:
df = pd.read_csv(csv_path)

positives = df[df["label"] == 1]
negatives = df[df["label"] == 0]

print("Number of samples:", len(df.index))
print("Number of positives:", len(positives.index))
print("Number of negatives:", len(negatives.index))

Angle between approach and Z axis.

In [None]:
N = len(positives.index)
angles = np.empty(N)
for i, index in enumerate(positives.index[:N]):
    ori = Rotation.from_quat(df.loc[index, "qx":"qw"].to_numpy())
    approach = ori.as_dcm()[:,2]
    angle = np.arccos(np.dot(approach, np.r_[0.0, 0.0, -1.0]))
    angles[i] = np.rad2deg(angle)        

In [None]:
plt.hist(angles)
plt.xlabel("Angle")
plt.ylabel("Count")
plt.title("Angle between approach and Z-axis")
plt.show()

## Balance Dataset

Discard a random subset of negative samples to ensure the same number of positive and negative grasp samples.

In [None]:
df = pd.read_csv(csv_path)
positives = df[df["label"] == 1]
negatives = df[df["label"] == 0]

i = np.random.choice(negatives.index, len(negatives.index) - len(positives.index), replace=False)
df = df.drop(i)

*DANGER ZONE* - overwrite dataframe and remove unreferenced raw data.

In [None]:
df.to_csv(csv_path, index=False)

In [None]:
scenes = df["scene_id"].values
for f in raw_dir.iterdir():
    if f.suffix == ".npz" and f.stem not in scenes:
        print("Removing ", f)
        # f.unlink()

## Generate TSDFs

In [None]:
from vgn.perception import TSDFVolume
from vgn.simulation import GraspSimulation

sim = GraspSimulation("blocks", gui=False)
tsdf_dir.mkdir(exist_ok=True)

for raw_file in tqdm.tqdm(list(raw_dir.iterdir())):
    if not raw_file.suffix == ".npz":
        continue
    
    raw = np.load(raw_file)
    depth_imgs = raw["depth_imgs"]
    extrinsics = raw["extrinsics"]
    n = int(raw["n"])

    tsdf = TSDFVolume(sim.size, 40)
    for i in range(depth_imgs.shape[0]):
        extrinsic = Transform.from_list(extrinsics[i])
        tsdf.integrate(depth_imgs[i], sim.camera.intrinsic, extrinsic)
        if i+1 == n:
            partial = tsdf.get_volume()
    complete = tsdf.get_volume()

    tsdf_file = tsdf_dir / raw_file.name
    np.savez_compressed(str(tsdf_file), partial=partial, complete=complete)

## Visualize

Visualize random grasp samples in rviz.

In [None]:
dataset = Dataset(data_dir, tsdf="partial")
i = np.random.choice(len(dataset))
x, y, index = dataset[i]
vis.draw_sample(x, y, index)

## Merge Datasets

Merge multiple folders into a single data set.

In [None]:
root = Path("data/datasets")
sources = ["train8", "train9"]
target = "train"

target_dir = root / target
target_csv_path = target_dir / "grasps.csv"

In [None]:
target_df = pd.read_csv(target_csv_path) if target_csv_path.exists() else pd.DataFrame()

for source in sources:
    source_dir = root / source
    # concatenate dataframes
    csv_path = source_dir / "grasps.csv"
    source_df = pd.read_csv(csv_path)
    target_df = pd.concat([target_df, source_df])
    # move raw data
    cmd = "mv {} {}".format(str(source_dir / "raw") + "/*.npz", str(target_dir / "raw"))
    subprocess.call(cmd, shell=True)

In [None]:
target_df.to_csv(target_csv_path, index=False)