# Dataset

In [None]:
import os
os.chdir('..')

In [None]:
from __future__ import division, print_function

from pathlib2 import Path
import subprocess

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import subprocess
from tqdm import tqdm

from vgn.utils.transform import Rotation, Transform

## Statistics

Compute the number of positive and negative samples in the dataset and plot the distribution of angles between the gravity vector and $Z$ axis of grasps.

In [None]:
root = Path("data/datasets/foo")

In [None]:
df = pd.read_csv(root / "grasps.csv")

positives = df[df["label"] == 1]
negatives = df[df["label"] == 0]

print("Number of samples:", len(df.index))
print("Number of positives:", len(positives.index))
print("Number of negatives:", len(negatives.index))

In [None]:
n = len(positives.index)
angles = np.empty(n)
for i, index in tqdm(enumerate(positives.index[:n]), total=n):
    approach = Rotation.from_quat(df.loc[index, "qx":"qw"].to_numpy()).as_dcm()[:,2]
    angle = np.arccos(np.dot(approach, np.r_[0.0, 0.0, -1.0]))
    angles[i] = np.rad2deg(angle)        

In [None]:
plt.hist(angles, bins=30)
plt.xlabel("Angle [deg]")
plt.ylabel("Count")
plt.show()

## Cleanup

Remove grasp positions that lie outside the workspace.

In [None]:
root = Path("data/datasets/foo")
csv_path = root / "grasps.csv"

In [None]:
df = pd.read_csv(csv_path)

df.drop(df[df["x"] < 0.02].index, inplace=True)
df.drop(df[df["y"] < 0.02].index, inplace=True)
df.drop(df[df["z"] < 0.02].index, inplace=True)
df.drop(df[df["x"] > 0.28].index, inplace=True)
df.drop(df[df["y"] > 0.28].index, inplace=True)
df.drop(df[df["z"] > 0.28].index, inplace=True)

In [None]:
df.to_csv(csv_path, index=False)  # DANGER ZONE, overwrites existing dataframe

Remove unreferenced scenes.

In [None]:
scenes = df["scene_id"].values
for f in (root / "raw").iterdir():
    if f.suffix == ".npz" and f.stem not in scenes:
        print("Removed", f)
        f.unlink()

## Balance Dataset

Discard a random subset of negative samples to ensure the same number of positive and negative grasp samples.

In [None]:
root = Path("data/datasets/foo")
csv_path = root / "grasps.csv"

In [None]:
df = pd.read_csv(csv_path)

positives = df[df["label"] == 1]
negatives = df[df["label"] == 0]

i = np.random.choice(negatives.index, len(negatives.index) - len(positives.index), replace=False)
df = df.drop(i)

In [None]:
df.to_csv(csv_path, index=False)  # DANGER ZONE, overwrites existing dataframe

## Generate TSDFs

In [None]:
root = Path("data/datasets/foo")

In [None]:
from vgn.perception import TSDFVolume
from vgn.simulation import ClutterRemovalSim

raw_dir = root / "raw"
tsdf_dir = root / "tsdfs"
tsdf_dir.mkdir(exist_ok=True)

sim = ClutterRemovalSim("pile", "blocks", gui=False)

for raw_file in tqdm(list(raw_dir.iterdir())):
    if not raw_file.suffix == ".npz":
        continue
        
    tsdf_file = tsdf_dir / raw_file.name
    if tsdf_file.exists():
        continue  # manually delete the folder in order to regenerate TSDFs
    
    raw = np.load(raw_file)
    depth_imgs = raw["depth_imgs"]
    extrinsics = raw["extrinsics"]
    n = int(raw["n"])

    tsdf = TSDFVolume(sim.size, 40)
    for i in range(depth_imgs.shape[0]):
        extrinsic = Transform.from_list(extrinsics[i])
        tsdf.integrate(depth_imgs[i], sim.camera.intrinsic, extrinsic)
        if i+1 == n:
            partial = tsdf.get_volume()
    complete = tsdf.get_volume()

    np.savez_compressed(str(tsdf_file), partial=partial, complete=complete)

## Visualize

Make sure to have a ROS core running and open `config/sim.rviz` in RViz.

In [None]:
root = Path("data/datasets/foo")

In [None]:
import rospy

from vgn.dataset import Dataset
from vgn import vis

rospy.init_node("vgn_vis", anonymous=True)
vis.set_size(0.3)

In [None]:
dataset = Dataset(root, reconstruction="complete", augment=False)

In [None]:
i = np.random.choice(len(dataset))
x, y, index = dataset[i]
vis.draw_sample(x, y, index)

## Merge Datasets

In [None]:
sources = [Path("data/datasets/foo"),
           Path("data/datasets/bar")]
target = Path("data/datasets/merged")

In [None]:
(target / "raw").mkdir(exist_ok=True)
(target / "tsdfs").mkdir(exist_ok=True)

target_csv_path = target / "grasps.csv"
target_df = pd.read_csv(target_csv_path) if target_csv_path.exists() else pd.DataFrame()

for source_dir in sources:
    # concatenate dataframes
    csv_path = source_dir / "grasps.csv"
    source_df = pd.read_csv(csv_path)
    target_df = pd.concat([target_df, source_df])
    # move raw data
    cmd = "mv {} {}".format(str(source_dir / "raw") + "/*.npz", str(target / "raw"))
    subprocess.call(cmd, shell=True)
    # move tsdfs
    cmd = "mv {} {}".format(str(source_dir / "tsdfs") + "/*.npz", str(target / "tsdfs"))
    subprocess.call(cmd, shell=True)

In [None]:
target_df.to_csv(target_csv_path, index=False)