# Evaluation

Network inspection, learning curves, metrics, and grasp trials.

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
os.chdir('..')

In [None]:
from __future__ import print_function, division

from pathlib2 import Path
import time

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import rospy

from vgn.baselines import GPD
from vgn import benchmark, vis
from vgn.dataset import Dataset
from vgn.detection import *
from vgn.utils.transform import Rotation, Transform

In [None]:
rospy.init_node("vgn_evaluation", anonymous=True)

## Network

Compute loss and accuracy on test set, visualize failures.

In [None]:
model = Path("/home/michel/catkin_ws/src/vgn/data/runs/200713-2255,dataset=train,augment=True,net=conv,batch_size=32,lr=3e-04,rot-loss-only/vgn_conv_24.pth")
dataset = Path("/home/michel/catkin_ws/src/vgn/data/datasets/test")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
net = load_network(model, device)
dataset = Dataset(dataset)

Draw ground truth and predictions for random samples.

## Benchmark

Metrics and failure cases of a clutter removal experiment.

In [None]:
logdir = Path("/home/michel/catkin_ws/src/vgn/data/experiments/gpd_deepen_positive_scores/test")

Compute metrics.

In [None]:
n_grasps, success_rate, percent_cleared, planning_time = benchmark.compute_metrics(logdir)

print("# grasps:       ", n_grasps)
print("Success rate:   ", success_rate)
print("Percent cleared:", percent_cleared)
print("Planning time:   {} +- {}".format(planning_time["mean"], planning_time["std"]))

Visualize failure cases in rviz.

In [None]:
dataset = Dataset(logdir)
failures = dataset.df[dataset.df["label"] == 0].index.tolist()
iterator = iter(failures)

In [None]:
# visualize one by one
i = next(iterator)
print(i)
dataset.draw(i, 0.05)

In [None]:
# iterate with some time delay
for i in iterator:
    dataset.draw(i, 0.05)
    time.sleep(2.0)

## Learning Curves

Loss and grasp metrics vs training epochs.

In [None]:
run_dir = Path("data/runs/200610-2009,dataset=train,augment=True,net=conv,batch_size=32,lr=3e-04")
exp_name = "06_train_augment"
object_set = "adversarial"
epochs_to_evaluate = [5, 10]

Run clutter removal experiment for each `epochs_to_evaluate`.

In [None]:
for epoch in epochs_to_evaluate:
    model = run_dir / ("vgn_conv_" + str(epoch) + ".pth")
    logdir = Path("data") / "experiments" / exp_name / object_set / str(epoch)    
    
    if logdir.exists():
        continue  # manually delete folder to rerun benchmark

    benchmark.run(TODO)

Read train and validation losses (csvs need to be downloaded from TensorBoard).

In [None]:
df = pd.read_csv(run_dir / "train" / "loss.csv")
train_epochs = df["Step"].to_numpy()
train_loss = df["Value"].to_numpy()

df = pd.read_csv(run_dir / "validation" / "loss.csv")
val_epochs = df["Step"].to_numpy()
val_loss = df["Value"].to_numpy()

Read the benchmark metrics.

In [None]:
root = Path("data") / "experiments" / exp_name / object_set
success_rates = []
for epoch in epochs_to_evaluate:
    log_dir = root / str(epoch)
    success_rate, _, _ = benchmark.metrics(log_dir)
    success_rates.append(success_rate)

Create plot.

In [None]:
fig, ax1 = plt.subplots()
ax2 = ax1.twinx()

l1, = ax1.plot(train_epochs, train_loss, color="C0")
l2,= ax1.plot(val_epochs, val_loss, color="C1")
l3, = ax2.plot(epochs_to_evaluate, success_rates, color="C2")

ax1.set_xlabel("Epoch")
ax1.set_ylabel("Loss")
ax2.set_ylabel("%")

ax1.legend([l1, l2], ["train", "validation"], loc="lower left")
ax2.legend([l3], ["success rate"], loc="upper right")

fig.suptitle(exp_name)
fig_path = Path.home() / "Desktop" / (exp_name + ".png")
plt.savefig(str(fig_path))
plt.show()

## Performance vs Amount of Scene Information

Evaluate grasp performance with increasingly complete scene reconstructions.

In [None]:
root = Path("data/experiments/scene_information")
N = 8

In [None]:
#grasp_planner = VGN(Path("/home/michel/catkin_ws/src/vgn/data/runs/200611-1140,dataset=train,augment=True,net=conv,batch_size=32,lr=3e-04/vgn_conv_30.pth"))
grasp_planner = GPD()

In [None]:
for n in range(1,N):
    benchmark.run(
        grasp_planner,
        root / "gpd" / str(n).zfill(2),
        object_set="test",
        object_count=5,
        rounds=40,
        n=n,
        N=N,
        sim_gui=True,
    )

Plot performance vs number of viewpoints.

In [None]:
viewpoints = np.arange(1,N)
success_rates = {"vgn" : []}
percent_cleared = {"vgn": []}

for n in viewpoints:
    res = benchmark.compute_metrics(root / "vgn" / str(n).zfill(2))
    success_rates["vgn"].append(res[1])
    percent_cleared["vgn"].append(res[2])

In [None]:
fig, ax = plt.subplots()
ax.plot(viewpoints, success_rates["vgn"], color="C0", label="VGN")
ax.plot(viewpoints, percent_cleared["vgn"], color="C0", linestyle="--")
ax.set_ylim(0, 101)
ax.legend()