In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

In [2]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

import os

import dill
import hydra
import imageio
import matplotlib.pyplot as plt
import numpy as np
import torch
from torch.utils.data import DataLoader

import sys
sys.path.append("/home/diffusion_policy")
from diffusion_policy.dataset.base_dataset import BaseImageDataset
from diffusion_policy.policy.base_image_policy import BaseImagePolicy
from diffusion_policy.workspace.base_workspace import BaseWorkspace

precision = 2
np.set_printoptions(suppress=True, precision=precision)
torch.set_printoptions(precision=precision, sci_mode=False)

In [3]:
# rundir = "/home/diffusion_policy/data/outputs/2023.04.09/21.39.45_train_diffusion_unet_hybrid_pick_up_ball"
rundir = "/home/diffusion_policy/data/outputs/2023.04.10/00.43.12_train_diffusion_unet_hybrid_pick_up_ball/"
ckpt_path = os.path.join(rundir, "checkpoints", "latest.ckpt")

In [4]:
payload = torch.load(open(ckpt_path, 'rb'), pickle_module=dill)
cfg = payload['cfg']

In [5]:
batch_size = 25
cfg["dataloader"]["batch_size"] = batch_size
cfg["dataloader"]["pin_memory"] = False
cfg["val_dataloader"]["batch_size"] = batch_size
cfg["val_dataloader"]["pin_memory"] = False

In [6]:
cls = hydra.utils.get_class(cfg._target_)
workspace = cls(cfg)
workspace: BaseWorkspace
workspace.load_payload(payload, exclude_keys=None, include_keys=None)



using obs modality: low_dim with keys: ['joint_states']
using obs modality: rgb with keys: ['images']
using obs modality: depth with keys: []
using obs modality: scan with keys: []




Diffusion params: 6.511707e+07
Vision params: 1.119709e+07


In [7]:
policy: BaseImagePolicy
policy = workspace.model
if cfg.training.use_ema:
    policy = workspace.ema_model

device = torch.device('cuda')
policy.eval().to(device)

# set inference params
policy.num_inference_steps = 200 # DDIM inference iterations
policy.n_action_steps = policy.horizon - policy.n_obs_steps + 1

In [11]:
# configure dataset
dataset: BaseImageDataset
dataset = hydra.utils.instantiate(cfg.task.dataset)
assert isinstance(dataset, BaseImageDataset)
train_dataloader = DataLoader(dataset, **cfg.dataloader)
normalizer = dataset.get_normalizer()

# configure validation dataset
val_dataset = dataset.get_validation_dataset()
val_dataloader = DataLoader(val_dataset, **cfg.val_dataloader)

In [12]:
len(train_dataloader)

374

In [13]:
dataloader = val_dataloader
batch = next(iter(dataloader))

In [None]:
policy.num_inference_steps = 100 # DDIM inference iterations
n_action_steps = 4
batch_idx = 0

In [None]:
batch_idx = 5
batch["obs"]["images"].shape
images = batch["obs"]["images"][batch_idx].detach().cpu().numpy().transpose(0,2,3,1)
images = (images * 255).astype(np.uint8)

In [None]:
images.shape

In [None]:
import imageio
from IPython.display import Video


In [None]:
output_filepath = "/home/video/output.mp4"
writer = imageio.get_writer(output_filepath, fps=15)
for i in range(images.shape[0]):
    writer.append_data(images[i])
writer.close()

In [None]:
from ipywidgets import Video
Video.from_file(output_filepath, width=320, height=320)

In [None]:
result = policy.predict_action(batch["obs"])
pred_action = result['action_pred'].detach().cpu().numpy()

In [None]:
pred_action[batch_idx, :n_action_steps]

In [None]:
batch["action"][batch_idx, :n_action_steps]

In [None]:
error = pred_action[batch_idx, :n_action_steps] - batch["action"][batch_idx, :n_action_steps].detach().cpu().numpy()
error

In [None]:
with torch.no_grad():
    dataloader = val_dataloader

    pred, gt = [], []

    for i, batch in enumerate(dataloader):
        result = policy.predict_action(batch["obs"])
        pred_action = result['action_pred'].detach().cpu().numpy()
        pred.append(pred_action)

        gt_action = batch["action"]
        gt_action= gt_action.detach().cpu().numpy()
        gt.append(gt_action)

        if i > 4:
            break

In [None]:
pred = np.concatenate(pred)
gt = np.concatenate(gt)

In [None]:
n_pred_timestep = 8
error = np.sqrt((pred[:,:n_pred_timestep] - gt[:, :n_pred_timestep]) ** 2)
error.mean(axis=(0,1))

In [None]:
pred[0, :n_pred_timestep]

In [None]:
gt[0, :n_pred_timestep]

#
train: 0.0072, 0.0047, 0.0066, 0.1946, 0.0163, 0.0182, 0.007
val:   0.0223, 0.0155, 0.0212, 0.7429, 0.0603, 0.0578, 0.027 


- I'm suspicious of the first euler angle values
- the outputs seem to be very inconsistent
- are they inconsistent in the training data?
- is the normalization doing something weird?

In [None]:
policy.normalizer["action"].params_dict["offset"]

In [None]:
policy.normalizer["action"].params_dict["scale"]

- ok so the offset is tiny so that's probably fine
- but the scale is quite small
- which I assume means that it's range is small 
    - is that the case
    - how are the limits computed?
    

In [None]:
actions = []
for i, batch in enumerate(train_dataloader):
    if i > 10:
        break
    actions.append(batch["action"])
actions = torch.cat(actions).reshape(-1, 10)
actions = actions.detach().cpu().numpy()

In [None]:
plt.hist(actions[:, -1], bins=100)

In [None]:
plt.hist(dataset.replay_buffer["actions"][:,-1], bins=100)

In [None]:
actions = np.array(dataset.replay_buffer["actions"]).copy()
norm_actions = policy.normalizer["action"].normalize(actions)

In [None]:
plt.hist(norm_actions[:,-1].detach().cpu().numpy(), bins=100)

- can you convert the policy to torchscript?

In [15]:
scripted_policy = torch.jit.trace(policy, example_inputs=batch["obs"], check_trace=False)

TypeError: type Tensor doesn't define __round__ method