In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
import os
import datetime
import time
import pathlib
project_path = pathlib.Path('.').absolute().parent
os.sys.path.insert(1, str(project_path))

In [3]:
import torch
from tqdm.notebook import tqdm
from torch.utils.data import DataLoader
import numpy as np
import pinocchio as pin
from cto.cvae import VAE
from dotmap import DotMap

In [4]:
data_paths = [
    'data/1121_150831.npz',
    'data/1121_150929.npz',
    'data/1121_150946.npz',
    ]

In [5]:
all_X = []
all_Y = []
for path in data_paths:
    with np.load(path, allow_pickle=True) as loaded:
        data = loaded['data']
        # prepare data
    H = 10000 
    N = len(data) - H
    X = np.zeros((N, 6))
    Y = np.zeros((N, 6))
    
    # y = pose_n, x = pose_{n+H}
    for n in range(N):
        pose_XYZQUAT = np.hstack((data[n]["object_position"], data[n]["object_orientation"]))
        pose_SE3 = pin.XYZQUATToSE3(pose_XYZQUAT)
        Y[n, :3] = pose_SE3.translation
        Y[n, 3:] = pin.log3(pose_SE3.rotation)

        pose_XYZQUAT = np.hstack((data[n+H]["object_position"], data[n+H]["object_orientation"]))
        pose_SE3 = pin.XYZQUATToSE3(pose_XYZQUAT)
        X[n, :3] = pose_SE3.translation
        X[n, 3:] = pin.log3(pose_SE3.rotation)

    all_X.append(X)
    all_Y.append(Y)
all_X = np.vstack(all_X)
all_Y = np.vstack(all_Y)

In [6]:
class Dataset(torch.utils.data.Dataset):
    def __init__(self, x, y):
        super().__init__()
        # store the raw tensors
        self._x = x
        self._y = y

    def __len__(self):
        # a DataSet must know it size
        return self._x.shape[0]

    def __getitem__(self, index):
        x = self._x[index, :]
        y = self._y[index, :]
        return x, y
    
data_train = Dataset(all_X, all_Y)

In [7]:
def loss_fn(recon_x, x, mean, log_var):
    MSE = torch.nn.functional.mse_loss(recon_x, x, reduction="sum")
#     BCE = torch.nn.functional.binary_cross_entropy(recon_x, x, reduction='sum')
    KLD = -0.5 * torch.sum(1 + log_var - mean.pow(2) - log_var.exp())
    return (MSE + KLD) / x.size(0)

In [8]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
data_loader = DataLoader(data_train, batch_size=64, shuffle=True)
vae = VAE(
        encoder_layer_sizes=[6, 128],
        latent_size=4,
        decoder_layer_sizes=[128, 6],
        conditional=True,
        cond_size=6).to(device)
optimizer = torch.optim.Adam(vae.parameters(), lr=1e-3)

In [9]:
args = DotMap()
args.epochs = 50

In [10]:
for epoch in tqdm(range(args.epochs)):
    for iteration, (x, y) in enumerate(data_loader):
        x, y = x.to(device).float(), y.to(device).float()
        recon_x, mean, log_var, z = vae(x, y)
        loss = loss_fn(recon_x, x, mean, log_var)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

  0%|          | 0/50 [00:00<?, ?it/s]

In [11]:
pose_XYZQUAT = np.array([0.17930479, 0.06879323, 0.0325, 
                         0.        , 0.        , 0.24076294, 0.97058395])
pose_SE3 = pin.XYZQUATToSE3(pose_XYZQUAT)
test_x = np.zeros(6)
test_x[:3] = pose_SE3.translation
test_x[3:] = pin.log3(pose_SE3.rotation)
test_x = torch.tensor(test_x).repeat((1, 1)).to(device).float()

vae.eval()
recon_x = vae.inference(torch.randn([1, 4]).to(device), test_x).detach().cpu().numpy()
recon_x[0, 2] = 0.0325
pose_inter = pin.SE3(pin.exp3(recon_x[0, 3:]), recon_x[0,:3])

In [12]:
import pybullet
import trifinger_simulation
from cto.mcts.pvmcts import PolicyValueMCTS
from cto.mcts.pvnet import PolicyValueNet, ValueClassifier
from cto.params import get_default_params, update_params
from cto.contact_modes import construct_contact_plan
from cto.envs.trifinger import TriFingerAndCube
from cto.policy import OpenLoopPolicy

pybullet build time: Jul 21 2022 19:50:14
  warn(


In [13]:
object_urdf = str(trifinger_simulation.get_data_dir()/'cube_v2'/'cube_v2.urdf')
params = get_default_params(object_urdf)
# finger_type="trifinger_meta" # with arena
finger_type="trifingernyu" # without arena
visualization = True
max_goal_orn_diff = np.pi/2

max_budget_mcts = 50
verbose = True

In [14]:
init_pose_dict={"position": np.array([0.17930479, 0.06879323, 0.0325    ]),
                "orientation": np.array([0.        , 0.        , 0.24076294, 0.97058395])}
goal_pose_dict={"position": np.array([0.06534854, -0.01630616,  0.0325]),
                "orientation": np.array([0.        ,  0.        , -0.45309934,  0.89146003])}

In [15]:
env = TriFingerAndCube(params, visualization=visualization, 
                       init_difficulty=-1, finger_type=finger_type)
policy = OpenLoopPolicy(env.action_space, env.finger, time_step=0.001)

startThreads creating 1 threads.
starting thread 0
started thread 0 
argc=2
argv[0] = --unused
argv[1] = --start_demo_name=Physics Server
ExampleBrowserThreadFunc started
X11 functions dynamically loaded using dlopen/dlsym OK!
X11 functions dynamically loaded using dlopen/dlsym OK!
Creating context
Created GL 3.3 context
Direct GLX rendering context obtained
Making context current
GL_VENDOR=NVIDIA Corporation
GL_RENDERER=Quadro P1000/PCIe/SSE2
GL_VERSION=3.3.0 NVIDIA 470.141.03
GL_SHADING_LANGUAGE_VERSION=3.30 NVIDIA via Cg compiler
pthread_getconcurrency()=0
Version = 3.3.0 NVIDIA 470.141.03
Vendor = NVIDIA Corporation
Renderer = Quadro P1000/PCIe/SSE2
b3Printf: Selected demo: Physics Server
startThreads creating 1 threads.
starting thread 0
started thread 0 
MotionThreadFunc thread started
ven = NVIDIA Corporation
ven = NVIDIA Corporation


In [16]:
observation_list = []
#     create the env and store init/goal object pose for reset
obs = env.reset(init_pose_dict=init_pose_dict, 
                goal_pose_dict=goal_pose_dict)
xyz, quat = env.get_cube_pose()
init_pose_dict = {"position":xyz, "orientation":quat}
goal_pose_dict = {"position":env.goal[:3], "orientation":env.goal[3:]}

# set the mcts problem
pose_init = env.get_cube_pose_as_SE3()
pose_goal = pin.XYZQUATToSE3(env.goal)
delta = pin.log6(pose_goal.act(pose_init.inverse()))
desired_poses = [pose_init, pose_inter, pose_goal]
params = update_params(params, desired_poses, repr="SE3")

# plan with an untrained mcts
mcts = PolicyValueMCTS(params, env)
mcts.run(state=[[0, 0, 0]], budget=max_budget_mcts, verbose=False)
state, sol = mcts.get_solution()



In [17]:
# if a solution is found, executet the plan in gym
observation = env.reset(goal_pose_dict=goal_pose_dict, init_pose_dict=init_pose_dict)
policy.reset()
policy.set_trajs(observation, state, sol, params)

for i in range(policy.x_des.shape[0]):
    action = policy.predict(observation)
    observation, reward, episode_done, info = env.step(action)
    policy_observation = policy.get_observation()
    is_done = policy.done or episode_done
    full_observation = {**observation, **policy_observation}
    observation_list.append(full_observation)

final_pos_err = observation_list[-1]["achieved_goal_position_error"]
final_orn_err = observation_list[-1]["achieved_goal_orientation_error"]
print("Final object position error: ", final_pos_err)
print("Final object orientation error: ", final_orn_err)

Final object position error:  0.009163810994577042
Final object orientation error:  0.14923658383455748
