### Semi-autonomous Teleoperation Demo : Place task in `Kitchen Plate` scene with `small size dish rack`

### Parse `Realistic Scene`

In [1]:
import mujoco #,cv2,pyvista
import torch 
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
import sys
sys.path.append('../../../')
from utils.mujoco_parser import MuJoCoParserClass
from baselines.rl.dlpg.buffer import BufferClass
from baselines.rl.dlpg.dlpg import DeepLatentPolicyGradient
from utils.util import sample_xyzs,rpy2r,r2rpy,r2quat,compute_view_params,get_interp_const_vel_traj, printmd
np.set_printoptions(precision=2,suppress=True,linewidth=100)
plt.rc('xtick',labelsize=6); plt.rc('ytick',labelsize=6)
%config InlineBackend.figure_format = 'retina'
%matplotlib inline

def torch2np(x_torch):
    if x_torch is None:
        x_np = None
    else:
        x_np = x_torch.detach().cpu().numpy()
    return x_np
def np2torch(x_np,device='cpu'):
    if x_np is None:
        x_torch = None
    else:
        x_torch = torch.tensor(x_np,dtype=torch.float32,device=device)
    return x_torch

print ("MuJoCo version:[%s]"%(mujoco.__version__))

MuJoCo version:[2.3.3]


In [2]:
xml_path = '../../../asset/scene_kitchen_dish_rack_black.xml'
env    = MuJoCoParserClass(name='Place task scene: Plate table',rel_xml_path=xml_path,VERBOSE=False, MODE='window')
dlpg   = DeepLatentPolicyGradient(xdim=2, cdim=6, zdim=5, hdims=[64,64], actv_enc=nn.LeakyReLU())
buffer = BufferClass(xdim=2, cdim=6, buffer_limit=1000)

# Move tables and robot base
env.model.body('base_table').pos = np.array([0,0,0])
env.model.body('base').pos = np.array([-10,0,0.79])
env.model.body('avoiding_object_table').pos = np.array([0.38+0.45,0,0])
env.model.body('right_object_table').pos = np.array([-0.05,-0.80,0])
env.model.body('left_object_table').pos = np.array([-1.5,0.80,0])

# Place objects
obj_box_names = [body_name for body_name in env.body_names
             if body_name is not None and (body_name.startswith("obj_box"))]
n_box_obj = len(obj_box_names)
env.place_objects_random(n_obj=n_box_obj, obj_names=obj_box_names, x_range=[0.80, 1.15], y_range=[-3.15, -2.15], COLORS=False, VERBOSE=True)

jntadr_mug_cup = env.model.body('mug_cup').jntadr[0]
env.model.joint(jntadr_mug_cup).qpos0[:3] = np.array([ 10, -0.3, 0.85])
env.model.joint(jntadr_mug_cup).qpos0[3:] = r2quat(rpy2r(np.radians([0, 0, np.random.rand() * 360])))

jntadr_tray = env.model.body('tray').jntadr[0]
env.model.joint(jntadr_tray).qpos0[:3] = np.array([10,-0.3,0.80])
env.model.joint(jntadr_tray).qpos0[3:] = r2quat(rpy2r(np.radians([0, 0, 0])))

jntadr_dish_rack = env.model.body('kitchen-drainer').jntadr[0]
pos_dish_rack = np.array([0.9,0.35,0.82])
env.model.joint(jntadr_dish_rack).qpos0[:3] = pos_dish_rack
dish_rack_random_rot = 0 # np.random.rand() * 360
env.model.joint(jntadr_dish_rack).qpos0[3:] = r2quat(rpy2r(np.radians([0, 0, dish_rack_random_rot])))


# Target dish
env.model.joint(env.model.body('kitchen-plate').jntadr[0]).qpos0[:3] = np.array([0.2, -0.7, 0.8])
# Set the dishes
env.model.joint(env.model.body('kitchen-plate-2').jntadr[0]).qpos0[:3] = pos_dish_rack + np.array([0.,-0.06, 0.25])
env.model.joint(env.model.body('kitchen-plate-2').jntadr[0]).qpos0[3:] = r2quat(rpy2r(np.radians([0, 90, 90])))
env.model.joint(env.model.body('kitchen-plate-3').jntadr[0]).qpos0[:3] = pos_dish_rack + np.array([0., -0.12, 0.25])
env.model.joint(env.model.body('kitchen-plate-3').jntadr[0]).qpos0[3:] = r2quat(rpy2r(np.radians([0, 90, 90])))
env.model.joint(env.model.body('kitchen-plate-4').jntadr[0]).qpos0[:3] = pos_dish_rack + np.array([-3.5, 0., 0.25])
env.model.joint(env.model.body('kitchen-plate-4').jntadr[0]).qpos0[3:] = r2quat(rpy2r(np.radians([0, 90, 90])))

env.reset()

In [3]:
# Init viewer
env.init_viewer(viewer_title='UR5e with RG2 gripper',viewer_width=1200,viewer_height=800,
                viewer_hide_menus=True, MODE='window')
env.update_viewer(azimuth=30,distance=3.0,elevation=-30,lookat=[1.0,0.0,0.71],
                  VIS_TRANSPARENT=False,VIS_CONTACTPOINT=False,
                  contactwidth=0.05,contactheight=0.05,contactrgba=np.array([1,0,0,1]),
                  VIS_JOINT=False,jointlength=0.25,jointwidth=0.05,jointrgba=[0.2,0.6,0.8,0.6])
init_ur_q = np.array([np.deg2rad(-90), np.deg2rad(-130), np.deg2rad(120), np.deg2rad(100), np.deg2rad(45), np.deg2rad(-90)])
trgt_name       = 'kitchen-plate'
obj_name_lst    = ['kitchen-plate-2', 'kitchen-plate-3', 'kitchen-drainer']
max_update_iter = 40 
max_epochs      = 100 
lr              = 1e-3 
batch_size      = 128
optimizer       = torch.optim.Adam(dlpg.cvae.parameters(),lr=lr, betas=(0.9, 0.99))
weight_name = 'test'
env.reset()

start_tick = 0 
end_tick = 2500

for epoch in range(max_epochs): 
    env.reset()
    start_tick = 0

    # Randomize object poses
    x_rand   = np.random.uniform(low=0.7, high=0.9, size=(1))
    y_rand   = np.random.uniform(low=-0.2, high=0.2, size=(1))
    yaw_rand = np.random.uniform(low=-90, high=90)

    random_pose_lst = np.concatenate((x_rand,y_rand,np.array([0.82])),axis=0)

    obj_jntadr  = env.model.body('kitchen-drainer').jntadr[0]
    obj_qposadr = env.model.jnt_qposadr[obj_jntadr]
    env.data.qpos[obj_qposadr:obj_qposadr+3]   = random_pose_lst
    env.data.qpos[obj_qposadr+3:obj_qposadr+7] = r2quat(rpy2r(np.radians([0, 0, yaw_rand-90])))

    obj_jntadr  = env.model.body('kitchen-plate-2').jntadr[0]
    obj_qposadr = env.model.jnt_qposadr[obj_jntadr]
    env.data.qpos[obj_qposadr:obj_qposadr+3]   = random_pose_lst+ np.array([0.-np.cos(np.deg2rad(yaw_rand))*0.06,-np.sin(np.deg2rad(yaw_rand))*0.06, 0.25])
    env.data.qpos[obj_qposadr+3:obj_qposadr+7] = r2quat(rpy2r(np.radians([0, 90, yaw_rand])))

    obj_jntadr  = env.model.body('kitchen-plate-3').jntadr[0]
    obj_qposadr = env.model.jnt_qposadr[obj_jntadr]
    env.data.qpos[obj_qposadr:obj_qposadr+3]   = random_pose_lst+ np.array([0.-np.cos(np.deg2rad(yaw_rand))*0.12, -np.sin(np.deg2rad(yaw_rand))*0.12, 0.25])
    env.data.qpos[obj_qposadr+3:obj_qposadr+7] = r2quat(rpy2r(np.radians([0, 90, yaw_rand])))

    # Get observations
    p_obj, R_obj = env.get_pR_body(body_name='kitchen-drainer')
    quat_obj     = r2quat(R_obj)
    state        = np.concatenate((p_obj[:-1],quat_obj))

    # Epsilon greedy 
    epsgrdy = 5*np.exp(max_epochs/20.)
    if np.random.rand() < epsgrdy:
        trgt_pose  = dlpg.explore()
    else: 
        trgt_pose = torch2np(dlpg.exploit(state))

    # Move target object  
    trgt_jntadr = env.model.body(trgt_name).jntadr[0]
    trgt_qposadr= env.model.jnt_qposadr[trgt_jntadr] 
    env.data.qpos[trgt_qposadr:trgt_qposadr+2]   = trgt_pose
    env.data.qpos[trgt_qposadr+2:trgt_qposadr+3]   = 0.82 # 0.82
    env.data.qpos[trgt_qposadr+3:trgt_qposadr+7] = r2quat(rpy2r(np.radians([0, 90, yaw_rand]))) 

    # Get target state
    p_trgt, R_trgt = env.get_pR_body(body_name=trgt_name)
    quat_trgt      = r2quat(R_trgt)
    state_trgt     = np.concatenate((p_trgt,quat_trgt))
    print("state_trgt",state_trgt)
    # Get reward 
    reward = dlpg.get_reward(state_trgt)

    # Save Buffer 
    buffer.store(x=trgt_pose, c=state, reward=reward)

    while env.tick - start_tick < end_tick:
        env.forward(q=init_ur_q, joint_idxs=env.idxs_forward)
        env.step(ctrl=init_ur_q,ctrl_idxs=env.idxs_step)

        # Render
        if env.loop_every(HZ=100):
            env.render(render_every=10)

    if (epoch+1)%1000: 
        # Update Policy 
        for i in range(max_update_iter): 
            loss_recon_sum=0;loss_kl_sum=0;n_batch_sum=0
            batch = buffer.sample_batch(batch_size=batch_size)
            x_batch, c_batch, reward_batch = batch["x"], batch["c"], batch["reward"]
            total_loss_out,loss_info = dlpg.cvae.loss_total(x           = x_batch, 
                                                        c               = c_batch, 
                                                        q               = reward_batch, 
                                                        LOSS_TYPE       = 'L1+L2',
                                                        recon_loss_gain = 10,
                                                        beta            = 0.01,
                                                        STOCHASTICITY   = True)
            optimizer.zero_grad()
            total_loss_out.backward()
            optimizer.step()

            loss_recon_sum = loss_recon_sum + batch_size*loss_info['loss_recon_out']
            loss_kl_sum    = loss_kl_sum + batch_size*loss_info['loss_kl_out']
            n_batch_sum    = n_batch_sum + batch_size   
        print("[Iteration:{}][Recon Loss:{}][KL Loss:{}]".format(i+1,loss_recon_sum.item()/batch_size, loss_kl_sum.item()/batch_size))
        torch.save(dlpg.cvae.state_dict(),"./weights/"+weight_name+"/"+weight_name+"{}.pth".format(epoch+1))  


env.close_viewer()

state_trgt [ 0.2 -0.7  0.8  1.   0.   0.   0. ]
state_trgt [ 0.2 -0.7  0.8  1.   0.   0.   0. ]
state_trgt [ 0.2 -0.7  0.8  1.   0.   0.   0. ]
state_trgt [ 0.2 -0.7  0.8  1.   0.   0.   0. ]
state_trgt [ 0.2 -0.7  0.8  1.   0.   0.   0. ]
state_trgt [ 0.2 -0.7  0.8  1.   0.   0.   0. ]
state_trgt [ 0.2 -0.7  0.8  1.   0.   0.   0. ]
state_trgt [ 0.2 -0.7  0.8  1.   0.   0.   0. ]
state_trgt [ 0.2 -0.7  0.8  1.   0.   0.   0. ]
state_trgt [ 0.2 -0.7  0.8  1.   0.   0.   0. ]
state_trgt [ 0.2 -0.7  0.8  1.   0.   0.   0. ]
state_trgt [ 0.2 -0.7  0.8  1.   0.   0.   0. ]
state_trgt [ 0.2 -0.7  0.8  1.   0.   0.   0. ]
state_trgt [ 0.2 -0.7  0.8  1.   0.   0.   0. ]
state_trgt [ 0.2 -0.7  0.8  1.   0.   0.   0. ]
state_trgt [ 0.2 -0.7  0.8  1.   0.   0.   0. ]
state_trgt [ 0.2 -0.7  0.8  1.   0.   0.   0. ]
state_trgt [ 0.2 -0.7  0.8  1.   0.   0.   0. ]
state_trgt [ 0.2 -0.7  0.8  1.   0.   0.   0. ]
state_trgt [ 0.2 -0.7  0.8  1.   0.   0.   0. ]
state_trgt [ 0.2 -0.7  0.8  1.   0.   0.

Exception: GLFW window does not exist but you tried to render.