In [1]:
%load_ext autoreload
%autoreload 2
%cd '~/carl/research/dev/PlasticineLab'
%pwd
%matplotlib inline
import matplotlib.pyplot as plt
import taichi as ti
import numpy as np
import cv2
import os
from plb.utils.visualization_utils import save_rgb
from plb.algorithms.bc.bc_agent import Agent
from imitation.imitation_buffer import ImitationReplayBuffer, filter_buffer_nan
from imitation.utils import aggregate_traj_info
from tqdm import tqdm
import argparse
import random
import numpy as np
import torch
import json
import os
from chester import logger
#
from plb.envs import make 
from plb.envs.mp_wrapper import make_mp_envs
from imitation.utils import load_target_info
from imitation.utils import visualize_trajs
from imitation.eval_helper import eval_skills, eval_vae, eval_plan
import wandb


def set_random_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)


def get_args(cmd=False):
    parser = argparse.ArgumentParser()
    parser.add_argument('--env_name', type=str, default='Roll-v1')
    parser.add_argument('--exp_prefix', type=str, default='1026_Roll_BC_Image')
    parser.add_argument('--num_env', type=int, default=1)  # Number of parallel environment
    parser.add_argument('--algo', type=str, default='imitation')
    parser.add_argument('--dataset_name', type=str, default='tmp')
    parser.add_argument("--seed", type=int, default=100)
    parser.add_argument("--gd_num_steps", type=int, default=50, help="steps for the gradient descent(gd) expert")

    # differentiable physics parameters
    parser.add_argument("--lr", type=float, default=0.02)  # For the solver
    parser.add_argument("--softness", type=float, default=666.)
    parser.add_argument("--optim", type=str, default='Adam', choices=['Adam', 'Momentum'])
    parser.add_argument("--num_trajs", type=int, default=20)  # Number of demonstration trajectories
    parser.add_argument("--energy_weight", type=float, default=0.)
    parser.add_argument("--vel_loss_weight", type=float, default=0.)

    # Train
    parser.add_argument("--il_num_epoch", type=int, default=5000)
    parser.add_argument("--il_lr", type=float, default=1e-3)
    parser.add_argument("--il_eval_freq", type=int, default=10)
    parser.add_argument("--batch_size", type=int, default=128)
    parser.add_argument("--step_per_epoch", type=int, default=500)
    parser.add_argument("--step_warmup", type=int, default=2000)
    parser.add_argument("--hindsight_goal_ratio", type=float, default=0.5)
    parser.add_argument("--debug_overfit_test", type=bool, default=False)
    parser.add_argument("--obs_noise", type=float, default=0.05)
    parser.add_argument("--resume_path", default=None)
    parser.add_argument("--num_tools", type=int, default=1)

    # Architecture
    parser.add_argument("--frame_stack", type=int, default=1)
    parser.add_argument("--image_dim", type=int, default=64)
    parser.add_argument("--img_mode", type=str, default='rgb')
    parser.add_argument("--pos_ratio", type=float, default=0.5)
    parser.add_argument("--pos_reset_ratio", type=float, default=0.2)  # 20% of the positive goals will come from the reset motion
    parser.add_argument("--z_dim", type=int, default=32)  # Maybe try multiple values
    parser.add_argument("--actor_feature_dim", type=int, default=128)
    parser.add_argument("--encoder_beta", type=float, default=10.)
    parser.add_argument("--bin_succ", type=bool, default=False)

    # Plan
    parser.add_argument("--adam_sample", type=int, default=400)
    parser.add_argument("--adam_iter", type=int, default=3000)
    parser.add_argument("--adam_lr", type=float, default=5e-2)
    parser.add_argument("--min_zlogl", type=float, default=-30)
    parser.add_argument("--save_goal_his", type=bool, default=False)
    parser.add_argument("--plan_step", type=int, default=2)

    if cmd:
        args = parser.parse_args()
    else:
        args = parser.parse_args("")

    return args


def eval_traj(traj_ids, args, buffer, env, agent, np_target_imgs, save_name, visualize=False):
    horizon = 50
    trajs = []
    demo_obses = []
    for traj_id in traj_ids:
        init_v = int(buffer.buffer['init_v'][traj_id * horizon])
        target_v = int(buffer.buffer['target_v'][traj_id * horizon])
        reset_key = {'init_v': init_v, 'target_v': target_v}
        tid = buffer.get_tid(buffer.buffer['action_mask'][traj_id * horizon])
        traj = sample_traj(env, agent, reset_key, tid)
        
        demo_obs = buffer.buffer['obses'][traj_id * horizon: traj_id * horizon + horizon]
        demo_obses.append(demo_obs)
        traj['target_img'] = demo_obs[-1]
        # demo_target_ious.append(buffer.buffer['target_ious'][traj_id * horizon + horizon - 1])
        print(f'tid: {tid}, traj_id: {traj_id}, reward: {np.sum(traj["rewards"])}')
        trajs.append(traj)
    demo_obses = np.array(demo_obses)

    # agent_ious = np.array([traj['target_ious'][-1, 0] for traj in trajs])
    # demo_target_ious = np.array(demo_target_ious)
    # logger.log('Agent ious: {}, Demo ious: {}'.format(np.mean(agent_ious), np.mean(demo_target_ious)))
    if visualize:
        visualize_trajs(trajs, 10, key='info_emds', save_name=os.path.join(logger.get_dir(), save_name),
                        vis_target=True, demo_obses=demo_obses[:, :, :, :, :3])
    # info = {'agent_iou': np.mean(agent_ious), 'demo_iou': np.mean(demo_target_ious)}
    
    info = {'eval_final_normalized_performance': traj['info_final_normalized_performance'], 
    'eval_avg_normalized_performance': np.mean(traj['info_normalized_performance'])}
    return info


def prepare_agent_env(args):
    pass


def run_task(arg_vv, log_dir, exp_name):  # Chester launch
    args = get_args(cmd=False)

    args.__dict__.update(**arg_vv)

    set_random_seed(args.seed)

    # # Configure logger
    logger.configure(dir=log_dir, exp_name=exp_name)
    log_dir = logger.get_dir()
    assert log_dir is not None
    os.makedirs(log_dir, exist_ok=True)


    # # Dump parameters
    with open(os.path.join(logger.get_dir(), 'variant.json'), 'w') as f:
        json.dump(args.__dict__, f, indent=2, sort_keys=True)

    # Need to make the environment before moving tensor to torch
    obs_channel = len(args.img_mode) * args.frame_stack
    img_obs_shape = (args.image_dim, args.image_dim, obs_channel)
    env = make_mp_envs(args.env_name, args.num_env, args.seed)

    args.cached_state_path = env.getattr('cfg.cached_state_path', 0)
    print(args.cached_state_path)
    action_dim = env.getattr('taichi_env.primitives.action_dim')[0]

    # Load buffer
    device = 'cuda'
    buffer = ImitationReplayBuffer(args)
    buffer.load(args.dataset_path)
    filter_buffer_nan(buffer)
    print("buffer size:", buffer.cur_size)

    buffer.generate_train_eval_split()
    target_info = load_target_info(args, device)
    buffer.__dict__.update(**target_info)
    # torch.autograd.set_detect_anomaly(True)
    # # ----------preparation done------------------
    agent = Agent(args, None, img_obs_shape, action_dim, num_tools=1, device=device)
    if args.resume_path is not None:
        agent.load(args.resume_path)

    total_steps = 0
    eval_idxes = np.random.permutation(buffer.eval_traj_idx)[:min(30, len(buffer.eval_traj_idx))]
    print("eval_idxes:", eval_idxes)
    for epoch in range(args.il_num_epoch):
        train_infos = []
        data_batch = buffer.sample_tool_transitions_bc([np.random.permutation([i for i in range(50)]),], epoch, device)
        train_info = agent.train(data_batch)
        train_infos.append(train_info)
        if epoch % args.il_eval_freq == 0:
            # Log training info
            train_infos = aggregate_traj_info(train_infos, prefix=None)


            # evaluate
            if epoch % (args.il_eval_freq*2) == 0:
                # plan_info = eval_plan(args, env, agent, epoch)
                plan_info = eval_traj([0], args, buffer, env, agent, buffer.np_target_imgs, f'visual_{epoch}.gif',visualize=True)
            else:
                plan_info = eval_traj([0], args, buffer, env, agent, buffer.np_target_imgs, f'visual_{epoch}.gif',visualize=False)

            # Logging
            logger.record_tabular('epoch', epoch)
            logger.record_tabular('total steps', total_steps)
            all_info = {}
            all_info.update(**train_infos)
            all_info.update(**plan_info)
            all_info.update({'epoch': epoch, 'total steps': total_steps})
            for key, val in all_info.items():
                logger.record_tabular(key, val)
            logger.dump_tabular()

            # Save model
            if epoch % (args.il_eval_freq * 2) == 0:
                agent.save(os.path.join(logger.get_dir(), f'agent_{epoch}.ckpt'))
    env.close()


/home/jianrenw/carl/research/dev/PlasticineLab
[Taichi] mode=release
[Taichi] preparing sandbox at /tmp/taichi-ayrjthpt
[Taichi] version 0.7.26, llvm 10.0.0, commit e37bdb5e, linux, python 3.8.11
[I 10/27/21 20:02:53.635 1358824] [shell.py:_shell_pop_print@35] Graphical python shell detected, using wrapped sys.stdout


In [2]:
import time

import numpy as np
import torch

from plb.algorithms.bc.bc_agent import Agent as BCAgent
from imitation.utils import img_to_tensor, to_action_mask
from plb.envs.mp_wrapper import SubprocVecEnv
from scipy.spatial.transform import Rotation as R
import os
from chester import logger
device = 'cuda'
def sample_traj(env, agent, reset_key, tid, action_mask=None, action_sequence=None, log_succ_score=False, reset_primitive=False, num_moves=1, init='zero'):
    """Compute ious: pairwise iou between each pair of timesteps. """
    assert agent.args.num_env == 1
    states, obses, actions, rewards, succs, scores =[],  [], [], [], [0.], [0.]  # Append 0 for the first frame for succs and scores
    if action_sequence is None and action_mask is None:
        if tid == 0:
            action_mask = to_action_mask(env, [1, 0])
        else:
            action_mask = to_action_mask(env, [0, 1])

    if isinstance(env, SubprocVecEnv):
        if reset_key is not None:
            state = env.reset([reset_key])[0]
        obs = env.render(mode='rgb')[0]  # rgbd observation
    else:
        if reset_key is not None:
            state = env.reset(**reset_key)
        obs = env.render(mode='rgb')  # rgbd observation
    
    env.getfunc("taichi_env.load_target_x", 0, [{'path':'data/debug/target_expert.npy'}])
    
    if reset_key is not None:
        states.append(state)
        obses.append(obs)
    T = 50
    total_r = 0

    total_time = 0
    agent_time = 0
    env_time = 0
    st_time = time.time()
    if isinstance(agent, BCAgent): # learner
        action_dim = env.getattr("taichi_env.primitives.action_dim", 0)
        frame_stack = agent.args.frame_stack
        _, _, _, mp_info = env.step([np.zeros(action_dim)])
        if reset_primitive:
            primitive_state = env.getfunc('get_primitive_state', 0)
        if reset_key is not None:
            infos = [mp_info[0]]
        else:
            infos = []
        # mass_grids.append(info['mass_grid'])
        stack_obs = img_to_tensor(np.array(obs)[None], mode=agent.args.img_mode).to(agent.device)  # stack_obs shape: [1, 4, 64, 64]
        target_img = img_to_tensor(np.array(env.getattr('target_img', 0))[None], mode=agent.args.img_mode).to(agent.device)
        C = stack_obs.shape[1]
        stack_obs = stack_obs.repeat([1, frame_stack, 1, 1])
        with torch.no_grad():
            for i in range(T):
                t1 = time.time()

                with torch.no_grad():
                    action, done = agent.act(stack_obs, target_img, tid)
                    action = action[0].detach().cpu().numpy()
                    done = done[0].detach().cpu().numpy()
                obs_tensor = img_to_tensor(np.array(obs)[None], mode=agent.args.img_mode).to(agent.device)
                stack_obs = torch.cat([stack_obs, obs_tensor], dim=1)[:, -frame_stack * C:]
                if np.round(done).astype(int) == 1 and agent.terminate_early:
                    break
                t2 = time.time()
                mp_next_state, mp_reward, _, mp_info = env.step([action])
                next_state, reward, info = mp_next_state[0], mp_reward[0], mp_info[0]

                infos.append(info)
                t3 = time.time()

                agent_time += t2 - t1
                env_time += t3 - t2

                actions.append(action)
                states.append(next_state)
                obs = env.render(mode='rgb')[0]
                obses.append(obs)
                total_r += reward
                rewards.append(reward)
                if log_succ_score:
                    succs.append(succ)
                    scores.append(score)
        target_img = np.array(env.getattr('target_img', 0))

    emds = np.array([info['info_emd'] for info in infos])
    if len(infos) > 0:
        info_normalized_performance = np.array([info['info_normalized_performance'] for info in infos])
        info_final_normalized_performance = info_normalized_performance[-1]
    else:
        info_normalized_performance = []
        info_final_normalized_performance = None

    total_time = time.time() - st_time
    ret = {'states': np.array(states).astype(np.float32),
           'obses': np.array(obses).astype(np.float32),
           'actions': np.array(actions).astype(np.float32),
           'target_img': target_img,
           'rewards': np.array(rewards),
           'info_rewards': np.array(rewards),
           'info_emds': emds,
           'info_final_normalized_performance': info_final_normalized_performance,
           'info_normalized_performance': info_normalized_performance,
           'info_total_r': total_r,
           'info_total_time': total_time,
           'info_agent_time': agent_time,
           'info_env_time': env_time,
           'action_mask': action_mask}
    if log_succ_score:
        ret['succs'] = np.array(succs)  # Should miss the first frame
        ret['scores'] = np.array(scores)
    if reset_key is not None:
        ret.update(**reset_key)
    return ret


In [3]:
vv = {
    'task': 'train_policy',
    'il_eval_freq': 1,
    'num_epoch': 100,
    'batch_size': 50,
    'step_per_epoch':50,
    'dataset_path': 'data/debug/dataset.gz'
}
run_task(vv, './data/debug', 'test')

Logging to ./data/debug
Setting pykeops dir to  ~/.cache/pykeops_2080/
[Taichi] Starting on arch=cuda
[Tina] version 0.1.1
[Tina] Taichi properties hacked
pimirives: num primitive: 2
Building primitive
action:
  dim: 6
  scale: (0.7, 0.005, 0.005, 0.005, 0.0, 0.0)
collision_group: [0.0, 0.0, 0.0]
color: (0.7568, 0.6039, 0.4196)
friction: 0.9
h: 0.3
init_pos: (0.3, 0.25, 0.5)
init_rot: (0.707, 0.707, 0.0, 0.0)
lower_bound: (0.0, 0.1, 0.0)
r: 0.03
shape: RollingPinExt
upper_bound: (1.0, 1.0, 1.0)
variations: None
Building primitive
action:
  dim: 0
  scale: ()
collision_group: [0.0, 0.0, 0.0]
color: (0.5, 0.5, 0.5)
friction: 5.0
init_pos: (0.28, 0.04, 0.5)
init_rot: (1.0, 0.0, 0.0, 0.0)
lower_bound: (0.0, 0.0, 0.0)
shape: Box
size: (0.7, 0.02, 0.43)
upper_bound: (1.0, 1.0, 1.0)
variations: None
Initialize Tina Renderer
bake_size: 6  
cam_center: (0.33, 0.1, 0.5)  
cam_phi: -0.8  
cam_radius: 0.8  
cam_theta: 0.0  
camera_pos: (0.5, 1.2, 4.0)  
camera_rot: (0.2, 0)  
dx: 0.006666666666666

ValueError: operands could not be broadcast together with shapes (64,64,3) (50,64,3,4) 