In [5]:
import sys
import numpy as np
import pandas as pd
import pickle
import re
import math
import os
os.environ['KMP_DUPLICATE_LIB_OK']='True'
sys.path.append(os.path.join(os.path.abspath(os.getcwd()),".."))
sys.path.append(os.path.join(os.path.abspath(os.getcwd()),"../.."))
parent_dir = os.path.abspath(os.path.join(os.getcwd(), '../..'))

AttributeError: module 'numpy.random' has no attribute 'BitGenerator'

In [2]:
# import MEIRL Modules
from MaxEnt_Baseline.baseline_utils import *
from MaxEnt_Baseline.Reward_Expert import *
from MaxEnt_Baseline.MaxEntBaseline import *
from utils.trajectory import Trajectory
from utils.plot_utils import *

In [3]:
# Settings for the different cases
feat_list_cases = [["laptop", "coffee", "table"],["laptop", "coffee", "table"], ["proxemics", "coffee", "table"]]
weights_cases = [[10.0, 0.0, 10.0], [10.0, 0.0, 10.0], [10.0, 0.0, 10.0]]
known_features_cases = [["table", "coffee"], ["laptop", "coffee"], ["table", "coffee"]]

traj_feat_list = [ "tablelaptop_case1", "tablelaptop_case2", "tableproxemics_case3"]
traj_idx_list = [[4, 13,0, 15], [0,1,2,3,4,5,6,7], [1,2,3,4,6]]


# TrajOpt Settings
obj_center_dict = {'HUMAN_CENTER': [-0.2, -0.5, 0.6], 'LAPTOP_CENTER': [-0.6, 0.0, 0.0]}
T = 20.0
timestep=0.5

# 1. Load or Generate near-optimal expert demonstrations

In [4]:
# Setting for which Case (see paper)
case = 1

# adjust accordingly
feat_list = feat_list_cases[case-1]
weights = weights_cases[case-1]
known_features = known_features_cases[case-1]

## 1.1 Load human expert demonstrations

In [5]:
# initialize empty reward expert
Expert = GT_Reward_Expert(feat_list, weights, gen='cost',
                          starts=[], goals=[], goal_poses=None, combi=False,
                          obj_center_dict = obj_center_dict)

No handlers could be found for logger "openravepy.ikfast"


In [6]:
# load in a set of demonstrations
data_file = parent_dir + '/data/MEIRL_demonstrations/demos_{}.p'.format(traj_feat_list[case-1])
trajectory_list = pickle.load(open( data_file, "rb" ) )

# select subset of demonstrations & sample to fit T, timestep setting of trajopt
s_g_exp_trajs = []
for i, trajectory in enumerate(trajectory_list):
    if i not in traj_idx_list[case-1]:
        continue
    waypts = trajectory
    waypts_time = np.linspace(0.0, T, waypts.shape[0])
    traj = Trajectory(waypts, waypts_time)

    # Downsample/Upsample trajectory to fit desired timestep and T.
    num_waypts = int(T / timestep) + 1
    if num_waypts < len(traj.waypts):
        demo = traj.downsample(int(T / timestep) + 1)
    else:
        demo = traj.upsample(int(T / timestep) + 1)
    s_g_exp_trajs.append([demo.waypts])

# add to the expert
Expert.load_trajs(s_g_exp_trajs) 

## 1.2 generate near-optimal expert demonstrations

In [13]:
# extract start & goal positions from the respective demonstrations
starts = []
goals = []
data_file = parent_dir + '/data/MEIRL_demonstrations/demos_{}.p'.format(traj_feat_list[case-1])
trajectory_list = pickle.load(open( data_file, "rb" ) )
for trajectory in [trajectory_list[i] for i in traj_idx_list[case-1]]:
    starts.append(trajectory[0])
    goals.append(trajectory[-1])

In [14]:
# initialize reward expert with the start & goal positions
Expert = GT_Reward_Expert(feat_list, weights, gen='cost',
                          starts=starts, goals=goals, goal_poses=None, combi=False,
                          obj_center_dict = obj_center_dict)

In [15]:
# Optional: add more random start-goal pais with a minimum distance
Expert.generate_rand_start_goal(n_trajs=0, min_dist=0.7)

In [16]:
# Step 1: generate near-optimal expert demos for all start-goal paris with TrajOpt
# Note: we found that more near optimal trajectories don't help compared to just one per pair
Expert.generate_expert_demos(n_per_s_g=1)

In [17]:
# Visualize Expert Demonstrations (Color is just z axis coordinate)
Expert.plot_trajs()

# IRL

In [18]:
NN_dict = {'n_layers': 2, 'n_units':128,
           'sin':False, 'cos':False, 'noangles':True, 'norot':True,
           'rpy':False, 'lowdim':False, 'noxyz':False, 'EErot':False,
           '6D_laptop':True, '6D_human':False, '9D_coffee':False}

IRL_dict = {'n_iters': 50,
            'n_cur_rew_traj': 1,
            'lr':1e-3, 'weight_decay':0.001, 'std':0.01}

IRL = DeepMaxEntIRL(s_g_exp_trajs=Expert.return_trajs(), goal_poses=None,
                   known_feat_list=known_features, NN_dict=NN_dict, gen='waypt',
                   obj_center_dict = obj_center_dict)

In [19]:
# Before IRL: Compare the demonstration trajectories & the current cost induced trajectories
# Colors are the randomly initialized Neural Network cost function
plot_IRL_comparison(IRL)

In [20]:
IRL.deep_max_ent_irl(n_iters=IRL_dict['n_iters'], n_cur_rew_traj=IRL_dict['n_cur_rew_traj'],
                     lr=IRL_dict['lr'], weight_decay=IRL_dict['weight_decay'], std=IRL_dict['std'])

Iteration 49: 100%|██████████| 50/50 [03:59<00:00,  4.79s/it, avg_loss=-.158] 


In [21]:
# After IRL: Compare the demonstration trajectories & the current cost induced trajectories
# Colors are the learned Neural Network cost function
plot_IRL_comparison(IRL)

# Visualize the learned cost function in 3D 

In [22]:
# plot GT
plot_gt3D(parent_dir, Expert.env)

In [23]:
# plot learned
plot_learned3D(parent_dir, IRL.function, IRL.env)