You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I used the following code to evaluate the pretrained model, but found that the evaluation efficiency is too low (about 23min per episode). I wondered if there is anything wrong with the code? Or could you please provide a standard script for evaluation? Thanks for your help!
import mj_envs
import gym
import numpy as np
import torch
import gpytorch
from gp_models import MultitaskGPModel
from rlkit.torch.pytorch_util import set_gpu_mode
from tqdm import tqdm
import copy
import time
device = torch.device('cuda:1')
def rollout(
env,
agent,
max_path_length=np.inf,
render=False,
render_kwargs=None,
preprocess_obs_for_policy_fn=None,
get_action_kwargs=None,
return_dict_obs=False,
full_o_postprocess_func=None,
reset_callback=None,
):
if render_kwargs is None:
render_kwargs = {}
if get_action_kwargs is None:
get_action_kwargs = {}
if preprocess_obs_for_policy_fn is None:
preprocess_obs_for_policy_fn = lambda x: x
raw_obs = []
raw_next_obs = []
observations = []
actions = []
rewards = []
terminals = []
dones = []
agent_infos = []
env_infos = []
next_observations = []
path_length = 0
# agent.reset()
o = env.reset()
if reset_callback:
reset_callback(env, agent, o)
if render:
# todo: debug
env.mj_render()
# env.render(**render_kwargs)
while path_length < max_path_length:
print('path_length:', path_length)
raw_obs.append(o)
# todo: debug
# o_for_agent = torch.from_numpy(o).cuda().float().unsqueeze(0)
o_torch = torch.from_numpy(np.array([o])).float().to(device)
output = model(o_torch)
observed_pred = likelihood(output)
a = observed_pred.mean.data.cpu().numpy()
if len(a) == 1:
a = a[0]
# # o_for_agent = o
# # a = agent.get_action(o_for_agent, **get_action_kwargs)
# a, *_ = agent(o_for_agent, **get_action_kwargs)
# a = a.detach().cpu().numpy()
# # a = agent.get_action(o_for_agent, **get_action_kwargs)[0][0]
agent_info = None
if full_o_postprocess_func:
full_o_postprocess_func(env, agent, o)
next_o, r, done, env_info = env.step(copy.deepcopy(a))
if render:
# todo: debug
env.mj_render()
# env.render(**render_kwargs)
observations.append(o)
rewards.append(r)
terminal = False
if done:
# terminal=False if TimeLimit caused termination
if not env_info.pop('TimeLimit.truncated', False):
terminal = True
terminals.append(terminal)
dones.append(done)
actions.append(a)
next_observations.append(next_o)
raw_next_obs.append(next_o)
agent_infos.append(agent_info)
env_infos.append(env_info)
path_length += 1
if done:
break
o = next_o
actions = np.array(actions)
if len(actions.shape) == 1:
actions = np.expand_dims(actions, 1)
observations = np.array(observations)
next_observations = np.array(next_observations)
if return_dict_obs:
observations = raw_obs
next_observations = raw_next_obs
rewards = np.array(rewards)
if len(rewards.shape) == 1:
rewards = rewards.reshape(-1, 1)
return dict(
observations=observations,
actions=actions,
rewards=rewards,
next_observations=next_observations,
terminals=np.array(terminals).reshape(-1, 1),
dones=np.array(dones).reshape(-1, 1),
agent_infos=agent_infos,
env_infos=env_infos,
full_observations=raw_obs,
full_next_observations=raw_obs,
)
def simulate_policy(env, policy, T=100, H=200, gpu=True, render=False):
if gpu:
set_gpu_mode(True)
# policy.cuda()
policy.to(device)
print('use GPU')
# policy = MakeDeterministic(policy)
episode = 0
success_time = 0
env.seed(1)
for episode in tqdm(range(0, T)):
print('episode:{}'.format(episode))
path = rollout(
env,
policy,
max_path_length=H,
render=render,
)
if path['env_infos'][-1]['goal_achieved'] is True:
success_time += 1
if hasattr(env, "log_diagnostics"):
env.log_diagnostics([path])
time.sleep(0.02)
success_time /= episode
return success_time
env = gym.make(f'door-binary-v0')
obs_dim = env.observation_space.low.size
action_dim = env.action_space.low.size
data_set = '../d4rl_model/offpolicy_hand_data/door2_sparse.npy'
model_path = '../nppac/nppac/door/gp_door_multitask_1000.pt'
data = np.load(data_set, allow_pickle=True)
keep_num = 1000
use_ard = True
gp_type = 'multitask'
gp_rank = 1
kernel_type = 'matern12'
# Ablation to randomly filter the dataset, not active by default.
if keep_num < len(data):
print(f'Keeping {keep_num} trajectories.')
data = np.random.choice(data, keep_num, replace=False)
if type(data[0]['observations'][0]) is dict:
# Convert to just the states
for traj in data:
traj['observations'] = [t['state_observation'] for t in traj['observations']]
train_x = torch.from_numpy(np.array([j for i in [traj['observations'] for traj in data] for j in i])).float().to(
device)
train_y = torch.from_numpy(np.array([j for i in [traj['actions'] for traj in data] for j in i])).float().to(
device)
print('Data Loaded!')
# Initialize likelihood and model
likelihood = gpytorch.likelihoods.MultitaskGaussianLikelihood(num_tasks=action_dim).to(device)
likelihood.eval()
ard_num_dims = obs_dim if use_ard else None
model = MultitaskGPModel(train_x, train_y, likelihood, num_tasks=action_dim, rank=gp_rank,
ard_num_dims=ard_num_dims, kernel_type=kernel_type).to(device)
model_dict = torch.load(model_path, map_location=device)
model.load_state_dict(model_dict)
model.eval()
success_rate = simulate_policy(env, model, render=False, T=100)
print('success rate is :', success_rate)
The text was updated successfully, but these errors were encountered:
I used the following code to evaluate the pretrained model, but found that the evaluation efficiency is too low (about 23min per episode). I wondered if there is anything wrong with the code? Or could you please provide a standard script for evaluation? Thanks for your help!
The text was updated successfully, but these errors were encountered: