Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

How to improve the evaluation efficiency? #3

Closed
nuomizai opened this issue Apr 25, 2022 · 2 comments
Closed

How to improve the evaluation efficiency? #3

nuomizai opened this issue Apr 25, 2022 · 2 comments

Comments

@nuomizai
Copy link

nuomizai commented Apr 25, 2022

I used the following code to evaluate the pretrained model, but found that the evaluation efficiency is too low (about 23min per episode). I wondered if there is anything wrong with the code? Or could you please provide a standard script for evaluation? Thanks for your help!

import mj_envs
import gym
import numpy as np
import torch
import gpytorch
from gp_models import MultitaskGPModel
from rlkit.torch.pytorch_util import set_gpu_mode
from tqdm import tqdm
import copy
import time
device = torch.device('cuda:1')

def rollout(
        env,
        agent,
        max_path_length=np.inf,
        render=False,
        render_kwargs=None,
        preprocess_obs_for_policy_fn=None,
        get_action_kwargs=None,
        return_dict_obs=False,
        full_o_postprocess_func=None,
        reset_callback=None,
):
    if render_kwargs is None:
        render_kwargs = {}
    if get_action_kwargs is None:
        get_action_kwargs = {}
    if preprocess_obs_for_policy_fn is None:
        preprocess_obs_for_policy_fn = lambda x: x
    raw_obs = []
    raw_next_obs = []
    observations = []
    actions = []
    rewards = []
    terminals = []
    dones = []
    agent_infos = []
    env_infos = []
    next_observations = []
    path_length = 0
    # agent.reset()
    o = env.reset()
    if reset_callback:
        reset_callback(env, agent, o)
    if render:
        # todo: debug
        env.mj_render()
        # env.render(**render_kwargs)
    while path_length < max_path_length:
        print('path_length:', path_length)
        raw_obs.append(o)
        # todo: debug

        # o_for_agent = torch.from_numpy(o).cuda().float().unsqueeze(0)

        o_torch = torch.from_numpy(np.array([o])).float().to(device)
        output = model(o_torch)
        observed_pred = likelihood(output)
        a = observed_pred.mean.data.cpu().numpy()

        if len(a) == 1:
            a = a[0]

        # # o_for_agent = o
        # # a = agent.get_action(o_for_agent, **get_action_kwargs)
        # a, *_ = agent(o_for_agent, **get_action_kwargs)
        # a = a.detach().cpu().numpy()
        # # a = agent.get_action(o_for_agent, **get_action_kwargs)[0][0]
        agent_info = None
        if full_o_postprocess_func:
            full_o_postprocess_func(env, agent, o)

        next_o, r, done, env_info = env.step(copy.deepcopy(a))
        if render:
            # todo: debug
            env.mj_render()

            # env.render(**render_kwargs)
        observations.append(o)
        rewards.append(r)
        terminal = False
        if done:
            # terminal=False if TimeLimit caused termination
            if not env_info.pop('TimeLimit.truncated', False):
                terminal = True
        terminals.append(terminal)
        dones.append(done)
        actions.append(a)
        next_observations.append(next_o)
        raw_next_obs.append(next_o)
        agent_infos.append(agent_info)
        env_infos.append(env_info)
        path_length += 1
        if done:
            break
        o = next_o
    actions = np.array(actions)
    if len(actions.shape) == 1:
        actions = np.expand_dims(actions, 1)
    observations = np.array(observations)
    next_observations = np.array(next_observations)
    if return_dict_obs:
        observations = raw_obs
        next_observations = raw_next_obs
    rewards = np.array(rewards)
    if len(rewards.shape) == 1:
        rewards = rewards.reshape(-1, 1)
    return dict(
        observations=observations,
        actions=actions,
        rewards=rewards,
        next_observations=next_observations,
        terminals=np.array(terminals).reshape(-1, 1),
        dones=np.array(dones).reshape(-1, 1),
        agent_infos=agent_infos,
        env_infos=env_infos,
        full_observations=raw_obs,
        full_next_observations=raw_obs,
    )


def simulate_policy(env, policy, T=100, H=200, gpu=True, render=False):
    if gpu:
        set_gpu_mode(True)
        # policy.cuda()
        policy.to(device)
        print('use GPU')
    # policy = MakeDeterministic(policy)
    episode = 0
    success_time = 0
    env.seed(1)
    for episode in tqdm(range(0, T)):
        print('episode:{}'.format(episode))
        path = rollout(
            env,
            policy,
            max_path_length=H,
            render=render,
        )
        if path['env_infos'][-1]['goal_achieved'] is True:
            success_time += 1
        if hasattr(env, "log_diagnostics"):
            env.log_diagnostics([path])
        time.sleep(0.02)
    success_time /= episode
    return success_time



env = gym.make(f'door-binary-v0')

obs_dim = env.observation_space.low.size
action_dim = env.action_space.low.size
data_set = '../d4rl_model/offpolicy_hand_data/door2_sparse.npy'
model_path = '../nppac/nppac/door/gp_door_multitask_1000.pt'

data = np.load(data_set, allow_pickle=True)
keep_num = 1000
use_ard = True
gp_type = 'multitask'
gp_rank = 1
kernel_type = 'matern12'
# Ablation to randomly filter the dataset, not active by default.
if keep_num < len(data):
    print(f'Keeping {keep_num} trajectories.')
    data = np.random.choice(data, keep_num, replace=False)

if type(data[0]['observations'][0]) is dict:
    # Convert to just the states
    for traj in data:
        traj['observations'] = [t['state_observation'] for t in traj['observations']]

train_x = torch.from_numpy(np.array([j for i in [traj['observations'] for traj in data] for j in i])).float().to(
    device)
train_y = torch.from_numpy(np.array([j for i in [traj['actions'] for traj in data] for j in i])).float().to(
    device)

print('Data Loaded!')

# Initialize likelihood and model
likelihood = gpytorch.likelihoods.MultitaskGaussianLikelihood(num_tasks=action_dim).to(device)
likelihood.eval()
ard_num_dims = obs_dim if use_ard else None

model = MultitaskGPModel(train_x, train_y, likelihood, num_tasks=action_dim, rank=gp_rank,
                         ard_num_dims=ard_num_dims, kernel_type=kernel_type).to(device)

model_dict = torch.load(model_path, map_location=device)
model.load_state_dict(model_dict)
model.eval()


success_rate = simulate_policy(env, model, render=False, T=100)
print('success rate is :', success_rate)
@conglu1997
Copy link
Owner

What you have looks good, but missing

with torch.no_grad(), gpytorch.settings.fast_pred_var():

for the parts that evaluate the GP.

@nuomizai
Copy link
Author

What you have looks good, but missing

with torch.no_grad(), gpytorch.settings.fast_pred_var():

for the parts that evaluate the GP.

Thanks for your help! I added with torch.no_grad(), gpytorch.settings.fast_pred_var(): and now the evaluation efficiency is much better!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants