# Evaluate trained models on a task and check success rate 

Load the necessary library

In [1]:
import numpy as np
import torch
import hydra
from omegaconf import DictConfig
import datetime
from omniisaacgymenvs.utils.hydra_cfg.hydra_utils import *
from omniisaacgymenvs.utils.hydra_cfg.reformat import omegaconf_to_dict, print_dict
from rl_games.algos_torch.players import PpoPlayerDiscrete
from rl_games.algos_torch.players import BasicPpoPlayerContinuous, BasicPpoPlayerDiscrete

from omniisaacgymenvs.utils.rlgames.rlgames_utils import RLGPUAlgoObserver, RLGPUEnv

from omniisaacgymenvs.scripts.rlgames_train import RLGTrainer
from rl_games.torch_runner import Runner
from omniisaacgymenvs.utils.task_util import initialize_task
from omniisaacgymenvs.envs.vec_env_rlgames import VecEnvRLGames
from omniisaacgymenvs.utils.config_utils.path_utils import retrieve_checkpoint_path
import matplotlib.pyplot as plt
from matplotlib.ticker import AutoMinorLocator
from torch._C import fork
from gym import spaces
import numpy as np
import torch
import yaml


from plot_experiment import plot_episode_data_virtual
from eval_metrics import success_rate_from_distances



In [2]:

def eval_multi_agents(cfg):

    base_dir = "./evaluations/"
    experiment_name = cfg.checkpoint.split("/")[1]
    print(f'Experiment name: {experiment_name}')
    evaluation_dir = base_dir + experiment_name + "/"
    os.makedirs(evaluation_dir, exist_ok=True)

    rlg_config_dict = omegaconf_to_dict(cfg.train)
    print(rlg_config_dict)
    runner = Runner(RLGPUAlgoObserver())
    runner.load(rlg_config_dict)
    runner.reset()

    agent = runner.create_player()
    agent.restore(cfg.checkpoint)

    store_all_agents = True # store all agents generated data, if false only the first agent is stored
    is_done = False
    env = agent.env
    obs = env.reset()

    ep_data = {'act': [], 'obs': [], 'rews': [], 'info': [], 'all_dist': []}
    total_reward = 0
    num_steps = 0
    
    total_num_steps = 800
    for _ in range(total_num_steps):
        actions = agent.get_action(obs['obs'], is_deterministic=True)
        obs, reward, done, info = env.step(actions)
        
        #print(f'Step {num_steps}: obs={obs["obs"]}, rews={reward}, dones={done}, info={info} \n')
        if store_all_agents:
            ep_data['act'].append(actions.cpu().numpy())
            ep_data['obs'].append(obs['obs']['state'].cpu().numpy())
            ep_data['rews'].append(reward.cpu().numpy())  
        else:
            ep_data['act'].append(actions[0].cpu().numpy())
            ep_data['obs'].append(obs['obs']['state'][0].cpu().numpy())
            ep_data['rews'].append(reward[0].cpu().numpy())
        #ep_data['info'].append(info)
        x_pos = obs['obs']['state'][:,6].cpu().numpy()
        y_pos = obs['obs']['state'][:,7].cpu().numpy()
        ep_data['all_dist'].append(np.linalg.norm(np.array([x_pos, y_pos]), axis=0))
        total_reward += reward[0]
        num_steps += 1
        is_done = done.any()
    ep_data['obs'] = np.array(ep_data['obs'])
    ep_data['act'] = np.array(ep_data['act'])
    ep_data['rews'] = np.array(ep_data['rews'])
    ep_data['all_dist'] = np.array(ep_data['all_dist'])

    print(f'\n Episode: rew_sum={total_reward:.2f}, tot_steps={num_steps} \n')
    #print(f'Episode data: {ep_data} \n')
    print(f'Episode data obs shape: {ep_data["obs"].shape} \n')

    #if not cfg.headless:
    #plot_episode_data_virtual(ep_data, evaluation_dir, store_all_agents)
    success_rate = success_rate_from_distances(ep_data['all_dist'])
    print(success_rate)

In [12]:


config_name = "../cfg/train/MFP2D_PPOmulti_dict_MLP.yaml"

with open(config_name, 'r') as stream:
    cfg = yaml.safe_load(stream)

obs_space = spaces.Dict({"state":spaces.Box(np.ones(10) * -np.Inf, np.ones(10) * np.Inf),
                         "transforms":spaces.Box(low=-1, high=1, shape=(8, 5)),
                         "masks":spaces.Box(low=0, high=1, shape=(8,))})

act_space = spaces.Tuple([spaces.Discrete(2)]*8)

player = BasicPpoPlayerDiscrete(cfg, obs_space, act_space, clip_actions=False, deterministic=True)
model_path = "../corl_runs/MLP_GTXY_UF_0.25_ST_PE_0.01_PAV_1.0_PLV_0.01/nn/last_MLP_GTXY_UF_0.25_ST_PE_0.01_PAV_1.0_PLV_0.01_ep_2000_rew_589.91455.pth"
player.restore(model_path)

obs = dict({'state':torch.zeros((1,10), dtype=torch.float32, device='cuda'),
            'transforms': torch.zeros(5,8, device='cuda'),
            'masks': torch.zeros(8, dtype=torch.float32, device='cuda')})

action = player.get_action(obs, is_deterministic=True)

print(action)

{'actions_num': [2, 2, 2, 2, 2, 2, 2, 2], 'input_shape': {'masks': (8,), 'state': (10,), 'transforms': (8, 5)}, 'num_seqs': 1, 'value_size': 1, 'normalize_value': True, 'normalize_input': True, 'normalize_input_keys': ['state']}
['state']
True
10
build mlp: 10
RunningMeanStd:  (1,)
['state']
here?
RunningMeanStd:  (10,)
=> loading checkpoint '../corl_runs/MLP_GTXY_UF_0.25_ST_PE_0.01_PAV_1.0_PLV_0.01/nn/last_MLP_GTXY_UF_0.25_ST_PE_0.01_PAV_1.0_PLV_0.01_ep_2000_rew_589.91455.pth'
tensor([1, 0, 1, 0, 0, 1, 0, 1], device='cuda:0')
