# Evaluate trained models on a task and check success rate 

Load the necessary library

In [2]:
import numpy as np
import torch
import hydra
from omegaconf import DictConfig
import datetime
from omniisaacgymenvs.utils.hydra_cfg.hydra_utils import *
from omniisaacgymenvs.utils.hydra_cfg.reformat import omegaconf_to_dict, print_dict
from rl_games.algos_torch.players import PpoPlayerDiscrete
from rl_games.algos_torch.players import BasicPpoPlayerContinuous, BasicPpoPlayerDiscrete

from omniisaacgymenvs.utils.rlgames.rlgames_utils import RLGPUAlgoObserver, RLGPUEnv

from omniisaacgymenvs.scripts.rlgames_train import RLGTrainer
from rl_games.torch_runner import Runner
from omniisaacgymenvs.utils.task_util import initialize_task
from omniisaacgymenvs.envs.vec_env_rlgames import VecEnvRLGames
from omniisaacgymenvs.utils.config_utils.path_utils import retrieve_checkpoint_path
import matplotlib.pyplot as plt
from matplotlib.ticker import AutoMinorLocator
from torch._C import fork
from gym import spaces
import numpy as np
import torch
import yaml
import os
import glob

from plot_experiment import plot_episode_data_virtual
from eval_metrics import success_rate_from_distances



In [3]:
# specify the experiment load directory
load_dir = "../new_mass/"
experiments = os.listdir(load_dir)
print(f'Experiments found in {load_dir} folder: {len(experiments)}')

Experiments found in ../new_mass/ folder: 20


In [6]:
# filter out invalid experiments and retrieve valid models
def get_valid_models(load_dir, experiment):
    valid_models = []
    invalid_experiments = []
    for experiment in experiments:
        try:
            file_pattern = os.path.join(load_dir, experiment, "nn", "last_*ep_2000_rew__*.pth")
            model = glob.glob(file_pattern)
            if model:
                valid_models.append(model[0])
        except:
            invalid_experiments.append(experiment)
    if invalid_experiments:
        print(f'Invalid experiments: {invalid_experiments}')
    else:
        print('All experiments are valid')
    return valid_models

In [7]:
models = get_valid_models(load_dir, experiments)
if not models:
    print('No valid models found')
    exit()

All experiments are valid


In [2]:

def eval_multi_agents(agent, models, horizon):

    base_dir = "./evaluations/"
    experiment_name = models[0].split("/")[1]
    print(f'Experiment name: {experiment_name}')
    evaluation_dir = base_dir + experiment_name + "/"
    os.makedirs(evaluation_dir, exist_ok=True)

    agent.restore(models[0])

    store_all_agents = True # store all agents generated data, if false only the first agent is stored
    is_done = False
    env = agent.env
    obs = env.reset()

    ep_data = {'act': [], 'obs': [], 'rews': [], 'info': [], 'all_dist': []}
    total_reward = 0
    num_steps = 0
    
    total_num_steps = 800
    for _ in range(total_num_steps):
        actions = agent.get_action(obs['obs'], is_deterministic=True)
        obs, reward, done, info = env.step(actions)
        
        #print(f'Step {num_steps}: obs={obs["obs"]}, rews={reward}, dones={done}, info={info} \n')
        if store_all_agents:
            ep_data['act'].append(actions.cpu().numpy())
            ep_data['obs'].append(obs['obs']['state'].cpu().numpy())
            ep_data['rews'].append(reward.cpu().numpy())  
        else:
            ep_data['act'].append(actions[0].cpu().numpy())
            ep_data['obs'].append(obs['obs']['state'][0].cpu().numpy())
            ep_data['rews'].append(reward[0].cpu().numpy())
        #ep_data['info'].append(info)
        x_pos = obs['obs']['state'][:,6].cpu().numpy()
        y_pos = obs['obs']['state'][:,7].cpu().numpy()
        ep_data['all_dist'].append(np.linalg.norm(np.array([x_pos, y_pos]), axis=0))
        total_reward += reward[0]
        num_steps += 1
        is_done = done.any()
    ep_data['obs'] = np.array(ep_data['obs'])
    ep_data['act'] = np.array(ep_data['act'])
    ep_data['rews'] = np.array(ep_data['rews'])
    ep_data['all_dist'] = np.array(ep_data['all_dist'])

    print(f'\n Episode: rew_sum={total_reward:.2f}, tot_steps={num_steps} \n')
    #print(f'Episode data: {ep_data} \n')
    print(f'Episode data obs shape: {ep_data["obs"].shape} \n')

    #if not cfg.headless:
    #plot_episode_data_virtual(ep_data, evaluation_dir, store_all_agents)
    success_rate = success_rate_from_distances(ep_data['all_dist'])
    print(success_rate)

In [12]:

# setting up the Isaac Gym environment and player

@hydra.main(config_name="config", config_path="../cfg")
def parse_hydra_configs(cfg: DictConfig):
    # _____Set up task_____
    horizon = 500
    cfg.task.env.maxEpisodeLength = horizon + 2
    cfg.task.env.platform.core.mass = 5.32
    cfg.task.env.clipObservations['state'] = 20.0
    cfg.task.env.task_parameters['max_spawn_dist'] = 3.0
    cfg.task.env.task_parameters['min_spawn_dist'] = 1.5  
    cfg.task.env.task_parameters['kill_dist'] = 6.0
    cfg.task.env.task_parameters['kill_after_n_steps_in_tolerance'] = 800
    cfg_dict = omegaconf_to_dict(cfg)
    # _____Create environment_____
    headless = cfg.headless
    enable_viewport = "enable_cameras" in cfg.task.sim and cfg.task.sim.enable_cameras
    env = VecEnvRLGames(headless=headless, sim_device=cfg.device_id, enable_livestream=cfg.enable_livestream, enable_viewport=enable_viewport)
    
    from omni.isaac.core.utils.torch.maths import set_seed
    cfg.seed = set_seed(cfg.seed, torch_deterministic=cfg.torch_deterministic)
    cfg_dict['seed'] = cfg.seed
    task = initialize_task(cfg_dict, env)
    rlg_trainer = RLGTrainer(cfg, cfg_dict)
    rlg_trainer.launch_rlg_hydra(env)
    rlg_config_dict = omegaconf_to_dict(cfg.train)

    # _____Create players (model)_____
    runner = Runner(RLGPUAlgoObserver())
    runner.load(rlg_config_dict)
    runner.reset()

    agent = runner.create_player()

    #eval_single_agent(cfg_dict, cfg, env)
    eval_multi_agents(agent, models, horizon)

    env.close()

{'actions_num': [2, 2, 2, 2, 2, 2, 2, 2], 'input_shape': {'masks': (8,), 'state': (10,), 'transforms': (8, 5)}, 'num_seqs': 1, 'value_size': 1, 'normalize_value': True, 'normalize_input': True, 'normalize_input_keys': ['state']}
['state']
True
10
build mlp: 10
RunningMeanStd:  (1,)
['state']
here?
RunningMeanStd:  (10,)
=> loading checkpoint '../corl_runs/MLP_GTXY_UF_0.25_ST_PE_0.01_PAV_1.0_PLV_0.01/nn/last_MLP_GTXY_UF_0.25_ST_PE_0.01_PAV_1.0_PLV_0.01_ep_2000_rew_589.91455.pth'
tensor([1, 0, 1, 0, 0, 1, 0, 1], device='cuda:0')


In [None]:
parse_hydra_configs()

### Code for inference only Player

config_name = "../cfg/train/MFP2D_PPOmulti_dict_MLP.yaml"
with open(config_name, 'r') as stream:
    cfg = yaml.safe_load(stream)
obs_space = spaces.Dict({"state":spaces.Box(np.ones(10) * -np.Inf, np.ones(10) * np.Inf),
                         "transforms":spaces.Box(low=-1, high=1, shape=(8, 5)),
                         "masks":spaces.Box(low=0, high=1, shape=(8,))})
act_space = spaces.Tuple([spaces.Discrete(2)]*8)
player = BasicPpoPlayerDiscrete(cfg, obs_space, act_space, clip_actions=False, deterministic=True)
model_path = "../corl_runs/MLP_GTXY_UF_0.25_ST_PE_0.01_PAV_1.0_PLV_0.01/nn/last_MLP_GTXY_UF_0.25_ST_PE_0.01_PAV_1.0_PLV_0.01_ep_2000_rew_589.91455.pth"
player.restore(model_path)

obs = dict({'state':torch.zeros((1,10), dtype=torch.float32, device='cuda'),
            'transforms': torch.zeros(5,8, device='cuda'),
            'masks': torch.zeros(8, dtype=torch.float32, device='cuda')})

action = player.get_action(obs, is_deterministic=True)

print(action)