In [4]:
import numpy as np
import torch
import torch.optim as optim
import torch.nn.functional as F
import torch.nn as nn
import matplotlib
import gym
import matplotlib.pyplot as plt
from tqdm import tqdm
from utils import core
from utils import models
from utils import helper
from pathlib import Path
import time 
from colabgymrender.recorder import Recorder

In [5]:
def get_action(ac, o, deterministic=False):
    return ac.act(torch.as_tensor(o, dtype=torch.float32), 
                    deterministic)


def run_trained_policy(env, max_ep_len=1000, 
                       num_runs=1, load_path=None, 
                       render=False, f_name=None):

    # initialize environment and make a copy
    test_env = Recorder(env, 'video/')
    # initialize policy network
    actor_critic = core.MLPActorCritic
    # Create actor-critic module and target networks
    ac_kwargs = dict(hidden_sizes=[200]*4)
    ac = actor_critic(test_env.observation_space, test_env.action_space, **ac_kwargs)
    ac.load_state_dict(torch.load(load_path))
    

    ep_reward_cache = []
    for n in range(num_runs):
        ret, ep_len = 0, 0
        reward_cache = []
        obs, done = test_env.reset(), False    
        while not done or (ep_len == max_ep_len):
            if render:
                test_env.render()
                time.sleep(0.05)
            act = get_action(ac, obs, True)
            obs2, reward, done, _ = test_env.step(act)
            ret += reward
            reward_cache.append(reward)
            obs = obs2
            if done:
                break
        ep_reward_cache.append(reward_cache)
        test_env.close()
    print(f'total average return: {ret}')

    # save data
    if f_name is not None:
        Path('data/{}'.format(f_name)).mkdir(parents=True, exist_ok=True)
        np.save(f'data/{f_name}/eval_policy_rewards.npy', ep_reward_cache)


load_path = 'data/half_cheetah_dyn_0_reward_-1.0_sac/policy.pth'
xml = f'/home/ghost-083/Research/1_Transfer_RL/D3M/envs/halfcheetah/assets/half_cheetah_0.xml'
env = gym.make('HalfCheetah-v3', xml_file=xml, 
                forward_reward_weight=-1.0, 
                ctrl_cost_weight=0.1, 
                reset_noise_scale=0.1
                )
run_trained_policy(env=env, num_runs=1, 
                load_path=load_path, max_ep_len=200,
                render=True, f_name='pendulum_eval_16.0')


Found 4 GPUs for rendering. Using device 0.
Could not make EGL context current


RuntimeError: Failed to initialize OpenGL