In [None]:
!pip -q install .

In [None]:
import torch
import pandas as pd
from collections import deque
import random
import matplotlib.pyplot as plt
from agent_object import ObjectAgent
from unityagents import UnityEnvironment
import numpy as np
%matplotlib inline

In [None]:
env = UnityEnvironment(file_name='/data/Reacher_Linux_NoVis/Reacher.x86_64')

In [None]:
# get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

In [None]:
# reset the environment
env_info = env.reset(train_mode=True)[brain_name]

# number of agents
data_num_agents = len(env_info.agents)
print('Number of agents:', data_num_agents)

# size of each action
data_action_size = brain.vector_action_space_size
print('Size of each action:', data_action_size)

# examine the state space 
data_states = env_info.vector_observations
data_state_size = data_states.shape[1]
print('There are {} agents. Each observes a state with length: {}'.format(data_states.shape[0], data_state_size))
print('The state for the first agent looks like:', data_states[0])

In [None]:
agent = ObjectAgent(data_state_size=data_state_size, data_action_size=data_action_size,
              data_n_agents=data_num_agents, data_random_seed=42)

In [None]:
agent.data_critic_local.load_state_dict(torch.load('checkpoint_critic.pth'))
agent.data_actor_local.load_state_dict(torch.load('checkpoint_actor.pth'))

In [None]:
def test_ddpg(env, data_agent, data_num_agents, data_max_t=1000):
    brain_name = env.brain_names[0]
    
    env_info = env.reset(train_mode=False)[brain_name]
    data_states = env_info.vector_observations  
    
    data_score = np.zeros(data_num_agents)
    for t in range(data_max_t):
        data_actions = data_agent.act(data_states, add_noise=False)
        
        env_info = env.step(data_actions)[brain_name]   
        data_next_states = env_info.vector_observations         # get next state (for each agent)
        data_rewards = env_info.rewards                         # get reward (for each agent)
        data_dones = env_info.local_done                        # see if episode finished

        data_agent.step(data_states, data_actions, data_rewards, data_next_states, data_dones)
        data_states = data_next_states
        data_score += data_rewards
        if any(data_dones):
            break
    print("Score of this episode is: %.2f" % np.mean(data_score))  

In [None]:
test_ddpg(env, agent, data_num_agents)

In [None]:
env.close()