# Reacher Agent Testing for a single agent

## Import libraries

In [1]:
import torch
from unityagents import UnityEnvironment
import numpy as np

## Import the Actor model

In [2]:
from model import Actor

## Create instance of Reacher environment

In [3]:
env = UnityEnvironment(file_name='Reacher.app') # Update the app name/location if not using macOS

INFO:unityagents:
'Academy' started successfully!
Unity Academy name: Academy
        Number of Brains: 1
        Number of External Brains : 1
        Lesson number : 0
        Reset Parameters :
		goal_speed -> 1.0
		goal_size -> 5.0
Unity brain name: ReacherBrain
        Number of Visual Observations (per agent): 0
        Vector Observation space type: continuous
        Vector Observation space size (per agent): 33
        Number of stacked Vector Observation: 1
        Vector Action space type: continuous
        Vector Action space size (per agent): 4
        Vector Action descriptions: , , , 


## Get brain

In [4]:
# get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

## Load Actor model weights

In [5]:
actor = Actor(state_size=33, action_size=4, seed=0)
actor.load_state_dict(torch.load('checkpoint_actor.pth'))

<All keys matched successfully>

## Testing

In [6]:
def test(state):
    
    """
    Testing the Reacher agent for a single agent
    
    Params
    ======
        state (numpy.ndarray): Current state that the agent is experiencing
    """
    
    global actor
    
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    if device == "cuda:0":
        qnetwork = qnetwork.cuda()
        
    state = torch.from_numpy(state).float().to(device)
    actor.eval()
    with torch.no_grad():
        action = actor(state).cpu().data.numpy()
        
    return np.clip(action, -1, 1)

In [7]:
env_info = env.reset(train_mode=False)[brain_name]       # reset the environment
state = env_info.vector_observations[0]
score = 0.0
for t in range(1000):
    action = test(state)
    env_info = env.step(action)[brain_name]
    next_state = env_info.vector_observations[0]
    reward = env_info.rewards[0]
    done = env_info.local_done[0]
    state = next_state
    score += reward
    if done:
        break
print("Score:", score)
env.close()



Score: 35.729999201372266
