In [1]:
from unityagents import UnityEnvironment
import numpy as np
import random

from collections import deque
import matplotlib.pyplot as plt
from ddpg_agent import Agent
from ddpg_model import Actor, Critic

import torch

%matplotlib inline

In [2]:
env = UnityEnvironment(file_name='../Reacher.app')

# get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

# reset the environment
env_info = env.reset(train_mode=False)[brain_name]

# number of agents
num_agents = len(env_info.agents)
print('Number of agents:', num_agents)

# size of each action
action_size = brain.vector_action_space_size
print('Size of each action:', action_size)

# examine the state space 
states = env_info.vector_observations
state_size = states.shape[1]
print('There are {} agents. Each observes a state with length: {}'.format(states.shape[0], state_size))
print('The state for the first agent looks like:', states[0])

INFO:unityagents:
'Academy' started successfully!
Unity Academy name: Academy
        Number of Brains: 1
        Number of External Brains : 1
        Lesson number : 0
        Reset Parameters :
		goal_speed -> 1.0
		goal_size -> 5.0
Unity brain name: ReacherBrain
        Number of Visual Observations (per agent): 0
        Vector Observation space type: continuous
        Vector Observation space size (per agent): 33
        Number of stacked Vector Observation: 1
        Vector Action space type: continuous
        Vector Action space size (per agent): 4
        Vector Action descriptions: , , , 


Number of agents: 20
Size of each action: 4
There are 20 agents. Each observes a state with length: 33
The state for the first agent looks like: [ 0.00000000e+00 -4.00000000e+00  0.00000000e+00  1.00000000e+00
 -0.00000000e+00 -0.00000000e+00 -4.37113883e-08  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00 -1.00000000e+01  0.00000000e+00
  1.00000000e+00 -0.00000000e+00 -0.00000000e+00 -4.37113883e-08
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  5.75471878e+00 -1.00000000e+00
  5.55726624e+00  0.00000000e+00  1.00000000e+00  0.00000000e+00
 -1.68164849e-01]


In [3]:
scores = np.zeros(num_agents)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

random_seed=0
Agent.actor_local = Actor(state_size, action_size, random_seed).to(device)
Agent.actor_local.load_state_dict(torch.load('checkpoint_actor.pth'))

agents =[] 

for i in range(num_agents):
    agents.append(Agent(state_size, action_size, random_seed=0))

Initialising ReplayBuffer


In [4]:

while True:
    actions = np.array([agents[i].act(states[i]) for i in range(num_agents)])

    env_info = env.step(actions)[brain_name]        # send the action to the environment
    next_states = env_info.vector_observations     # get the next state
    rewards = env_info.rewards                     # get the reward
    dones = env_info.local_done        

    states = next_states
    scores += rewards

    print('\rScore: {:.2f}\tmin: {:.2f}\tmax: {:.2f}'
              .format(np.mean(scores), np.min(scores), np.max(scores)), end="") 
    
    if np.any(dones):
        break
        
print("\nScores: {}".format(scores))

Score: 32.19	min: 24.81	max: 37.34
Scores: [35.50999921 31.2299993  27.16999939 36.74999918 32.26999928 32.69999927
 31.02999931 31.89999929 27.25999939 35.44999921 31.3899993  37.33999917
 24.80999945 34.34999923 28.92999935 31.05999931 31.2999993  33.08999926
 36.43999919 33.84999924]


In [5]:
env.reset()

{'ReacherBrain': <unityagents.brain.BrainInfo at 0x11c1dc630>}