In [1]:
from unityagents import UnityEnvironment
import numpy as np

env = UnityEnvironment(file_name="Tennis_Windows_x86_64/Tennis.exe")

# get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

# reset the environment
env_info = env.reset(train_mode=True)[brain_name]

# number of agents 
num_agents = len(env_info.agents)
print('Number of agents:', num_agents)

# size of each action
action_size = brain.vector_action_space_size
print('Size of each action:', action_size)

# examine the state space 
states = env_info.vector_observations
state_size = states.shape[1]
print('There are {} agents. Each observes a state with length: {}'.format(states.shape[0], state_size))
print('The state for the first agent looks like:', states[0])

INFO:unityagents:
'Academy' started successfully!
Unity Academy name: Academy
        Number of Brains: 1
        Number of External Brains : 1
        Lesson number : 0
        Reset Parameters :
		
Unity brain name: TennisBrain
        Number of Visual Observations (per agent): 0
        Vector Observation space type: continuous
        Vector Observation space size (per agent): 8
        Number of stacked Vector Observation: 3
        Vector Action space type: continuous
        Vector Action space size (per agent): 2
        Vector Action descriptions: , 


Number of agents: 2
Size of each action: 2
There are 2 agents. Each observes a state with length: 24
The state for the first agent looks like: [ 0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.         -6.65278625 -1.5
 -0.          0.          6.83172083  6.         -0.          0.        ]


In [2]:
from SCRATCHagent import Agent
from SCRATCHmodel import Actor, Critic

# Initialize the agents
agent1 = Agent(state_size, action_size, random_seed = 1)
agent2 = Agent(state_size, action_size, random_seed = 1)

score_list = []

for i in range(1, 100):                                      # play game for 5 episodes
    env_info = env.reset(train_mode=False)[brain_name]     # reset the environment    
    states = env_info.vector_observations                  # get the current state (for each agent)
    scores = np.zeros(num_agents)  # initialize the score (for each agent)
    
    while True:
        ob_1 = states[0]
        ob_2 = states[1]
        
        if i < 50:
            action1 = np.random.randn(action_size)
            action2 = np.random.randn(action_size)
        
        else:
            action1 = agent1.act(ob_1)
            action2 = agent2.act(ob_2)
        
        actions = [action1, action2]
        actions = np.clip(actions, -1, 1)                  # all actions between -1 and 1
        
        env_info = env.step(actions)[brain_name]           # send all actions to tne environment
        
        next_states = env_info.vector_observations         # get next state (for each agent)
                
        rewards = env_info.rewards                         # get reward (for each agent)
        dones = env_info.local_done                        # see if episode finished
        scores += env_info.rewards                         # update the score (for each agent)
        
        if i < 50:
            agent1.step(ob_1, ob_2, action1, action2, next_states[0], next_states[1], rewards[0], rewards[1], dones[0], dones[1])
            agent2.step(ob_2, ob_1, action2, action1, next_states[1], next_states[0], rewards[1], rewards[0], dones[1], dones[0])
        
        
        states = next_states                               # roll over states to next time step
        if np.any(dones):                                 # exit loop if episode finished
            break
    score_list.append(np.max(scores))

if np.mean(score_list)>0:
    print('Score (max over agents) from episode {}: {}'.format(i, np.max(scores)))



In [3]:
env.close()