This notebook is for displaying the performance of the trained model.  

In [1]:
# import libraries
from unityagents import UnityEnvironment
import numpy as np
from maddpg import MADDPG_Agent
import torch
import matplotlib.pyplot as plt
%matplotlib inline 

In [2]:
# Introduce the environment. 
# Note that here the Unity app for Mac is used, while for training the Linux version 
# without display is used in Tennis.ipynb since the training was done at AWS. 
# Please change `file_name` below depending on which OS and Unity app are used. 

env = UnityEnvironment(file_name="Tennis.app", seed= 2)
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

INFO:unityagents:
'Academy' started successfully!
Unity Academy name: Academy
        Number of Brains: 1
        Number of External Brains : 1
        Lesson number : 0
        Reset Parameters :
		
Unity brain name: TennisBrain
        Number of Visual Observations (per agent): 0
        Vector Observation space type: continuous
        Vector Observation space size (per agent): 8
        Number of stacked Vector Observation: 3
        Vector Action space type: continuous
        Vector Action space size (per agent): 2
        Vector Action descriptions: , 


In [3]:
# read the trained weights to the MADDPG agent

# MADDPG agent
magent = MADDPG_Agent(24, 2, 2, 1)

# location of the weights
file_name_weights_actor1 = 'weights/check_point_agent1_actor.pth'
file_name_weights_critic1 = 'weights/check_point_agent1_critic.pth'
file_name_weights_actor2 = 'weights/check_point_agent2_actor.pth'
file_name_weights_critic2 = 'weights/check_point_agent2_critic.pth'

# load the weights 
for i, ddpg_agent in enumerate(magent.maddpg_agents):
    if i==0:
        ddpg_agent.actor_local.load_state_dict(torch.load(file_name_weights_actor1, map_location=lambda storage, loc: storage))
        ddpg_agent.critic_local.load_state_dict(torch.load(file_name_weights_critic1, map_location=lambda storage, loc: storage))  
    elif i==1:
        ddpg_agent.actor_local.load_state_dict(torch.load(file_name_weights_actor2, map_location=lambda storage, loc: storage))
        ddpg_agent.critic_local.load_state_dict(torch.load(file_name_weights_critic2, map_location=lambda storage, loc: storage))    

In [4]:
# run several episodes to see the performance of the trained agent 

for i_episode in range(1, 10+1):

    env_info = env.reset(train_mode=False)[brain_name]     # reset the environment  
    states = env_info.vector_observations                  # get the current state (for each agent)
    scores = np.zeros(2)                                   # initialize the score (for each agent)
    magent.reset()                                         # reset noise
    
    while True:

        actions= magent.act_all(states, add_noise = False) # select all actions 
        env_info = env.step(actions)[brain_name]           # send all actions to the environment
        next_states = env_info.vector_observations         # get next state (for each agent)
        rewards = env_info.rewards                         # get reward (for each agent)
        dones = env_info.local_done                        # see if episode finished
        magent.step(states, actions, rewards, next_states, dones, True) # update the multi-agent    
        scores += rewards                                  # update the score (for each agent)
        states = next_states                               # roll over states to next time step
        if np.any(dones):                                  # exit loop if the episode is finished
            break
            
    print('episode {}, score: {:.2f}'.format(i_episode, np.max(scores)))
    
    # once the score reaches more than 1.0, finish evaluating the performance
    if np.max(scores) > 1.0:
        break


episode 1, score: 0.09
episode 2, score: 1.70


In [5]:
env.close()