In [None]:
import os
# Set the PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION to python
os.environ['PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION'] = 'python'

In [None]:
!pip -q install .

In [None]:
import random
import torch
from collections import deque
from object_agent_tennis import ObjectAgent
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline
from unityagents import UnityEnvironment
import numpy as np

env = UnityEnvironment(file_name="./data/Tennis_Windows_x86_64/Tennis.exe")

In [None]:
# get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

# reset the environment
env_info = env.reset(train_mode=True)[brain_name]

# number of agents 
num_agents = len(env_info.agents)
print('Number of agents:', num_agents)

# size of each action
action_size = brain.vector_action_space_size
print('Size of each action:', action_size)

# examine the state space 
data_states = env_info.vector_observations
state_size = data_states.shape[1]
print('There are {} agents. Each observes a state with length: {}'.format(data_states.shape[0], state_size))
print('The state for the first agent looks like:', data_states[0])

In [None]:
agent = ObjectAgent(data_state_size=24, data_action_size=2, data_random_seed=1)

In [None]:
agent.data_critic_local.load_state_dict(torch.load('checkpoint_critic.pth'))
agent.data_actor_local.load_state_dict(torch.load('checkpoint_actor.pth'))

In [None]:
def test_ddpg(env, data_agent, data_num_agents, data_max_t=1000):
    env_info = env.reset(train_mode=False)[brain_name]
    data_states = env_info.vector_observations  
    data_beta = 1.0
    data_score = np.zeros(data_num_agents)
    for t in range(data_max_t):
        data_actions = data_agent.act(data_states, data_noise_factor=data_beta)
        actions_other_player = np.flip(data_actions, 0)
        env_info = env.step(data_actions)[brain_name]   
        data_next_states = env_info.vector_observations         # get next state (for each agent)
        data_next_states_other_player = np.flip(data_next_states, 0)
        data_rewards = env_info.rewards                         # get reward (for each agent)
        data_dones = env_info.local_done                        # see if episode finished

        data_agent.step(data_states, data_actions, actions_other_player, data_rewards, data_next_states, data_next_states_other_player, data_dones)
        data_states = data_next_states
        data_score += data_rewards
        if any(data_dones):
            break
    print("Score of this episode is: %.2f" % np.mean(data_score))  

In [None]:
test_ddpg(env, agent, num_agents)

In [None]:
env.close()