In [1]:
from unityagents import UnityEnvironment
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from model import Actor

In [2]:
# load environment

env = UnityEnvironment(file_name="Tennis.app")
brain_name = env.brain_names[0]
brain = env.brains[brain_name]
env_info = env.reset(train_mode=False)[brain_name]
action_size = brain.vector_action_space_size
input_dims = len(env_info.vector_observations[0])

INFO:unityagents:
'Academy' started successfully!
Unity Academy name: Academy
        Number of Brains: 1
        Number of External Brains : 1
        Lesson number : 0
        Reset Parameters :
		
Unity brain name: TennisBrain
        Number of Visual Observations (per agent): 0
        Vector Observation space type: continuous
        Vector Observation space size (per agent): 8
        Number of stacked Vector Observation: 3
        Vector Action space type: continuous
        Vector Action space size (per agent): 2
        Vector Action descriptions: , 


In [3]:
# agent's neural network

class Actor(nn.Module):
    """Actor (Policy) Model."""

    def __init__(self, state_size, action_size, fc1_units=64, fc2_units=64):
        super(Actor, self).__init__()
        self.fc1 = nn.Linear(state_size, fc1_units)
        self.fc2 = nn.Linear(fc1_units, fc2_units)
        self.fc3 = nn.Linear(fc2_units, action_size)
        
    def forward(self, state):
        x = F.relu(self.fc1(state))
        x = F.relu(self.fc2(x))
        
        return F.tanh(self.fc3(x))

In [4]:
# instantiate agents and load pre-trained weights

agents = [Actor(input_dims, action_size) for i in range(2)]
for i, agent in enumerate(agents):
    agent.load_state_dict(torch.load(f"./model/checkpoint_actor_{i}.pth"))

In [5]:
def getActions(states, agents):

    actions = []

    for state, agent in zip(states, agents):
        action = agent(torch.from_numpy(state).float().unsqueeze(0))
        action = action.detach().numpy()
        actions.append(action)

    return actions

In [6]:
# reset the environment and watch the agent play tennis!

env_info = env.reset(train_mode=False)[brain_name]
states = env_info.vector_observations               # reset environment
score = 0                                          # initialize the score
while True:
    actions = getActions(states, agents)
    env_info = env.step(actions)[brain_name]        # send the action to the environment
    next_states = env_info.vector_observations      # get the next state
    rewards = env_info.rewards                      # get the reward
    dones = env_info.local_done                     # see if episode has finished
    score += np.max(rewards)                                 # update the score
    states = next_states                              # roll over the state to next time step
    if np.any(dones):                                # exit loop if episode finished
        break
    
print("Score: {}".format(score))


Score: 5.200000077486038


In [7]:
env.close()