In [None]:
from unityagents import UnityEnvironment
import numpy as np
import random
import torch
from collections import deque
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
env = UnityEnvironment(file_name="Reacher-2.app")

In [None]:
# get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

In [None]:
# reset the environment
env_info = env.reset(train_mode=True)[brain_name]

# number of agents in the environment
print('Number of agents:', len(env_info.agents))

# number of actions
action_size = brain.vector_action_space_size
print('Number of actions:', action_size)

# examine the state space 
state = env_info.vector_observations[0]
print('States look like:', state)
state_size = len(state)
print('States have length:', state_size)

In [None]:
env_info = env.reset(train_mode=True)[brain_name]


In [None]:
from ddpg_agent import Agent
from model import Actor, Critic

In [None]:
# load saved network
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


actor =  Actor(state_size, action_size, False, 0).to(device)
actor.load_state_dict(torch.load('best_checkpoint_actor.pth', map_location='cpu'))

In [None]:
critic = Critic(state_size, action_size, False, 0).to(device)
critic.load_state_dict(torch.load('best_checkpoint_critic.pth', map_location='cpu'))

In [None]:
hyper = {'BUFFER_SIZE' : int(1e6)  # replay buffer size
,'BATCH_SIZE' : 256        # minibatch size
,'GAMMA' : 0.99            # discount factor
,'TAU' : 1e-3              # for soft update of target parameters
,'LR_ACTOR' : 1e-3         # learning rate of the actor
,'LR_CRITIC' : 1e-3        # learning rate of the critic
,'WEIGHT_DECAY' : 0        # L2 weight decay
,'UPDATE_EVERY' : 20       # timesteps between updates
,'NUM_UPDATES' : 10        # num of update passes when updating
,'EPSILON' : 1.0           # epsilon for the noise process added to the actions
,'EPSILON_DECAY' : 0       # decay for epsilon above
,'NOISE_SIGMA' : 0.01      # sigma for Ornstein-Uhlenbeck noise
,'USE_BATCH_NORM' : False }

smart_agent = Agent(state_size=state_size, action_size=action_size, random_seed=0, hyper=hyper, actor=actor, critic=critic)

In [None]:
smart_agent.epsilon

In [None]:
# function to play game
def playGame(agent, n_episodes=100, max_t=1000, eps_start=.01, eps_end=0.001, eps_decay=0.995, print_every = 50):
    eps = eps_start 
    for i_episode in range(1, n_episodes+1):
        env_info = env.reset(train_mode=False)[brain_name]
        state = env_info.vector_observations
        score = 0
        for t in range(max_t):
            action = agent.act(state, eps)
            env_info = env.step(action)[brain_name]
            
            next_state = env_info.vector_observations   # get the next state
            reward = env_info.rewards                  # get the reward
            done = env_info.local_done    
    
            # next_state, reward, done, _ = env.step(action)
            agent.step(state, action, reward, next_state, done, t)
            state = next_state
            score += np.mean(reward)
            
            if np.any(done):
                break 
        if i_episode % print_every == 0:
            print("Score at episode %s : %s" % (i_episode, score))
        


In [None]:
playGame(smart_agent, n_episodes=100)