# Learning to Navigate

---

This notebook solves the Unity ML-Agents environment for the first project of the [Deep Reinforcement Learning Nanodegree](https://www.udacity.com/course/deep-reinforcement-learning-nanodegree--nd893).

In [None]:
from unityagents import UnityEnvironment
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

from dqn_agent import Agent
from train import dqn
from utils import scores_to_file


env = UnityEnvironment(file_name="Banana.app", no_graphics=True)

# Select the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

# reset the environment
env_info = env.reset(train_mode=True)[brain_name]

# number of actions
action_size = brain.vector_action_space_size
print('Number of actions:', action_size)

# examine the state space 
state = env_info.vector_observations[0]
print('States look like:', state)
state_size = len(state)
print('States have length:', state_size)

In [None]:
# Whether to use Double DQN (instead of vanilla DQN)
ddqn = True

# If ddqn is True, whether to use mean(Q1, Q2) (instead of random_choice(Q1, Q2))
ddqn_mean = True


if not ddqn:
    # DQN
    agent = Agent(state_size, action_size, seed=0, update_every=4)
    scores = dqn(env, agent, brain_name, eps_start=0.4)
else:
    # Double DQN
    agent = Agent(state_size, action_size, seed=0, ddqn=ddqn, ddqn_mean=ddqn_mean)
    scores = dqn(env, agent, brain_name)

In [None]:
# Plot the scores from one run
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(np.arange(len(scores)), scores)
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.show()

In [None]:
# Close environment when done
env.close()

In [None]:
# Run all 3 models 5 times each (this takes a long time)
for seed in range(5):
    # DQN
    agent = Agent(state_size, action_size, seed, update_every=4)
    scores = dqn(env, agent, brain_name, n_episodes=1000, eps_start=0.4)
    scores_to_file(scores, "results/dqn/" + str(seed) + ".txt")
    # Double DQN, choose moves using mean(Q1, Q2)
    agent = Agent(state_size, action_size, seed, ddqn=True)
    cores = dqn(env, agent, brain_name)
    scores_to_file(scores, "results/ddqn/" + str(seed) + ".txt")
    # Double DQN, choose moves using random_choice(Q1, Q2)
    agent = Agent(state_size, action_size, seed, ddqn=True, ddqn_mean=False)
    scores = dqn(env, agent, brain_name)
    scores_to_file(scores, "results/ddqn_rand/" + str(seed) + ".txt")
