# Bananas

---

In this notebook, you will learn how to use the Unity ML-Agents environment for the first project of the [Deep Reinforcement Learning Nanodegree](https://www.udacity.com/course/deep-reinforcement-learning-nanodegree--nd893).

In [None]:
from unityagents import UnityEnvironment
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

from dqn_agent import Agent
from train import dqn


env = UnityEnvironment(file_name="Banana.app", no_graphics=True)

# Select the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

# reset the environment
env_info = env.reset(train_mode=True)[brain_name]

# number of actions
action_size = brain.vector_action_space_size
print('Number of actions:', action_size)

# examine the state space 
state = env_info.vector_observations[0]
print('States look like:', state)
state_size = len(state)
print('States have length:', state_size)

In [None]:
# DQN
agent = Agent(state_size, action_size, seed=0, update_every=4)
scores = dqn(env, agent, brain_name, eps_start=0.4)

In [None]:
# Double DQN, choose moves using mean(Q1, Q2)
agent = Agent(state_size, action_size, seed=0, ddqn=True)
scores = dqn(env, agent, brain_name)

In [None]:
# Double DQN, choose moves using random_choice(Q1, Q2)
agent = Agent(state_size, action_size, seed=0, ddqn=True, ddqn_mean=False)
scores = dqn(env, agent, brain_name)

In [None]:
# plot the scores
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(np.arange(len(scores)), scores)
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.show()

In [None]:
env.close()