# Bananas

---

In this notebook, you will learn how to use the Unity ML-Agents environment for the first project of the [Deep Reinforcement Learning Nanodegree](https://www.udacity.com/course/deep-reinforcement-learning-nanodegree--nd893).

In [1]:
from unityagents import UnityEnvironment
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

from dqn_agent import Agent
from train import dqn
from utils import scores_to_file


env = UnityEnvironment(file_name="Banana.app", no_graphics=True)

# Select the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

# reset the environment
env_info = env.reset(train_mode=True)[brain_name]

# number of actions
action_size = brain.vector_action_space_size
print('Number of actions:', action_size)

# examine the state space 
state = env_info.vector_observations[0]
print('States look like:', state)
state_size = len(state)
print('States have length:', state_size)

INFO:unityagents:
'Academy' started successfully!
Unity Academy name: Academy
        Number of Brains: 1
        Number of External Brains : 1
        Lesson number : 0
        Reset Parameters :
		
Unity brain name: BananaBrain
        Number of Visual Observations (per agent): 0
        Vector Observation space type: continuous
        Vector Observation space size (per agent): 37
        Number of stacked Vector Observation: 1
        Vector Action space type: discrete
        Vector Action space size (per agent): 4
        Vector Action descriptions: , , , 


Number of actions: 4
States look like: [1.         0.         0.         0.         0.84408134 0.
 0.         1.         0.         0.0748472  0.         1.
 0.         0.         0.25755    1.         0.         0.
 0.         0.74177343 0.         1.         0.         0.
 0.25854847 0.         0.         1.         0.         0.09355672
 0.         1.         0.         0.         0.31969345 0.
 0.        ]
States have length: 37


In [None]:
# DQN
agent = Agent(state_size, action_size, seed=0, update_every=4)
scores = dqn(env, agent, brain_name, eps_start=0.4)

In [None]:
# Double DQN, choose moves using mean(Q1, Q2)
agent = Agent(state_size, action_size, seed=0, ddqn=True)
scores = dqn(env, agent, brain_name)

In [None]:
# Double DQN, choose moves using random_choice(Q1, Q2)
agent = Agent(state_size, action_size, seed=0, ddqn=True, ddqn_mean=False)
scores = dqn(env, agent, brain_name)

In [None]:
# plot the scores from one run
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(np.arange(len(scores)), scores)
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.show()

In [None]:
env.close()

In [2]:
# Run all 3 models 3 times each
for seed in range(2, 3):
    # DQN
    #agent = Agent(state_size, action_size, seed, update_every=4)
    #scores = dqn(env, agent, brain_name, eps_start=0.4)
    #scores_to_file(scores, "results/dqn/" + str(seed) + ".txt")
    # Double DQN, choose moves using mean(Q1, Q2)
    agent = Agent(state_size, action_size, seed, ddqn=True)
    scores = dqn(env, agent, brain_name, eps_start=0.4, eps_decay=0.99)
    scores_to_file(scores, "results/ddqn/" + str(seed) + ".txt")
    # Double DQN, choose moves using random_choice(Q1, Q2)
    #agent = Agent(state_size, action_size, seed, ddqn=True, ddqn_mean=False)
    #scores = dqn(env, agent, brain_name)
    #scores_to_file(scores, "results/ddqn_rand/" + str(seed) + ".txt")


Creating new Double DQN agent that moves using mean(Q1, Q2) with gamma = 0.95
Initializing replay buffer with buffer size 30000 and batch size 64
Episode 8	Eps: 0.384	Score: 1	Max Score: 3	Average Score Last 10: 0.75	Average Score Last 100: 0.75.50	
Episode 16	Eps: 0.369	Score: 1	Max Score: 8	Average Score Last 10: 3.00	Average Score Last 100: 2.00	
Episode 24	Eps: 0.355	Score: 0	Max Score: 9	Average Score Last 10: 3.80	Average Score Last 100: 2.582	
Episode 33	Eps: 0.339	Score: 2	Max Score: 13	Average Score Last 10: 3.70	Average Score Last 100: 3.006	
Episode 41	Eps: 0.326	Score: 10	Max Score: 13	Average Score Last 10: 6.90	Average Score Last 100: 3.95	
Episode 49	Eps: 0.313	Score: 6	Max Score: 13	Average Score Last 10: 6.60	Average Score Last 100: 4.35	
Episode 58	Eps: 0.299	Score: 10	Max Score: 13	Average Score Last 10: 6.90	Average Score Last 100: 4.76	
Episode 66	Eps: 0.287	Score: 3	Max Score: 13	Average Score Last 10: 6.10	Average Score Last 100: 4.86	
Episode 74	Eps: 0.276	Score

In [None]:
# DQN
agent = Agent(state_size, action_size, seed=0, update_every=4)
scores = dqn(env, agent, brain_name, eps_start=0.4)