In [11]:
import random
import torch
import numpy as np
from collections import deque
import matplotlib.pyplot as plt
from unityagents import UnityEnvironment
import numpy as np
from agent import Agent, ReplayBuffer

In [2]:
from tqdm import tqdm

In [3]:
env = UnityEnvironment(file_name="Banana_Linux/Banana.x86_64", no_graphics=True)

INFO:unityagents:
'Academy' started successfully!
Unity Academy name: Academy
        Number of Brains: 1
        Number of External Brains : 1
        Lesson number : 0
        Reset Parameters :
		
Unity brain name: BananaBrain
        Number of Visual Observations (per agent): 0
        Vector Observation space type: continuous
        Vector Observation space size (per agent): 37
        Number of stacked Vector Observation: 1
        Vector Action space type: discrete
        Vector Action space size (per agent): 4
        Vector Action descriptions: , , , 


In [4]:
# get the default brain
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

In [5]:
env_info = env.reset(train_mode=True)[brain_name]
action_size = brain.vector_action_space_size
state = env_info.vector_observations[0]
state_size = len(state)

In [8]:
BUFFER_SIZE = int(1e4)  # replay buffer size
BATCH_SIZE = 256       # minibatch size

In [12]:
def train(agent, n_episodes=4000, max_t=1000, eps_start=1.0, eps_end=0.01, eps_decay=0.999, train=True):
    
    scores = []                        # list containing scores from each episode
    scores_window = deque(maxlen=100)  # last 100 scores
    eps = eps_start                    # initialize epsilon
    for i_episode in tqdm(range(1, n_episodes+1)):
        env_info = env.reset(train_mode=train)[brain_name]
        state = env_info.vector_observations[0]
        score = 0
        agent.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, 0)
        for t in range(max_t):
            action, prob = agent.act(state, eps if train else 0.0)
#             print(prob)
            env_info = env.step(action)[brain_name]
            next_state = env_info.vector_observations[0]   # get the next state
            reward = env_info.rewards[0]                   # get the reward
            done = env_info.local_done[0]                  # see if episode has finished
            if train:
#                 print(prob)
                agent.step(state, action, reward, next_state, done, prob)
            score += reward                                # update the score
            state = next_state                             # roll over the state to next time step
            if done:                                       # exit loop if episode finished
                break
        scores_window.append(score)       # save most recent score
        scores.append(score)              # save most recent score
        eps = max(eps_end, eps_decay*eps) # decrease epsilon
        print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_window)), end="")
        if i_episode % 100 == 0:
            torch.save(agent.qnetwork_local.state_dict(), 'checkpoint_dqn.pth')
            print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, np.mean(scores_window)))
        if np.mean(scores_window)>=13.0 and train:
            print('\nEnvironment solved in {:d} episodes!\tAverage Score: {:.2f}'.format(i_episode-100, np.mean(scores_window)))
            torch.save(agent.qnetwork_local.state_dict(), 'checkpoint_dqn.pth')
            break
    return scores

In [None]:
agent = Agent(state_size=state_size, action_size=action_size, seed=0)
scores = train(agent)

  0%|          | 1/4000 [00:06<6:54:59,  6.23s/it]

Episode 1	Average Score: 0.00

  0%|          | 2/4000 [00:12<6:59:43,  6.30s/it]

Episode 2	Average Score: 0.00

  0%|          | 3/4000 [00:19<7:03:44,  6.36s/it]

Episode 3	Average Score: 0.33

  0%|          | 4/4000 [00:25<7:07:01,  6.41s/it]

Episode 4	Average Score: 0.50

  0%|          | 5/4000 [00:32<7:08:37,  6.44s/it]

Episode 5	Average Score: 0.60

  0%|          | 6/4000 [00:38<7:06:07,  6.40s/it]

Episode 6	Average Score: 0.50

  0%|          | 7/4000 [00:44<7:06:16,  6.41s/it]

Episode 7	Average Score: 0.43

  0%|          | 8/4000 [00:51<7:08:45,  6.44s/it]

Episode 8	Average Score: 0.25

  0%|          | 9/4000 [00:58<7:09:53,  6.46s/it]

Episode 9	Average Score: 0.22

  0%|          | 10/4000 [01:04<7:14:46,  6.54s/it]

Episode 10	Average Score: 0.30

  0%|          | 11/4000 [01:11<7:21:49,  6.65s/it]

Episode 11	Average Score: 0.09

  0%|          | 12/4000 [01:18<7:26:04,  6.71s/it]

Episode 12	Average Score: 0.17

  0%|          | 13/4000 [01:25<7:23:13,  6.67s/it]

Episode 13	Average Score: 0.15

  0%|          | 14/4000 [01:31<7:23:04,  6.67s/it]

Episode 14	Average Score: 0.21

  0%|          | 15/4000 [01:38<7:22:53,  6.67s/it]

Episode 15	Average Score: 0.13

  0%|          | 16/4000 [01:45<7:33:36,  6.83s/it]

Episode 16	Average Score: 0.31

  0%|          | 17/4000 [01:52<7:29:14,  6.77s/it]

Episode 17	Average Score: 0.35

  0%|          | 18/4000 [01:59<7:29:32,  6.77s/it]

Episode 18	Average Score: 0.33

  0%|          | 19/4000 [02:06<7:40:56,  6.95s/it]

Episode 19	Average Score: 0.32

  0%|          | 20/4000 [02:13<7:40:28,  6.94s/it]

Episode 20	Average Score: 0.30

  1%|          | 21/4000 [02:20<7:39:39,  6.93s/it]

Episode 21	Average Score: 0.24

  1%|          | 22/4000 [02:27<7:49:13,  7.08s/it]

Episode 22	Average Score: 0.27

  1%|          | 23/4000 [02:34<7:47:02,  7.05s/it]

Episode 23	Average Score: 0.26

  1%|          | 24/4000 [02:41<7:42:13,  6.98s/it]

Episode 24	Average Score: 0.21

  1%|          | 25/4000 [02:48<7:40:38,  6.95s/it]

Episode 25	Average Score: 0.28

  1%|          | 26/4000 [02:55<7:45:29,  7.03s/it]

Episode 26	Average Score: 0.27

  1%|          | 27/4000 [03:02<7:45:44,  7.03s/it]

Episode 27	Average Score: 0.26

  1%|          | 28/4000 [03:09<7:52:43,  7.14s/it]

Episode 28	Average Score: 0.29

  1%|          | 29/4000 [03:16<7:45:55,  7.04s/it]

Episode 29	Average Score: 0.28

  1%|          | 30/4000 [03:23<7:45:18,  7.03s/it]

Episode 30	Average Score: 0.27

  1%|          | 31/4000 [03:30<7:40:42,  6.96s/it]

Episode 31	Average Score: 0.32

  1%|          | 32/4000 [03:37<7:45:52,  7.04s/it]

Episode 32	Average Score: 0.31

  1%|          | 33/4000 [03:45<7:59:19,  7.25s/it]

Episode 33	Average Score: 0.30

  1%|          | 34/4000 [03:53<8:07:18,  7.37s/it]

Episode 34	Average Score: 0.26

  1%|          | 35/4000 [03:59<7:56:14,  7.21s/it]

Episode 35	Average Score: 0.29

  1%|          | 36/4000 [04:06<7:50:38,  7.12s/it]

Episode 36	Average Score: 0.31

  1%|          | 37/4000 [04:13<7:46:45,  7.07s/it]

Episode 37	Average Score: 0.30

  1%|          | 38/4000 [04:21<7:51:28,  7.14s/it]

Episode 38	Average Score: 0.29

  1%|          | 39/4000 [04:29<8:10:46,  7.43s/it]

Episode 39	Average Score: 0.26

  1%|          | 40/4000 [04:36<8:03:11,  7.32s/it]

Episode 40	Average Score: 0.25

  1%|          | 41/4000 [04:43<8:06:42,  7.38s/it]

Episode 41	Average Score: 0.24

  1%|          | 42/4000 [04:50<7:58:13,  7.25s/it]

Episode 42	Average Score: 0.21

  1%|          | 43/4000 [04:57<7:51:37,  7.15s/it]

Episode 43	Average Score: 0.21

  1%|          | 44/4000 [05:05<8:00:33,  7.29s/it]

Episode 44	Average Score: 0.16

  1%|          | 45/4000 [05:12<8:03:17,  7.33s/it]

Episode 45	Average Score: 0.18

  1%|          | 46/4000 [05:19<7:58:37,  7.26s/it]

Episode 46	Average Score: 0.15

  1%|          | 47/4000 [05:27<7:58:32,  7.26s/it]

Episode 47	Average Score: 0.15

  1%|          | 48/4000 [05:34<7:57:01,  7.24s/it]

Episode 48	Average Score: 0.17

  1%|          | 49/4000 [05:41<8:02:53,  7.33s/it]

Episode 49	Average Score: 0.10

  1%|▏         | 50/4000 [05:49<8:05:04,  7.37s/it]

Episode 50	Average Score: 0.06

  1%|▏         | 51/4000 [05:56<8:03:45,  7.35s/it]

Episode 51	Average Score: 0.12

  1%|▏         | 52/4000 [06:04<8:14:09,  7.51s/it]

Episode 52	Average Score: 0.12

  1%|▏         | 53/4000 [06:12<8:14:39,  7.52s/it]

Episode 53	Average Score: 0.11

  1%|▏         | 54/4000 [06:19<8:16:49,  7.55s/it]

Episode 54	Average Score: 0.11

  1%|▏         | 55/4000 [06:26<8:10:33,  7.46s/it]

Episode 55	Average Score: 0.09

  1%|▏         | 56/4000 [06:34<8:19:25,  7.60s/it]

Episode 56	Average Score: 0.07

  1%|▏         | 57/4000 [06:42<8:14:50,  7.53s/it]

Episode 57	Average Score: 0.05

  1%|▏         | 58/4000 [06:49<8:07:08,  7.41s/it]

Episode 58	Average Score: 0.07

  1%|▏         | 59/4000 [06:56<8:05:28,  7.39s/it]

Episode 59	Average Score: 0.08

  2%|▏         | 60/4000 [07:03<7:58:20,  7.28s/it]

Episode 60	Average Score: 0.03

  2%|▏         | 61/4000 [07:11<7:57:58,  7.28s/it]

Episode 61	Average Score: 0.03

  2%|▏         | 62/4000 [07:18<7:55:55,  7.25s/it]

Episode 62	Average Score: 0.02

  2%|▏         | 63/4000 [07:25<8:00:46,  7.33s/it]

Episode 63	Average Score: 0.00

  2%|▏         | 64/4000 [07:33<8:09:01,  7.45s/it]

Episode 64	Average Score: 0.00

  2%|▏         | 65/4000 [07:40<8:02:05,  7.35s/it]

Episode 65	Average Score: 0.02

  2%|▏         | 66/4000 [07:48<8:08:50,  7.46s/it]

Episode 66	Average Score: 0.00

  2%|▏         | 67/4000 [07:55<8:01:01,  7.34s/it]

Episode 67	Average Score: 0.00

  2%|▏         | 68/4000 [08:02<7:58:32,  7.30s/it]

Episode 68	Average Score: 0.00

  2%|▏         | 69/4000 [08:10<8:03:23,  7.38s/it]

Episode 69	Average Score: 0.00

  2%|▏         | 70/4000 [08:17<8:01:02,  7.34s/it]

Episode 70	Average Score: 0.03

  2%|▏         | 71/4000 [08:25<8:08:27,  7.46s/it]

Episode 71	Average Score: 0.01

  2%|▏         | 72/4000 [08:32<8:09:13,  7.47s/it]

Episode 72	Average Score: 0.00

  2%|▏         | 73/4000 [08:39<8:03:15,  7.38s/it]

Episode 73	Average Score: 0.01

  2%|▏         | 74/4000 [08:47<8:10:45,  7.50s/it]

Episode 74	Average Score: 0.03

  2%|▏         | 75/4000 [08:54<8:08:28,  7.47s/it]

Episode 75	Average Score: 0.04

  2%|▏         | 76/4000 [09:02<8:17:36,  7.61s/it]

Episode 76	Average Score: 0.03

  2%|▏         | 77/4000 [09:10<8:11:05,  7.51s/it]

Episode 77	Average Score: 0.04

  2%|▏         | 78/4000 [09:17<8:05:45,  7.43s/it]

Episode 78	Average Score: 0.03

  2%|▏         | 79/4000 [09:24<8:00:36,  7.35s/it]

Episode 79	Average Score: 0.03

  2%|▏         | 80/4000 [09:31<8:00:11,  7.35s/it]

Episode 80	Average Score: 0.04

  2%|▏         | 81/4000 [09:39<8:03:28,  7.40s/it]

Episode 81	Average Score: 0.04

  2%|▏         | 82/4000 [09:47<8:08:33,  7.48s/it]

Episode 82	Average Score: 0.01

  2%|▏         | 83/4000 [09:55<8:26:57,  7.77s/it]

Episode 83	Average Score: 0.01

  2%|▏         | 84/4000 [10:03<8:33:42,  7.87s/it]

Episode 84	Average Score: 0.02

  2%|▏         | 85/4000 [10:10<8:23:00,  7.71s/it]

Episode 85	Average Score: 0.02

  2%|▏         | 86/4000 [10:18<8:10:09,  7.51s/it]

Episode 86	Average Score: 0.02

  2%|▏         | 87/4000 [10:25<8:00:39,  7.37s/it]

Episode 87	Average Score: 0.02

  2%|▏         | 88/4000 [10:32<8:04:05,  7.42s/it]

Episode 88	Average Score: 0.01

  2%|▏         | 89/4000 [10:40<8:06:46,  7.47s/it]

Episode 89	Average Score: 0.00

  2%|▏         | 90/4000 [10:47<8:01:47,  7.39s/it]

Episode 90	Average Score: 0.01

  2%|▏         | 91/4000 [10:54<7:55:46,  7.30s/it]

Episode 91	Average Score: -0.01

  2%|▏         | 92/4000 [11:01<7:51:49,  7.24s/it]

Episode 92	Average Score: -0.02

  2%|▏         | 93/4000 [11:08<7:47:17,  7.18s/it]

Episode 93	Average Score: -0.03

  2%|▏         | 94/4000 [11:15<7:47:28,  7.18s/it]

Episode 94	Average Score: -0.03

  2%|▏         | 95/4000 [11:22<7:43:44,  7.13s/it]

Episode 95	Average Score: -0.03

  2%|▏         | 96/4000 [11:29<7:43:49,  7.13s/it]

Episode 96	Average Score: -0.04

  2%|▏         | 97/4000 [11:36<7:39:57,  7.07s/it]

Episode 97	Average Score: -0.04

  2%|▏         | 98/4000 [11:44<7:49:22,  7.22s/it]

Episode 98	Average Score: -0.05

  2%|▏         | 99/4000 [11:51<7:45:27,  7.16s/it]

Episode 100	Average Score: -0.02

  2%|▎         | 100/4000 [11:59<8:11:25,  7.56s/it]

Episode 100	Average Score: -0.02


  3%|▎         | 101/4000 [12:07<8:15:02,  7.62s/it]

Episode 101	Average Score: -0.04

  3%|▎         | 102/4000 [12:15<8:18:34,  7.67s/it]

Episode 102	Average Score: -0.04

  3%|▎         | 103/4000 [12:23<8:15:14,  7.63s/it]

Episode 103	Average Score: -0.06

  3%|▎         | 104/4000 [12:30<8:03:45,  7.45s/it]

Episode 104	Average Score: -0.07

  3%|▎         | 105/4000 [12:37<7:56:06,  7.33s/it]

Episode 105	Average Score: -0.09

  3%|▎         | 106/4000 [12:44<7:59:11,  7.38s/it]

Episode 106	Average Score: -0.07

  3%|▎         | 107/4000 [12:52<8:00:25,  7.40s/it]

Episode 107	Average Score: -0.08

  3%|▎         | 108/4000 [13:00<8:13:41,  7.61s/it]

Episode 108	Average Score: -0.09

  3%|▎         | 109/4000 [13:07<8:08:12,  7.53s/it]

Episode 109	Average Score: -0.11

  3%|▎         | 110/4000 [13:15<8:17:20,  7.67s/it]

Episode 110	Average Score: -0.12

  3%|▎         | 111/4000 [13:23<8:24:39,  7.79s/it]

Episode 111	Average Score: -0.09

  3%|▎         | 112/4000 [13:31<8:27:35,  7.83s/it]

Episode 112	Average Score: -0.10

  3%|▎         | 113/4000 [13:38<8:13:02,  7.61s/it]

Episode 113	Average Score: -0.08

  3%|▎         | 114/4000 [13:45<8:04:21,  7.48s/it]

Episode 114	Average Score: -0.08

  3%|▎         | 115/4000 [13:53<8:00:37,  7.42s/it]

Episode 115	Average Score: -0.07

  3%|▎         | 116/4000 [14:00<7:54:23,  7.33s/it]

Episode 116	Average Score: -0.10

  3%|▎         | 117/4000 [14:07<7:58:47,  7.40s/it]

Episode 117	Average Score: -0.12

  3%|▎         | 118/4000 [14:15<8:01:49,  7.45s/it]

Episode 118	Average Score: -0.13

  3%|▎         | 119/4000 [14:23<8:10:01,  7.58s/it]

Episode 119	Average Score: -0.15

  3%|▎         | 120/4000 [14:31<8:17:04,  7.69s/it]

Episode 120	Average Score: -0.14

  3%|▎         | 121/4000 [14:38<8:12:58,  7.63s/it]

Episode 121	Average Score: -0.10

  3%|▎         | 122/4000 [14:45<8:06:26,  7.53s/it]

Episode 122	Average Score: -0.13

  3%|▎         | 123/4000 [14:53<8:16:09,  7.68s/it]

Episode 123	Average Score: -0.13

  3%|▎         | 124/4000 [15:01<8:10:31,  7.59s/it]

Episode 124	Average Score: -0.12

  3%|▎         | 125/4000 [15:08<8:06:19,  7.53s/it]

Episode 125	Average Score: -0.15

  3%|▎         | 126/4000 [15:15<8:00:52,  7.45s/it]

Episode 126	Average Score: -0.15

  3%|▎         | 127/4000 [15:23<7:57:40,  7.40s/it]

Episode 127	Average Score: -0.11

  3%|▎         | 128/4000 [15:30<7:57:00,  7.39s/it]

Episode 128	Average Score: -0.13

  3%|▎         | 129/4000 [15:37<7:53:55,  7.35s/it]

Episode 129	Average Score: -0.13

  3%|▎         | 130/4000 [15:44<7:47:09,  7.24s/it]

Episode 130	Average Score: -0.15

  3%|▎         | 131/4000 [15:51<7:43:14,  7.18s/it]

Episode 131	Average Score: -0.18

  3%|▎         | 132/4000 [15:58<7:39:37,  7.13s/it]

Episode 132	Average Score: -0.16

  3%|▎         | 133/4000 [16:06<7:39:29,  7.13s/it]

Episode 133	Average Score: -0.14

  3%|▎         | 134/4000 [16:13<7:42:31,  7.18s/it]

Episode 134	Average Score: -0.12

  3%|▎         | 135/4000 [16:20<7:37:51,  7.11s/it]

Episode 135	Average Score: -0.13

  3%|▎         | 136/4000 [16:27<7:41:28,  7.17s/it]

Episode 136	Average Score: -0.12

  3%|▎         | 137/4000 [16:34<7:40:18,  7.15s/it]

Episode 137	Average Score: -0.13

  3%|▎         | 138/4000 [16:41<7:40:22,  7.15s/it]

Episode 138	Average Score: -0.12