In [1]:
%matplotlib widget
import datetime
import tensorflow as tf
import numpy as np
import pandas as pd
import xarray as xr
import time
import gym
import matplotlib.pyplot as plt
import cmocean
#from simple_agent import SimpleAtariAgent
from tqdm import tqdm
import random
import dqn_agent

Using TensorFlow backend.


In [2]:
env = gym.make('gym_biomapping:perfect_info_atari-v0', static=True, output='3D-matrix', generate_data=True)
agent = dqn_agent.DQNAgent(env, input="3D-matrix")

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Instructions for updating:
If using Keras pass *_constraint arguments to layers.



In [3]:
obs = env.reset()
next_action = agent.deliberate(obs)
agent.render(obs, next_action)
env.render()

In [None]:
done = False
while not done:
    obs, reward, done, info = env.step(next_action)
    #next_pos = agent.deliberate(obs)
    agent.render(obs, next_action)
    env.render()

In [4]:
# For more reproduceable results, but consider changing
random.seed(1)
np.random.seed(1)
tf.set_random_seed(1)

In [6]:
env.reset()
env.render()

In [5]:
# This is where the magic happens
EPISODES = 200
AGGREGATE_STATS_EVERY = 1 # Episode
ep_rewards = []
SHOW_PREVIEW = False
MIN_REWARD = -10_000 # Rewards below this are not saved
MODEL_NAME = "LAUV_ROALD"

# Epsilon parameters determine the extent to which the agent will explore/exploit
    # Using decaying epsilon: The more the agent learns, the less it will explore
    # epsilon := Change of doing explore action, i.e. randomly select action
    # EPSILON_DECAY := The rate at which exploring decays
    # MIN_EPSILON := To always keep some level of exploration, exploration will not decay beyond this threshold

epsilon = 1 # Not constant, as it will decay
EPSILON_DECAY = 0.90
MIN_EPSILON = 0.001


for episode in tqdm(range(1, EPISODES + 1), ascii=True, unit='episodes'):
    
    # Update tensorboard step every episode
    agent.tensorboard.step = episode
    
    # Reset episode, rewards, environment, done-flag
    episode_reward = 0
    step = 1
    current_state = env.reset()
    agent.reset()
    done = False
    
    # The training loop
    while not done:
        # Decide explore vs exploit
        if np.random.random() > epsilon:
            # Get action from Q table
            action = np.argmax(agent.get_qs(current_state))
        else:
            # Get random action
            action = np.random.randint(0, env.action_space.nvec[0])

        # Take selected aciton
        new_state, reward, done, _info = env.step([action])
        next_pos = agent.deliberate(new_state)
        
        episode_reward+=reward
    
        if SHOW_PREVIEW:
            agent.render(new_state, next_action)
            env.render()

        # Update replay-memory (a set length collection of experiences the agent remembers, from which training examples are selected)
        # Works as a deque: one in, oldest out
        #print("Updating replay memory...")
        agent.update_replay_memory((current_state, action, reward, new_state, done))

        # Trains the agent with the replay-memory is sufficiently large
        agent.train(done, step)
        current_state = new_state
        step += 1
        
    print("Episode reward: ", episode_reward)
    
    # Decay epsilon
    if epsilon > MIN_EPSILON:
        epsilon *= EPSILON_DECAY
        epsilon = max(MIN_EPSILON, epsilon)
        
    # Logging and storing
    ep_rewards.append(episode_reward)
    if not episode % AGGREGATE_STATS_EVERY or episode == 1:
        average_reward = sum(ep_rewards[-AGGREGATE_STATS_EVERY:])/len(ep_rewards[-AGGREGATE_STATS_EVERY:])
        min_reward = min(ep_rewards[-AGGREGATE_STATS_EVERY:])
        max_reward = max(ep_rewards[-AGGREGATE_STATS_EVERY:])
        agent.tensorboard.update_stats(reward_avg=average_reward, reward_min=min_reward, reward_max=max_reward, epsilon=epsilon)

        # Save model, but only when min reward is greater or equal a set value
        if min_reward >= MIN_REWARD:
            agent.model.save(f'models/{MODEL_NAME}__{max_reward:_>7.2f}max_{average_reward:_>7.2f}avg_{min_reward:_>7.2f}min__{int(time.time())}.model')
    
        

  0%|                                    | 0/200 [00:00<?, ?episodes/s]

AUV is out of bounds
Episode reward:  -1000



  0%|1                           | 1/200 [00:13<44:46, 13.50s/episodes]




  1%|2                           | 2/200 [00:27<44:55, 13.61s/episodes]

AUV is out of bounds
Episode reward:  -463.9144014120102


  2%|4                           | 3/200 [00:40<44:26, 13.54s/episodes]

AUV is out of bounds
Episode reward:  -1000


  2%|5                           | 4/200 [00:54<44:33, 13.64s/episodes]

AUV is out of bounds
Episode reward:  -682.969554901123


  2%|7                           | 5/200 [01:08<44:17, 13.63s/episodes]

AUV is out of bounds
Episode reward:  -849.6174182891846


  3%|8                           | 6/200 [01:21<43:40, 13.51s/episodes]

AUV is out of bounds
Episode reward:  -937.3925476074219


  4%|9                           | 7/200 [01:35<43:48, 13.62s/episodes]

AUV is out of bounds
Episode reward:  -858.7055225372314


  4%|#1                          | 8/200 [01:49<43:41, 13.65s/episodes]

AUV is out of bounds
Episode reward:  -907.1983661651611


  4%|#2                          | 9/200 [02:02<43:23, 13.63s/episodes]

AUV is out of bounds
Episode reward:  -908.2141265869141


  5%|#3                         | 10/200 [02:16<43:04, 13.60s/episodes]

AUV is out of bounds
Episode reward:  -1000


  6%|#4                         | 11/200 [02:29<42:44, 13.57s/episodes]

AUV is out of bounds
Episode reward:  -926.8247184753418


  6%|#6                         | 12/200 [02:43<42:53, 13.69s/episodes]

AUV is out of bounds
Episode reward:  -938.9627647399902


  6%|#7                         | 13/200 [02:57<42:29, 13.63s/episodes]

AUV is out of bounds
Episode reward:  -910.6674346923828


  7%|#8                         | 14/200 [03:10<41:57, 13.54s/episodes]

AUV is out of bounds
Episode reward:  -1000


  8%|##                         | 15/200 [03:24<42:11, 13.68s/episodes]

AUV is out of bounds
Episode reward:  -706.4510650634766


  8%|##1                        | 16/200 [03:38<41:56, 13.68s/episodes]

AUV is out of bounds
Episode reward:  -1000


  8%|##2                        | 17/200 [03:51<41:44, 13.68s/episodes]

AUV is out of bounds
Episode reward:  -984.6323595046997


  9%|##4                        | 18/200 [04:05<41:07, 13.56s/episodes]

AUV is out of bounds
Episode reward:  -1000


 10%|##5                        | 19/200 [04:19<41:25, 13.73s/episodes]

AUV is out of bounds
Episode reward:  -969.7431163787842


 10%|##7                        | 20/200 [04:33<41:15, 13.75s/episodes]

AUV is out of bounds
Episode reward:  -794.2027587890625


 10%|##8                        | 21/200 [04:46<40:29, 13.57s/episodes]

AUV is out of bounds
Episode reward:  -1000


 11%|##9                        | 22/200 [04:59<40:00, 13.48s/episodes]

AUV is out of bounds
Episode reward:  -1000


 12%|###1                       | 23/200 [05:12<39:41, 13.46s/episodes]

AUV is out of bounds
Episode reward:  -1000


 12%|###2                       | 24/200 [05:26<39:44, 13.55s/episodes]

AUV is out of bounds
Episode reward:  -1000


 12%|###3                       | 25/200 [05:40<39:49, 13.66s/episodes]

AUV is out of bounds
Episode reward:  -1000


 13%|###5                       | 26/200 [05:53<39:24, 13.59s/episodes]

AUV is out of bounds
Episode reward:  -1000


 14%|###6                       | 27/200 [06:07<39:00, 13.53s/episodes]

AUV is out of bounds
Episode reward:  -1000


 14%|###7                       | 28/200 [06:20<38:50, 13.55s/episodes]

AUV is out of bounds
Episode reward:  -1000
Replay-memory sufficiently large - Started using neural network
AUV is out of bounds


 14%|###6                     | 29/200 [11:00<4:26:28, 93.50s/episodes]

Episode reward:  10463.565761566162
AUV is out of bounds


 15%|###6                    | 30/200 [13:13<4:58:07, 105.22s/episodes]

Episode reward:  1728.8210411071777
Episode reached max number of steps


 16%|###5                   | 31/200 [21:48<10:42:43, 228.18s/episodes]

Episode reward:  15367.546651363373
Episode reached max number of steps


 16%|###6                   | 32/200 [30:25<14:41:03, 314.66s/episodes]

Episode reward:  25705.138778686523
Episode reached max number of steps


 16%|###7                   | 33/200 [39:03<17:25:31, 375.64s/episodes]

Episode reward:  27841.20295715332
AUV is out of bounds


 17%|###9                   | 34/200 [39:17<12:19:40, 267.35s/episodes]

Episode reward:  -1000
AUV is out of bounds


 18%|####1                   | 35/200 [39:59<9:09:16, 199.74s/episodes]

Episode reward:  -716.9193687438965
Episode reached max number of steps


 18%|####1                  | 36/200 [48:37<13:27:02, 295.26s/episodes]

Episode reward:  24876.367546081543
Episode reached max number of steps


 18%|####2                  | 37/200 [57:17<16:25:15, 362.67s/episodes]

Episode reward:  10397.283505439758
Episode reached max number of steps


 19%|###9                 | 38/200 [1:05:59<18:28:00, 410.37s/episodes]

Episode reward:  12197.824716567993
Episode reached max number of steps


 20%|####                 | 39/200 [1:14:42<19:51:48, 444.15s/episodes]

Episode reward:  18519.82664871216
Episode reached max number of steps


 20%|####2                | 40/200 [1:23:26<20:47:55, 467.97s/episodes]

Episode reward:  20384.976528167725
Episode reached max number of steps


 20%|####3                | 41/200 [1:32:12<21:26:19, 485.41s/episodes]

Episode reward:  24026.322647094727
AUV is out of bounds


 21%|####4                | 42/200 [1:34:32<16:45:32, 381.85s/episodes]

Episode reward:  5344.11604309082
AUV is out of bounds


 22%|####5                | 43/200 [1:36:03<12:50:57, 294.64s/episodes]

Episode reward:  62.74634552001953
Episode reached max number of steps


 22%|####6                | 44/200 [1:44:49<15:46:34, 364.07s/episodes]

Episode reward:  13002.30111694336
AUV is out of bounds


 22%|####7                | 45/200 [1:45:37<11:35:33, 269.25s/episodes]

Episode reward:  -1000
Episode reached max number of steps


 23%|####8                | 46/200 [1:54:24<14:49:41, 346.63s/episodes]

Episode reward:  18493.555736541748
AUV is out of bounds


 24%|####9                | 47/200 [1:54:59<10:45:19, 253.07s/episodes]

Episode reward:  -835.1123008728027
Episode reached max number of steps


 24%|#####                | 48/200 [2:03:45<14:08:54, 335.09s/episodes]

Episode reward:  15219.208671569824
Episode reached max number of steps


 24%|#####1               | 49/200 [2:12:32<16:27:30, 392.39s/episodes]

Episode reward:  12202.748935699463
Episode reached max number of steps


 25%|#####2               | 50/200 [2:21:17<18:01:05, 432.44s/episodes]

Episode reward:  34671.83846282959
Episode reached max number of steps


 26%|#####3               | 51/200 [2:30:03<19:03:08, 460.32s/episodes]

Episode reward:  25832.19942855835
AUV is out of bounds


 26%|#####4               | 52/200 [2:30:17<13:25:25, 326.53s/episodes]

Episode reward:  -1000
Episode reached max number of steps


 26%|#####5               | 53/200 [2:39:02<15:46:04, 386.15s/episodes]

Episode reward:  33494.75981140137
Episode reached max number of steps


 27%|#####6               | 54/200 [2:47:48<17:21:24, 427.98s/episodes]

Episode reward:  12937.800783157349
Episode reached max number of steps


 28%|#####7               | 55/200 [2:56:33<18:24:57, 457.22s/episodes]

Episode reward:  5290.856987953186
Episode reached max number of steps


 28%|#####8               | 56/200 [3:05:18<19:06:07, 477.55s/episodes]

Episode reward:  5726.229030609131
Episode reached max number of steps


 28%|#####9               | 57/200 [3:14:06<19:33:33, 492.41s/episodes]

Episode reward:  30539.485153198242
Episode reached max number of steps


 29%|######               | 58/200 [3:22:51<19:48:46, 502.30s/episodes]

Episode reward:  30147.20231628418
Episode reached max number of steps


 30%|######1              | 59/200 [3:31:36<19:56:10, 509.01s/episodes]

Episode reward:  3658.376214981079
Episode reached max number of steps


 30%|######3              | 60/200 [3:40:20<19:58:23, 513.60s/episodes]

Episode reward:  10018.857515335083
Episode reached max number of steps


 30%|######4              | 61/200 [3:49:04<19:57:28, 516.90s/episodes]

Episode reward:  8537.27965927124
AUV is out of bounds


 31%|######5              | 62/200 [3:49:19<14:02:26, 366.28s/episodes]

Episode reward:  -1000
Episode reached max number of steps


 32%|######6              | 63/200 [3:58:02<15:43:29, 413.21s/episodes]

Episode reward:  25062.73484802246
AUV is out of bounds


 32%|######7              | 64/200 [3:58:27<11:12:24, 296.65s/episodes]

Episode reward:  -1000
Episode reached max number of steps


 32%|######8              | 65/200 [4:07:09<13:39:32, 364.24s/episodes]

Episode reward:  15937.889892578125
Episode reached max number of steps


 33%|######9              | 66/200 [4:15:51<15:19:21, 411.65s/episodes]

Episode reward:  22237.301235198975
Episode reached max number of steps


 34%|#######              | 67/200 [4:24:33<16:25:53, 444.77s/episodes]

Episode reward:  32918.95127868652
Episode reached max number of steps


 34%|#######1             | 68/200 [4:33:15<17:09:40, 468.03s/episodes]

Episode reward:  21394.692459106445
Episode reached max number of steps


 34%|#######2             | 69/200 [4:41:59<17:38:09, 484.65s/episodes]

Episode reward:  32502.750232696533
Episode reached max number of steps


 35%|#######3             | 70/200 [4:50:41<17:54:17, 495.83s/episodes]

Episode reward:  13283.954681396484
AUV is out of bounds


 36%|#######4             | 71/200 [4:51:03<12:40:33, 353.75s/episodes]

Episode reward:  -890.8865356445312
Episode reached max number of steps


 36%|#######5             | 72/200 [4:59:44<14:21:48, 403.98s/episodes]

Episode reward:  23551.935546875
Episode reached max number of steps


 36%|#######6             | 73/200 [5:08:26<15:29:50, 439.30s/episodes]

Episode reward:  20414.639183044434
Episode reached max number of steps


 37%|#######7             | 74/200 [5:17:07<16:14:24, 464.01s/episodes]

Episode reward:  19043.016468048096
Episode reached max number of steps


 38%|#######8             | 75/200 [5:25:48<16:42:03, 480.99s/episodes]

Episode reward:  16870.343116760254
Episode reached max number of steps


 38%|#######9             | 76/200 [5:34:32<17:00:35, 493.84s/episodes]

Episode reward:  21734.966682434082
Episode reached max number of steps


 38%|########             | 77/200 [5:43:14<17:09:51, 502.37s/episodes]

Episode reward:  29022.564682006836
Episode reached max number of steps


 39%|########1            | 78/200 [5:51:54<17:12:21, 507.72s/episodes]

Episode reward:  26042.99688720703
Episode reached max number of steps


 40%|########2            | 79/200 [6:00:33<17:10:50, 511.16s/episodes]

Episode reward:  12584.879528045654
Episode reached max number of steps


 40%|########4            | 80/200 [6:09:13<17:07:19, 513.66s/episodes]

Episode reward:  18713.7744140625
Episode reached max number of steps


 40%|########5            | 81/200 [6:17:53<17:02:16, 515.44s/episodes]

Episode reward:  33888.26916503906
Episode reached max number of steps


 41%|########6            | 82/200 [6:26:31<16:55:44, 516.48s/episodes]

Episode reward:  33879.01351928711
Episode reached max number of steps


 42%|########7            | 83/200 [6:35:10<16:48:36, 517.23s/episodes]

Episode reward:  37617.95587158203
Episode reached max number of steps


 42%|########8            | 84/200 [6:43:55<16:44:06, 519.37s/episodes]

Episode reward:  27978.389709472656
Episode reached max number of steps


 42%|########9            | 85/200 [6:52:33<16:35:00, 519.13s/episodes]

Episode reward:  16166.127826690674
Episode reached max number of steps


 43%|#########            | 86/200 [7:01:12<16:25:55, 518.90s/episodes]

Episode reward:  14553.885837554932
Episode reached max number of steps


 44%|#########1           | 87/200 [7:09:50<16:17:01, 518.77s/episodes]

Episode reward:  13245.678100585938
Episode reached max number of steps


 44%|#########2           | 88/200 [7:18:29<16:08:11, 518.67s/episodes]

Episode reward:  22005.798934936523
Episode reached max number of steps


 44%|#########3           | 89/200 [7:27:07<15:59:35, 518.70s/episodes]

Episode reward:  23147.251712799072
Episode reached max number of steps


 45%|#########4           | 90/200 [7:35:46<15:50:45, 518.60s/episodes]

Episode reward:  13395.300491333008
Episode reached max number of steps


 46%|#########5           | 91/200 [7:44:25<15:42:21, 518.73s/episodes]

Episode reward:  27029.036254882812
Episode reached max number of steps


 46%|#########6           | 92/200 [7:53:03<15:33:33, 518.65s/episodes]

Episode reward:  34715.802810668945
Episode reached max number of steps


 46%|#########7           | 93/200 [8:01:41<15:24:15, 518.28s/episodes]

Episode reward:  7883.301225662231
Episode reached max number of steps


 47%|#########8           | 94/200 [8:10:18<15:15:21, 518.13s/episodes]

Episode reward:  6046.857663154602
Episode reached max number of steps


 48%|#########9           | 95/200 [8:18:56<15:06:34, 518.04s/episodes]

Episode reward:  21237.89274597168
Episode reached max number of steps


 48%|##########           | 96/200 [8:27:33<14:57:26, 517.75s/episodes]

Episode reward:  1663.2391057014465
Episode reached max number of steps


 48%|##########1          | 97/200 [8:36:15<14:50:48, 518.92s/episodes]

Episode reward:  11654.917598724365
Episode reached max number of steps


 49%|##########2          | 98/200 [8:44:53<14:41:28, 518.51s/episodes]

Episode reward:  27550.27130126953
Episode reached max number of steps


 50%|##########3          | 99/200 [8:53:29<14:31:52, 517.94s/episodes]

Episode reward:  15402.70421218872
Episode reached max number of steps


 50%|##########          | 100/200 [9:02:06<14:22:47, 517.67s/episodes]

Episode reward:  20830.26611328125
Episode reached max number of steps


 50%|##########1         | 101/200 [9:10:44<14:14:19, 517.77s/episodes]

Episode reward:  30067.931091308594
Episode reached max number of steps


 51%|##########2         | 102/200 [9:19:21<14:05:18, 517.54s/episodes]

Episode reward:  26122.57460784912
Episode reached max number of steps


 52%|##########3         | 103/200 [9:28:00<13:57:11, 517.85s/episodes]

Episode reward:  1193.566496938467
Episode reached max number of steps


 52%|##########4         | 104/200 [9:36:39<13:49:06, 518.19s/episodes]

Episode reward:  35501.1169128418
Episode reached max number of steps


 52%|##########5         | 105/200 [9:45:18<13:40:45, 518.37s/episodes]

Episode reward:  45646.71795654297
Episode reached max number of steps


 53%|##########6         | 106/200 [9:53:56<13:32:08, 518.39s/episodes]

Episode reward:  21108.024501800537
Episode reached max number of steps


 54%|##########1        | 107/200 [10:02:33<13:22:59, 518.06s/episodes]

Episode reward:  10518.378196716309
Episode reached max number of steps


 54%|##########2        | 108/200 [10:11:09<13:13:28, 517.48s/episodes]

Episode reward:  35776.417388916016
Episode reached max number of steps


 55%|##########3        | 109/200 [10:19:46<13:04:28, 517.24s/episodes]

Episode reward:  18234.207801818848
Episode reached max number of steps


 55%|##########4        | 110/200 [10:28:22<12:55:11, 516.79s/episodes]

Episode reward:  27055.767601013184
AUV is out of bounds


 56%|###########1        | 111/200 [10:28:36<9:03:05, 366.13s/episodes]

Episode reward:  -1000
Episode reached max number of steps


 56%|##########6        | 112/200 [10:37:13<10:03:07, 411.22s/episodes]

Episode reward:  30166.466499328613
Episode reached max number of steps


 56%|##########7        | 113/200 [10:45:50<10:42:23, 443.03s/episodes]

Episode reward:  36245.07115936279
Episode reached max number of steps


 57%|##########8        | 114/200 [10:54:27<11:06:49, 465.22s/episodes]

Episode reward:  13850.410343170166
AUV is out of bounds


 57%|###########5        | 115/200 [10:54:42<7:47:34, 330.06s/episodes]

Episode reward:  -1000
Episode reached max number of steps


 58%|###########6        | 116/200 [11:03:19<9:00:27, 386.04s/episodes]

Episode reward:  26469.64990234375
Episode reached max number of steps


 58%|###########7        | 117/200 [11:11:55<9:48:06, 425.14s/episodes]

Episode reward:  21111.5890045166
Episode reached max number of steps


 59%|###########2       | 118/200 [11:20:32<10:18:38, 452.66s/episodes]

Episode reward:  27189.532836914062
Episode reached max number of steps


 60%|###########3       | 119/200 [11:29:08<10:37:00, 471.85s/episodes]

Episode reward:  18578.336853027344
Episode reached max number of steps


 60%|###########4       | 120/200 [11:37:46<10:47:19, 485.49s/episodes]

Episode reward:  23078.395614624023
Episode reached max number of steps


 60%|###########4       | 121/200 [11:46:23<10:51:45, 495.01s/episodes]

Episode reward:  6997.596391677856
Episode reached max number of steps


 61%|###########5       | 122/200 [11:55:00<10:52:08, 501.64s/episodes]

Episode reward:  16042.528583526611
Episode reached max number of steps


 62%|###########6       | 123/200 [12:03:36<10:49:24, 506.03s/episodes]

Episode reward:  6591.221813201904
Episode reached max number of steps


 62%|###########7       | 124/200 [12:12:12<10:44:39, 508.94s/episodes]

Episode reward:  19052.75602722168
Episode reached max number of steps


 62%|###########8       | 125/200 [12:20:48<10:38:37, 510.91s/episodes]

Episode reward:  27722.49338531494
Episode reached max number of steps


 63%|###########9       | 126/200 [12:29:22<10:31:33, 512.08s/episodes]

Episode reward:  20241.219646453857
Episode reached max number of steps


 64%|############       | 127/200 [12:37:58<10:24:18, 513.14s/episodes]

Episode reward:  37362.08402252197
Episode reached max number of steps


 64%|############1      | 128/200 [12:46:33<10:16:32, 513.78s/episodes]

Episode reward:  21290.38112258911
Episode reached max number of steps


 64%|############2      | 129/200 [12:55:08<10:08:27, 514.19s/episodes]

Episode reward:  42689.63053894043
Episode reached max number of steps


 65%|############3      | 130/200 [13:03:43<10:00:08, 514.41s/episodes]

Episode reward:  9189.074077606201
Episode reached max number of steps


 66%|#############1      | 131/200 [13:12:18<9:51:49, 514.64s/episodes]

Episode reward:  20864.54501724243
Episode reached max number of steps


 66%|#############2      | 132/200 [13:20:53<9:43:15, 514.63s/episodes]

Episode reward:  19771.71795272827
Episode reached max number of steps


 66%|#############3      | 133/200 [13:29:28<9:34:50, 514.78s/episodes]

Episode reward:  33382.02233123779
Episode reached max number of steps


 67%|#############4      | 134/200 [13:38:05<9:26:49, 515.29s/episodes]

Episode reward:  20884.831813812256
Episode reached max number of steps


 68%|#############5      | 135/200 [13:46:44<9:19:37, 516.57s/episodes]

Episode reward:  14585.858837127686
Episode reached max number of steps


 68%|#############6      | 136/200 [13:55:19<9:10:26, 516.04s/episodes]

Episode reward:  24154.955730438232
Episode reached max number of steps


 68%|#############7      | 137/200 [14:03:54<9:01:29, 515.70s/episodes]

Episode reward:  26174.036560058594
Episode reached max number of steps


 69%|#############8      | 138/200 [14:12:29<8:52:48, 515.62s/episodes]

Episode reward:  11016.210096359253
Episode reached max number of steps


 70%|#############9      | 139/200 [14:21:05<8:44:05, 515.50s/episodes]

Episode reward:  26583.72827911377
Episode reached max number of steps


 70%|##############      | 140/200 [14:29:40<8:35:20, 515.35s/episodes]

Episode reward:  13053.643424987793
Episode reached max number of steps


 70%|##############1     | 141/200 [14:38:16<8:26:58, 515.56s/episodes]

Episode reward:  33365.82373046875
Episode reached max number of steps


 71%|##############2     | 142/200 [14:46:52<8:18:43, 515.92s/episodes]

Episode reward:  7523.313079833984
Episode reached max number of steps


 72%|##############3     | 143/200 [14:55:28<8:10:08, 515.93s/episodes]

Episode reward:  18088.29997253418
Episode reached max number of steps


 72%|##############4     | 144/200 [15:04:03<8:01:16, 515.65s/episodes]

Episode reward:  23860.30002975464
Episode reached max number of steps


 72%|##############5     | 145/200 [15:12:38<7:52:23, 515.34s/episodes]

Episode reward:  9353.125680923462
Episode reached max number of steps


 73%|##############6     | 146/200 [15:21:13<7:43:37, 515.13s/episodes]

Episode reward:  8694.451852798462
Episode reached max number of steps


 74%|##############7     | 147/200 [15:29:47<7:34:52, 514.95s/episodes]

Episode reward:  26215.51879119873
Episode reached max number of steps


 74%|##############8     | 148/200 [15:38:21<7:26:04, 514.70s/episodes]

Episode reward:  11631.272359848022
Episode reached max number of steps


 74%|##############9     | 149/200 [15:46:56<7:17:36, 514.84s/episodes]

Episode reward:  6111.12330532074
Episode reached max number of steps


 75%|###############     | 150/200 [15:55:32<7:09:06, 514.94s/episodes]

Episode reward:  26728.462982177734
Episode reached max number of steps


 76%|###############1    | 151/200 [16:04:06<7:00:26, 514.83s/episodes]

Episode reward:  4638.576427459717
Episode reached max number of steps


 76%|###############2    | 152/200 [16:12:41<6:51:52, 514.84s/episodes]

Episode reward:  28049.728744506836
Episode reached max number of steps


 76%|###############3    | 153/200 [16:21:17<6:43:28, 515.08s/episodes]

Episode reward:  17490.926879882812
Episode reached max number of steps


 77%|###############4    | 154/200 [16:29:53<6:35:12, 515.48s/episodes]

Episode reward:  28613.000785827637
Episode reached max number of steps


 78%|###############5    | 155/200 [16:38:28<6:26:35, 515.45s/episodes]

Episode reward:  14739.907287597656
Episode reached max number of steps


 78%|###############6    | 156/200 [16:47:04<6:18:02, 515.52s/episodes]

Episode reward:  14431.395622253418
Episode reached max number of steps


 78%|###############7    | 157/200 [16:55:40<6:09:25, 515.48s/episodes]

Episode reward:  7507.903289794922
Episode reached max number of steps


 79%|###############8    | 158/200 [17:04:18<6:01:23, 516.27s/episodes]

Episode reward:  13460.034065246582
AUV is out of bounds


 80%|###############9    | 159/200 [17:04:32<4:09:55, 365.74s/episodes]

Episode reward:  -1000
Episode reached max number of steps


 80%|################    | 160/200 [17:13:08<4:33:55, 410.88s/episodes]

Episode reward:  13306.201957702637
Episode reached max number of steps


 80%|################1   | 161/200 [17:21:45<4:47:36, 442.48s/episodes]

Episode reward:  13007.261585235596
Episode reached max number of steps


 81%|################2   | 162/200 [17:30:21<4:54:13, 464.56s/episodes]

Episode reward:  16782.130851745605
Episode reached max number of steps


 82%|################2   | 163/200 [17:38:57<4:56:02, 480.08s/episodes]

Episode reward:  26968.79628753662
Episode reached max number of steps


 82%|################4   | 164/200 [17:47:33<4:54:29, 490.83s/episodes]

Episode reward:  21785.246181488037
Episode reached max number of steps


 82%|################5   | 165/200 [17:56:17<4:52:13, 500.94s/episodes]

Episode reward:  21016.617977142334
AUV is out of bounds


 83%|################5   | 166/200 [17:56:32<3:21:13, 355.10s/episodes]

Episode reward:  -1000
Episode reached max number of steps


 84%|################7   | 167/200 [18:05:08<3:41:53, 403.43s/episodes]

Episode reward:  11044.352931976318
Episode reached max number of steps


 84%|################8   | 168/200 [18:13:46<3:53:26, 437.70s/episodes]

Episode reward:  15431.249252319336
Episode reached max number of steps


 84%|################9   | 169/200 [18:22:23<3:58:23, 461.40s/episodes]

Episode reward:  28258.819259643555
Episode reached max number of steps


 85%|#################   | 170/200 [18:31:00<3:59:05, 478.17s/episodes]

Episode reward:  13130.276363372803
Episode reached max number of steps


 86%|#################1  | 171/200 [18:39:37<3:56:41, 489.71s/episodes]

Episode reward:  9866.977439880371
Episode reached max number of steps


 86%|#################2  | 172/200 [18:48:14<3:52:22, 497.93s/episodes]

Episode reward:  18090.575691223145
Episode reached max number of steps


 86%|#################3  | 173/200 [18:56:51<3:46:39, 503.69s/episodes]

Episode reward:  24668.258239746094
Episode reached max number of steps


 87%|#################4  | 174/200 [19:05:28<3:40:01, 507.77s/episodes]

Episode reward:  14228.353395462036
Episode reached max number of steps


 88%|#################5  | 175/200 [19:14:06<3:32:46, 510.66s/episodes]

Episode reward:  1620.2164731025696
Episode reached max number of steps


 88%|#################6  | 176/200 [19:22:43<3:25:05, 512.73s/episodes]

Episode reward:  25917.5405960083
Episode reached max number of steps


 88%|#################7  | 177/200 [19:31:20<3:17:03, 514.06s/episodes]

Episode reward:  41892.80090332031
Episode reached max number of steps


 89%|#################8  | 178/200 [19:39:58<3:08:50, 515.01s/episodes]

Episode reward:  1528.9007441997528
Episode reached max number of steps


 90%|#################9  | 179/200 [19:48:34<3:00:22, 515.37s/episodes]

Episode reward:  36349.74223327637
Episode reached max number of steps


 90%|##################  | 180/200 [19:57:11<2:51:57, 515.85s/episodes]

Episode reward:  23470.194541931152
Episode reached max number of steps


 90%|##################1 | 181/200 [20:05:52<2:43:51, 517.44s/episodes]

Episode reward:  814.4578247070312
Episode reached max number of steps


 91%|##################2 | 182/200 [20:14:28<2:35:07, 517.09s/episodes]

Episode reward:  23128.232173919678
Episode reached max number of steps


 92%|##################3 | 183/200 [20:23:04<2:26:26, 516.84s/episodes]

Episode reward:  26439.016204833984
Episode reached max number of steps


 92%|##################4 | 184/200 [20:31:40<2:17:44, 516.51s/episodes]

Episode reward:  19769.898529052734
Episode reached max number of steps


 92%|##################5 | 185/200 [20:40:17<2:09:08, 516.54s/episodes]

Episode reward:  13240.301656723022
Episode reached max number of steps


 93%|##################6 | 186/200 [20:48:53<2:00:30, 516.47s/episodes]

Episode reward:  37971.49253845215
Episode reached max number of steps


 94%|##################7 | 187/200 [20:57:28<1:51:47, 515.99s/episodes]

Episode reward:  7198.064268112183
Episode reached max number of steps


 94%|##################7 | 188/200 [21:06:03<1:43:08, 515.75s/episodes]

Episode reward:  20986.426414489746
Episode reached max number of steps


 94%|##################9 | 189/200 [21:14:38<1:34:30, 515.51s/episodes]

Episode reward:  29035.84242248535
Episode reached max number of steps


 95%|################### | 190/200 [21:23:14<1:25:56, 515.61s/episodes]

Episode reward:  14536.205638885498
Episode reached max number of steps


 96%|################### | 191/200 [21:31:50<1:17:20, 515.62s/episodes]

Episode reward:  23792.378574371338
Episode reached max number of steps


 96%|###################2| 192/200 [21:40:25<1:08:44, 515.52s/episodes]

Episode reward:  20086.214782714844
Episode reached max number of steps


 96%|###################3| 193/200 [21:49:00<1:00:06, 515.26s/episodes]

Episode reward:  10071.189863204956
Episode reached max number of steps


 97%|#####################3| 194/200 [21:57:34<51:30, 515.04s/episodes]

Episode reward:  17288.40895462036
Episode reached max number of steps


 98%|#####################4| 195/200 [22:06:08<42:54, 514.84s/episodes]

Episode reward:  30671.537017822266
Episode reached max number of steps


 98%|#####################5| 196/200 [22:14:43<34:18, 514.70s/episodes]

Episode reward:  28713.640991210938
Episode reached max number of steps


 98%|#####################6| 197/200 [22:23:17<25:43, 514.52s/episodes]

Episode reward:  18067.689945220947
AUV is out of bounds


 99%|#####################7| 198/200 [22:23:31<12:09, 364.53s/episodes]

Episode reward:  -1000
Episode reached max number of steps


100%|#####################8| 199/200 [22:32:05<06:49, 409.33s/episodes]

Episode reward:  15124.371383666992
Episode reached max number of steps


100%|######################| 200/200 [22:40:40<00:00, 408.20s/episodes]

Episode reward:  21914.31558227539





In [None]:
print(next_pos)