In [1]:
%matplotlib widget
import datetime
import tensorflow as tf
import numpy as np
import pandas as pd
import xarray as xr
import time
import gym
import matplotlib.pyplot as plt
import cmocean
#from simple_agent import SimpleAtariAgent
from tqdm import tqdm
import random
import dqn_agent

Using TensorFlow backend.


In [2]:
env = gym.make('gym_biomapping:perfect_info_atari-v0', static=True, output='3D-matrix')
agent = dqn_agent.DQNAgent(env, input="3D-matrix")

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Reset the position to (0,0,0)


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Instructions for updating:
If using Keras pass *_constraint arguments to layers.



In [3]:
obs = env.reset()
next_action = agent.deliberate(obs)
agent.render(obs, next_action)
env.render()

Reset the position to (0,0,0)


In [None]:
done = False
while not done:
    obs, reward, done, info = env.step(next_action)
    #next_pos = agent.deliberate(obs)
    agent.render(obs, next_action)
    env.render()

In [4]:
# For more reproduceable results, but consider changing
random.seed(1)
np.random.seed(1)
tf.set_random_seed(1)

In [5]:
# This is where the magic happens
EPISODES = 100
AGGREGATE_STATS_EVERY = 1 # Episode
ep_rewards = []
SHOW_PREVIEW = True
MIN_REWARD = -10_000 # Rewards below this are not saved
MODEL_NAME = "LAUV_ROALD"

# Epsilon parameters determine the extent to which the agent will explore/exploit
    # Using decaying epsilon: The more the agent learns, the less it will explore
    # epsilon := Change of doing explore action, i.e. randomly select action
    # EPSILON_DECAY := The rate at which exploring decays
    # MIN_EPSILON := To always keep some level of exploration, exploration will not decay beyond this threshold

epsilon = 1 # Not constant, as it will decay
EPSILON_DECAY = 0.99975
MIN_EPSILON = 0.001


for episode in tqdm(range(1, EPISODES + 1), ascii=True, unit='episodes'):
    
    # Update tensorboard step every episode
    agent.tensorboard.step = episode
    
    # Reset episode, rewards, environment, done-flag
    episode_reward = 0
    step = 1
    current_state = env.reset()
    agent.reset()
    done = False
    
    # The training loop
    while not done:
        # Decide explore vs exploit
        if np.random.random() > epsilon:
            # Get action from Q table
            action = np.argmax(agent.get_qs(current_state))
        else:
            # Get random action
            action = np.random.randint(0, env.action_space.nvec[0])

        # Take selected aciton
        new_state, reward, done, _info = env.step([action])
        next_pos = agent.deliberate(new_state)
        
        episode_reward+=reward
    
        if SHOW_PREVIEW:
            agent.render(new_state, next_action)
            env.render()

        # Update replay-memory (a set length collection of experiences the agent remembers, from which training examples are selected)
        # Works as a deque: one in, oldest out
        #print("Updating replay memory...")
        agent.update_replay_memory((current_state, action, reward, new_state, done))

        # Trains the agent with the replay-memory is sufficiently large
        agent.train(done, step)
        current_state = new_state
        step += 1
        
    print("Episode reward: ", episode_reward)
    
    # Decay epsilon
    if epsilon > MIN_EPSILON:
        epsilon *= EPSILON_DECAY
        epsilon = max(MIN_EPSILON, epsilon)
        
    # Logging and storing
    ep_rewards.append(episode_reward)
    if not episode % AGGREGATE_STATS_EVERY or episode == 1:
        average_reward = sum(ep_rewards[-AGGREGATE_STATS_EVERY:])/len(ep_rewards[-AGGREGATE_STATS_EVERY:])
        min_reward = min(ep_rewards[-AGGREGATE_STATS_EVERY:])
        max_reward = max(ep_rewards[-AGGREGATE_STATS_EVERY:])
        agent.tensorboard.update_stats(reward_avg=average_reward, reward_min=min_reward, reward_max=max_reward, epsilon=epsilon)

        # Save model, but only when min reward is greater or equal a set value
        if min_reward >= MIN_REWARD:
            agent.model.save(f'models/{MODEL_NAME}__{max_reward:_>7.2f}max_{average_reward:_>7.2f}avg_{min_reward:_>7.2f}min__{int(time.time())}.model')
    
        

  0%|                      | 0/100 [00:00<?, ?episodes/s]

Reset the position to (0,0,0)
Episode reached max number of steps
Episode reward:  109.02322769165039



  1%|1             | 1/100 [00:11<19:14, 11.66s/episodes]

Reset the position to (0,0,0)
Episode reached max number of steps


  2%|2             | 2/100 [00:23<18:55, 11.59s/episodes]

Episode reward:  337.20257568359375
Reset the position to (0,0,0)
Episode reached max number of steps


  3%|4             | 3/100 [00:34<18:32, 11.47s/episodes]

Episode reward:  278.7209167480469
Reset the position to (0,0,0)
Episode reached max number of steps


  4%|5             | 4/100 [00:45<18:13, 11.39s/episodes]

Episode reward:  145.62260627746582
Reset the position to (0,0,0)
Episode reached max number of steps


  5%|7             | 5/100 [00:56<17:55, 11.33s/episodes]

Episode reward:  265.55854415893555
Reset the position to (0,0,0)
Episode reached max number of steps


  6%|8             | 6/100 [01:07<17:42, 11.31s/episodes]

Episode reward:  179.99904251098633
Reset the position to (0,0,0)
Episode reached max number of steps


  7%|9             | 7/100 [01:19<17:34, 11.34s/episodes]

Episode reward:  96.58406066894531
Reset the position to (0,0,0)
Episode reached max number of steps


  8%|#1            | 8/100 [01:30<17:17, 11.28s/episodes]

Episode reward:  132.4255256652832
Reset the position to (0,0,0)
Episode reached max number of steps


  9%|#2            | 9/100 [01:41<17:05, 11.27s/episodes]

Episode reward:  91.65714645385742
Reset the position to (0,0,0)
Episode reached max number of steps


 10%|#3           | 10/100 [01:52<16:52, 11.25s/episodes]

Episode reward:  252.18023872375488
Reset the position to (0,0,0)
Episode reached max number of steps


 11%|#4           | 11/100 [02:04<16:39, 11.23s/episodes]

Episode reward:  144.16545009613037
Reset the position to (0,0,0)
Episode reached max number of steps


 12%|#5           | 12/100 [02:15<16:25, 11.20s/episodes]

Episode reward:  225.74450540542603
Reset the position to (0,0,0)
Episode reached max number of steps


 13%|#6           | 13/100 [02:26<16:13, 11.19s/episodes]

Episode reward:  184.60885047912598
Reset the position to (0,0,0)
Episode reached max number of steps


 14%|#8           | 14/100 [02:37<16:00, 11.17s/episodes]

Episode reward:  268.39711570739746
Reset the position to (0,0,0)
Episode reached max number of steps


 15%|#9           | 15/100 [02:48<15:48, 11.16s/episodes]

Episode reward:  244.92330169677734
Reset the position to (0,0,0)
Episode reached max number of steps


 16%|##           | 16/100 [02:59<15:38, 11.17s/episodes]

Episode reward:  270.49613761901855
Reset the position to (0,0,0)
Episode reached max number of steps


 17%|##2          | 17/100 [03:11<15:26, 11.17s/episodes]

Episode reward:  190.45767784118652
Reset the position to (0,0,0)
Episode reached max number of steps


 18%|##3          | 18/100 [03:22<15:15, 11.17s/episodes]

Episode reward:  224.50894355773926
Reset the position to (0,0,0)

Episode reached max number of steps


 19%|##4          | 19/100 [03:33<15:06, 11.19s/episodes]

Episode reward:  89.76514267921448
Reset the position to (0,0,0)
Replay-memory sufficiently large - Started using neural network
Episode reached max number of steps


 20%|##6          | 20/100 [04:02<22:11, 16.64s/episodes]

Episode reward:  76.15397453308105
Reset the position to (0,0,0)
Episode reached max number of steps


 21%|##7          | 21/100 [04:55<36:07, 27.43s/episodes]

Episode reward:  188.1548571586609
Reset the position to (0,0,0)
Episode reached max number of steps


 22%|##8          | 22/100 [05:47<45:28, 34.98s/episodes]

Episode reward:  228.95446395874023
Reset the position to (0,0,0)
Episode reached max number of steps


 23%|##9          | 23/100 [06:40<51:47, 40.36s/episodes]

Episode reward:  115.23102951049805
Reset the position to (0,0,0)
Episode reached max number of steps


 24%|###1         | 24/100 [07:32<55:32, 43.85s/episodes]

Episode reward:  189.7302589416504
Reset the position to (0,0,0)
Episode reached max number of steps


 25%|###2         | 25/100 [08:24<57:50, 46.28s/episodes]

Episode reward:  143.9588451385498
Reset the position to (0,0,0)
Episode reached max number of steps


 26%|###3         | 26/100 [09:16<59:10, 47.98s/episodes]

Episode reward:  95.44243717193604
Reset the position to (0,0,0)
Episode reached max number of steps


 27%|###5         | 27/100 [10:08<59:48, 49.15s/episodes]

Episode reward:  62.716190338134766
Reset the position to (0,0,0)
Episode reached max number of steps


 28%|###        | 28/100 [11:00<1:00:03, 50.05s/episodes]

Episode reward:  317.21851348876953
Reset the position to (0,0,0)
Episode reached max number of steps


 29%|###7         | 29/100 [11:52<59:51, 50.59s/episodes]

Episode reward:  152.61414337158203
Reset the position to (0,0,0)
Episode reached max number of steps


 30%|###9         | 30/100 [12:44<59:29, 51.00s/episodes]

Episode reward:  191.79150390625
Reset the position to (0,0,0)
Episode reached max number of steps


 31%|####         | 31/100 [13:36<59:01, 51.32s/episodes]

Episode reward:  161.2552813887596
Reset the position to (0,0,0)
Episode reached max number of steps


 32%|####1        | 32/100 [14:28<58:21, 51.50s/episodes]

Episode reward:  148.0075626373291
Reset the position to (0,0,0)
Episode reached max number of steps


 33%|####2        | 33/100 [15:21<57:49, 51.78s/episodes]

Episode reward:  153.58651065826416
Reset the position to (0,0,0)
Episode reached max number of steps


 34%|####4        | 34/100 [16:13<57:02, 51.85s/episodes]

Episode reward:  91.59129905700684
Reset the position to (0,0,0)
Episode reached max number of steps


 35%|####5        | 35/100 [17:05<56:18, 51.97s/episodes]

Episode reward:  369.58792877197266
Reset the position to (0,0,0)
Episode reached max number of steps


 36%|####6        | 36/100 [17:57<55:32, 52.07s/episodes]

Episode reward:  345.119478225708
Reset the position to (0,0,0)
Episode reached max number of steps


 37%|####8        | 37/100 [18:49<54:38, 52.04s/episodes]

Episode reward:  201.55811500549316
Reset the position to (0,0,0)
Episode reached max number of steps


 38%|####9        | 38/100 [19:41<53:47, 52.06s/episodes]

Episode reward:  103.34860610961914
Reset the position to (0,0,0)
Episode reached max number of steps


 39%|#####        | 39/100 [20:34<53:00, 52.14s/episodes]

Episode reward:  203.81663608551025
Reset the position to (0,0,0)
Episode reached max number of steps


 40%|#####2       | 40/100 [21:26<52:08, 52.14s/episodes]

Episode reward:  490.54798126220703
Reset the position to (0,0,0)
Episode reached max number of steps


 41%|#####3       | 41/100 [22:18<51:13, 52.09s/episodes]

Episode reward:  88.60877418518066
Reset the position to (0,0,0)
Episode reached max number of steps


 42%|#####4       | 42/100 [23:10<50:19, 52.07s/episodes]

Episode reward:  61.467538833618164
Reset the position to (0,0,0)
Episode reached max number of steps


 43%|#####5       | 43/100 [24:02<49:24, 52.01s/episodes]

Episode reward:  319.3786277770996
Reset the position to (0,0,0)
Episode reached max number of steps


 44%|#####7       | 44/100 [24:54<48:36, 52.09s/episodes]

Episode reward:  205.52429962158203
Reset the position to (0,0,0)
Episode reached max number of steps


 45%|#####8       | 45/100 [25:46<47:46, 52.12s/episodes]

Episode reward:  314.5286331176758
Reset the position to (0,0,0)
Episode reached max number of steps


 46%|#####9       | 46/100 [26:38<46:58, 52.20s/episodes]

Episode reward:  232.82881927490234
Reset the position to (0,0,0)
Episode reached max number of steps


 47%|######1      | 47/100 [27:31<46:07, 52.21s/episodes]

Episode reward:  187.8083152770996
Reset the position to (0,0,0)
Episode reached max number of steps


 48%|######2      | 48/100 [28:24<45:27, 52.45s/episodes]

Episode reward:  117.86389350891113
Reset the position to (0,0,0)
Episode reached max number of steps


 49%|######3      | 49/100 [29:16<44:32, 52.39s/episodes]

Episode reward:  353.3605308532715
Reset the position to (0,0,0)
Episode reached max number of steps


 50%|######5      | 50/100 [30:08<43:36, 52.33s/episodes]

Episode reward:  102.0417251586914
Reset the position to (0,0,0)
Episode reached max number of steps


 51%|######6      | 51/100 [31:00<42:43, 52.32s/episodes]

Episode reward:  137.8079433441162
Reset the position to (0,0,0)
Episode reached max number of steps


 52%|######7      | 52/100 [31:53<41:50, 52.30s/episodes]

Episode reward:  131.6690273284912
Reset the position to (0,0,0)
Episode reached max number of steps


 53%|######8      | 53/100 [32:45<40:56, 52.27s/episodes]

Episode reward:  221.89770889282227
Reset the position to (0,0,0)
Episode reached max number of steps


 54%|#######      | 54/100 [33:37<40:04, 52.27s/episodes]

Episode reward:  127.80197811126709
Reset the position to (0,0,0)
Episode reached max number of steps


 55%|#######1     | 55/100 [34:29<39:09, 52.22s/episodes]

Episode reward:  173.90100479125977
Reset the position to (0,0,0)
Episode reached max number of steps


 56%|#######2     | 56/100 [35:22<38:20, 52.28s/episodes]

Episode reward:  131.25774955749512
Reset the position to (0,0,0)
Episode reached max number of steps


 57%|#######4     | 57/100 [36:14<37:28, 52.30s/episodes]

Episode reward:  352.5038814544678
Reset the position to (0,0,0)
Episode reached max number of steps


 58%|#######5     | 58/100 [37:07<36:41, 52.42s/episodes]

Episode reward:  284.5538387298584
Reset the position to (0,0,0)
Episode reached max number of steps


 59%|#######6     | 59/100 [37:59<35:46, 52.35s/episodes]

Episode reward:  303.74771881103516
Reset the position to (0,0,0)
Episode reached max number of steps


 60%|#######8     | 60/100 [38:51<34:57, 52.43s/episodes]

Episode reward:  203.67033767700195
Reset the position to (0,0,0)
Episode reached max number of steps


 61%|#######9     | 61/100 [39:44<34:04, 52.41s/episodes]

Episode reward:  266.7119483947754
Reset the position to (0,0,0)
Episode reached max number of steps


 62%|########     | 62/100 [40:36<33:12, 52.43s/episodes]

Episode reward:  463.91573333740234
Reset the position to (0,0,0)
Episode reached max number of steps


 63%|########1    | 63/100 [41:29<32:18, 52.39s/episodes]

Episode reward:  336.9028720855713
Reset the position to (0,0,0)
Episode reached max number of steps


 64%|########3    | 64/100 [42:21<31:24, 52.35s/episodes]

Episode reward:  249.80004501342773
Reset the position to (0,0,0)
Episode reached max number of steps


 65%|########4    | 65/100 [43:13<30:35, 52.43s/episodes]

Episode reward:  304.1552782058716
Reset the position to (0,0,0)
Episode reached max number of steps


 66%|########5    | 66/100 [44:06<29:44, 52.48s/episodes]

Episode reward:  427.12571716308594
Reset the position to (0,0,0)
Episode reached max number of steps


 67%|########7    | 67/100 [44:59<28:56, 52.63s/episodes]

Episode reward:  174.53973054885864
Reset the position to (0,0,0)
Episode reached max number of steps


 68%|########8    | 68/100 [45:52<28:03, 52.60s/episodes]

Episode reward:  274.0922260284424
Reset the position to (0,0,0)
Episode reached max number of steps


 69%|########9    | 69/100 [46:44<27:12, 52.67s/episodes]

Episode reward:  376.7329349517822
Reset the position to (0,0,0)
Episode reached max number of steps


 70%|#########1   | 70/100 [47:37<26:19, 52.67s/episodes]

Episode reward:  132.95535278320312
Reset the position to (0,0,0)
Episode reached max number of steps


 71%|#########2   | 71/100 [48:30<25:27, 52.67s/episodes]

Episode reward:  607.286226272583
Reset the position to (0,0,0)
Episode reached max number of steps


 72%|#########3   | 72/100 [49:23<24:35, 52.69s/episodes]

Episode reward:  75.46749877929688
Reset the position to (0,0,0)
Episode reached max number of steps


 73%|#########4   | 73/100 [50:15<23:42, 52.67s/episodes]

Episode reward:  251.83892440795898
Reset the position to (0,0,0)
Episode reached max number of steps


 74%|#########6   | 74/100 [51:08<22:52, 52.78s/episodes]

Episode reward:  188.84575939178467
Reset the position to (0,0,0)
Episode reached max number of steps


 75%|#########7   | 75/100 [52:01<21:58, 52.72s/episodes]

Episode reward:  265.035608291626
Reset the position to (0,0,0)
Episode reached max number of steps


 76%|#########8   | 76/100 [52:54<21:06, 52.76s/episodes]

Episode reward:  177.93677139282227
Reset the position to (0,0,0)
Episode reached max number of steps


 77%|##########   | 77/100 [53:46<20:11, 52.69s/episodes]

Episode reward:  379.73699951171875
Reset the position to (0,0,0)
Episode reached max number of steps


 78%|##########1  | 78/100 [54:39<19:19, 52.69s/episodes]

Episode reward:  244.974027633667
Reset the position to (0,0,0)
Episode reached max number of steps


 79%|##########2  | 79/100 [55:32<18:26, 52.70s/episodes]

Episode reward:  162.58155059814453
Reset the position to (0,0,0)
Episode reached max number of steps


 80%|##########4  | 80/100 [56:24<17:34, 52.71s/episodes]

Episode reward:  96.4915943145752
Reset the position to (0,0,0)
Episode reached max number of steps


 81%|##########5  | 81/100 [57:17<16:43, 52.81s/episodes]

Episode reward:  79.2655143737793
Reset the position to (0,0,0)
Episode reached max number of steps


 82%|##########6  | 82/100 [58:10<15:50, 52.78s/episodes]

Episode reward:  241.63865280151367
Reset the position to (0,0,0)
Episode reached max number of steps


 83%|##########7  | 83/100 [59:03<14:58, 52.85s/episodes]

Episode reward:  354.01548957824707
Reset the position to (0,0,0)
Episode reached max number of steps


 84%|##########9  | 84/100 [59:56<14:05, 52.85s/episodes]

Episode reward:  487.45042991638184
Reset the position to (0,0,0)
Episode reached max number of steps


 85%|#########3 | 85/100 [1:00:48<13:10, 52.69s/episodes]

Episode reward:  553.5917377471924
Reset the position to (0,0,0)
Episode reached max number of steps


 86%|#########4 | 86/100 [1:01:40<12:15, 52.55s/episodes]

Episode reward:  148.7508373260498
Reset the position to (0,0,0)
Episode reached max number of steps


 87%|#########5 | 87/100 [1:02:33<11:23, 52.57s/episodes]

Episode reward:  630.3593339920044
Reset the position to (0,0,0)
Episode reached max number of steps


 88%|#########6 | 88/100 [1:03:25<10:30, 52.52s/episodes]

Episode reward:  141.86521911621094
Reset the position to (0,0,0)
Episode reached max number of steps


 89%|#########7 | 89/100 [1:04:18<09:38, 52.57s/episodes]

Episode reward:  459.29959297180176
Reset the position to (0,0,0)
Episode reached max number of steps


 90%|#########9 | 90/100 [1:05:11<08:46, 52.68s/episodes]

Episode reward:  351.3422222137451
Reset the position to (0,0,0)
Episode reached max number of steps


 91%|########## | 91/100 [1:06:04<07:54, 52.68s/episodes]

Episode reward:  192.29705357551575
Reset the position to (0,0,0)
Episode reached max number of steps


 92%|##########1| 92/100 [1:06:58<07:04, 53.09s/episodes]

Episode reward:  209.3719940185547
Reset the position to (0,0,0)
Episode reached max number of steps


 93%|##########2| 93/100 [1:07:50<06:10, 52.91s/episodes]

Episode reward:  196.56145477294922
Reset the position to (0,0,0)
Episode reached max number of steps


 94%|##########3| 94/100 [1:08:43<05:16, 52.73s/episodes]

Episode reward:  203.95548629760742
Reset the position to (0,0,0)
Episode reached max number of steps


 95%|##########4| 95/100 [1:09:40<04:31, 54.21s/episodes]

Episode reward:  175.98072052001953
Reset the position to (0,0,0)
Episode reached max number of steps


 96%|##########5| 96/100 [1:10:36<03:38, 54.72s/episodes]

Episode reward:  319.5772590637207
Reset the position to (0,0,0)
Episode reached max number of steps


 97%|##########6| 97/100 [1:11:32<02:45, 55.18s/episodes]

Episode reward:  128.53872108459473
Reset the position to (0,0,0)
Episode reached max number of steps


 98%|##########7| 98/100 [1:12:27<01:49, 54.97s/episodes]

Episode reward:  263.69483375549316
Reset the position to (0,0,0)
Episode reached max number of steps


 99%|##########8| 99/100 [1:13:22<00:55, 55.12s/episodes]

Episode reward:  216.68279647827148
Reset the position to (0,0,0)
Episode reached max number of steps


100%|##########| 100/100 [1:14:16<00:00, 44.57s/episodes]

Episode reward:  325.525297164917



