In [1]:
import sys, math
import numpy as np
import matplotlib.pyplot as plt

In [2]:
import gym
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten
from keras.optimizers import Adam, SGD
from keras.models import load_model

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [3]:
import random

In [4]:
ENV_NAME = 'LunarLander-v2'

env = gym.make(ENV_NAME)
# To get repeatable results.
sd = 2
# np.random.seed(sd)
random.seed(sd)
env.seed(sd)
nb_actions = env.action_space.n

"""Keras model for a fully connected 4 layer NN"""
model = Sequential()
model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
model.add(Dense(40))
model.add(Activation('relu'))
model.add(Dense(40))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
model.compile(loss='mean_squared_error',  optimizer=Adam(lr=0.002,
              decay=2.25e-05))
print(model.summary())


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_1 (Flatten)          (None, 8)                 0         
_________________________________________________________________
dense_1 (Dense)              (None, 40)                360       
_________________________________________________________________
activation_1 (Activation)    (None, 40)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 40)                1640      
_________________________________________________________________
activation_2 (Activation)    (None, 40)                0         
_________________________________________________________________
dense_3 (Dense)              (None, 4)                 164       
_________________________________________________________________
activation_3 (Activation)    (None, 4)                 0         
Total para

In [5]:
def forward_pass(state):
    input = np.empty([1, 1, 8])
    input[0][0] = state
    return model.predict(input)[0]

In [6]:
def get_best_action(state):
    """Returns the index of the action with the highest Q-value, i.e.
        argMax(Q(nxt_state, all_actions))
    """
    state_q_values = forward_pass(state)
    return np.argmax(state_q_values)

In [7]:
def get_targets(state, action, reward, next_state):
    """
    Returns a set of target Q-values for a particular <s, a, r, s'> tuple
    """
    current_state_q_values = forward_pass(state)
    next_state_q_values = forward_pass(next_state)
    max_q_next_state = np.max(next_state_q_values)
    targets = np.empty([1, nb_actions])

    for i in range(nb_actions):
        if i == action:
            targets[0][i] = reward + (gamma * max_q_next_state)
        else:
            targets[0][i] = current_state_q_values[i]
    return targets

In [8]:
def choose_action(state, epsilon):
    """
    Greedy-epsilon exploration. Chooses action with the highest Q(s,a) value.
    With probability epsilon chooses a random action.
    """
    r = np.random.uniform()
    if r < epsilon:
        action = np.floor(np.random.randint(nb_actions))
    else:
        action = get_best_action(state)
    return int(action)

In [9]:
class Memory(object):
    def __init__(self, memory_size=10000, experience_size=1):
        self.experiences = np.empty([0, experience_size], dtype=object)
        self.max_memory_size = memory_size

    def add_experience(self, experience):
        self.experiences = np.insert(self.experiences, 0,
                                     experience, axis=0)
        if len(self.experiences) > self.max_memory_size:
            self.experiences = np.delete(self.experiences,
                                         self.max_memory_size, axis=0)

    def sample_experiences(self, mini_batch_size):
        if(mini_batch_size > len(self.experiences)):
            rep_needed = True
        else:
            rep_needed = False
        s = self.experiences[np.random.choice(
                self.experiences.shape[0],
                mini_batch_size, replace=rep_needed)]
        return s

In [10]:
def pack_experience(state, action, reward, new_state):
    experience = np.empty([0])
    experience = np.append(experience, state)
    experience = np.append(experience, [action])
    experience = np.append(experience, [reward])
    experience = np.append(experience, new_state)
    return experience

In [11]:
def unpack_experience(experience):
    state = experience[0:8]
    action = experience[8]
    reward = experience[9]
    new_state = experience[10:18]
    return state, action, reward, new_state

In [12]:
def learn_from_replay_memories(memory, batch_size):
    """
    Take a uniformly distributed batch of experiences and set the corresponding
    targets. Then train the network sequentally on each individual
    (experience, target) pair.
    """
    sample_batch = memory.sample_experiences(batch_size)
    for e in sample_batch:
        state, action, reward, new_state = unpack_experience(e)
        targets = get_targets(state, action, reward, new_state)
        x = np.empty([1, 1, 8])
        x[0][0] = state
        model.train_on_batch(x, targets)

In [13]:
mini_batch_size = 5
replay_memory_size = 20
gamma = 0.1
epsilon = 0.1
max_steps_per_epoch = 1000
max_epochs = 5000

memory = Memory(replay_memory_size, 18)
total_reward = np.zeros(max_epochs)

In [14]:
for epoch in range(max_epochs):
    state = env.reset()
    current_step = 0
    epoch_done = False
    while current_step < max_steps_per_epoch and not epoch_done:
        # Choose an action using the greedy-epsilon policy
        action = choose_action(state, epsilon)
        new_state, reward, epoch_done, info = env.step(action)
        total_reward[epoch] = total_reward[epoch] + reward
        # Store the experience in memory buffer
        experience = pack_experience(state, action, reward, new_state)
        memory.add_experience(experience)

        current_step = current_step + 1
        state = new_state
        # Learn from past experiences
        learn_from_replay_memories(memory, mini_batch_size)

    if not epoch % 10 and epoch and gamma < 0.975:
        # Gradually increase gamma to improve the importance of future-rewards
        # as the NN learns and becomes more accurate
        gamma = gamma * 1.0125
        print("New gamma = {:.6}".format(gamma))

    print("Episode {} reward = {:.6}".format(epoch, total_reward[epoch]))
    if not epoch % 10 and epoch:
        print("---------------------------")
        print("Last 10 episode avg = {:.6}".format(np.average(total_reward[epoch-10:epoch])))
        print("---------------------------")

    if epoch and np.average(total_reward[epoch-100:epoch]) > 150:
        break

Episode 0 reward = -87.1167
Episode 1 reward = -259.304


  avg = a.mean(axis)
  ret = ret.dtype.type(ret / rcount)


Episode 2 reward = -515.312
Episode 3 reward = -690.686
Episode 4 reward = -581.656
Episode 5 reward = -407.677
Episode 6 reward = -182.731
Episode 7 reward = -550.495
Episode 8 reward = -684.048
Episode 9 reward = -657.08
New gamma = 0.10125
Episode 10 reward = -673.2
---------------------------
Last 10 episode avg = -461.611
---------------------------
Episode 11 reward = -92.358
Episode 12 reward = -183.011
Episode 13 reward = -226.484
Episode 14 reward = -248.508
Episode 15 reward = -215.913
Episode 16 reward = -78.1515
Episode 17 reward = -203.97
Episode 18 reward = -268.175
Episode 19 reward = -262.515
New gamma = 0.102516
Episode 20 reward = -234.447
---------------------------
Last 10 episode avg = -245.228
---------------------------
Episode 21 reward = -81.9291
Episode 22 reward = -817.279
Episode 23 reward = -333.162
Episode 24 reward = 179.31
Episode 25 reward = -132.286
Episode 26 reward = 241.509
Episode 27 reward = -538.863
Episode 28 reward = -899.65
Episode 29 reward =

Episode 208 reward = -48.2567
Episode 209 reward = -174.322
New gamma = 0.129806
Episode 210 reward = -42.0982
---------------------------
Last 10 episode avg = -146.428
---------------------------
Episode 211 reward = -165.968
Episode 212 reward = -204.55
Episode 213 reward = -203.694
Episode 214 reward = -72.0148
Episode 215 reward = -547.557
Episode 216 reward = -272.969
Episode 217 reward = -247.031
Episode 218 reward = -4.30828
Episode 219 reward = -42.392
New gamma = 0.131429
Episode 220 reward = -182.34
---------------------------
Last 10 episode avg = -180.258
---------------------------
Episode 221 reward = -161.278
Episode 222 reward = -18.5035
Episode 223 reward = -62.5831
Episode 224 reward = -53.1985
Episode 225 reward = -226.04
Episode 226 reward = -21.6795
Episode 227 reward = -52.4841
Episode 228 reward = -103.047
Episode 229 reward = -134.431
New gamma = 0.133072
Episode 230 reward = -58.5678
---------------------------
Last 10 episode avg = -101.558
------------------

Episode 411 reward = -238.606
Episode 412 reward = -162.714
Episode 413 reward = -161.36
Episode 414 reward = -200.648
Episode 415 reward = -113.385
Episode 416 reward = -222.208
Episode 417 reward = -104.607
Episode 418 reward = -225.174
Episode 419 reward = -310.237
New gamma = 0.168497
Episode 420 reward = -135.339
---------------------------
Last 10 episode avg = -192.637
---------------------------
Episode 421 reward = -216.32
Episode 422 reward = -54.4658
Episode 423 reward = -79.0616
Episode 424 reward = -35.3838
Episode 425 reward = -201.822
Episode 426 reward = -224.464
Episode 427 reward = -139.63
Episode 428 reward = -110.488
Episode 429 reward = -246.513
New gamma = 0.170603
Episode 430 reward = 201.695
---------------------------
Last 10 episode avg = -144.349
---------------------------
Episode 431 reward = -218.815
Episode 432 reward = -198.132
Episode 433 reward = -32.6443
Episode 434 reward = -23.4292
Episode 435 reward = -282.909
Episode 436 reward = -122.414
Episode 

Episode 614 reward = -53.5123
Episode 615 reward = -44.1289
Episode 616 reward = -165.58
Episode 617 reward = -256.461
Episode 618 reward = -105.93
Episode 619 reward = -209.678
New gamma = 0.216019
Episode 620 reward = -50.5561
---------------------------
Last 10 episode avg = -61.2904
---------------------------
Episode 621 reward = 20.6792
Episode 622 reward = -74.2691
Episode 623 reward = -52.4044
Episode 624 reward = -67.6056
Episode 625 reward = 38.8567
Episode 626 reward = -37.7654
Episode 627 reward = -243.843
Episode 628 reward = 284.353
Episode 629 reward = -42.9922
New gamma = 0.218719
Episode 630 reward = 50.2625
---------------------------
Last 10 episode avg = -22.5547
---------------------------
Episode 631 reward = -155.513
Episode 632 reward = 281.457
Episode 633 reward = 250.215
Episode 634 reward = -30.5526
Episode 635 reward = -36.0449
Episode 636 reward = 38.4182
Episode 637 reward = -45.8414
Episode 638 reward = -50.5901
Episode 639 reward = -115.05
New gamma = 0.

Episode 817 reward = 4.57713
Episode 818 reward = -128.506
Episode 819 reward = -32.5788
New gamma = 0.276944
Episode 820 reward = -191.735
---------------------------
Last 10 episode avg = -98.4534
---------------------------
Episode 821 reward = -69.5301
Episode 822 reward = -218.988
Episode 823 reward = -33.0987
Episode 824 reward = -24.3555
Episode 825 reward = -161.621
Episode 826 reward = -14.8873
Episode 827 reward = -46.2461
Episode 828 reward = -19.1432
Episode 829 reward = -44.724
New gamma = 0.280406
Episode 830 reward = -35.6218
---------------------------
Last 10 episode avg = -82.4329
---------------------------
Episode 831 reward = -217.335
Episode 832 reward = -15.0944
Episode 833 reward = 35.5056
Episode 834 reward = -210.192
Episode 835 reward = 1.06403
Episode 836 reward = -236.92
Episode 837 reward = -74.4326
Episode 838 reward = -29.6055
Episode 839 reward = 177.316
New gamma = 0.283911
Episode 840 reward = -273.292
---------------------------
Last 10 episode avg =

New gamma = 0.355053
Episode 1020 reward = -13.3713
---------------------------
Last 10 episode avg = -93.5056
---------------------------
Episode 1021 reward = -195.992
Episode 1022 reward = -26.7019
Episode 1023 reward = -18.0482
Episode 1024 reward = -1.39376
Episode 1025 reward = -196.263
Episode 1026 reward = -24.1675
Episode 1027 reward = -168.376
Episode 1028 reward = -173.434
Episode 1029 reward = -3.26268
New gamma = 0.359491
Episode 1030 reward = -228.094
---------------------------
Last 10 episode avg = -82.101
---------------------------
Episode 1031 reward = -20.855
Episode 1032 reward = -177.361
Episode 1033 reward = 33.7254
Episode 1034 reward = -111.275
Episode 1035 reward = -61.7884
Episode 1036 reward = -80.0837
Episode 1037 reward = -16.4217
Episode 1038 reward = -214.964
Episode 1039 reward = -11.2576
New gamma = 0.363985
Episode 1040 reward = -113.313
---------------------------
Last 10 episode avg = -88.8374
---------------------------
Episode 1041 reward = -318.5

Episode 1217 reward = -214.732
Episode 1218 reward = -24.9915
Episode 1219 reward = -7.37353
New gamma = 0.455191
Episode 1220 reward = -121.907
---------------------------
Last 10 episode avg = -56.4699
---------------------------
Episode 1221 reward = -56.3907
Episode 1222 reward = -49.099
Episode 1223 reward = 48.8491
Episode 1224 reward = -285.227
Episode 1225 reward = -5.74823
Episode 1226 reward = 202.75
Episode 1227 reward = -17.301
Episode 1228 reward = 235.831
Episode 1229 reward = -38.6566
New gamma = 0.460881
Episode 1230 reward = -184.313
---------------------------
Last 10 episode avg = -8.68996
---------------------------
Episode 1231 reward = -131.357
Episode 1232 reward = -153.928
Episode 1233 reward = -43.2572
Episode 1234 reward = -175.529
Episode 1235 reward = -169.431
Episode 1236 reward = 243.057
Episode 1237 reward = -34.8036
Episode 1238 reward = -42.7224
Episode 1239 reward = -63.3559
New gamma = 0.466642
Episode 1240 reward = -17.74
---------------------------


Episode 1415 reward = 29.1263
Episode 1416 reward = -70.0974
Episode 1417 reward = -178.285
Episode 1418 reward = 256.112
Episode 1419 reward = 184.627
New gamma = 0.583572
Episode 1420 reward = -4.52217
---------------------------
Last 10 episode avg = -41.8561
---------------------------
Episode 1421 reward = -189.666
Episode 1422 reward = -85.6096
Episode 1423 reward = -85.2012
Episode 1424 reward = 28.7132
Episode 1425 reward = -184.645
Episode 1426 reward = -14.0681
Episode 1427 reward = -62.1683
Episode 1428 reward = 19.7962
Episode 1429 reward = -195.525
New gamma = 0.590867
Episode 1430 reward = -193.4
---------------------------
Last 10 episode avg = -77.2896
---------------------------
Episode 1431 reward = 231.044
Episode 1432 reward = 123.918
Episode 1433 reward = -34.3179
Episode 1434 reward = -34.4869
Episode 1435 reward = 0.848914
Episode 1436 reward = -66.8579
Episode 1437 reward = 38.2897
Episode 1438 reward = -172.738
Episode 1439 reward = -7.05109
New gamma = 0.59825

Episode 1613 reward = -6.39589
Episode 1614 reward = -212.635
Episode 1615 reward = 33.7068
Episode 1616 reward = -67.8766
Episode 1617 reward = -179.141
Episode 1618 reward = -183.416
Episode 1619 reward = -167.175
New gamma = 0.748161
Episode 1620 reward = -202.246
---------------------------
Last 10 episode avg = -71.3653
---------------------------
Episode 1621 reward = -230.787
Episode 1622 reward = -18.1878
Episode 1623 reward = 233.297
Episode 1624 reward = -40.732
Episode 1625 reward = -153.159
Episode 1626 reward = -53.5062
Episode 1627 reward = 82.8107
Episode 1628 reward = -256.729
Episode 1629 reward = -214.382
New gamma = 0.757513
Episode 1630 reward = -10.8462
---------------------------
Last 10 episode avg = -85.3621
---------------------------
Episode 1631 reward = -190.034
Episode 1632 reward = -184.441
Episode 1633 reward = -234.041
Episode 1634 reward = -16.8798
Episode 1635 reward = -215.832
Episode 1636 reward = 193.719
Episode 1637 reward = 50.1302
Episode 1638 re

Episode 1811 reward = -33.8185
Episode 1812 reward = -122.259
Episode 1813 reward = -76.9203
Episode 1814 reward = -37.0637
Episode 1815 reward = -8.66148
Episode 1816 reward = -352.259
Episode 1817 reward = 18.063
Episode 1818 reward = 10.1737
Episode 1819 reward = 25.9169
New gamma = 0.95917
Episode 1820 reward = -41.8012
---------------------------
Last 10 episode avg = -65.8843
---------------------------
Episode 1821 reward = -77.7144
Episode 1822 reward = -103.4
Episode 1823 reward = -61.4487
Episode 1824 reward = -27.2619
Episode 1825 reward = -63.3865
Episode 1826 reward = -30.5011
Episode 1827 reward = -130.276
Episode 1828 reward = -70.9164
Episode 1829 reward = -25.0006
New gamma = 0.97116
Episode 1830 reward = 4.57164
---------------------------
Last 10 episode avg = -63.1707
---------------------------
Episode 1831 reward = -10.0969
Episode 1832 reward = -50.8046
Episode 1833 reward = -15.0214
Episode 1834 reward = -186.999
Episode 1835 reward = -48.6081
Episode 1836 rewar

Episode 2021 reward = 172.342
Episode 2022 reward = 274.08
Episode 2023 reward = 283.221
Episode 2024 reward = 128.951
Episode 2025 reward = 77.9832
Episode 2026 reward = 110.732
Episode 2027 reward = 23.8095
Episode 2028 reward = 269.546
Episode 2029 reward = 196.674
Episode 2030 reward = 275.061
---------------------------
Last 10 episode avg = 157.058
---------------------------
Episode 2031 reward = 125.217
Episode 2032 reward = 195.765
Episode 2033 reward = 204.96
Episode 2034 reward = 227.867
Episode 2035 reward = 227.059
Episode 2036 reward = 275.564
Episode 2037 reward = 222.229
Episode 2038 reward = -41.1127
Episode 2039 reward = -60.4806
Episode 2040 reward = -65.9798
---------------------------
Last 10 episode avg = 165.213
---------------------------
Episode 2041 reward = -49.2233
Episode 2042 reward = -74.0328
Episode 2043 reward = 234.976
Episode 2044 reward = -36.8193
Episode 2045 reward = 141.225
Episode 2046 reward = 207.437
Episode 2047 reward = 105.405
Episode 2048 r

Episode 2232 reward = 212.262
Episode 2233 reward = 70.9033
Episode 2234 reward = 189.871
Episode 2235 reward = 182.666
Episode 2236 reward = 189.764
Episode 2237 reward = 206.394
Episode 2238 reward = 220.555
Episode 2239 reward = 212.982
Episode 2240 reward = 158.895
---------------------------
Last 10 episode avg = 198.214
---------------------------
Episode 2241 reward = 78.6848
Episode 2242 reward = 75.0823
Episode 2243 reward = -52.6335
Episode 2244 reward = 262.076
Episode 2245 reward = 194.139
Episode 2246 reward = -97.2241
Episode 2247 reward = 229.778
Episode 2248 reward = 257.897
Episode 2249 reward = 171.041
Episode 2250 reward = 37.4306
---------------------------
Last 10 episode avg = 127.774
---------------------------
Episode 2251 reward = 88.443
Episode 2252 reward = 155.042
Episode 2253 reward = 39.7416
Episode 2254 reward = 54.4226
Episode 2255 reward = 187.448
Episode 2256 reward = 244.515
Episode 2257 reward = -93.7334
Episode 2258 reward = 197.496
Episode 2259 rew

Episode 2444 reward = 261.604
Episode 2445 reward = 12.6259
Episode 2446 reward = -96.6582
Episode 2447 reward = 11.6067
Episode 2448 reward = 286.786
Episode 2449 reward = 117.127
Episode 2450 reward = 174.885
---------------------------
Last 10 episode avg = 135.149
---------------------------
Episode 2451 reward = 155.564
Episode 2452 reward = 133.995
Episode 2453 reward = 157.905
Episode 2454 reward = 136.548
Episode 2455 reward = 185.13
Episode 2456 reward = 40.4757
Episode 2457 reward = -190.146
Episode 2458 reward = 225.633
Episode 2459 reward = 9.54928
Episode 2460 reward = 3.68298
---------------------------
Last 10 episode avg = 102.954
---------------------------
Episode 2461 reward = 35.5565
Episode 2462 reward = -35.5982
Episode 2463 reward = 230.311
Episode 2464 reward = 38.3337
Episode 2465 reward = -10.8558
Episode 2466 reward = 249.104
Episode 2467 reward = -55.2961
Episode 2468 reward = 185.105
Episode 2469 reward = 211.716
Episode 2470 reward = 198.101
--------------

Episode 2656 reward = 261.956
Episode 2657 reward = -95.9193
Episode 2658 reward = -68.6428
Episode 2659 reward = 189.475
Episode 2660 reward = -49.1393
---------------------------
Last 10 episode avg = 33.8698
---------------------------
Episode 2661 reward = -75.9597
Episode 2662 reward = -72.4797
Episode 2663 reward = -143.113
Episode 2664 reward = -84.1992
Episode 2665 reward = 72.9128
Episode 2666 reward = 28.4361
Episode 2667 reward = -384.418
Episode 2668 reward = 129.826
Episode 2669 reward = 144.805
Episode 2670 reward = 191.359
---------------------------
Last 10 episode avg = -43.3329
---------------------------
Episode 2671 reward = -8.7318
Episode 2672 reward = 140.395
Episode 2673 reward = 218.911
Episode 2674 reward = 278.8
Episode 2675 reward = 228.092
Episode 2676 reward = 268.078
Episode 2677 reward = 121.304
Episode 2678 reward = -249.496
Episode 2679 reward = 76.8385
Episode 2680 reward = 198.168
---------------------------
Last 10 episode avg = 126.555
------------

Episode 2868 reward = 206.662
Episode 2869 reward = 264.825
Episode 2870 reward = 194.993
---------------------------
Last 10 episode avg = 163.234
---------------------------
Episode 2871 reward = -198.089
Episode 2872 reward = 115.872
Episode 2873 reward = 118.743
Episode 2874 reward = -153.095
Episode 2875 reward = 163.596
Episode 2876 reward = 250.496
Episode 2877 reward = 183.323
Episode 2878 reward = 43.5124
Episode 2879 reward = 134.607
Episode 2880 reward = 176.058
---------------------------
Last 10 episode avg = 85.396
---------------------------
Episode 2881 reward = -41.8614
Episode 2882 reward = 214.135
Episode 2883 reward = 217.582
Episode 2884 reward = 115.525
Episode 2885 reward = 150.393
Episode 2886 reward = 193.93
Episode 2887 reward = -17.7564
Episode 2888 reward = 286.707
Episode 2889 reward = -85.986
Episode 2890 reward = -35.014
---------------------------
Last 10 episode avg = 120.873
---------------------------
Episode 2891 reward = -170.396
Episode 2892 reward

Episode 3081 reward = 217.647
Episode 3082 reward = 234.496
Episode 3083 reward = 234.844
Episode 3084 reward = -9.56911
Episode 3085 reward = -135.571
Episode 3086 reward = 164.3
Episode 3087 reward = -59.6715
Episode 3088 reward = 292.061
Episode 3089 reward = -31.9063
Episode 3090 reward = 205.04
---------------------------
Last 10 episode avg = 117.785
---------------------------
Episode 3091 reward = 216.586
Episode 3092 reward = -51.736
Episode 3093 reward = 229.057
Episode 3094 reward = 242.237
Episode 3095 reward = 155.093
Episode 3096 reward = 82.8755
Episode 3097 reward = 138.109
Episode 3098 reward = -88.6487
Episode 3099 reward = -19.0449
Episode 3100 reward = 138.701
---------------------------
Last 10 episode avg = 110.957
---------------------------
Episode 3101 reward = 196.274
Episode 3102 reward = 256.846
Episode 3103 reward = 189.283
Episode 3104 reward = 166.555
Episode 3105 reward = 165.699
Episode 3106 reward = 135.781
Episode 3107 reward = 259.054
Episode 3108 re

KeyboardInterrupt: 

In [None]:
print("Max episode reward = {}".format(np.max(total_reward)))
model.save('dqn.h5')

In [34]:
for i_episode in range(20):
    observation = env.reset()
    for t in range(1000):
        
        action = choose_action(observation, 0.0)
        observation, reward, done, info = env.step(action)
        env.render()
        if done:
            print("Episode finished after {} timesteps\n Last Reward: {}".format(t+1, reward))
            break

env.close()

Episode finished after 242 timesteps
 Last Reward: 100
Episode finished after 472 timesteps
 Last Reward: 100
Episode finished after 353 timesteps
 Last Reward: 100
Episode finished after 286 timesteps
 Last Reward: 100
Episode finished after 354 timesteps
 Last Reward: 100
Episode finished after 351 timesteps
 Last Reward: 100
Episode finished after 256 timesteps
 Last Reward: 100
Episode finished after 143 timesteps
 Last Reward: -100
Episode finished after 293 timesteps
 Last Reward: 100
Episode finished after 381 timesteps
 Last Reward: 100
Episode finished after 297 timesteps
 Last Reward: 100
Episode finished after 411 timesteps
 Last Reward: 100
Episode finished after 370 timesteps
 Last Reward: 100
Episode finished after 342 timesteps
 Last Reward: 100
Episode finished after 397 timesteps
 Last Reward: 100
Episode finished after 387 timesteps
 Last Reward: 100
Episode finished after 391 timesteps
 Last Reward: 100
Episode finished after 1000 timesteps
 Last Reward: -0.023770310

In [33]:
model = load_model('dqn.h5')