In [2]:
import sys, math
import numpy as np
import matplotlib.pyplot as plt
import gym
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten
from keras.optimizers import Adam, SGD
from keras.models import load_model
import random

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [3]:
class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.gamma = 0.1
        self.max_gamma = 0.975
        self.epsilon = 0.1
        self.gamma_decay = 1.0125
        self.expirience_size = 18
        self.max_memory_size = 25
        self.mini_batch_size = 5
        self.experiences = np.empty([0, self.expirience_size], dtype=object)
        self.model = self._build_model()
        
    def _build_model(self):
        model = Sequential()
        model.add(Flatten(input_shape=(1,) + self.state_size))
        model.add(Dense(40))
        model.add(Activation('relu'))
        model.add(Dense(40))
        model.add(Activation('relu'))
        model.add(Dense(self.action_size))
        model.add(Activation('linear'))
        model.compile(loss='mean_squared_error',  optimizer=Adam(lr=0.002, decay=2.25e-05))
        print(model.summary())
        return model
    
    def remember(self, experience):
        self.experiences = np.insert(self.experiences, 0,
                                     experience, axis=0)
        if len(self.experiences) > self.max_memory_size:
            self.experiences = np.delete(self.experiences,
                                         self.max_memory_size, axis=0)

    def sample_experiences(self, mini_batch_size):
        if(mini_batch_size > len(self.experiences)):
            rep_needed = True
        else:
            rep_needed = False
        s = self.experiences[np.random.choice(
                self.experiences.shape[0],
                mini_batch_size, replace=rep_needed)]
        return s
    
    def forward_pass(self, state):
        input = np.empty([1, 1, self.state_size[0]])
        input[0][0] = state
        return self.model.predict(input)[0]

    def get_targets(self, state, action, reward, next_state):
        current_state_q_values = self.forward_pass(state)
        next_state_q_values = self.forward_pass(next_state)
        max_q_next_state = np.max(next_state_q_values)
        targets = np.empty([1, self.action_size])

        for i in range(self.action_size):
            if i == action:
                targets[0][i] = reward + (self.gamma * max_q_next_state)
            else:
                targets[0][i] = current_state_q_values[i]
        return targets
    
    def choose_action(self, state):
        r = np.random.uniform()
        if r < self.epsilon:
            action = np.floor(np.random.randint(self.action_size))
        else:
            state_q_values = self.forward_pass(state)
            action = np.argmax(state_q_values)
        return int(action)
    
    def replay(self):
        sample_batch = self.sample_experiences(self.mini_batch_size)
        for e in sample_batch:
            state, action, reward, new_state = e[0:8], e[8], e[9], e[10:18]
            targets = self.get_targets(state, action, reward, new_state)
            x = np.empty([1, 1, 8])
            x[0][0] = state
            self.model.train_on_batch(x, targets)
            
    def increase_gamma(self):
        if self.gamma < self.max_gamma:
            self.gamma = self.gamma * self.gamma_decay
            print("\n======== new gamma: {:.8} ========".format(self.gamma))
        
    def load(self, name):
        self.model = load_model(name)

    def save(self, name):
        self.model.save(name)
        

In [4]:
env = gym.make('LunarLander-v2')
seed = 2
random.seed(seed)
env.seed(seed)

[2]

In [5]:
max_steps_per_episode = 1000
max_episodes = 4000
total_reward = np.zeros(max_episodes)

state_size = env.observation_space.shape
action_size = env.action_space.n

agent = DQNAgent(state_size, action_size)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_1 (Flatten)          (None, 8)                 0         
_________________________________________________________________
dense_1 (Dense)              (None, 40)                360       
_________________________________________________________________
activation_1 (Activation)    (None, 40)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 40)                1640      
_________________________________________________________________
activation_2 (Activation)    (None, 40)                0         
_________________________________________________________________
dense_3 (Dense)              (None, 4)                 164       
_________________________________________________________________
activation_3 (Activation)    (None, 4)                 0         
Total para

In [6]:
for episode in range(max_episodes):
    state = env.reset()
    current_step = 0
    episode_done = False
    while current_step < max_steps_per_episode and not episode_done:
        action = agent.choose_action(state)
        new_state, reward, episode_done, info = env.step(action)
        total_reward[episode] = total_reward[episode] + reward
        experience = np.empty([0])
        experience = np.append(experience, state)
        experience = np.append(experience, [action])
        experience = np.append(experience, [reward])
        experience = np.append(experience, new_state)
        agent.remember(experience)

        current_step = current_step + 1
        state = new_state
        agent.replay()

    print("Episode: {}/{}, num_steps: {}, total_reward: {:.4}"
          .format(episode, max_episodes, current_step, total_reward[episode]))
    
    if not episode % 10 and episode and agent.gamma < agent.max_gamma:
        agent.increase_gamma()
        agent.save('lunar_lander_dqn_backup.h5')
    
    if not episode % 10 and episode:
        print("Last 10 episode avg = {:.6}".format(np.average(total_reward[episode-10:episode])))

    if episode and np.average(total_reward[episode-100:episode]) > 150:
        break

Episode: 0/5000, num_steps: 113, total_reward: -403.4
Episode: 1/5000, num_steps: 80, total_reward: -237.0


  avg = a.mean(axis)
  ret = ret.dtype.type(ret / rcount)


Episode: 2/5000, num_steps: 88, total_reward: -450.7
Episode: 3/5000, num_steps: 80, total_reward: -246.8
Episode: 4/5000, num_steps: 84, total_reward: -367.2
Episode: 5/5000, num_steps: 62, total_reward: -120.2
Episode: 6/5000, num_steps: 67, total_reward: -148.1
Episode: 7/5000, num_steps: 201, total_reward: -421.3
Episode: 8/5000, num_steps: 120, total_reward: -226.4
Episode: 9/5000, num_steps: 79, total_reward: -396.9
Episode: 10/5000, num_steps: 104, total_reward: -55.17

---------------------------
Last 10 episode avg = -301.816
---------------------------
Episode: 11/5000, num_steps: 114, total_reward: -287.4
Episode: 12/5000, num_steps: 128, total_reward: -169.7
Episode: 13/5000, num_steps: 103, total_reward: -316.0
Episode: 14/5000, num_steps: 92, total_reward: 0.3239
Episode: 15/5000, num_steps: 132, total_reward: -325.2
Episode: 16/5000, num_steps: 328, total_reward: -127.5
Episode: 17/5000, num_steps: 113, total_reward: -479.9
Episode: 18/5000, num_steps: 279, total_reward:

Episode: 124/5000, num_steps: 252, total_reward: -46.64
Episode: 125/5000, num_steps: 187, total_reward: -116.5
Episode: 126/5000, num_steps: 369, total_reward: -303.8
Episode: 127/5000, num_steps: 259, total_reward: -174.5
Episode: 128/5000, num_steps: 130, total_reward: -178.3
Episode: 129/5000, num_steps: 336, total_reward: -250.0
Episode: 130/5000, num_steps: 168, total_reward: -61.61

---------------------------
Last 10 episode avg = -176.183
---------------------------
Episode: 131/5000, num_steps: 447, total_reward: -96.14
Episode: 132/5000, num_steps: 223, total_reward: -200.3
Episode: 133/5000, num_steps: 295, total_reward: -252.0
Episode: 134/5000, num_steps: 210, total_reward: -24.61
Episode: 135/5000, num_steps: 226, total_reward: -289.4
Episode: 136/5000, num_steps: 123, total_reward: -62.82
Episode: 137/5000, num_steps: 203, total_reward: -74.54
Episode: 138/5000, num_steps: 61, total_reward: -415.0
Episode: 139/5000, num_steps: 138, total_reward: -776.9
Episode: 140/5000

Episode: 244/5000, num_steps: 232, total_reward: -64.27
Episode: 245/5000, num_steps: 133, total_reward: -229.9
Episode: 246/5000, num_steps: 181, total_reward: -312.6
Episode: 247/5000, num_steps: 239, total_reward: -31.95
Episode: 248/5000, num_steps: 828, total_reward: 113.6
Episode: 249/5000, num_steps: 265, total_reward: -73.66
Episode: 250/5000, num_steps: 298, total_reward: -56.93

---------------------------
Last 10 episode avg = -126.298
---------------------------
Episode: 251/5000, num_steps: 170, total_reward: -211.7
Episode: 252/5000, num_steps: 280, total_reward: -201.7
Episode: 253/5000, num_steps: 182, total_reward: -44.95
Episode: 254/5000, num_steps: 439, total_reward: -234.9
Episode: 255/5000, num_steps: 182, total_reward: -69.95
Episode: 256/5000, num_steps: 232, total_reward: -186.0
Episode: 257/5000, num_steps: 231, total_reward: -290.5
Episode: 258/5000, num_steps: 361, total_reward: -266.2
Episode: 259/5000, num_steps: 304, total_reward: -98.54
Episode: 260/5000

Episode: 364/5000, num_steps: 251, total_reward: -192.3
Episode: 365/5000, num_steps: 221, total_reward: -211.2
Episode: 366/5000, num_steps: 163, total_reward: -60.9
Episode: 367/5000, num_steps: 204, total_reward: -17.8
Episode: 368/5000, num_steps: 313, total_reward: -352.9
Episode: 369/5000, num_steps: 244, total_reward: -79.88
Episode: 370/5000, num_steps: 254, total_reward: -216.6

---------------------------
Last 10 episode avg = -165.086
---------------------------
Episode: 371/5000, num_steps: 141, total_reward: -9.007
Episode: 372/5000, num_steps: 321, total_reward: -305.9
Episode: 373/5000, num_steps: 243, total_reward: -218.1
Episode: 374/5000, num_steps: 159, total_reward: -142.6
Episode: 375/5000, num_steps: 370, total_reward: -169.3
Episode: 376/5000, num_steps: 123, total_reward: -240.4
Episode: 377/5000, num_steps: 84, total_reward: -149.6
Episode: 378/5000, num_steps: 160, total_reward: -26.44
Episode: 379/5000, num_steps: 276, total_reward: -159.5
Episode: 380/5000, 

Episode: 484/5000, num_steps: 281, total_reward: -73.97
Episode: 485/5000, num_steps: 152, total_reward: -176.5
Episode: 486/5000, num_steps: 277, total_reward: -74.52
Episode: 487/5000, num_steps: 299, total_reward: -62.88
Episode: 488/5000, num_steps: 173, total_reward: -129.6
Episode: 489/5000, num_steps: 247, total_reward: -68.2
Episode: 490/5000, num_steps: 254, total_reward: -70.14

---------------------------
Last 10 episode avg = -90.5132
---------------------------
Episode: 491/5000, num_steps: 434, total_reward: -46.86
Episode: 492/5000, num_steps: 325, total_reward: -142.2
Episode: 493/5000, num_steps: 200, total_reward: -140.9
Episode: 494/5000, num_steps: 1000, total_reward: 66.82
Episode: 495/5000, num_steps: 391, total_reward: -231.6
Episode: 496/5000, num_steps: 260, total_reward: 0.1149
Episode: 497/5000, num_steps: 162, total_reward: -179.4
Episode: 498/5000, num_steps: 175, total_reward: -38.64
Episode: 499/5000, num_steps: 153, total_reward: -142.9
Episode: 500/5000

Episode: 604/5000, num_steps: 184, total_reward: -159.2
Episode: 605/5000, num_steps: 243, total_reward: -41.12
Episode: 606/5000, num_steps: 184, total_reward: 6.116
Episode: 607/5000, num_steps: 211, total_reward: -282.1
Episode: 608/5000, num_steps: 189, total_reward: 63.2
Episode: 609/5000, num_steps: 237, total_reward: -216.6
Episode: 610/5000, num_steps: 234, total_reward: -243.9

---------------------------
Last 10 episode avg = -111.963
---------------------------
Episode: 611/5000, num_steps: 720, total_reward: 230.8
Episode: 612/5000, num_steps: 230, total_reward: -309.8
Episode: 613/5000, num_steps: 417, total_reward: -271.3
Episode: 614/5000, num_steps: 242, total_reward: -186.5
Episode: 615/5000, num_steps: 151, total_reward: -230.1
Episode: 616/5000, num_steps: 136, total_reward: -165.9
Episode: 617/5000, num_steps: 216, total_reward: -205.7
Episode: 618/5000, num_steps: 289, total_reward: -201.6
Episode: 619/5000, num_steps: 280, total_reward: 9.549
Episode: 620/5000, nu

Episode: 724/5000, num_steps: 185, total_reward: 52.57
Episode: 725/5000, num_steps: 423, total_reward: 260.6
Episode: 726/5000, num_steps: 591, total_reward: 106.2
Episode: 727/5000, num_steps: 151, total_reward: -186.2
Episode: 728/5000, num_steps: 352, total_reward: -300.3
Episode: 729/5000, num_steps: 498, total_reward: -280.6
Episode: 730/5000, num_steps: 426, total_reward: -228.4

---------------------------
Last 10 episode avg = -103.857
---------------------------
Episode: 731/5000, num_steps: 329, total_reward: -110.6
Episode: 732/5000, num_steps: 341, total_reward: -19.54
Episode: 733/5000, num_steps: 242, total_reward: -146.8
Episode: 734/5000, num_steps: 228, total_reward: -177.3
Episode: 735/5000, num_steps: 411, total_reward: 207.2
Episode: 736/5000, num_steps: 456, total_reward: 158.4
Episode: 737/5000, num_steps: 165, total_reward: -493.5
Episode: 738/5000, num_steps: 282, total_reward: -694.4
Episode: 739/5000, num_steps: 187, total_reward: -452.1
Episode: 740/5000, nu

Episode: 844/5000, num_steps: 229, total_reward: -204.7
Episode: 845/5000, num_steps: 289, total_reward: 21.44
Episode: 846/5000, num_steps: 646, total_reward: 216.8
Episode: 847/5000, num_steps: 584, total_reward: 184.6
Episode: 848/5000, num_steps: 241, total_reward: -24.69
Episode: 849/5000, num_steps: 262, total_reward: -22.39
Episode: 850/5000, num_steps: 292, total_reward: -28.63

---------------------------
Last 10 episode avg = -48.0776
---------------------------
Episode: 851/5000, num_steps: 233, total_reward: -14.38
Episode: 852/5000, num_steps: 309, total_reward: -216.7
Episode: 853/5000, num_steps: 305, total_reward: 25.06
Episode: 854/5000, num_steps: 400, total_reward: -112.5
Episode: 855/5000, num_steps: 227, total_reward: -62.82
Episode: 856/5000, num_steps: 363, total_reward: -248.1
Episode: 857/5000, num_steps: 186, total_reward: 79.89
Episode: 858/5000, num_steps: 229, total_reward: 25.39
Episode: 859/5000, num_steps: 322, total_reward: -232.0
Episode: 860/5000, num

Episode: 964/5000, num_steps: 510, total_reward: -159.6
Episode: 965/5000, num_steps: 208, total_reward: -177.9
Episode: 966/5000, num_steps: 264, total_reward: -11.95
Episode: 967/5000, num_steps: 208, total_reward: -195.9
Episode: 968/5000, num_steps: 429, total_reward: -44.72
Episode: 969/5000, num_steps: 247, total_reward: -82.52
Episode: 970/5000, num_steps: 218, total_reward: -91.43

---------------------------
Last 10 episode avg = -138.724
---------------------------
Episode: 971/5000, num_steps: 227, total_reward: -216.9
Episode: 972/5000, num_steps: 387, total_reward: -240.9
Episode: 973/5000, num_steps: 334, total_reward: -168.2
Episode: 974/5000, num_steps: 356, total_reward: -66.3
Episode: 975/5000, num_steps: 383, total_reward: -251.7
Episode: 976/5000, num_steps: 247, total_reward: -67.51
Episode: 977/5000, num_steps: 334, total_reward: -142.9
Episode: 978/5000, num_steps: 255, total_reward: -100.4
Episode: 979/5000, num_steps: 221, total_reward: -247.5
Episode: 980/5000

Episode: 1082/5000, num_steps: 428, total_reward: -72.36
Episode: 1083/5000, num_steps: 310, total_reward: -2.027
Episode: 1084/5000, num_steps: 248, total_reward: -177.9
Episode: 1085/5000, num_steps: 239, total_reward: -166.0
Episode: 1086/5000, num_steps: 308, total_reward: -189.2
Episode: 1087/5000, num_steps: 237, total_reward: -45.6
Episode: 1088/5000, num_steps: 235, total_reward: -3.866
Episode: 1089/5000, num_steps: 178, total_reward: -177.5
Episode: 1090/5000, num_steps: 294, total_reward: -47.77

---------------------------
Last 10 episode avg = -84.7665
---------------------------
Episode: 1091/5000, num_steps: 340, total_reward: -236.8
Episode: 1092/5000, num_steps: 264, total_reward: -9.837
Episode: 1093/5000, num_steps: 280, total_reward: 2.997
Episode: 1094/5000, num_steps: 367, total_reward: -94.43
Episode: 1095/5000, num_steps: 298, total_reward: -229.3
Episode: 1096/5000, num_steps: 267, total_reward: -51.14
Episode: 1097/5000, num_steps: 358, total_reward: -40.64
Ep

Episode: 1201/5000, num_steps: 244, total_reward: -195.2
Episode: 1202/5000, num_steps: 1000, total_reward: 167.8
Episode: 1203/5000, num_steps: 224, total_reward: -198.2
Episode: 1204/5000, num_steps: 195, total_reward: -166.1
Episode: 1205/5000, num_steps: 479, total_reward: 265.1
Episode: 1206/5000, num_steps: 120, total_reward: -109.0
Episode: 1207/5000, num_steps: 1000, total_reward: 87.91
Episode: 1208/5000, num_steps: 161, total_reward: -167.0
Episode: 1209/5000, num_steps: 282, total_reward: -233.0
Episode: 1210/5000, num_steps: 711, total_reward: 185.0

---------------------------
Last 10 episode avg = -55.4983
---------------------------
Episode: 1211/5000, num_steps: 394, total_reward: -230.3
Episode: 1212/5000, num_steps: 177, total_reward: -215.0
Episode: 1213/5000, num_steps: 161, total_reward: -575.7
Episode: 1214/5000, num_steps: 281, total_reward: -328.6
Episode: 1215/5000, num_steps: 312, total_reward: -294.7
Episode: 1216/5000, num_steps: 369, total_reward: -309.0
Ep

Episode: 1321/5000, num_steps: 329, total_reward: -165.8
Episode: 1322/5000, num_steps: 326, total_reward: -240.2
Episode: 1323/5000, num_steps: 312, total_reward: -240.1
Episode: 1324/5000, num_steps: 169, total_reward: -159.0
Episode: 1325/5000, num_steps: 351, total_reward: -105.4
Episode: 1326/5000, num_steps: 247, total_reward: 13.18
Episode: 1327/5000, num_steps: 246, total_reward: 7.435
Episode: 1328/5000, num_steps: 269, total_reward: -44.35
Episode: 1329/5000, num_steps: 317, total_reward: -55.9
Episode: 1330/5000, num_steps: 225, total_reward: -196.0

---------------------------
Last 10 episode avg = -71.5767
---------------------------
Episode: 1331/5000, num_steps: 171, total_reward: -38.14
Episode: 1332/5000, num_steps: 289, total_reward: -62.21
Episode: 1333/5000, num_steps: 239, total_reward: -29.19
Episode: 1334/5000, num_steps: 360, total_reward: -205.6
Episode: 1335/5000, num_steps: 287, total_reward: 27.07
Episode: 1336/5000, num_steps: 321, total_reward: 12.42
Episo

Episode: 1441/5000, num_steps: 301, total_reward: -86.2
Episode: 1442/5000, num_steps: 395, total_reward: -90.33
Episode: 1443/5000, num_steps: 226, total_reward: -177.1
Episode: 1444/5000, num_steps: 175, total_reward: -206.9
Episode: 1445/5000, num_steps: 259, total_reward: -185.1
Episode: 1446/5000, num_steps: 330, total_reward: 256.7
Episode: 1447/5000, num_steps: 243, total_reward: -47.62
Episode: 1448/5000, num_steps: 282, total_reward: -243.2
Episode: 1449/5000, num_steps: 252, total_reward: -30.28
Episode: 1450/5000, num_steps: 299, total_reward: -8.068

---------------------------
Last 10 episode avg = -83.129
---------------------------
Episode: 1451/5000, num_steps: 239, total_reward: -103.4
Episode: 1452/5000, num_steps: 482, total_reward: 249.7
Episode: 1453/5000, num_steps: 322, total_reward: -34.99
Episode: 1454/5000, num_steps: 278, total_reward: -50.1
Episode: 1455/5000, num_steps: 240, total_reward: -28.94
Episode: 1456/5000, num_steps: 350, total_reward: 14.95
Episod

Episode: 1561/5000, num_steps: 355, total_reward: -31.47
Episode: 1562/5000, num_steps: 243, total_reward: 17.22
Episode: 1563/5000, num_steps: 446, total_reward: 238.9
Episode: 1564/5000, num_steps: 230, total_reward: -26.67
Episode: 1565/5000, num_steps: 854, total_reward: 245.2
Episode: 1566/5000, num_steps: 304, total_reward: -62.23
Episode: 1567/5000, num_steps: 414, total_reward: 2.68
Episode: 1568/5000, num_steps: 154, total_reward: -160.8
Episode: 1569/5000, num_steps: 387, total_reward: -19.87
Episode: 1570/5000, num_steps: 231, total_reward: -22.71

---------------------------
Last 10 episode avg = 19.829
---------------------------
Episode: 1571/5000, num_steps: 370, total_reward: 296.1
Episode: 1572/5000, num_steps: 281, total_reward: -41.5
Episode: 1573/5000, num_steps: 665, total_reward: 256.0
Episode: 1574/5000, num_steps: 289, total_reward: -232.4
Episode: 1575/5000, num_steps: 458, total_reward: 187.6
Episode: 1576/5000, num_steps: 133, total_reward: -277.6
Episode: 15

Episode: 1681/5000, num_steps: 225, total_reward: -150.4
Episode: 1682/5000, num_steps: 205, total_reward: -196.6
Episode: 1683/5000, num_steps: 227, total_reward: -11.02
Episode: 1684/5000, num_steps: 556, total_reward: 172.1
Episode: 1685/5000, num_steps: 236, total_reward: -212.0
Episode: 1686/5000, num_steps: 540, total_reward: 237.0
Episode: 1687/5000, num_steps: 634, total_reward: 243.5
Episode: 1688/5000, num_steps: 207, total_reward: -143.9
Episode: 1689/5000, num_steps: 476, total_reward: -52.34
Episode: 1690/5000, num_steps: 230, total_reward: -11.69

---------------------------
Last 10 episode avg = -14.1573
---------------------------
Episode: 1691/5000, num_steps: 1000, total_reward: 58.84
Episode: 1692/5000, num_steps: 450, total_reward: 300.2
Episode: 1693/5000, num_steps: 292, total_reward: -14.02
Episode: 1694/5000, num_steps: 447, total_reward: 245.5
Episode: 1695/5000, num_steps: 238, total_reward: 26.17
Episode: 1696/5000, num_steps: 225, total_reward: 9.855
Episode

Episode: 1801/5000, num_steps: 769, total_reward: 118.7
Episode: 1802/5000, num_steps: 151, total_reward: -183.7
Episode: 1803/5000, num_steps: 200, total_reward: -77.99
Episode: 1804/5000, num_steps: 190, total_reward: 5.641
Episode: 1805/5000, num_steps: 220, total_reward: -187.4
Episode: 1806/5000, num_steps: 163, total_reward: 16.14
Episode: 1807/5000, num_steps: 293, total_reward: 20.95
Episode: 1808/5000, num_steps: 263, total_reward: -11.78
Episode: 1809/5000, num_steps: 201, total_reward: 26.7
Episode: 1810/5000, num_steps: 276, total_reward: 5.108

---------------------------
Last 10 episode avg = -28.2559
---------------------------
Episode: 1811/5000, num_steps: 267, total_reward: -7.814
Episode: 1812/5000, num_steps: 79, total_reward: -495.5
Episode: 1813/5000, num_steps: 490, total_reward: -67.82
Episode: 1814/5000, num_steps: 188, total_reward: -48.36
Episode: 1815/5000, num_steps: 117, total_reward: -185.1
Episode: 1816/5000, num_steps: 381, total_reward: -30.79
Episode:

Episode: 1926/5000, num_steps: 457, total_reward: 188.7
Episode: 1927/5000, num_steps: 558, total_reward: 209.6
Episode: 1928/5000, num_steps: 432, total_reward: 158.8
Episode: 1929/5000, num_steps: 1000, total_reward: 144.7
Episode: 1930/5000, num_steps: 288, total_reward: 7.769
---------------------------
Last 10 episode avg = 103.041
---------------------------
Episode: 1931/5000, num_steps: 312, total_reward: -26.62
Episode: 1932/5000, num_steps: 480, total_reward: 179.4
Episode: 1933/5000, num_steps: 128, total_reward: -12.36
Episode: 1934/5000, num_steps: 410, total_reward: 194.6
Episode: 1935/5000, num_steps: 1000, total_reward: 142.2
Episode: 1936/5000, num_steps: 651, total_reward: 180.3
Episode: 1937/5000, num_steps: 785, total_reward: 153.3
Episode: 1938/5000, num_steps: 1000, total_reward: 97.66
Episode: 1939/5000, num_steps: 1000, total_reward: 41.88
Episode: 1940/5000, num_steps: 1000, total_reward: 47.0
---------------------------
Last 10 episode avg = 95.8235
----------

Episode: 2052/5000, num_steps: 187, total_reward: -36.84
Episode: 2053/5000, num_steps: 247, total_reward: 51.54
Episode: 2054/5000, num_steps: 212, total_reward: -20.47
Episode: 2055/5000, num_steps: 262, total_reward: 3.877
Episode: 2056/5000, num_steps: 368, total_reward: 216.3
Episode: 2057/5000, num_steps: 227, total_reward: 14.96
Episode: 2058/5000, num_steps: 267, total_reward: -6.759
Episode: 2059/5000, num_steps: 213, total_reward: -3.263
Episode: 2060/5000, num_steps: 349, total_reward: 228.5
---------------------------
Last 10 episode avg = 42.6924
---------------------------
Episode: 2061/5000, num_steps: 289, total_reward: -54.18
Episode: 2062/5000, num_steps: 1000, total_reward: 130.2
Episode: 2063/5000, num_steps: 451, total_reward: 191.3
Episode: 2064/5000, num_steps: 383, total_reward: 218.5
Episode: 2065/5000, num_steps: 314, total_reward: -47.29
Episode: 2066/5000, num_steps: 407, total_reward: 203.6
Episode: 2067/5000, num_steps: 1000, total_reward: 104.9
Episode: 2

Episode: 2180/5000, num_steps: 367, total_reward: 253.7
---------------------------
Last 10 episode avg = 129.258
---------------------------
Episode: 2181/5000, num_steps: 728, total_reward: 229.2
Episode: 2182/5000, num_steps: 332, total_reward: -4.515
Episode: 2183/5000, num_steps: 814, total_reward: 214.2
Episode: 2184/5000, num_steps: 563, total_reward: 156.8
Episode: 2185/5000, num_steps: 420, total_reward: 233.4
Episode: 2186/5000, num_steps: 147, total_reward: -46.17
Episode: 2187/5000, num_steps: 675, total_reward: 240.0
Episode: 2188/5000, num_steps: 555, total_reward: 167.1
Episode: 2189/5000, num_steps: 320, total_reward: -99.76
Episode: 2190/5000, num_steps: 329, total_reward: -80.85
---------------------------
Last 10 episode avg = 134.396
---------------------------
Episode: 2191/5000, num_steps: 206, total_reward: -19.27
Episode: 2192/5000, num_steps: 447, total_reward: 230.4
Episode: 2193/5000, num_steps: 486, total_reward: 186.2
Episode: 2194/5000, num_steps: 445, tot

Episode: 2306/5000, num_steps: 378, total_reward: 164.2
Episode: 2307/5000, num_steps: 360, total_reward: 267.8
Episode: 2308/5000, num_steps: 462, total_reward: 197.5
Episode: 2309/5000, num_steps: 227, total_reward: -25.17
Episode: 2310/5000, num_steps: 285, total_reward: -57.83
---------------------------
Last 10 episode avg = 131.152
---------------------------
Episode: 2311/5000, num_steps: 293, total_reward: 246.9
Episode: 2312/5000, num_steps: 300, total_reward: 251.0
Episode: 2313/5000, num_steps: 174, total_reward: -34.23
Episode: 2314/5000, num_steps: 226, total_reward: -0.2637
Episode: 2315/5000, num_steps: 210, total_reward: -63.57
Episode: 2316/5000, num_steps: 424, total_reward: 198.1
Episode: 2317/5000, num_steps: 267, total_reward: 2.135
Episode: 2318/5000, num_steps: 376, total_reward: 267.9
Episode: 2319/5000, num_steps: 209, total_reward: 248.7
Episode: 2320/5000, num_steps: 310, total_reward: 282.4
---------------------------
Last 10 episode avg = 105.87
-----------

Episode: 2432/5000, num_steps: 148, total_reward: -3.262
Episode: 2433/5000, num_steps: 413, total_reward: 155.9
Episode: 2434/5000, num_steps: 426, total_reward: 193.3
Episode: 2435/5000, num_steps: 716, total_reward: 170.5
Episode: 2436/5000, num_steps: 565, total_reward: 211.6
Episode: 2437/5000, num_steps: 738, total_reward: 214.1
Episode: 2438/5000, num_steps: 587, total_reward: 207.4
Episode: 2439/5000, num_steps: 555, total_reward: 202.8
Episode: 2440/5000, num_steps: 664, total_reward: 240.0
---------------------------
Last 10 episode avg = 170.323
---------------------------
Episode: 2441/5000, num_steps: 131, total_reward: -91.67
Episode: 2442/5000, num_steps: 362, total_reward: 212.7
Episode: 2443/5000, num_steps: 669, total_reward: 154.7
Episode: 2444/5000, num_steps: 141, total_reward: 1.238
Episode: 2445/5000, num_steps: 411, total_reward: 193.3
Episode: 2446/5000, num_steps: 525, total_reward: 233.3
Episode: 2447/5000, num_steps: 343, total_reward: 246.6
Episode: 2448/50

### Сохраним получившуюся модель

In [7]:
agent.save('lunar_lander_dqn.h5')

### Загрузим получившуюся модель

In [8]:
agent.load('lunar_lander_dqn.h5')

### Тестирование

In [16]:
NUM_TESTS = 10
NUM_SUCCESS = 0
agent.epsilon = 0.0

for i_episode in range(10):
    observation = env.reset()
    for t in range(1000):
        
        action = agent.choose_action(observation)
        observation, reward, done, info = env.step(action)
        env.render()
        if done:
            if (reward == 100):
                NUM_SUCCESS += 1
            print("Episode finished after {} timesteps\n with last reward: {}".format(t+1, reward))
            break

env.close()
print("\nSUCCESSFULL landing in {}/{} cases".format(NUM_SUCCESS, NUM_TESTS))

Episode finished after 275 timesteps
 with last reward: 100
Episode finished after 464 timesteps
 with last reward: 100
Episode finished after 419 timesteps
 with last reward: 100
Episode finished after 420 timesteps
 with last reward: 100
Episode finished after 304 timesteps
 with last reward: 100
Episode finished after 384 timesteps
 with last reward: 100
Episode finished after 341 timesteps
 with last reward: 100
Episode finished after 307 timesteps
 with last reward: 100
Episode finished after 364 timesteps
 with last reward: 100
Episode finished after 265 timesteps
 with last reward: 100

SUCCESSFULL landing in 10/10 cases
