In [12]:
import random
import gym
import numpy as np
from collections import deque
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
import gym_tetris

EPISODES = 5000


class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95    # discount rate
        self.epsilon = 1.0  # exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001
        self.model = self._build_model()

    def _build_model(self):
        # Neural Net for Deep-Q learning Model
        model = Sequential()
        model.add(Dense(24, input_dim=self.state_size, activation='relu'))
        model.add(Dense(24, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss='mse', optimizer=Adam(lr=self.learning_rate))
        return model

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        act_values = self.model.predict(state)
        return np.argmax(act_values[0])  # returns action

    def replay(self, batch_size):
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target = (reward + self.gamma * np.amax(self.model.predict(next_state)[0]))
            target_f = self.model.predict(state)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

    def load(self, name):
        self.model.load_weights(name)

    def save(self, name):
        self.model.save_weights(name)




In [13]:
def prepro(I):
    
    N = I[20:420, 10:210]
    N = N[9::20,9::20]
    N = N[::,::,0]/3 + N[::,::,1]/3 + N[::,::,2]/3
    N[N>0] = 1

    return N.astype(np.float).ravel()

In [14]:
env = gym.make('Tetris-v0')
#state_size = env.observation_space.shape[0]
state_size = 200
action_size = env.action_space.n
agent = DQNAgent(state_size, action_size)
agent.load("./save/cartpole-dqn.h5")
done = False
batch_size = 32

for e in range(EPISODES):
    state = env.reset()
    state = prepro(state)
    state = np.reshape(state, [1, state_size])
    #for time in range(500):
    time = 0
    total_reward = 0
    while(True):
        #env.render()
        action = agent.act(state)
        next_state, reward, done, _ = env.step(action)
        reward = 1 if not done else -10
        total_reward += reward
        next_state = prepro(next_state)
        next_state = np.reshape(next_state, [1, state_size])
        agent.remember(state, action, reward, next_state, done)
        state = next_state
        time += 1
        if done:
            agent.replay(len(agent.memory))
            agent.memory.clear()
            print("episode: {}/{}, ticks: {}, score: {}, total reward: {}, e: {:.2}"
                  .format(e, EPISODES, time, _['score'], total_reward, agent.epsilon))
            break
        if len(agent.memory) > batch_size:
            agent.replay(batch_size)
            agent.memory.clear()
    if e % 10 == 0:
        agent.save("./save/cartpole-dqn.h5")

episode: 0/5000, ticks: 986, score: 0, total reward: 975, e: 0.86
episode: 1/5000, ticks: 765, score: 0, total reward: 754, e: 0.76
episode: 2/5000, ticks: 1161, score: 0, total reward: 1150, e: 0.64
episode: 3/5000, ticks: 822, score: 0, total reward: 811, e: 0.56
episode: 4/5000, ticks: 1040, score: 0, total reward: 1029, e: 0.48
episode: 5/5000, ticks: 1029, score: 0, total reward: 1018, e: 0.41
episode: 6/5000, ticks: 1549, score: 0, total reward: 1538, e: 0.32
episode: 7/5000, ticks: 1081, score: 0, total reward: 1070, e: 0.27
episode: 8/5000, ticks: 680, score: 0, total reward: 669, e: 0.25
episode: 9/5000, ticks: 542, score: 0, total reward: 531, e: 0.23
episode: 10/5000, ticks: 1260, score: 0, total reward: 1249, e: 0.19
episode: 11/5000, ticks: 1663, score: 0, total reward: 1652, e: 0.14
episode: 12/5000, ticks: 1137, score: 0, total reward: 1126, e: 0.12
episode: 13/5000, ticks: 717, score: 0, total reward: 706, e: 0.11
episode: 14/5000, ticks: 858, score: 0, total reward: 84

episode: 120/5000, ticks: 621, score: 0, total reward: 610, e: 0.01
episode: 121/5000, ticks: 1303, score: 0, total reward: 1292, e: 0.01
episode: 122/5000, ticks: 667, score: 0, total reward: 656, e: 0.01
episode: 123/5000, ticks: 1628, score: 0, total reward: 1617, e: 0.01
episode: 124/5000, ticks: 1294, score: 0, total reward: 1283, e: 0.01
episode: 125/5000, ticks: 804, score: 0, total reward: 793, e: 0.01
episode: 126/5000, ticks: 957, score: 0, total reward: 946, e: 0.01
episode: 127/5000, ticks: 1001, score: 0, total reward: 990, e: 0.01
episode: 128/5000, ticks: 865, score: 0, total reward: 854, e: 0.01
episode: 129/5000, ticks: 1019, score: 0, total reward: 1008, e: 0.01
episode: 130/5000, ticks: 784, score: 0, total reward: 773, e: 0.01
episode: 131/5000, ticks: 1011, score: 0, total reward: 1000, e: 0.01
episode: 132/5000, ticks: 911, score: 0, total reward: 900, e: 0.01
episode: 133/5000, ticks: 766, score: 0, total reward: 755, e: 0.01
episode: 134/5000, ticks: 812, score:

episode: 239/5000, ticks: 1500, score: 0, total reward: 1489, e: 0.01
episode: 240/5000, ticks: 1146, score: 0, total reward: 1135, e: 0.01
episode: 241/5000, ticks: 812, score: 0, total reward: 801, e: 0.01
episode: 242/5000, ticks: 902, score: 0, total reward: 891, e: 0.01
episode: 243/5000, ticks: 938, score: 0, total reward: 927, e: 0.01
episode: 244/5000, ticks: 929, score: 0, total reward: 918, e: 0.01
episode: 245/5000, ticks: 1138, score: 0, total reward: 1127, e: 0.01
episode: 246/5000, ticks: 1103, score: 0, total reward: 1092, e: 0.01
episode: 247/5000, ticks: 748, score: 0, total reward: 737, e: 0.01
episode: 248/5000, ticks: 2064, score: 0, total reward: 2053, e: 0.01
episode: 249/5000, ticks: 920, score: 0, total reward: 909, e: 0.01
episode: 250/5000, ticks: 1111, score: 0, total reward: 1100, e: 0.01
episode: 251/5000, ticks: 1587, score: 0, total reward: 1576, e: 0.01
episode: 252/5000, ticks: 947, score: 0, total reward: 936, e: 0.01
episode: 253/5000, ticks: 703, sco

episode: 358/5000, ticks: 1083, score: 0, total reward: 1072, e: 0.01
episode: 359/5000, ticks: 1210, score: 0, total reward: 1199, e: 0.01
episode: 360/5000, ticks: 1019, score: 0, total reward: 1008, e: 0.01
episode: 361/5000, ticks: 1129, score: 0, total reward: 1118, e: 0.01
episode: 362/5000, ticks: 1137, score: 0, total reward: 1126, e: 0.01
episode: 363/5000, ticks: 1020, score: 0, total reward: 1009, e: 0.01
episode: 364/5000, ticks: 1083, score: 0, total reward: 1072, e: 0.01
episode: 365/5000, ticks: 1120, score: 0, total reward: 1109, e: 0.01
episode: 366/5000, ticks: 1102, score: 0, total reward: 1091, e: 0.01
episode: 367/5000, ticks: 1464, score: 0, total reward: 1453, e: 0.01
episode: 368/5000, ticks: 1365, score: 0, total reward: 1354, e: 0.01
episode: 369/5000, ticks: 948, score: 0, total reward: 937, e: 0.01
episode: 370/5000, ticks: 1800, score: 0, total reward: 1789, e: 0.01
episode: 371/5000, ticks: 912, score: 0, total reward: 901, e: 0.01
episode: 372/5000, ticks

episode: 476/5000, ticks: 1038, score: 0, total reward: 1027, e: 0.01
episode: 477/5000, ticks: 1120, score: 0, total reward: 1109, e: 0.01
episode: 478/5000, ticks: 1238, score: 0, total reward: 1227, e: 0.01
episode: 479/5000, ticks: 929, score: 0, total reward: 918, e: 0.01
episode: 480/5000, ticks: 947, score: 0, total reward: 936, e: 0.01
episode: 481/5000, ticks: 1138, score: 0, total reward: 1127, e: 0.01
episode: 482/5000, ticks: 947, score: 0, total reward: 936, e: 0.01
episode: 483/5000, ticks: 1583, score: 0, total reward: 1572, e: 0.01
episode: 484/5000, ticks: 1795, score: 0, total reward: 1784, e: 0.01
episode: 485/5000, ticks: 1582, score: 0, total reward: 1571, e: 0.01
episode: 486/5000, ticks: 1249, score: 0, total reward: 1238, e: 0.01
episode: 487/5000, ticks: 884, score: 0, total reward: 873, e: 0.01
episode: 488/5000, ticks: 838, score: 0, total reward: 827, e: 0.01
episode: 489/5000, ticks: 993, score: 0, total reward: 982, e: 0.01
episode: 490/5000, ticks: 730, s

episode: 595/5000, ticks: 1064, score: 0, total reward: 1053, e: 0.01
episode: 596/5000, ticks: 1412, score: 0, total reward: 1401, e: 0.01
episode: 597/5000, ticks: 1020, score: 0, total reward: 1009, e: 0.01
episode: 598/5000, ticks: 948, score: 0, total reward: 937, e: 0.01
episode: 599/5000, ticks: 1601, score: 0, total reward: 1590, e: 0.01
episode: 600/5000, ticks: 621, score: 0, total reward: 610, e: 0.01
episode: 601/5000, ticks: 756, score: 0, total reward: 745, e: 0.01
episode: 602/5000, ticks: 884, score: 0, total reward: 873, e: 0.01
episode: 603/5000, ticks: 1001, score: 0, total reward: 990, e: 0.01
episode: 604/5000, ticks: 1085, score: 0, total reward: 1074, e: 0.01
episode: 605/5000, ticks: 1229, score: 0, total reward: 1218, e: 0.01
episode: 606/5000, ticks: 1584, score: 0, total reward: 1573, e: 0.01
episode: 607/5000, ticks: 1873, score: 0, total reward: 1862, e: 0.01
episode: 608/5000, ticks: 1074, score: 0, total reward: 1063, e: 0.01
episode: 609/5000, ticks: 102

episode: 714/5000, ticks: 974, score: 0, total reward: 963, e: 0.01
episode: 715/5000, ticks: 875, score: 0, total reward: 864, e: 0.01
episode: 716/5000, ticks: 1043, score: 0, total reward: 1032, e: 0.01
episode: 717/5000, ticks: 784, score: 0, total reward: 773, e: 0.01
episode: 718/5000, ticks: 1029, score: 0, total reward: 1018, e: 0.01
episode: 719/5000, ticks: 1502, score: 0, total reward: 1491, e: 0.01
episode: 720/5000, ticks: 1250, score: 0, total reward: 1239, e: 0.01
episode: 721/5000, ticks: 702, score: 0, total reward: 691, e: 0.01
episode: 722/5000, ticks: 1237, score: 0, total reward: 1226, e: 0.01
episode: 723/5000, ticks: 902, score: 0, total reward: 891, e: 0.01
episode: 724/5000, ticks: 1040, score: 0, total reward: 1029, e: 0.01
episode: 725/5000, ticks: 1277, score: 0, total reward: 1266, e: 0.01
episode: 726/5000, ticks: 1012, score: 0, total reward: 1001, e: 0.01
episode: 727/5000, ticks: 939, score: 0, total reward: 928, e: 0.01
episode: 728/5000, ticks: 721, s

episode: 833/5000, ticks: 785, score: 0, total reward: 774, e: 0.01
episode: 834/5000, ticks: 812, score: 0, total reward: 801, e: 0.01
episode: 835/5000, ticks: 975, score: 0, total reward: 964, e: 0.01
episode: 836/5000, ticks: 784, score: 0, total reward: 773, e: 0.01
episode: 837/5000, ticks: 939, score: 0, total reward: 928, e: 0.01
episode: 838/5000, ticks: 1372, score: 0, total reward: 1361, e: 0.01
episode: 839/5000, ticks: 712, score: 0, total reward: 701, e: 0.01
episode: 840/5000, ticks: 1400, score: 0, total reward: 1389, e: 0.01
episode: 841/5000, ticks: 1257, score: 0, total reward: 1246, e: 0.01
episode: 842/5000, ticks: 912, score: 0, total reward: 901, e: 0.01
episode: 843/5000, ticks: 1030, score: 0, total reward: 1019, e: 0.01
episode: 844/5000, ticks: 1220, score: 0, total reward: 1209, e: 0.01
episode: 845/5000, ticks: 1101, score: 0, total reward: 1090, e: 0.01
episode: 846/5000, ticks: 930, score: 0, total reward: 919, e: 0.01
episode: 847/5000, ticks: 657, score

episode: 952/5000, ticks: 1399, score: 0, total reward: 1388, e: 0.01
episode: 953/5000, ticks: 1992, score: 0, total reward: 1981, e: 0.01
episode: 954/5000, ticks: 1202, score: 0, total reward: 1191, e: 0.01
episode: 955/5000, ticks: 1058, score: 0, total reward: 1047, e: 0.01
episode: 956/5000, ticks: 1064, score: 0, total reward: 1053, e: 0.01
episode: 957/5000, ticks: 1401, score: 0, total reward: 1390, e: 0.01
episode: 958/5000, ticks: 784, score: 0, total reward: 773, e: 0.01
episode: 959/5000, ticks: 1011, score: 0, total reward: 1000, e: 0.01
episode: 960/5000, ticks: 874, score: 0, total reward: 863, e: 0.01
episode: 961/5000, ticks: 1955, score: 0, total reward: 1944, e: 0.01
episode: 962/5000, ticks: 729, score: 0, total reward: 718, e: 0.01
episode: 963/5000, ticks: 1002, score: 0, total reward: 991, e: 0.01
episode: 964/5000, ticks: 958, score: 0, total reward: 947, e: 0.01
episode: 965/5000, ticks: 1065, score: 0, total reward: 1054, e: 0.01
episode: 966/5000, ticks: 119

episode: 1070/5000, ticks: 1275, score: 0, total reward: 1264, e: 0.01
episode: 1071/5000, ticks: 976, score: 0, total reward: 965, e: 0.01
episode: 1072/5000, ticks: 840, score: 0, total reward: 829, e: 0.01
episode: 1073/5000, ticks: 786, score: 0, total reward: 775, e: 0.01
episode: 1074/5000, ticks: 993, score: 0, total reward: 982, e: 0.01
episode: 1075/5000, ticks: 1093, score: 0, total reward: 1082, e: 0.01
episode: 1076/5000, ticks: 1418, score: 0, total reward: 1407, e: 0.01
episode: 1077/5000, ticks: 1256, score: 0, total reward: 1245, e: 0.01
episode: 1078/5000, ticks: 1330, score: 0, total reward: 1319, e: 0.01
episode: 1079/5000, ticks: 696, score: 0, total reward: 685, e: 0.01
episode: 1080/5000, ticks: 896, score: 0, total reward: 885, e: 0.01
episode: 1081/5000, ticks: 1211, score: 0, total reward: 1200, e: 0.01
episode: 1082/5000, ticks: 1729, score: 0, total reward: 1718, e: 0.01
episode: 1083/5000, ticks: 1056, score: 0, total reward: 1045, e: 0.01
episode: 1084/5000

episode: 1187/5000, ticks: 1565, score: 0, total reward: 1554, e: 0.01
episode: 1188/5000, ticks: 703, score: 0, total reward: 692, e: 0.01
episode: 1189/5000, ticks: 905, score: 0, total reward: 894, e: 0.01
episode: 1190/5000, ticks: 747, score: 0, total reward: 736, e: 0.01
episode: 1191/5000, ticks: 866, score: 0, total reward: 855, e: 0.01
episode: 1192/5000, ticks: 830, score: 0, total reward: 819, e: 0.01
episode: 1193/5000, ticks: 930, score: 0, total reward: 919, e: 0.01
episode: 1194/5000, ticks: 1727, score: 0, total reward: 1716, e: 0.01
episode: 1195/5000, ticks: 821, score: 0, total reward: 810, e: 0.01
episode: 1196/5000, ticks: 1110, score: 0, total reward: 1099, e: 0.01
episode: 1197/5000, ticks: 811, score: 0, total reward: 800, e: 0.01
episode: 1198/5000, ticks: 1048, score: 0, total reward: 1037, e: 0.01
episode: 1199/5000, ticks: 839, score: 0, total reward: 828, e: 0.01
episode: 1200/5000, ticks: 1256, score: 0, total reward: 1245, e: 0.01
episode: 1201/5000, tick

episode: 1305/5000, ticks: 1065, score: 0, total reward: 1054, e: 0.01
episode: 1306/5000, ticks: 930, score: 0, total reward: 919, e: 0.01
episode: 1307/5000, ticks: 1012, score: 0, total reward: 1001, e: 0.01
episode: 1308/5000, ticks: 1146, score: 0, total reward: 1135, e: 0.01
episode: 1309/5000, ticks: 1119, score: 0, total reward: 1108, e: 0.01
episode: 1310/5000, ticks: 703, score: 0, total reward: 692, e: 0.01
episode: 1311/5000, ticks: 902, score: 0, total reward: 891, e: 0.01
episode: 1312/5000, ticks: 839, score: 0, total reward: 828, e: 0.01
episode: 1313/5000, ticks: 1049, score: 0, total reward: 1038, e: 0.01
episode: 1314/5000, ticks: 1320, score: 0, total reward: 1309, e: 0.01
episode: 1315/5000, ticks: 656, score: 0, total reward: 645, e: 0.01
episode: 1316/5000, ticks: 729, score: 0, total reward: 718, e: 0.01
episode: 1317/5000, ticks: 883, score: 0, total reward: 872, e: 0.01
episode: 1318/5000, ticks: 820, score: 0, total reward: 809, e: 0.01
episode: 1319/5000, ti

episode: 1423/5000, ticks: 939, score: 0, total reward: 928, e: 0.01
episode: 1424/5000, ticks: 984, score: 0, total reward: 973, e: 0.01
episode: 1425/5000, ticks: 1402, score: 0, total reward: 1391, e: 0.01
episode: 1426/5000, ticks: 1230, score: 0, total reward: 1219, e: 0.01
episode: 1427/5000, ticks: 794, score: 0, total reward: 783, e: 0.01
episode: 1428/5000, ticks: 966, score: 0, total reward: 955, e: 0.01
episode: 1429/5000, ticks: 948, score: 0, total reward: 937, e: 0.01
episode: 1430/5000, ticks: 875, score: 0, total reward: 864, e: 0.01
episode: 1431/5000, ticks: 794, score: 0, total reward: 783, e: 0.01
episode: 1432/5000, ticks: 1029, score: 0, total reward: 1018, e: 0.01
episode: 1433/5000, ticks: 894, score: 0, total reward: 883, e: 0.01
episode: 1434/5000, ticks: 1111, score: 0, total reward: 1100, e: 0.01
episode: 1435/5000, ticks: 793, score: 0, total reward: 782, e: 0.01
episode: 1436/5000, ticks: 802, score: 0, total reward: 791, e: 0.01
episode: 1437/5000, ticks:

episode: 1541/5000, ticks: 1238, score: 0, total reward: 1227, e: 0.01
episode: 1542/5000, ticks: 1392, score: 0, total reward: 1381, e: 0.01
episode: 1543/5000, ticks: 1075, score: 0, total reward: 1064, e: 0.01
episode: 1544/5000, ticks: 957, score: 0, total reward: 946, e: 0.01
episode: 1545/5000, ticks: 975, score: 0, total reward: 964, e: 0.01
episode: 1546/5000, ticks: 1084, score: 0, total reward: 1073, e: 0.01
episode: 1547/5000, ticks: 1449, score: 0, total reward: 1438, e: 0.01
episode: 1548/5000, ticks: 865, score: 0, total reward: 854, e: 0.01
episode: 1549/5000, ticks: 892, score: 0, total reward: 881, e: 0.01
episode: 1550/5000, ticks: 956, score: 0, total reward: 945, e: 0.01
episode: 1551/5000, ticks: 1012, score: 0, total reward: 1001, e: 0.01
episode: 1552/5000, ticks: 1164, score: 0, total reward: 1153, e: 0.01
episode: 1553/5000, ticks: 1238, score: 0, total reward: 1227, e: 0.01
episode: 1554/5000, ticks: 1120, score: 0, total reward: 1109, e: 0.01
episode: 1555/50

episode: 1659/5000, ticks: 1519, score: 0, total reward: 1508, e: 0.01
episode: 1660/5000, ticks: 1028, score: 0, total reward: 1017, e: 0.01
episode: 1661/5000, ticks: 875, score: 0, total reward: 864, e: 0.01
episode: 1662/5000, ticks: 1392, score: 0, total reward: 1381, e: 0.01
episode: 1663/5000, ticks: 1129, score: 0, total reward: 1118, e: 0.01
episode: 1664/5000, ticks: 920, score: 0, total reward: 909, e: 0.01
episode: 1665/5000, ticks: 957, score: 0, total reward: 946, e: 0.01
episode: 1666/5000, ticks: 775, score: 0, total reward: 764, e: 0.01
episode: 1667/5000, ticks: 984, score: 0, total reward: 973, e: 0.01
episode: 1668/5000, ticks: 1266, score: 0, total reward: 1255, e: 0.01
episode: 1669/5000, ticks: 975, score: 0, total reward: 964, e: 0.01
episode: 1670/5000, ticks: 1047, score: 0, total reward: 1036, e: 0.01
episode: 1671/5000, ticks: 793, score: 0, total reward: 782, e: 0.01
episode: 1672/5000, ticks: 1029, score: 0, total reward: 1018, e: 0.01
episode: 1673/5000, 

episode: 1776/5000, ticks: 894, score: 0, total reward: 883, e: 0.01
episode: 1777/5000, ticks: 1464, score: 0, total reward: 1453, e: 0.01
episode: 1778/5000, ticks: 1111, score: 0, total reward: 1100, e: 0.01
episode: 1779/5000, ticks: 1183, score: 0, total reward: 1172, e: 0.01
episode: 1780/5000, ticks: 938, score: 0, total reward: 927, e: 0.01
episode: 1781/5000, ticks: 812, score: 0, total reward: 801, e: 0.01
episode: 1782/5000, ticks: 802, score: 0, total reward: 791, e: 0.01
episode: 1783/5000, ticks: 712, score: 0, total reward: 701, e: 0.01
episode: 1784/5000, ticks: 1019, score: 0, total reward: 1008, e: 0.01
episode: 1785/5000, ticks: 903, score: 0, total reward: 892, e: 0.01
episode: 1786/5000, ticks: 857, score: 0, total reward: 846, e: 0.01
episode: 1787/5000, ticks: 848, score: 0, total reward: 837, e: 0.01
episode: 1788/5000, ticks: 1165, score: 0, total reward: 1154, e: 0.01
episode: 1789/5000, ticks: 1147, score: 0, total reward: 1136, e: 0.01
episode: 1790/5000, ti

episode: 1893/5000, ticks: 1447, score: 0, total reward: 1436, e: 0.01
episode: 1894/5000, ticks: 1039, score: 0, total reward: 1028, e: 0.01
episode: 1895/5000, ticks: 929, score: 0, total reward: 918, e: 0.01
episode: 1896/5000, ticks: 902, score: 0, total reward: 891, e: 0.01
episode: 1897/5000, ticks: 957, score: 0, total reward: 946, e: 0.01
episode: 1898/5000, ticks: 1138, score: 0, total reward: 1127, e: 0.01
episode: 1899/5000, ticks: 1111, score: 0, total reward: 1100, e: 0.01
episode: 1900/5000, ticks: 902, score: 0, total reward: 891, e: 0.01
episode: 1901/5000, ticks: 938, score: 0, total reward: 927, e: 0.01
episode: 1902/5000, ticks: 1400, score: 0, total reward: 1389, e: 0.01
episode: 1903/5000, ticks: 930, score: 0, total reward: 919, e: 0.01
episode: 1904/5000, ticks: 886, score: 0, total reward: 875, e: 0.01
episode: 1905/5000, ticks: 965, score: 0, total reward: 954, e: 0.01
episode: 1906/5000, ticks: 758, score: 0, total reward: 747, e: 0.01
episode: 1907/5000, tick

episode: 2011/5000, ticks: 1011, score: 0, total reward: 1000, e: 0.01
episode: 2012/5000, ticks: 1293, score: 0, total reward: 1282, e: 0.01
episode: 2013/5000, ticks: 1419, score: 0, total reward: 1408, e: 0.01
episode: 2014/5000, ticks: 1201, score: 0, total reward: 1190, e: 0.01
episode: 2015/5000, ticks: 876, score: 0, total reward: 865, e: 0.01
episode: 2016/5000, ticks: 1501, score: 0, total reward: 1490, e: 0.01
episode: 2017/5000, ticks: 757, score: 0, total reward: 746, e: 0.01
episode: 2018/5000, ticks: 946, score: 0, total reward: 935, e: 0.01
episode: 2019/5000, ticks: 1012, score: 0, total reward: 1001, e: 0.01
episode: 2020/5000, ticks: 1048, score: 0, total reward: 1037, e: 0.01
episode: 2021/5000, ticks: 2019, score: 0, total reward: 2008, e: 0.01
episode: 2022/5000, ticks: 1111, score: 0, total reward: 1100, e: 0.01
episode: 2023/5000, ticks: 767, score: 0, total reward: 756, e: 0.01
episode: 2024/5000, ticks: 1284, score: 0, total reward: 1273, e: 0.01
episode: 2025/

episode: 2128/5000, ticks: 1038, score: 0, total reward: 1027, e: 0.01
episode: 2129/5000, ticks: 1148, score: 0, total reward: 1137, e: 0.01
episode: 2130/5000, ticks: 847, score: 0, total reward: 836, e: 0.01
episode: 2131/5000, ticks: 884, score: 0, total reward: 873, e: 0.01
episode: 2132/5000, ticks: 1101, score: 0, total reward: 1090, e: 0.01
episode: 2133/5000, ticks: 776, score: 0, total reward: 765, e: 0.01
episode: 2134/5000, ticks: 884, score: 0, total reward: 873, e: 0.01
episode: 2135/5000, ticks: 858, score: 0, total reward: 847, e: 0.01
episode: 2136/5000, ticks: 1093, score: 0, total reward: 1082, e: 0.01
episode: 2137/5000, ticks: 948, score: 0, total reward: 937, e: 0.01
episode: 2138/5000, ticks: 829, score: 0, total reward: 818, e: 0.01
episode: 2139/5000, ticks: 1175, score: 0, total reward: 1164, e: 0.01
episode: 2140/5000, ticks: 830, score: 0, total reward: 819, e: 0.01
episode: 2141/5000, ticks: 1339, score: 0, total reward: 1328, e: 0.01
episode: 2142/5000, ti

episode: 2246/5000, ticks: 1084, score: 0, total reward: 1073, e: 0.01
episode: 2247/5000, ticks: 1281, score: 0, total reward: 1270, e: 0.01
episode: 2248/5000, ticks: 903, score: 0, total reward: 892, e: 0.01
episode: 2249/5000, ticks: 948, score: 0, total reward: 937, e: 0.01
episode: 2250/5000, ticks: 1920, score: 0, total reward: 1909, e: 0.01
episode: 2251/5000, ticks: 793, score: 0, total reward: 782, e: 0.01
episode: 2252/5000, ticks: 1238, score: 0, total reward: 1227, e: 0.01
episode: 2253/5000, ticks: 1527, score: 0, total reward: 1516, e: 0.01
episode: 2254/5000, ticks: 821, score: 0, total reward: 810, e: 0.01
episode: 2255/5000, ticks: 1165, score: 0, total reward: 1154, e: 0.01
episode: 2256/5000, ticks: 802, score: 0, total reward: 791, e: 0.01
episode: 2257/5000, ticks: 866, score: 0, total reward: 855, e: 0.01
episode: 2258/5000, ticks: 757, score: 0, total reward: 746, e: 0.01
episode: 2259/5000, ticks: 793, score: 0, total reward: 782, e: 0.01
episode: 2260/5000, ti

KeyboardInterrupt: 

In [4]:
env.close()