In [4]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '1'

import numpy as np
import keras
import json
import matplotlib.pyplot as plt
import time
import seaborn

from PIL import Image
from IPython import display

%matplotlib inline
seaborn.set()

# Setting up the game

In [7]:
class Catch(object):
    """ This is a actual game """
    def __init__(self, grid_size=10):
        self.grid_size = grid_size
        self.reset()
        
    def _update_state(self, action):
        """
        Inputs: action and states
        Outputs: new states and reward
        """
        state = self.state
        if action == 0:
            action = -1
        elif action == 1:
            action = 0
        else:
            action = 1
            
        f0, f1, basket = state[0]
        new_basket = min(max(1, basket + action), self.grid_size - 1)
        f0 += 1
        out = np.asarray([f0, f1, new_basket])
        out = out[np.newaxis]
        
        assert len(out.shape) == 2
        self.state = out
        
    def _draw_state(self):
        im_size = (self.grid_size, )*2
        state = self.state[0]
        canvas = np.zeros(im_size)
        canvas[state[0], state[1]] = 1 # draw fruit
        canvas[-1, state[2]-1:state[2]+2] = 1 # draw basket
        return canvas
    
    def _get_reward(self):
        fruit_row, fruit_col, basket = self.state[0]
        if fruit_row == self.grid_size-1:
            if abs(fruit_col - basket) <= 1:
                return 1
            else:
                return -1
        else:
            return 0
        
    def _is_over(self):
        if self.state[0, 0] == self.grid_size - 1:
            return True
        else:
            return False
        
    def observe(self):
        canvas = self._draw_state()
        return canvas.reshape((1, -1))
    
    def act(self, action):
        self._update_state(action)
        reward = self._get_reward()
        game_over = self._is_over()
        return self.observe(), reward, game_over
    
    def reset(self):
        n = np.random.randint(0, self.grid_size-1, size=1)
        m = np.random.randint(1, self.grid_size-2, size=1)
        self.state = np.asarray([0, n, m])[np.newaxis]

In [8]:
# Here we define some variables used for the game and rendering later
last_frame_time = 0
translate_action = ["Left","Stay","Right","Create Ball","End Test"]
grid_size = 10

In [16]:
def display_screen(action, points, input_t):
    global last_frame_time
    print('Action %s, Points: %d' % (translate_action[action], points))
    if 'End'not in translate_action[action]:
        plt.imshow(input_t.reshape((grid_size, )*2), interpolation='none', cmap='gray')
        display.clear_output(wait=True)
        display.display(plt.gcf())
        plt.show()
    last_frame_time = set_max_fps(last_frame_time)
    
def set_max_fps(last_frame_time, FPS=1):
    current_mili_time = lambda: int(round(time.time() * 1000))
    sleep_time = 1. / FPS - (current_mili_time() - last_frame_time)
    if sleep_time > 0:
        time.sleep(sleep_time)
    return current_mili_time()

In [2]:
class ExperienceReplay(object):
    """
    During gameplay all the experiences <s, a, r, s'> are stored in a replay memory.
    In training, batches of randomly drawn experiences are used to generate the input and target for training
    """
    def __init__(self, max_memory=100, discount=0.9):
        """
        Setup
        max_memory: the maximum number of experiences we want to store
        memory: a list of experiences
        discount: the discount factor for future experience
        
        In the memory the information whether the game ended at the state is stored separately in a nested array
        [...
        [experience, game_over]
        [experience, game_over]
        ...]
        """
        self.max_memory = max_memory
        self.memory = list()
        self.discount = discount
        
    def remember(self, states, game_over):
        """ Save a state to memory """
        self.memory.append([states, game_over])
        # remove oldest state in memory
        if len(self.memory) > self.max_memory:
            del self.memory[0]
            
    def get_batch(self, model, batch_size=10):
        # how many experiences do we have?
        len_memory = len(self.memory)
        
        # calculate the number of actions that can possibly be taken in the game
        num_actions = model.output_shape[-1]
        
        # dimensions of the game field
        env_dim = self.memory[0][0][0].shape[1]
        
        # we want to return an input and target vector with inputs from an observed state...
        inputs = np.zeros((min(len_memory, batch_size), env_dim))
        
        # ... and the target r + gamma * max Q(s', a')
        # Note that our target is a matrix, with possible fields not only for the action taken but also
        # for the other possible actions. The actions not take the same value as the prediction to not affect them
        targets = np.zeros((inputs.shape[0], num_actions))
        
        # We draw states to learn from randomly
        for i, idx in enumerate(np.random.randint(0, len_memory, size=inputs.shape[0])):
            """
            Here we load one transition <s, a, r, s'> from memory
            state_t: initial state s
            action_t: action taken a
            reward_t: reward earned r
            state_tp1: the state that follow s'
            """
            state_t, action_t, reward_t, state_tp1 = self.memory[idx][0]
            
            # We also need to know whether the game ended at this state
            game_over = self.memory[idx][1]
            
            # add the state s to the input
            inputs[i:i+1] = state_t
            
            # first we fill the target values with the predictions of the model.
            # they will not be affected by training 
            targets[i] = model.predict(state_t)[0]
            
            # Here Q_sa is max_a' Q(s', a')
            Q_sa = np.max(model.predict(state_tp1)[0])
            
            # if the game is over, the reward is the final reward
            if game_over:
                targets[i, action_t] = reward_t
            else:
                targets[i, action_t] = reward_t + self.discount * Q_sa
                
        return inputs, targets
            

In [3]:
def baseline_model(grid_size, num_actions, hidden_size):
    model = keras.Sequential()
    model.add(keras.layers.Dense(hidden_size, input_shape=(grid_size**2, ), activation='relu'))
    model.add(keras.layers.Dense(hidden_size, activation='relu'))
    model.add(keras.layers.Dense(num_actions))
    
    model.compile(optimizer=keras.optimizers.SGD(lr=0.1),
                  loss='mse')
    return model

In [10]:
# building the model
epsilon = 0.1
num_actions = 3
hidden_size = 100
grid_size = 10
max_memory = 500
batch_size = 1

In [11]:
model = baseline_model(grid_size, num_actions, hidden_size)
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_2 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_3 (Dense)              (None, 3)                 303       
Total params: 20,503
Trainable params: 20,503
Non-trainable params: 0
_________________________________________________________________


In [12]:
# Define environment / game
env = Catch(grid_size)

# Init experience replay object
exp_replay = ExperienceReplay(max_memory=max_memory)

In [13]:
def train(model, epochs, verbose=1):
    # train
    # reseting the win counter
    win_cnt = 0
    win_hist = []
    # epochs is the number of game play
    for e in range(epochs):
        loss = 0.
        env.reset()
        game_over = False
        # get initial input
        input_t = env.observe()
        
        while not game_over:
            # The learner is acting on the last observed game screen
            # input_t is a vector containing representing the game screen
            input_tm1 = input_t
            
            if np.random.rand() <= epsilon:
                action = np.random.randint(0, num_actions, size=1)
            else:
                # q contains the expected rewards for the actions
                q = model.predict(input_tm1)
                # we pick the actionwith the highest expected reward
                action = np.argmax(q[0])
                
            # apply action, get rewards and new state
            input_t, reward, game_over = env.act(action)
            # if we managed to catch the fruit we add 1 to our win counter
            if reward == 1:
                win_cnt += 1
                
            # Uncomment this to render the game
            #display_screen(action, 3000, inputs[0])
            
            """ The experience <s, a, r, s'> we make during gameplay are our training data.
            Here we first save the last experience, and then load a batch of experiences to train our model"""
            
            # store experience
            exp_replay.remember([input_tm1, action, reward, input_t], game_over)
            
            # load batch of experiences
            inputs, targets = exp_replay.get_batch(model, batch_size=batch_size)
            
            # train model on experiences
            batch_loss = model.train_on_batch(inputs, targets)
            
            loss += batch_loss
            
        if verbose > 0:
            print('Epoch {:03d}/{:03d} | Loss {:.4f} | Win count {}'.format(e, epochs, loss, win_cnt))
        win_hist.append(win_cnt)
    return win_hist
                

In [14]:
epochs = 5000
hist = train(model, epochs, verbose=1)
print('Training done!')

Epoch 000/5000 | Loss 0.0126 | Win count 1
Epoch 001/5000 | Loss 0.0314 | Win count 1
Epoch 002/5000 | Loss 0.0481 | Win count 1
Epoch 003/5000 | Loss 0.5598 | Win count 1
Epoch 004/5000 | Loss 0.1996 | Win count 1
Epoch 005/5000 | Loss 0.5535 | Win count 1
Epoch 006/5000 | Loss 0.2284 | Win count 1
Epoch 007/5000 | Loss 0.0334 | Win count 1
Epoch 008/5000 | Loss 0.9591 | Win count 1
Epoch 009/5000 | Loss 0.0730 | Win count 2
Epoch 010/5000 | Loss 0.1065 | Win count 2
Epoch 011/5000 | Loss 0.4818 | Win count 2
Epoch 012/5000 | Loss 0.0342 | Win count 2
Epoch 013/5000 | Loss 0.2565 | Win count 2
Epoch 014/5000 | Loss 0.4105 | Win count 2
Epoch 015/5000 | Loss 0.5222 | Win count 2
Epoch 016/5000 | Loss 0.0770 | Win count 3
Epoch 017/5000 | Loss 0.3179 | Win count 3
Epoch 018/5000 | Loss 0.0981 | Win count 3
Epoch 019/5000 | Loss 0.2888 | Win count 3
Epoch 020/5000 | Loss 0.0853 | Win count 4
Epoch 021/5000 | Loss 0.4896 | Win count 4
Epoch 022/5000 | Loss 0.4942 | Win count 4
Epoch 023/5

Epoch 188/5000 | Loss 0.3712 | Win count 50
Epoch 189/5000 | Loss 0.2262 | Win count 50
Epoch 190/5000 | Loss 0.0108 | Win count 50
Epoch 191/5000 | Loss 0.0114 | Win count 50
Epoch 192/5000 | Loss 0.0207 | Win count 51
Epoch 193/5000 | Loss 0.1371 | Win count 52
Epoch 194/5000 | Loss 1.0904 | Win count 53
Epoch 195/5000 | Loss 0.1732 | Win count 53
Epoch 196/5000 | Loss 0.2368 | Win count 54
Epoch 197/5000 | Loss 0.0451 | Win count 54
Epoch 198/5000 | Loss 0.1785 | Win count 54
Epoch 199/5000 | Loss 0.0178 | Win count 54
Epoch 200/5000 | Loss 0.1567 | Win count 54
Epoch 201/5000 | Loss 0.0636 | Win count 55
Epoch 202/5000 | Loss 0.0240 | Win count 55
Epoch 203/5000 | Loss 0.1609 | Win count 55
Epoch 204/5000 | Loss 0.0199 | Win count 55
Epoch 205/5000 | Loss 0.1209 | Win count 56
Epoch 206/5000 | Loss 0.2698 | Win count 56
Epoch 207/5000 | Loss 0.2122 | Win count 56
Epoch 208/5000 | Loss 0.3240 | Win count 57
Epoch 209/5000 | Loss 0.2828 | Win count 57
Epoch 210/5000 | Loss 0.8912 | W

Epoch 375/5000 | Loss 0.0777 | Win count 121
Epoch 376/5000 | Loss 0.1128 | Win count 122
Epoch 377/5000 | Loss 0.0723 | Win count 123
Epoch 378/5000 | Loss 0.1612 | Win count 123
Epoch 379/5000 | Loss 0.0187 | Win count 124
Epoch 380/5000 | Loss 0.4809 | Win count 125
Epoch 381/5000 | Loss 0.0812 | Win count 126
Epoch 382/5000 | Loss 0.3351 | Win count 127
Epoch 383/5000 | Loss 0.2412 | Win count 127
Epoch 384/5000 | Loss 0.0359 | Win count 127
Epoch 385/5000 | Loss 0.1274 | Win count 128
Epoch 386/5000 | Loss 0.0563 | Win count 128
Epoch 387/5000 | Loss 0.0505 | Win count 128
Epoch 388/5000 | Loss 0.1207 | Win count 128
Epoch 389/5000 | Loss 0.4629 | Win count 129
Epoch 390/5000 | Loss 0.7411 | Win count 130
Epoch 391/5000 | Loss 0.0395 | Win count 131
Epoch 392/5000 | Loss 0.1601 | Win count 131
Epoch 393/5000 | Loss 0.1159 | Win count 132
Epoch 394/5000 | Loss 0.0328 | Win count 133
Epoch 395/5000 | Loss 0.6938 | Win count 133
Epoch 396/5000 | Loss 0.0820 | Win count 133
Epoch 397/

Epoch 561/5000 | Loss 0.0394 | Win count 219
Epoch 562/5000 | Loss 0.2685 | Win count 220
Epoch 563/5000 | Loss 0.0948 | Win count 220
Epoch 564/5000 | Loss 0.0557 | Win count 220
Epoch 565/5000 | Loss 0.2188 | Win count 221
Epoch 566/5000 | Loss 0.0396 | Win count 221
Epoch 567/5000 | Loss 0.0376 | Win count 221
Epoch 568/5000 | Loss 0.1171 | Win count 222
Epoch 569/5000 | Loss 0.3137 | Win count 223
Epoch 570/5000 | Loss 0.3582 | Win count 223
Epoch 571/5000 | Loss 0.1359 | Win count 223
Epoch 572/5000 | Loss 0.1022 | Win count 224
Epoch 573/5000 | Loss 0.0375 | Win count 225
Epoch 574/5000 | Loss 0.0430 | Win count 225
Epoch 575/5000 | Loss 0.2242 | Win count 226
Epoch 576/5000 | Loss 0.3266 | Win count 226
Epoch 577/5000 | Loss 0.0925 | Win count 226
Epoch 578/5000 | Loss 0.0788 | Win count 227
Epoch 579/5000 | Loss 0.1239 | Win count 228
Epoch 580/5000 | Loss 0.0722 | Win count 228
Epoch 581/5000 | Loss 0.3615 | Win count 228
Epoch 582/5000 | Loss 1.1628 | Win count 228
Epoch 583/

Epoch 745/5000 | Loss 0.0476 | Win count 327
Epoch 746/5000 | Loss 0.6562 | Win count 327
Epoch 747/5000 | Loss 0.0657 | Win count 327
Epoch 748/5000 | Loss 0.0785 | Win count 328
Epoch 749/5000 | Loss 0.0464 | Win count 328
Epoch 750/5000 | Loss 0.0181 | Win count 329
Epoch 751/5000 | Loss 0.9697 | Win count 329
Epoch 752/5000 | Loss 0.1838 | Win count 330
Epoch 753/5000 | Loss 0.3925 | Win count 330
Epoch 754/5000 | Loss 0.6048 | Win count 330
Epoch 755/5000 | Loss 0.0903 | Win count 331
Epoch 756/5000 | Loss 0.1921 | Win count 332
Epoch 757/5000 | Loss 0.6078 | Win count 332
Epoch 758/5000 | Loss 0.0719 | Win count 332
Epoch 759/5000 | Loss 0.2262 | Win count 332
Epoch 760/5000 | Loss 0.1303 | Win count 333
Epoch 761/5000 | Loss 0.6629 | Win count 333
Epoch 762/5000 | Loss 0.1853 | Win count 333
Epoch 763/5000 | Loss 0.1955 | Win count 334
Epoch 764/5000 | Loss 0.0543 | Win count 335
Epoch 765/5000 | Loss 0.1579 | Win count 335
Epoch 766/5000 | Loss 0.0333 | Win count 336
Epoch 767/

Epoch 928/5000 | Loss 0.0123 | Win count 445
Epoch 929/5000 | Loss 0.0381 | Win count 446
Epoch 930/5000 | Loss 0.1292 | Win count 447
Epoch 931/5000 | Loss 0.0449 | Win count 448
Epoch 932/5000 | Loss 0.0497 | Win count 449
Epoch 933/5000 | Loss 0.0744 | Win count 450
Epoch 934/5000 | Loss 0.0098 | Win count 451
Epoch 935/5000 | Loss 0.0797 | Win count 452
Epoch 936/5000 | Loss 0.2187 | Win count 453
Epoch 937/5000 | Loss 0.0867 | Win count 453
Epoch 938/5000 | Loss 0.0997 | Win count 454
Epoch 939/5000 | Loss 0.0255 | Win count 455
Epoch 940/5000 | Loss 0.0379 | Win count 455
Epoch 941/5000 | Loss 0.0334 | Win count 456
Epoch 942/5000 | Loss 0.0675 | Win count 456
Epoch 943/5000 | Loss 0.0522 | Win count 457
Epoch 944/5000 | Loss 0.0236 | Win count 457
Epoch 945/5000 | Loss 0.0439 | Win count 458
Epoch 946/5000 | Loss 0.0296 | Win count 459
Epoch 947/5000 | Loss 0.0459 | Win count 460
Epoch 948/5000 | Loss 0.0602 | Win count 461
Epoch 949/5000 | Loss 0.0084 | Win count 462
Epoch 950/

Epoch 1109/5000 | Loss 0.1204 | Win count 583
Epoch 1110/5000 | Loss 0.0495 | Win count 584
Epoch 1111/5000 | Loss 0.0245 | Win count 585
Epoch 1112/5000 | Loss 0.1056 | Win count 586
Epoch 1113/5000 | Loss 0.0260 | Win count 587
Epoch 1114/5000 | Loss 0.0113 | Win count 588
Epoch 1115/5000 | Loss 0.6617 | Win count 589
Epoch 1116/5000 | Loss 0.2552 | Win count 590
Epoch 1117/5000 | Loss 0.0352 | Win count 591
Epoch 1118/5000 | Loss 0.4769 | Win count 592
Epoch 1119/5000 | Loss 0.0354 | Win count 593
Epoch 1120/5000 | Loss 0.0077 | Win count 594
Epoch 1121/5000 | Loss 0.0306 | Win count 594
Epoch 1122/5000 | Loss 0.0118 | Win count 594
Epoch 1123/5000 | Loss 0.0226 | Win count 595
Epoch 1124/5000 | Loss 0.0123 | Win count 596
Epoch 1125/5000 | Loss 0.0059 | Win count 596
Epoch 1126/5000 | Loss 0.0112 | Win count 597
Epoch 1127/5000 | Loss 0.0611 | Win count 598
Epoch 1128/5000 | Loss 0.1291 | Win count 599
Epoch 1129/5000 | Loss 0.0826 | Win count 600
Epoch 1130/5000 | Loss 0.0211 | Wi

Epoch 1289/5000 | Loss 0.0558 | Win count 719
Epoch 1290/5000 | Loss 0.0562 | Win count 720
Epoch 1291/5000 | Loss 0.0345 | Win count 721
Epoch 1292/5000 | Loss 0.0405 | Win count 722
Epoch 1293/5000 | Loss 0.0762 | Win count 723
Epoch 1294/5000 | Loss 0.6796 | Win count 724
Epoch 1295/5000 | Loss 0.2114 | Win count 725
Epoch 1296/5000 | Loss 0.0439 | Win count 725
Epoch 1297/5000 | Loss 0.0478 | Win count 725
Epoch 1298/5000 | Loss 0.0344 | Win count 725
Epoch 1299/5000 | Loss 0.0504 | Win count 726
Epoch 1300/5000 | Loss 0.0301 | Win count 727
Epoch 1301/5000 | Loss 0.0223 | Win count 727
Epoch 1302/5000 | Loss 0.8229 | Win count 727
Epoch 1303/5000 | Loss 0.0593 | Win count 728
Epoch 1304/5000 | Loss 0.1810 | Win count 728
Epoch 1305/5000 | Loss 0.0799 | Win count 728
Epoch 1306/5000 | Loss 0.0330 | Win count 729
Epoch 1307/5000 | Loss 0.0502 | Win count 729
Epoch 1308/5000 | Loss 0.2043 | Win count 730
Epoch 1309/5000 | Loss 0.1891 | Win count 730
Epoch 1310/5000 | Loss 0.0332 | Wi

Epoch 1469/5000 | Loss 0.0163 | Win count 868
Epoch 1470/5000 | Loss 0.0204 | Win count 868
Epoch 1471/5000 | Loss 0.0107 | Win count 869
Epoch 1472/5000 | Loss 0.0257 | Win count 870
Epoch 1473/5000 | Loss 0.0105 | Win count 871
Epoch 1474/5000 | Loss 0.0241 | Win count 872
Epoch 1475/5000 | Loss 0.0138 | Win count 873
Epoch 1476/5000 | Loss 0.0597 | Win count 874
Epoch 1477/5000 | Loss 0.0184 | Win count 875
Epoch 1478/5000 | Loss 0.0157 | Win count 876
Epoch 1479/5000 | Loss 0.1183 | Win count 877
Epoch 1480/5000 | Loss 0.0397 | Win count 878
Epoch 1481/5000 | Loss 0.0148 | Win count 879
Epoch 1482/5000 | Loss 0.0137 | Win count 880
Epoch 1483/5000 | Loss 0.0161 | Win count 881
Epoch 1484/5000 | Loss 0.7317 | Win count 882
Epoch 1485/5000 | Loss 0.0398 | Win count 883
Epoch 1486/5000 | Loss 0.0390 | Win count 883
Epoch 1487/5000 | Loss 0.0125 | Win count 884
Epoch 1488/5000 | Loss 0.0449 | Win count 885
Epoch 1489/5000 | Loss 0.0442 | Win count 886
Epoch 1490/5000 | Loss 0.0117 | Wi

Epoch 1648/5000 | Loss 0.0148 | Win count 1028
Epoch 1649/5000 | Loss 0.0134 | Win count 1029
Epoch 1650/5000 | Loss 0.0265 | Win count 1030
Epoch 1651/5000 | Loss 0.1218 | Win count 1031
Epoch 1652/5000 | Loss 0.0166 | Win count 1032
Epoch 1653/5000 | Loss 0.0217 | Win count 1033
Epoch 1654/5000 | Loss 0.0146 | Win count 1034
Epoch 1655/5000 | Loss 0.0344 | Win count 1035
Epoch 1656/5000 | Loss 0.0211 | Win count 1036
Epoch 1657/5000 | Loss 0.0137 | Win count 1037
Epoch 1658/5000 | Loss 0.0170 | Win count 1038
Epoch 1659/5000 | Loss 0.0168 | Win count 1039
Epoch 1660/5000 | Loss 0.7250 | Win count 1040
Epoch 1661/5000 | Loss 0.0338 | Win count 1041
Epoch 1662/5000 | Loss 0.0343 | Win count 1042
Epoch 1663/5000 | Loss 0.0388 | Win count 1043
Epoch 1664/5000 | Loss 0.0507 | Win count 1044
Epoch 1665/5000 | Loss 0.0900 | Win count 1045
Epoch 1666/5000 | Loss 0.0326 | Win count 1046
Epoch 1667/5000 | Loss 0.1023 | Win count 1046
Epoch 1668/5000 | Loss 0.0288 | Win count 1047
Epoch 1669/50

Epoch 1827/5000 | Loss 0.0234 | Win count 1198
Epoch 1828/5000 | Loss 0.0157 | Win count 1199
Epoch 1829/5000 | Loss 0.0050 | Win count 1200
Epoch 1830/5000 | Loss 0.1298 | Win count 1201
Epoch 1831/5000 | Loss 0.0595 | Win count 1201
Epoch 1832/5000 | Loss 0.0050 | Win count 1202
Epoch 1833/5000 | Loss 0.5260 | Win count 1203
Epoch 1834/5000 | Loss 0.1387 | Win count 1204
Epoch 1835/5000 | Loss 0.1792 | Win count 1205
Epoch 1836/5000 | Loss 0.0805 | Win count 1206
Epoch 1837/5000 | Loss 0.0182 | Win count 1206
Epoch 1838/5000 | Loss 0.0111 | Win count 1207
Epoch 1839/5000 | Loss 0.0360 | Win count 1208
Epoch 1840/5000 | Loss 0.0403 | Win count 1209
Epoch 1841/5000 | Loss 0.0216 | Win count 1210
Epoch 1842/5000 | Loss 0.0118 | Win count 1211
Epoch 1843/5000 | Loss 0.0145 | Win count 1212
Epoch 1844/5000 | Loss 0.0103 | Win count 1213
Epoch 1845/5000 | Loss 0.0560 | Win count 1214
Epoch 1846/5000 | Loss 0.0300 | Win count 1215
Epoch 1847/5000 | Loss 0.0041 | Win count 1216
Epoch 1848/50

Epoch 2005/5000 | Loss 0.0089 | Win count 1361
Epoch 2006/5000 | Loss 0.0035 | Win count 1362
Epoch 2007/5000 | Loss 0.0074 | Win count 1363
Epoch 2008/5000 | Loss 0.0294 | Win count 1364
Epoch 2009/5000 | Loss 0.0171 | Win count 1365
Epoch 2010/5000 | Loss 0.0354 | Win count 1366
Epoch 2011/5000 | Loss 0.1021 | Win count 1367
Epoch 2012/5000 | Loss 0.0070 | Win count 1368
Epoch 2013/5000 | Loss 0.0156 | Win count 1369
Epoch 2014/5000 | Loss 0.0125 | Win count 1370
Epoch 2015/5000 | Loss 0.0101 | Win count 1371
Epoch 2016/5000 | Loss 0.0292 | Win count 1372
Epoch 2017/5000 | Loss 0.0195 | Win count 1373
Epoch 2018/5000 | Loss 0.0124 | Win count 1374
Epoch 2019/5000 | Loss 0.0106 | Win count 1375
Epoch 2020/5000 | Loss 0.0103 | Win count 1376
Epoch 2021/5000 | Loss 0.0960 | Win count 1377
Epoch 2022/5000 | Loss 0.0119 | Win count 1378
Epoch 2023/5000 | Loss 0.0085 | Win count 1379
Epoch 2024/5000 | Loss 0.0415 | Win count 1380
Epoch 2025/5000 | Loss 0.0075 | Win count 1381
Epoch 2026/50

Epoch 2180/5000 | Loss 0.0159 | Win count 1532
Epoch 2181/5000 | Loss 0.0056 | Win count 1533
Epoch 2182/5000 | Loss 0.0031 | Win count 1534
Epoch 2183/5000 | Loss 0.0062 | Win count 1535
Epoch 2184/5000 | Loss 0.0037 | Win count 1536
Epoch 2185/5000 | Loss 0.0197 | Win count 1537
Epoch 2186/5000 | Loss 0.0063 | Win count 1538
Epoch 2187/5000 | Loss 0.0046 | Win count 1539
Epoch 2188/5000 | Loss 0.0042 | Win count 1540
Epoch 2189/5000 | Loss 0.0125 | Win count 1541
Epoch 2190/5000 | Loss 0.1379 | Win count 1542
Epoch 2191/5000 | Loss 0.0335 | Win count 1543
Epoch 2192/5000 | Loss 0.0098 | Win count 1544
Epoch 2193/5000 | Loss 0.0055 | Win count 1545
Epoch 2194/5000 | Loss 0.0081 | Win count 1546
Epoch 2195/5000 | Loss 0.0079 | Win count 1547
Epoch 2196/5000 | Loss 0.0049 | Win count 1548
Epoch 2197/5000 | Loss 0.0026 | Win count 1549
Epoch 2198/5000 | Loss 0.0351 | Win count 1550
Epoch 2199/5000 | Loss 0.0046 | Win count 1551
Epoch 2200/5000 | Loss 0.0211 | Win count 1552
Epoch 2201/50

Epoch 2356/5000 | Loss 0.0044 | Win count 1701
Epoch 2357/5000 | Loss 0.0054 | Win count 1702
Epoch 2358/5000 | Loss 0.0027 | Win count 1703
Epoch 2359/5000 | Loss 0.0051 | Win count 1704
Epoch 2360/5000 | Loss 0.0057 | Win count 1705
Epoch 2361/5000 | Loss 0.0020 | Win count 1706
Epoch 2362/5000 | Loss 0.0041 | Win count 1707
Epoch 2363/5000 | Loss 0.0040 | Win count 1707
Epoch 2364/5000 | Loss 0.0565 | Win count 1708
Epoch 2365/5000 | Loss 0.0019 | Win count 1709
Epoch 2366/5000 | Loss 0.0149 | Win count 1710
Epoch 2367/5000 | Loss 0.0053 | Win count 1711
Epoch 2368/5000 | Loss 0.0076 | Win count 1712
Epoch 2369/5000 | Loss 0.0048 | Win count 1713
Epoch 2370/5000 | Loss 0.0086 | Win count 1714
Epoch 2371/5000 | Loss 0.0063 | Win count 1715
Epoch 2372/5000 | Loss 0.0084 | Win count 1716
Epoch 2373/5000 | Loss 0.0061 | Win count 1717
Epoch 2374/5000 | Loss 0.0037 | Win count 1718
Epoch 2375/5000 | Loss 0.0041 | Win count 1719
Epoch 2376/5000 | Loss 0.0037 | Win count 1720
Epoch 2377/50

Epoch 2535/5000 | Loss 0.0145 | Win count 1863
Epoch 2536/5000 | Loss 0.0118 | Win count 1864
Epoch 2537/5000 | Loss 0.0028 | Win count 1865
Epoch 2538/5000 | Loss 0.0031 | Win count 1866
Epoch 2539/5000 | Loss 0.0036 | Win count 1867
Epoch 2540/5000 | Loss 0.0180 | Win count 1868
Epoch 2541/5000 | Loss 0.0042 | Win count 1869
Epoch 2542/5000 | Loss 0.0110 | Win count 1870
Epoch 2543/5000 | Loss 0.0066 | Win count 1871
Epoch 2544/5000 | Loss 0.0256 | Win count 1872
Epoch 2545/5000 | Loss 0.0141 | Win count 1873
Epoch 2546/5000 | Loss 0.0078 | Win count 1874
Epoch 2547/5000 | Loss 0.0108 | Win count 1875
Epoch 2548/5000 | Loss 0.0140 | Win count 1876
Epoch 2549/5000 | Loss 0.0210 | Win count 1877
Epoch 2550/5000 | Loss 0.0074 | Win count 1878
Epoch 2551/5000 | Loss 0.0041 | Win count 1879
Epoch 2552/5000 | Loss 0.0044 | Win count 1880
Epoch 2553/5000 | Loss 0.0046 | Win count 1881
Epoch 2554/5000 | Loss 0.0054 | Win count 1882
Epoch 2555/5000 | Loss 0.0040 | Win count 1883
Epoch 2556/50

Epoch 2710/5000 | Loss 0.0516 | Win count 2022
Epoch 2711/5000 | Loss 0.0224 | Win count 2023
Epoch 2712/5000 | Loss 0.9699 | Win count 2024
Epoch 2713/5000 | Loss 0.0538 | Win count 2024
Epoch 2714/5000 | Loss 0.3302 | Win count 2025
Epoch 2715/5000 | Loss 0.0305 | Win count 2025
Epoch 2716/5000 | Loss 0.0635 | Win count 2026
Epoch 2717/5000 | Loss 0.0387 | Win count 2027
Epoch 2718/5000 | Loss 0.0122 | Win count 2028
Epoch 2719/5000 | Loss 0.0293 | Win count 2029
Epoch 2720/5000 | Loss 0.0231 | Win count 2030
Epoch 2721/5000 | Loss 0.0150 | Win count 2031
Epoch 2722/5000 | Loss 0.0101 | Win count 2032
Epoch 2723/5000 | Loss 0.0210 | Win count 2033
Epoch 2724/5000 | Loss 0.0095 | Win count 2034
Epoch 2725/5000 | Loss 0.0557 | Win count 2035
Epoch 2726/5000 | Loss 0.0080 | Win count 2036
Epoch 2727/5000 | Loss 0.0106 | Win count 2037
Epoch 2728/5000 | Loss 0.0438 | Win count 2038
Epoch 2729/5000 | Loss 0.0104 | Win count 2039
Epoch 2730/5000 | Loss 0.0124 | Win count 2040
Epoch 2731/50

Epoch 2886/5000 | Loss 0.0087 | Win count 2184
Epoch 2887/5000 | Loss 0.0109 | Win count 2185
Epoch 2888/5000 | Loss 0.0057 | Win count 2186
Epoch 2889/5000 | Loss 0.0242 | Win count 2187
Epoch 2890/5000 | Loss 0.0041 | Win count 2188
Epoch 2891/5000 | Loss 0.0197 | Win count 2189
Epoch 2892/5000 | Loss 0.0047 | Win count 2190
Epoch 2893/5000 | Loss 0.0034 | Win count 2191
Epoch 2894/5000 | Loss 0.0087 | Win count 2192
Epoch 2895/5000 | Loss 0.0082 | Win count 2192
Epoch 2896/5000 | Loss 0.0067 | Win count 2193
Epoch 2897/5000 | Loss 0.0097 | Win count 2194
Epoch 2898/5000 | Loss 0.0115 | Win count 2195
Epoch 2899/5000 | Loss 0.0057 | Win count 2196
Epoch 2900/5000 | Loss 0.0106 | Win count 2197
Epoch 2901/5000 | Loss 0.0077 | Win count 2198
Epoch 2902/5000 | Loss 0.0029 | Win count 2199
Epoch 2903/5000 | Loss 0.0066 | Win count 2200
Epoch 2904/5000 | Loss 0.0127 | Win count 2201
Epoch 2905/5000 | Loss 0.0029 | Win count 2202
Epoch 2906/5000 | Loss 0.0056 | Win count 2203
Epoch 2907/50

Epoch 3063/5000 | Loss 0.0013 | Win count 2354
Epoch 3064/5000 | Loss 0.2151 | Win count 2355
Epoch 3065/5000 | Loss 0.0157 | Win count 2355
Epoch 3066/5000 | Loss 0.1215 | Win count 2356
Epoch 3067/5000 | Loss 0.0085 | Win count 2357
Epoch 3068/5000 | Loss 0.0050 | Win count 2358
Epoch 3069/5000 | Loss 0.0105 | Win count 2359
Epoch 3070/5000 | Loss 0.0525 | Win count 2360
Epoch 3071/5000 | Loss 0.0314 | Win count 2360
Epoch 3072/5000 | Loss 0.0092 | Win count 2361
Epoch 3073/5000 | Loss 0.0949 | Win count 2362
Epoch 3074/5000 | Loss 0.0068 | Win count 2363
Epoch 3075/5000 | Loss 0.0333 | Win count 2364
Epoch 3076/5000 | Loss 0.0759 | Win count 2365
Epoch 3077/5000 | Loss 0.0099 | Win count 2366
Epoch 3078/5000 | Loss 0.0126 | Win count 2366
Epoch 3079/5000 | Loss 0.2957 | Win count 2367
Epoch 3080/5000 | Loss 0.0595 | Win count 2367
Epoch 3081/5000 | Loss 0.0092 | Win count 2368
Epoch 3082/5000 | Loss 0.0283 | Win count 2369
Epoch 3083/5000 | Loss 0.0294 | Win count 2370
Epoch 3084/50

Epoch 3242/5000 | Loss 0.0095 | Win count 2519
Epoch 3243/5000 | Loss 0.0032 | Win count 2520
Epoch 3244/5000 | Loss 0.0427 | Win count 2521
Epoch 3245/5000 | Loss 0.0511 | Win count 2521
Epoch 3246/5000 | Loss 0.0112 | Win count 2522
Epoch 3247/5000 | Loss 0.0044 | Win count 2523
Epoch 3248/5000 | Loss 0.0055 | Win count 2524
Epoch 3249/5000 | Loss 0.0030 | Win count 2525
Epoch 3250/5000 | Loss 0.0013 | Win count 2526
Epoch 3251/5000 | Loss 0.0061 | Win count 2527
Epoch 3252/5000 | Loss 0.0038 | Win count 2528
Epoch 3253/5000 | Loss 0.0030 | Win count 2529
Epoch 3254/5000 | Loss 0.0048 | Win count 2530
Epoch 3255/5000 | Loss 0.0042 | Win count 2531
Epoch 3256/5000 | Loss 0.0064 | Win count 2532
Epoch 3257/5000 | Loss 0.0070 | Win count 2533
Epoch 3258/5000 | Loss 0.0219 | Win count 2534
Epoch 3259/5000 | Loss 0.0440 | Win count 2535
Epoch 3260/5000 | Loss 0.0015 | Win count 2536
Epoch 3261/5000 | Loss 0.0081 | Win count 2537
Epoch 3262/5000 | Loss 0.0036 | Win count 2538
Epoch 3263/50

Epoch 3417/5000 | Loss 0.0015 | Win count 2685
Epoch 3418/5000 | Loss 0.0023 | Win count 2686
Epoch 3419/5000 | Loss 0.0012 | Win count 2687
Epoch 3420/5000 | Loss 0.0057 | Win count 2688
Epoch 3421/5000 | Loss 0.0028 | Win count 2689
Epoch 3422/5000 | Loss 0.0022 | Win count 2690
Epoch 3423/5000 | Loss 0.0130 | Win count 2691
Epoch 3424/5000 | Loss 0.0135 | Win count 2692
Epoch 3425/5000 | Loss 0.0152 | Win count 2693
Epoch 3426/5000 | Loss 0.0037 | Win count 2694
Epoch 3427/5000 | Loss 0.0161 | Win count 2695
Epoch 3428/5000 | Loss 0.0021 | Win count 2696
Epoch 3429/5000 | Loss 0.0053 | Win count 2697
Epoch 3430/5000 | Loss 0.0846 | Win count 2698
Epoch 3431/5000 | Loss 0.0194 | Win count 2699
Epoch 3432/5000 | Loss 0.0057 | Win count 2700
Epoch 3433/5000 | Loss 0.0062 | Win count 2701
Epoch 3434/5000 | Loss 0.0020 | Win count 2702
Epoch 3435/5000 | Loss 0.0617 | Win count 2703
Epoch 3436/5000 | Loss 0.0139 | Win count 2704
Epoch 3437/5000 | Loss 0.0062 | Win count 2705
Epoch 3438/50

Epoch 3592/5000 | Loss 0.0021 | Win count 2853
Epoch 3593/5000 | Loss 0.0045 | Win count 2854
Epoch 3594/5000 | Loss 0.0338 | Win count 2855
Epoch 3595/5000 | Loss 0.0052 | Win count 2856
Epoch 3596/5000 | Loss 0.0037 | Win count 2857
Epoch 3597/5000 | Loss 0.0283 | Win count 2858
Epoch 3598/5000 | Loss 0.0121 | Win count 2859
Epoch 3599/5000 | Loss 0.0048 | Win count 2860
Epoch 3600/5000 | Loss 0.0067 | Win count 2861
Epoch 3601/5000 | Loss 0.0078 | Win count 2862
Epoch 3602/5000 | Loss 0.0056 | Win count 2863
Epoch 3603/5000 | Loss 0.0050 | Win count 2864
Epoch 3604/5000 | Loss 0.0038 | Win count 2865
Epoch 3605/5000 | Loss 0.0070 | Win count 2866
Epoch 3606/5000 | Loss 0.0056 | Win count 2867
Epoch 3607/5000 | Loss 0.0051 | Win count 2868
Epoch 3608/5000 | Loss 0.0142 | Win count 2869
Epoch 3609/5000 | Loss 0.0738 | Win count 2870
Epoch 3610/5000 | Loss 0.0073 | Win count 2871
Epoch 3611/5000 | Loss 0.0035 | Win count 2872
Epoch 3612/5000 | Loss 0.0032 | Win count 2873
Epoch 3613/50

Epoch 3768/5000 | Loss 0.0182 | Win count 3018
Epoch 3769/5000 | Loss 0.0501 | Win count 3019
Epoch 3770/5000 | Loss 0.0088 | Win count 3020
Epoch 3771/5000 | Loss 0.0136 | Win count 3021
Epoch 3772/5000 | Loss 0.0128 | Win count 3022
Epoch 3773/5000 | Loss 0.0059 | Win count 3023
Epoch 3774/5000 | Loss 0.1026 | Win count 3024
Epoch 3775/5000 | Loss 0.0037 | Win count 3025
Epoch 3776/5000 | Loss 0.0143 | Win count 3026
Epoch 3777/5000 | Loss 0.0064 | Win count 3027
Epoch 3778/5000 | Loss 0.0625 | Win count 3028
Epoch 3779/5000 | Loss 0.0596 | Win count 3029
Epoch 3780/5000 | Loss 0.0205 | Win count 3030
Epoch 3781/5000 | Loss 0.0126 | Win count 3031
Epoch 3782/5000 | Loss 0.0068 | Win count 3031
Epoch 3783/5000 | Loss 0.0393 | Win count 3032
Epoch 3784/5000 | Loss 0.0181 | Win count 3033
Epoch 3785/5000 | Loss 0.0119 | Win count 3034
Epoch 3786/5000 | Loss 0.0177 | Win count 3035
Epoch 3787/5000 | Loss 0.0070 | Win count 3036
Epoch 3788/5000 | Loss 0.0046 | Win count 3037
Epoch 3789/50

Epoch 3944/5000 | Loss 0.1454 | Win count 3185
Epoch 3945/5000 | Loss 0.0201 | Win count 3186
Epoch 3946/5000 | Loss 0.0213 | Win count 3186
Epoch 3947/5000 | Loss 0.0076 | Win count 3187
Epoch 3948/5000 | Loss 0.0140 | Win count 3188
Epoch 3949/5000 | Loss 0.0072 | Win count 3189
Epoch 3950/5000 | Loss 0.0259 | Win count 3190
Epoch 3951/5000 | Loss 0.0053 | Win count 3191
Epoch 3952/5000 | Loss 0.0042 | Win count 3192
Epoch 3953/5000 | Loss 0.0145 | Win count 3193
Epoch 3954/5000 | Loss 0.1041 | Win count 3193
Epoch 3955/5000 | Loss 0.0043 | Win count 3194
Epoch 3956/5000 | Loss 0.1208 | Win count 3195
Epoch 3957/5000 | Loss 0.0368 | Win count 3196
Epoch 3958/5000 | Loss 0.0445 | Win count 3197
Epoch 3959/5000 | Loss 0.0459 | Win count 3198
Epoch 3960/5000 | Loss 0.0089 | Win count 3199
Epoch 3961/5000 | Loss 0.0110 | Win count 3200
Epoch 3962/5000 | Loss 0.0246 | Win count 3201
Epoch 3963/5000 | Loss 0.0236 | Win count 3202
Epoch 3964/5000 | Loss 0.0195 | Win count 3203
Epoch 3965/50

Epoch 4119/5000 | Loss 0.0016 | Win count 3346
Epoch 4120/5000 | Loss 0.0017 | Win count 3347
Epoch 4121/5000 | Loss 0.0025 | Win count 3348
Epoch 4122/5000 | Loss 0.1375 | Win count 3349
Epoch 4123/5000 | Loss 0.0971 | Win count 3350
Epoch 4124/5000 | Loss 0.0214 | Win count 3351
Epoch 4125/5000 | Loss 0.8589 | Win count 3352
Epoch 4126/5000 | Loss 0.0464 | Win count 3353
Epoch 4127/5000 | Loss 0.0384 | Win count 3354
Epoch 4128/5000 | Loss 0.0141 | Win count 3355
Epoch 4129/5000 | Loss 0.0111 | Win count 3356
Epoch 4130/5000 | Loss 0.0416 | Win count 3357
Epoch 4131/5000 | Loss 0.0160 | Win count 3358
Epoch 4132/5000 | Loss 0.0027 | Win count 3359
Epoch 4133/5000 | Loss 0.0036 | Win count 3360
Epoch 4134/5000 | Loss 0.0050 | Win count 3361
Epoch 4135/5000 | Loss 0.0044 | Win count 3362
Epoch 4136/5000 | Loss 0.0103 | Win count 3363
Epoch 4137/5000 | Loss 0.0047 | Win count 3364
Epoch 4138/5000 | Loss 0.4068 | Win count 3365
Epoch 4139/5000 | Loss 0.0792 | Win count 3366
Epoch 4140/50

Epoch 4295/5000 | Loss 0.0046 | Win count 3519
Epoch 4296/5000 | Loss 0.0031 | Win count 3520
Epoch 4297/5000 | Loss 0.0074 | Win count 3521
Epoch 4298/5000 | Loss 0.0018 | Win count 3522
Epoch 4299/5000 | Loss 0.0031 | Win count 3523
Epoch 4300/5000 | Loss 0.0042 | Win count 3524
Epoch 4301/5000 | Loss 0.0017 | Win count 3525
Epoch 4302/5000 | Loss 0.0042 | Win count 3526
Epoch 4303/5000 | Loss 0.0018 | Win count 3527
Epoch 4304/5000 | Loss 0.0017 | Win count 3528
Epoch 4305/5000 | Loss 0.0135 | Win count 3529
Epoch 4306/5000 | Loss 0.0049 | Win count 3530
Epoch 4307/5000 | Loss 0.0030 | Win count 3531
Epoch 4308/5000 | Loss 0.0022 | Win count 3532
Epoch 4309/5000 | Loss 0.0099 | Win count 3533
Epoch 4310/5000 | Loss 0.0064 | Win count 3534
Epoch 4311/5000 | Loss 0.0025 | Win count 3535
Epoch 4312/5000 | Loss 0.0015 | Win count 3536
Epoch 4313/5000 | Loss 0.0017 | Win count 3537
Epoch 4314/5000 | Loss 0.0040 | Win count 3538
Epoch 4315/5000 | Loss 0.0019 | Win count 3539
Epoch 4316/50

Epoch 4472/5000 | Loss 0.0025 | Win count 3688
Epoch 4473/5000 | Loss 0.1613 | Win count 3689
Epoch 4474/5000 | Loss 0.0224 | Win count 3690
Epoch 4475/5000 | Loss 0.0032 | Win count 3691
Epoch 4476/5000 | Loss 0.0096 | Win count 3692
Epoch 4477/5000 | Loss 0.0124 | Win count 3693
Epoch 4478/5000 | Loss 0.0090 | Win count 3694
Epoch 4479/5000 | Loss 0.0118 | Win count 3695
Epoch 4480/5000 | Loss 0.2507 | Win count 3696
Epoch 4481/5000 | Loss 0.0176 | Win count 3697
Epoch 4482/5000 | Loss 0.0150 | Win count 3698
Epoch 4483/5000 | Loss 0.0039 | Win count 3699
Epoch 4484/5000 | Loss 0.0079 | Win count 3699
Epoch 4485/5000 | Loss 0.1609 | Win count 3700
Epoch 4486/5000 | Loss 0.0108 | Win count 3701
Epoch 4487/5000 | Loss 0.0042 | Win count 3702
Epoch 4488/5000 | Loss 0.0063 | Win count 3703
Epoch 4489/5000 | Loss 0.0026 | Win count 3703
Epoch 4490/5000 | Loss 0.0035 | Win count 3704
Epoch 4491/5000 | Loss 0.0353 | Win count 3705
Epoch 4492/5000 | Loss 0.0060 | Win count 3706
Epoch 4493/50

Epoch 4649/5000 | Loss 0.0014 | Win count 3859
Epoch 4650/5000 | Loss 0.0013 | Win count 3860
Epoch 4651/5000 | Loss 0.0008 | Win count 3861
Epoch 4652/5000 | Loss 0.0044 | Win count 3862
Epoch 4653/5000 | Loss 0.0011 | Win count 3862
Epoch 4654/5000 | Loss 0.0021 | Win count 3863
Epoch 4655/5000 | Loss 0.0023 | Win count 3864
Epoch 4656/5000 | Loss 0.0041 | Win count 3865
Epoch 4657/5000 | Loss 0.0043 | Win count 3866
Epoch 4658/5000 | Loss 0.0040 | Win count 3867
Epoch 4659/5000 | Loss 0.0015 | Win count 3868
Epoch 4660/5000 | Loss 0.0015 | Win count 3869
Epoch 4661/5000 | Loss 0.0062 | Win count 3870
Epoch 4662/5000 | Loss 0.0012 | Win count 3871
Epoch 4663/5000 | Loss 0.0029 | Win count 3872
Epoch 4664/5000 | Loss 0.0121 | Win count 3873
Epoch 4665/5000 | Loss 0.0029 | Win count 3874
Epoch 4666/5000 | Loss 0.0010 | Win count 3875
Epoch 4667/5000 | Loss 0.0012 | Win count 3876
Epoch 4668/5000 | Loss 0.0031 | Win count 3877
Epoch 4669/5000 | Loss 0.0013 | Win count 3878
Epoch 4670/50

Epoch 4824/5000 | Loss 0.0269 | Win count 4024
Epoch 4825/5000 | Loss 0.0009 | Win count 4025
Epoch 4826/5000 | Loss 0.0070 | Win count 4026
Epoch 4827/5000 | Loss 0.0043 | Win count 4027
Epoch 4828/5000 | Loss 0.0032 | Win count 4028
Epoch 4829/5000 | Loss 0.0012 | Win count 4029
Epoch 4830/5000 | Loss 0.0009 | Win count 4030
Epoch 4831/5000 | Loss 0.0011 | Win count 4031
Epoch 4832/5000 | Loss 0.0037 | Win count 4031
Epoch 4833/5000 | Loss 0.0014 | Win count 4032
Epoch 4834/5000 | Loss 0.0016 | Win count 4033
Epoch 4835/5000 | Loss 0.0020 | Win count 4034
Epoch 4836/5000 | Loss 0.1327 | Win count 4035
Epoch 4837/5000 | Loss 0.0044 | Win count 4036
Epoch 4838/5000 | Loss 0.0101 | Win count 4037
Epoch 4839/5000 | Loss 0.0274 | Win count 4038
Epoch 4840/5000 | Loss 0.0129 | Win count 4039
Epoch 4841/5000 | Loss 0.0042 | Win count 4040
Epoch 4842/5000 | Loss 0.0181 | Win count 4041
Epoch 4843/5000 | Loss 0.0050 | Win count 4042
Epoch 4844/5000 | Loss 0.0020 | Win count 4043
Epoch 4845/50

Epoch 4999/5000 | Loss 0.0124 | Win count 4185
Training done!


# Testing 

In [18]:
def test(model):
    # this function lets a pretrained model play the game to evaluate how well it is doing
    global last_frame_time
    plt.ion()
    # define env / game
    env = Catch(grid_size)
    # c is a simple counter variable keeping track of how much we train
    c = 0
    # reset the last frame time (we start from 0)
    last_frame_time = 0
    # reset score
    points = 0
    # for training we are playing the game 10 times
    for e in range(10):
        loss = 0.
        # reset the game
        env.reset()
        # the game is not over
        game_over = False
        # get initial input
        input_t = env.observe()
        #display_screen(3, points, input_t)
        c += 1
        while not game_over:
            # the learner is acting on the last observed game screen
            # input_t is a vector containing representing the game screen
            input_tm1 = input_t
            # feed the learner the current status and get the expected rewards for different actions from it
            q = model.predict(input_tm1)
            # select the action with the highest expected reward
            action = np.argmax(q[0])
            # apply action, get rewards and new state
            input_t, reward, game_over = env.act(action)
            # update our score
            points += reward
            display_screen(action, points, input_t)
            c += 1