### Cleaning up possible files of previous executions

In [1]:
!rm -rf result

### Importing modules and creating output directory

In [2]:
from game import Game
from watch import Watcher

import time
import os

if not os.path.isdir('result'):
    os.makedirs('result')

pygame 1.9.6
Hello from the pygame community. https://www.pygame.org/contribute.html


### Running offline training
* No enemies
* Learning path

In [3]:
start = time.time()

col = False  # Colision False (offline training, no enemies, just to learn path)

watcher = None
# watcher = Watcher(show='periodic', period=5, duration=2, replay=False, fps=30, clock_flag=True)

game = Game(level=0, watcher=watcher, colision=col)

# General parameters
game.player_max_moves = 200
game.player_max_max_moves = 800
game.player_vel = 5

# Every game.player_moves_interval iterations, game.player_max_moves is increased by game.player_moves_step
game.player_moves_step = 15
game.player_moves_interval = 10

# Every game.eps_decrease_interval, game.learn.eps is halved
game.eps_decrease_interval = 200

game.iteration_print_interval = 100
game.max_iterations = 1000

# Q-Learning parameters
game.learn.eps = 0.9
game.learn.lr = 0.6
game.learn.gamma = 0.99

# Colision On/Off
game.checkpoints = False   # Checkpoints are deprecated, though I'm not sure if I can remove this
game.constant_eps = False  # Iff true ignore game.eps_decrease_interval

game.start()

finish = time.time()
print('Execution time ', time.strftime('%H:%M:%S', time.gmtime(finish - start)))

filename = "result/params_offline.txt"

with open(filename, 'w') as f:
    f.write('game.player_max_moves = ' + str(game.player_max_moves) + '\n')
    f.write('game.player_max_max_moves = ' + str(game.player_max_max_moves) + '\n')
    f.write('game.player_vel = ' + str(game.player_vel) + '\n')
    f.write('game.player_moves_step = ' + str(game.player_moves_step) + '\n')
    f.write('game.player_moves_interval = ' + str(game.player_moves_interval) + '\n')
    f.write('game.eps_decrease_interval = ' + str(game.eps_decrease_interval) + '\n')
    f.write('game.iteration_print_interval = ' + str(game.iteration_print_interval) + '\n')
    f.write('game.max_iterations = ' + str(game.max_iterations) + '\n')
    f.write('game.learn.eps = ' + str(game.learn.eps) + '\n')
    f.write('game.learn.lr = ' + str(game.learn.lr) + '\n')
    f.write('game.learn.gamma = ' + str(game.learn.gamma) + '\n')
    f.write('game.colision = ' + str(game.colision) + '\n')
    f.write('game.checkpoints = ' + str(game.checkpoints) + '\n')
    f.write('game.constant_eps = ' + str(game.constant_eps) + '\n')
    f.write('game.epochs = ' + str(game.iter_num) + '\n')
    f.write('game.player.mov_num = ' + str(game.player.mov_num) + '\n')
    f.write('game.qtable_size = ' + str(game.qtable_size) + '\n')


Iteration:  0
Q-Table size:  0
Iteration:  100
Q-Table size:  0
Iteration:  200
Q-Table size:  0
Hooorraaaay
Win after 228 iterations
Max moves:  530
Moves used:  476
Hooorraaaay
Win after 242 iterations
Max moves:  560
Moves used:  354
Hooorraaaay
Win after 251 iterations
Max moves:  575
Moves used:  449
Hooorraaaay
Win after 263 iterations
Max moves:  590
Moves used:  356
Hooorraaaay
Win after 264 iterations
Max moves:  590
Moves used:  363
Hooorraaaay
Win after 267 iterations
Max moves:  590
Moves used:  463
Hooorraaaay
Win after 268 iterations
Max moves:  590
Moves used:  399
Hooorraaaay
Win after 270 iterations
Max moves:  590
Moves used:  246
Hooorraaaay
Win after 277 iterations
Max moves:  590
Moves used:  326
Hooorraaaay
Win after 278 iterations
Max moves:  590
Moves used:  465
Hooorraaaay
Win after 279 iterations
Max moves:  590
Moves used:  537
Hooorraaaay
Win after 280 iterations
Max moves:  590
Moves used:  501
Hooorraaaay
Win after 282 iterations
Max moves:  590
Moves used

Hooorraaaay
Win after 408 iterations
Max moves:  620
Moves used:  361
Hooorraaaay
Win after 409 iterations
Max moves:  620
Moves used:  179
Hooorraaaay
Win after 410 iterations
Max moves:  620
Moves used:  298
Hooorraaaay
Win after 411 iterations
Max moves:  620
Moves used:  156
Hooorraaaay
Win after 412 iterations
Max moves:  620
Moves used:  147
Hooorraaaay
Win after 413 iterations
Max moves:  620
Moves used:  351
Hooorraaaay
Win after 414 iterations
Max moves:  620
Moves used:  177
Hooorraaaay
Win after 415 iterations
Max moves:  620
Moves used:  229
Hooorraaaay
Win after 416 iterations
Max moves:  620
Moves used:  159
Hooorraaaay
Win after 417 iterations
Max moves:  620
Moves used:  273
Hooorraaaay
Win after 418 iterations
Max moves:  620
Moves used:  140
Hooorraaaay
Win after 419 iterations
Max moves:  620
Moves used:  447
Hooorraaaay
Win after 420 iterations
Max moves:  620
Moves used:  186
Hooorraaaay
Win after 421 iterations
Max moves:  620
Moves used:  274
Hooorraaaay
Win afte

Hooorraaaay
Win after 539 iterations
Max moves:  620
Moves used:  151
Hooorraaaay
Win after 540 iterations
Max moves:  620
Moves used:  161
Hooorraaaay
Win after 541 iterations
Max moves:  620
Moves used:  125
Hooorraaaay
Win after 542 iterations
Max moves:  620
Moves used:  156
Hooorraaaay
Win after 543 iterations
Max moves:  620
Moves used:  183
Hooorraaaay
Win after 544 iterations
Max moves:  620
Moves used:  143
Hooorraaaay
Win after 545 iterations
Max moves:  620
Moves used:  124
Hooorraaaay
Win after 546 iterations
Max moves:  620
Moves used:  113
Hooorraaaay
Win after 547 iterations
Max moves:  620
Moves used:  126
Hooorraaaay
Win after 548 iterations
Max moves:  620
Moves used:  169
Hooorraaaay
Win after 549 iterations
Max moves:  620
Moves used:  110
Hooorraaaay
Win after 550 iterations
Max moves:  620
Moves used:  146
Hooorraaaay
Win after 551 iterations
Max moves:  620
Moves used:  172
Hooorraaaay
Win after 552 iterations
Max moves:  620
Moves used:  169
Hooorraaaay
Win afte

Hooorraaaay
Win after 658 iterations
Max moves:  620
Moves used:  142
Hooorraaaay
Win after 659 iterations
Max moves:  620
Moves used:  147
Hooorraaaay
Win after 660 iterations
Max moves:  620
Moves used:  130
Hooorraaaay
Win after 661 iterations
Max moves:  620
Moves used:  141
Hooorraaaay
Win after 662 iterations
Max moves:  620
Moves used:  145
Hooorraaaay
Win after 663 iterations
Max moves:  620
Moves used:  163
Hooorraaaay
Win after 664 iterations
Max moves:  620
Moves used:  151
Hooorraaaay
Win after 665 iterations
Max moves:  620
Moves used:  160
Hooorraaaay
Win after 666 iterations
Max moves:  620
Moves used:  118
Hooorraaaay
Win after 667 iterations
Max moves:  620
Moves used:  140
Hooorraaaay
Win after 668 iterations
Max moves:  620
Moves used:  124
Hooorraaaay
Win after 669 iterations
Max moves:  620
Moves used:  140
Hooorraaaay
Win after 670 iterations
Max moves:  620
Moves used:  136
Hooorraaaay
Win after 671 iterations
Max moves:  620
Moves used:  145
Hooorraaaay
Win afte

Hooorraaaay
Win after 787 iterations
Max moves:  620
Moves used:  149
Hooorraaaay
Win after 788 iterations
Max moves:  620
Moves used:  152
Hooorraaaay
Win after 789 iterations
Max moves:  620
Moves used:  132
Hooorraaaay
Win after 790 iterations
Max moves:  620
Moves used:  151
Hooorraaaay
Win after 791 iterations
Max moves:  620
Moves used:  132
Hooorraaaay
Win after 792 iterations
Max moves:  620
Moves used:  203
Hooorraaaay
Win after 793 iterations
Max moves:  620
Moves used:  173
Hooorraaaay
Win after 794 iterations
Max moves:  620
Moves used:  142
Hooorraaaay
Win after 795 iterations
Max moves:  620
Moves used:  144
Hooorraaaay
Win after 796 iterations
Max moves:  620
Moves used:  122
Hooorraaaay
Win after 797 iterations
Max moves:  620
Moves used:  142
Hooorraaaay
Win after 798 iterations
Max moves:  620
Moves used:  127
Hooorraaaay
Win after 799 iterations
Max moves:  620
Moves used:  123
Hooorraaaay
Win after 800 iterations
Max moves:  620
Moves used:  128
Iteration:  800
Q-Ta

Hooorraaaay
Win after 915 iterations
Max moves:  620
Moves used:  139
Hooorraaaay
Win after 916 iterations
Max moves:  620
Moves used:  100
Hooorraaaay
Win after 917 iterations
Max moves:  620
Moves used:  110
Hooorraaaay
Win after 918 iterations
Max moves:  620
Moves used:  132
Hooorraaaay
Win after 919 iterations
Max moves:  620
Moves used:  118
Hooorraaaay
Win after 920 iterations
Max moves:  620
Moves used:  139
Hooorraaaay
Win after 921 iterations
Max moves:  620
Moves used:  122
Hooorraaaay
Win after 922 iterations
Max moves:  620
Moves used:  121
Hooorraaaay
Win after 923 iterations
Max moves:  620
Moves used:  130
Hooorraaaay
Win after 924 iterations
Max moves:  620
Moves used:  125
Hooorraaaay
Win after 925 iterations
Max moves:  620
Moves used:  138
Hooorraaaay
Win after 926 iterations
Max moves:  620
Moves used:  117
Hooorraaaay
Win after 927 iterations
Max moves:  620
Moves used:  169
Hooorraaaay
Win after 928 iterations
Max moves:  620
Moves used:  190
Hooorraaaay
Win afte

### Running online training
* With enemies
* Learning to dodge

In [4]:
start = time.time()

col = True  # Colision True (online training, with enemies, to learn to dodge enemies)

# watcher = None

# after 5 iterations, show 2
# watcher = Watcher(show='periodic', period=5, duration=2, replay=False, fps=30, clock_flag=True)

# show every iteration
# watcher = Watcher(show='all', replay=False, fps=30, clock_flag=True)

# shows only replay at the end
watcher = Watcher(show='nothing', replay=True, fps=30, clock_flag=True)

game = Game(level=0, watcher=watcher, colision=col)

# General parameters
game.player_max_moves = 200
game.player_max_max_moves = 800
game.player_vel = 5

# Every game.player_moves_interval iterations, game.player_max_moves is increased by game.player_moves_step
game.player_moves_step = 15
game.player_moves_interval = 10

# Every game.eps_decrease_interval, game.learn.eps is halved
game.eps_decrease_interval = 200

game.iteration_print_interval = 50
game.max_iterations = 700

# Q-Learning parameters
game.learn.eps = 0.15
game.learn.lr = 0.3
game.learn.gamma = 0.9

# Colision On/Off
game.checkpoints = False   # Checkpoints are deprecated, though I'm not sure if I can remove this
game.constant_eps = False  # Iff true ignore game.eps_decrease_interval

game.start()

finish = time.time()
print('Execution time ', time.strftime('%H:%M:%S', time.gmtime(finish - start)))

filename = "result/params_online.txt"

with open(filename, 'w') as f:
    f.write('game.player_max_moves = ' + str(game.player_max_moves) + '\n')
    f.write('game.player_max_max_moves = ' + str(game.player_max_max_moves) + '\n')
    f.write('game.player_vel = ' + str(game.player_vel) + '\n')
    f.write('game.player_moves_step = ' + str(game.player_moves_step) + '\n')
    f.write('game.player_moves_interval = ' + str(game.player_moves_interval) + '\n')
    f.write('game.eps_decrease_interval = ' + str(game.eps_decrease_interval) + '\n')
    f.write('game.iteration_print_interval = ' + str(game.iteration_print_interval) + '\n')
    f.write('game.max_iterations = ' + str(game.max_iterations) + '\n')
    f.write('game.learn.eps = ' + str(game.learn.eps) + '\n')
    f.write('game.learn.lr = ' + str(game.learn.lr) + '\n')
    f.write('game.learn.gamma = ' + str(game.learn.gamma) + '\n')
    f.write('game.colision = ' + str(game.colision) + '\n')
    f.write('game.checkpoints = ' + str(game.checkpoints) + '\n')
    f.write('game.constant_eps = ' + str(game.constant_eps) + '\n')
    f.write('game.epochs = ' + str(game.iter_num) + '\n')
    f.write('game.player.mov_num = ' + str(game.player.mov_num) + '\n')
    f.write('game.qtable_size = ' + str(game.qtable_size) + '\n')


Iteration:  0
Q-Table size:  12
Iteration:  50
Q-Table size:  1143
Hooorraaaay
Win after 66 iterations
Max moves:  290
Moves used:  111
Press ENTER to start replay
Execution time  00:00:10


### Showing replay

In [5]:
!python play_replay.py 30 result/replay.p

pygame 1.9.6
Hello from the pygame community. https://www.pygame.org/contribute.html


### Generating result heatmaps

In [6]:
!bash generate_heatmaps.sh

The set_clim function was deprecated in Matplotlib 3.1 and will be removed in 3.3. Use ScalarMappable.set_clim instead.
  cbar.set_clim(0, maxlog)
Input #0, image2, from 'result/online_heatmap/im_%04d.png':
  Duration: 00:00:04.44, start: 0.000000, bitrate: N/A
    Stream #0:0: Video: png, rgba(pc), 524x388 [SAR 5905:5905 DAR 131:97], 25 fps, 25 tbr, 25 tbn, 25 tbc
Stream mapping:
  Stream #0:0 -> #0:0 (png (native) -> h264 (libx264rgb))
Press [q] to stop, [?] for help
[1;36m[libx264rgb @ 0x556a54b559a0] [0musing SAR=1/1
[1;36m[libx264rgb @ 0x556a54b559a0] [0musing cpu capabilities: MMX2 SSE2Fast SSSE3 SSE4.2 AVX FMA3 BMI2 AVX2
[1;36m[libx264rgb @ 0x556a54b559a0] [0mprofile High 4:4:4 Predictive, level 3.1, 4:4:4 8-bit
[1;36m[libx264rgb @ 0x556a54b559a0] [0m264 - core 152 r2854 e9a5903 - H.264/MPEG-4 AVC codec - Copyleft 2003-2017 - http://www.videolan.org/x264.html - options: cabac=1 ref=3 deblock=1:0:0 analyse=0x1:0x111 me=hex subme=7 psy=1 psy_rd=1.00:0.00 mixed_ref=1 me_ran