In [1]:
import numpy as np
import random
import os
import time
import torch 
from torch import tensor
from torch.autograd import Variable
from copy import deepcopy
from random import shuffle
from tensorboardX import SummaryWriter

In [2]:
from lib.turn_program_into_file import turn_program_into_file
from game.wrappers.game_container import GameContainer

In [3]:
cpus = 4
player_size = 8
game = GameContainer(128, 128)
# Each run taks 2.6 sec per cpu, 38089 = a 24h run.
run_limit = 10
save_cycle = 2
output_dir = "./output/pyramid/"
log_path = './output/logs/'
run = int(time.time())
save_dir = output_dir + str(run) + '/'
writer = SummaryWriter()

In [4]:
# king_hill_ids = list(range(0, cpus))
os.makedirs(os.path.join(output_dir, str(run)))

In [None]:
# -*- coding: utf-8 -*-
import torch

top = 2

# Use the nn package to define our model and loss function.
p1 = torch.nn.Sequential(
    torch.nn.Linear(top**6, top**5),
    torch.nn.ReLU(),
    torch.nn.Dropout(0.2),
    torch.nn.Linear(top**5, top**4),
    torch.nn.ReLU(),
    torch.nn.Linear(top**4, top**3),
    torch.nn.ReLU(),
    torch.nn.Dropout(0.2),
    torch.nn.Linear(top**3, top**4),
    torch.nn.ReLU(),
    torch.nn.Linear(top**4, top**5),
    torch.nn.ReLU(),
    torch.nn.Dropout(0.2),
    torch.nn.Linear(top**5, top**6),
    torch.nn.Sigmoid(),
)


p2 = torch.nn.Sequential(
    torch.nn.Linear(top**6, top**5),
    torch.nn.ReLU(),
    torch.nn.Dropout(0.2),
    torch.nn.Linear(top**5, top**4),
    torch.nn.ReLU(),
    torch.nn.Linear(top**4, top**3),
    torch.nn.ReLU(),
    torch.nn.Dropout(0.2),
    torch.nn.Linear(top**3, top**4),
    torch.nn.ReLU(),
    torch.nn.Linear(top**4, top**5),
    torch.nn.ReLU(),
    torch.nn.Dropout(0.2),
    torch.nn.Linear(top**5, top**6),
    torch.nn.Sigmoid(),
)


loss_fn = torch.nn.BCELoss()


def return_result(game_result, number):
    result = 0
    
    for i in game_result:
        if i == number:
            result += 1
        
    return result

def create_player(game_board_pt, player_size):
    game_board_np = deepcopy(game_board_pt.detach().numpy())
    
    for i in range(len(game_board_np)):
        game_board_np[i] = 1.0 if game_board_np[i] > 0.5 else 0.0
    
    game_board_np = np.array(game_board_np, dtype=np.int)
    
    return game_board_np


def run_game(home, away, rounds, player_size):
    home = np.reshape(home, (player_size, player_size))
    away = np.reshape(away, (player_size, player_size))
    
    game.add_players(home, away)
    game.launch(rounds)
    final_board = game.gol.grid()
    final_board = np.array(final_board).flatten()
    
    return return_result(final_board, 1)

def create_loss_board(home, away, rounds, player_size, check_order):
    org_score = run_game(home, away, rounds, player_size)
    loss_board = deepcopy(home)
    tile_board = deepcopy(home)
    top_score = org_score
    
    for i in check_order:
        tile_board[i] = 0.0 if home[i] == 1.0 else 1.0
        tile_result = run_game(tile_board, away, rounds, player_size)
        if tile_result >= top_score:
            loss_board[i] = tile_board[i]
            top_score = tile_result
        else:
            tile_board = deepcopy(loss_board)
    if top_score == org_score:
        shuffle(check_order)
        print('shuffle')
    
    return torch.tensor(loss_board, dtype=torch.float), org_score, top_score, check_order

learning_rate = 0.0002
check_order_p1 = list(range(0, player_size**2))
check_order_p2 = list(range(0, player_size**2))
optimizer_p1 = torch.optim.Adam(p1.parameters(), lr=learning_rate)
optimizer_p2 = torch.optim.Adam(p2.parameters(), lr=learning_rate)

start_p1 = Variable(torch.tensor(torch.rand(player_size * player_size), requires_grad=True))
start_p2 = Variable(torch.tensor(torch.rand(player_size * player_size), requires_grad=True))

for t in range(10000):
    
    p1_board = p1(start_p1)
    p2_board = p2(start_p2)

    with torch.no_grad():
        p1_board_np = create_player(p1_board.clone(), player_size)
        p2_board_np = create_player(p2_board.clone(), player_size)

        p1_loss_board, p1_score, p1_top, check_order_p1 = create_loss_board(p1_board_np, p2_board_np, 600, player_size, check_order_p1)
        p2_loss_board, p2_score, p2_top, check_order_p2 = create_loss_board(p2_board_np, p1_board_np, 600, player_size, check_order_p2)

    loss_p1 = tensor((1 - (p1_score/p1_top))) * loss_fn(p1_board, p1_loss_board)
    loss_p1.backward()

    
    optimizer_p1.step()
    
    loss_p2 = tensor((1 - (p2_score/p2_top))) * loss_fn(p2_board, p2_loss_board)
    loss_p2.backward()
    optimizer_p2.step()
    
    optimizer_p1.zero_grad()
    optimizer_p2.zero_grad()
    
    start_p1 = Variable(torch.tensor(p1_loss_board, requires_grad=True))
    start_p2 = Variable(torch.tensor(p2_loss_board, requires_grad=True))
    
    print(loss_p1, p1_score, p1_top)
    print(loss_p2, p2_score, p2_top)
    
    writer.add_scalars('600/loss', {'loss_p1': loss_p1, 'loss_p2': loss_p2}, t)
    writer.add_scalars('600/top', {'p1_top': p1_top, 'p2_top': p2_top}, t)
    writer.add_scalars('600/score', {'p1_score': p1_score, 'p2_score': p2_score}, t)


    if t % 100 == 0:
        print("Saving players:")
        print(p1_board, p2_board)
        turn_program_into_file(np.reshape(p1_board_np, (8,8)), save_dir + "p1-cycle-" + str(t) + ".rle", "RKH", "EW", "")
        turn_program_into_file(np.reshape(p2_board_np, (8,8)), save_dir + "p2-cycle-" + str(t) + ".rle", "RKH", "EW", "")

tensor(0.6614) 0 451
tensor(0.6435) 0 287
Saving players:
tensor([ 0.4536,  0.5305,  0.4568,  0.5655,  0.5141,  0.4601,  0.5051,
         0.5555,  0.4806,  0.4869,  0.4422,  0.4749,  0.5150,  0.5237,
         0.5285,  0.5091,  0.5035,  0.5161,  0.4681,  0.4656,  0.4512,
         0.5230,  0.4510,  0.5228,  0.4918,  0.5101,  0.4893,  0.4606,
         0.5031,  0.4794,  0.5254,  0.5023,  0.4972,  0.4661,  0.5043,
         0.4971,  0.4814,  0.5291,  0.5092,  0.5094,  0.4764,  0.5069,
         0.4912,  0.4429,  0.4961,  0.4558,  0.4754,  0.4900,  0.5246,
         0.5326,  0.4363,  0.5318,  0.4644,  0.5206,  0.4522,  0.5008,
         0.5247,  0.5093,  0.4978,  0.4557,  0.4758,  0.4670,  0.5202,
         0.4891]) tensor([ 0.4739,  0.4775,  0.4788,  0.4399,  0.5370,  0.5277,  0.5412,
         0.5020,  0.4375,  0.4910,  0.5622,  0.4714,  0.5005,  0.5544,
         0.5158,  0.4361,  0.4588,  0.5274,  0.4818,  0.4426,  0.5563,
         0.5540,  0.5033,  0.5506,  0.4619,  0.5308,  0.4666,  0.4752,
 

tensor(0.6395) 8 414
tensor(0.5516) 43 422
tensor(0.5967) 8 389
tensor(0.5448) 43 448
tensor(0.5838) 28 392
tensor(0.5477) 44 383
tensor(0.3599) 113 262
tensor(0.3131) 128 264
tensor(0.2800) 152 280
tensor(0.4781) 62 292
tensor(0.5752) 30 348
tensor(0.5908) 14 335
tensor(0.6232) 3 293
tensor(0.5477) 43 414
tensor(1.00000e-02 *
       8.7339) 252 293
tensor(0.1339) 247 317
tensor(0.4907) 40 208
tensor(0.5452) 26 237
tensor(0.2004) 287 425
tensor(0.6663) 3 329
tensor(0.4110) 94 292
tensor(0.3058) 195 387
tensor(0.4912) 69 312
tensor(0.6119) 0 480
tensor(0.6267) 8 440
tensor(0.5392) 43 421
tensor(0.2794) 152 280
tensor(0.4814) 62 292
tensor(0.5662) 47 442
tensor(0.5193) 42 324
tensor(0.1009) 285 342
tensor(0.6105) 4 425
tensor(0.6229) 4 244
tensor(0.3139) 146 303
tensor(0.5960) 10 419
tensor(0.2212) 303 482
tensor(0.5588) 18 194
tensor(0.5289) 43 417
tensor(0.5719) 8 163
tensor(0.4981) 59 382
tensor(0.1545) 287 386
tensor(0.5880) 0 233
tensor(0.5820) 14 265
tensor(0.5478) 43 489
tensor(0.

tensor(0.4751) 17 315
tensor(0.2668) 146 346
tensor(0.3883) 15 268
tensor(0.3529) 35 572
tensor(0.3086) 0 203
tensor(0.3013) 10 226
tensor(0.4576) 0 200
tensor(0.2234) 212 463
tensor(0.3060) 0 194
tensor(0.1024) 303 414
tensor(0.4022) 15 504
tensor(0.3894) 10 324
tensor(0.3102) 0 203
tensor(0.2508) 10 226
tensor(0.3727) 15 393
tensor(0.2671) 43 288
tensor(0.2880) 43 323
tensor(0.1728) 225 421
tensor(0.2852) 10 313
tensor(0.2426) 10 324
tensor(0.2526) 0 318
tensor(0.3361) 35 414
tensor(0.3007) 7 407
tensor(0.3692) 10 324
tensor(0.3262) 0 456
tensor(1.00000e-02 *
       6.1307) 186 232
tensor(0.2094) 23 316
tensor(0.2695) 35 492
tensor(0.2576) 40 247
tensor(0.2624) 35 421
tensor(0.3475) 6 203
tensor(0.3127) 10 324
tensor(0.3075) 6 203
tensor(0.2267) 10 324
tensor(0.3450) 10 226
tensor(0.2692) 10 324
tensor(0.2727) 0 203
tensor(0.3319) 10 226
tensor(0.2644) 9 359
tensor(0.2391) 10 188
tensor(0.3195) 10 221
tensor(0.2825) 43 387
tensor(0.3683) 6 260
tensor(0.2466) 43 284
Saving players:
te

tensor(0.1516) 0 203
tensor(0.4182) 10 385
tensor(0.2304) 0 203
tensor(0.3330) 10 385
tensor(0.2548) 40 406
tensor(0.2910) 10 388
tensor(0.1472) 6 203
tensor(0.2685) 10 385
tensor(0.4148) 17 301
tensor(0.2546) 10 424
tensor(0.1441) 24 203
tensor(0.2279) 10 376
tensor(0.1632) 4 203
tensor(0.3281) 10 381
tensor(0.2736) 17 307
tensor(0.3059) 10 186
tensor(0.2047) 6 203
tensor(0.2415) 10 385
tensor(0.1948) 68 301
tensor(0.1120) 128 205
tensor(1.00000e-02 *
       9.8411) 197 350
tensor(0.4699) 10 343
tensor(0.3415) 4 307
tensor(0.3604) 10 351
tensor(0.1134) 26 203
tensor(0.4681) 10 297
tensor(0.1123) 26 203
tensor(0.4491) 10 297
tensor(0.2950) 4 307
tensor(0.3670) 10 351
tensor(0.2892) 4 307
tensor(0.3394) 10 351
tensor(0.3011) 4 307
tensor(0.3463) 10 351
tensor(0.4415) 6 464
tensor(0.3730) 4 358
tensor(0.1497) 24 203
tensor(0.2320) 10 376
tensor(0.3874) 4 367
tensor(0.2667) 10 381
tensor(0.2985) 6 203
tensor(0.2346) 10 385
tensor(0.2806) 4 307
tensor(0.3700) 10 351
tensor(0.1039) 26 203
t

tensor(0.2786) 7 278
tensor(0.4501) 24 327
tensor(0.2744) 100 391
tensor(0.3113) 0 363
tensor(0.3771) 4 454
tensor(0.3137) 10 306
shuffle
tensor(0.) 429 429
tensor(0.3006) 29 218
tensor(0.3382) 8 371
tensor(0.2602) 24 327
tensor(0.2128) 74 224
tensor(0.2718) 24 344
shuffle
tensor(0.1134) 129 241
tensor(0.) 307 307
tensor(0.3704) 4 256
tensor(0.1677) 24 233
tensor(0.3599) 4 362
tensor(0.3639) 10 223
shuffle
tensor(1.00000e-02 *
       9.7672) 129 241
tensor(0.) 307 307
tensor(0.5227) 4 362
tensor(0.2451) 10 324
tensor(0.1053) 7 357
tensor(0.1993) 10 324
tensor(1.00000e-02 *
       3.8752) 345 417
tensor(0.3519) 11 320
shuffle
tensor(0.1755) 129 241
tensor(0.) 307 307
tensor(0.3120) 4 256
tensor(0.2755) 24 458
tensor(0.1919) 100 241
tensor(0.2336) 0 234
tensor(0.4186) 4 362
tensor(0.1806) 10 438
tensor(0.3202) 4 395
tensor(0.2535) 43 308
shuffle
tensor(0.) 429 429
tensor(0.2269) 29 389
tensor(1.00000e-02 *
       8.4719) 100 318
tensor(0.2359) 0 234
shuffle
tensor(0.1990) 129 318
tensor(

tensor(0.4205) 4 380
tensor(0.1781) 10 440
tensor(0.4412) 4 380
tensor(0.2395) 10 440
tensor(0.4747) 4 380
tensor(0.4006) 24 228
tensor(0.4827) 4 380
tensor(0.1761) 10 440
tensor(1.00000e-02 *
       3.3235) 318 353
tensor(0.4637) 10 207
Saving players:
tensor([ 0.3000,  0.6712,  0.1374,  0.8358,  0.7315,  0.1676,  0.6654,
         0.7784,  0.1980,  0.4382,  0.1472,  0.2393,  0.3982,  0.3752,
         0.7273,  0.8100,  0.7367,  0.8082,  0.2384,  0.1843,  0.1955,
         0.5893,  0.4202,  0.6876,  0.3794,  0.7376,  0.6448,  0.2349,
         0.7484,  0.2154,  0.7789,  0.7565,  0.4991,  0.3072,  0.7556,
         0.1268,  0.1855,  0.7203,  0.7293,  0.6826,  0.1709,  0.7323,
         0.7047,  0.1357,  0.3638,  0.3027,  0.0916,  0.7084,  0.6712,
         0.8303,  0.1648,  0.7890,  0.2371,  0.7923,  0.1804,  0.7162,
         0.7894,  0.7385,  0.8399,  0.1662,  0.5998,  0.2718,  0.7505,
         0.3139]) tensor([ 0.4238,  0.1331,  0.1251,  0.1523,  0.9820,  0.9580,  0.8201,
         0.9344,  

tensor(0.3038) 4 380
tensor(0.3596) 0 438
tensor(0.2930) 4 380
tensor(0.1224) 10 440
tensor(0.2973) 4 380
tensor(0.1049) 10 440
tensor(0.2536) 4 380
tensor(0.3331) 0 438
tensor(0.2605) 4 380
tensor(0.3928) 0 438
tensor(0.3540) 59 419
tensor(0.4677) 0 396
tensor(0.2681) 4 380
tensor(0.3193) 0 438
tensor(0.2774) 4 380
tensor(0.1440) 10 440
tensor(0.2638) 4 380
tensor(0.4093) 0 438
tensor(0.3747) 0 404
tensor(0.3707) 0 438
tensor(0.2854) 4 380
tensor(0.3599) 0 434
tensor(0.1842) 41 218
tensor(0.1265) 10 337
tensor(0.2027) 41 221
tensor(0.2960) 0 293
tensor(0.2914) 4 380
tensor(0.3320) 0 438
tensor(0.2391) 4 380
tensor(0.3235) 0 438
tensor(0.2755) 4 380
tensor(0.2564) 6 207
tensor(0.2568) 4 380
tensor(0.3461) 0 438
tensor(0.2189) 26 268
tensor(0.1558) 10 284
tensor(0.3113) 4 380
tensor(1.00000e-02 *
       8.4083) 10 440
tensor(0.4451) 14 412
tensor(0.1128) 10 440
tensor(0.2340) 4 380
tensor(0.1006) 10 503
tensor(0.2426) 4 380
tensor(0.3247) 10 440
tensor(0.2721) 4 380
tensor(0.2881) 0 434

tensor(0.5920) 4 370
tensor(0.1755) 8 438
tensor(0.2018) 10 311
tensor(0.2764) 8 358
tensor(0.2733) 3 321
tensor(0.2574) 79 385
tensor(0.2144) 41 380
tensor(0.2653) 0 293
tensor(0.3663) 0 451
tensor(0.1434) 79 385
tensor(0.2142) 10 311
tensor(0.2373) 39 409
tensor(0.3360) 4 331
tensor(0.2176) 0 434
tensor(0.5379) 4 380
tensor(0.1678) 0 438
tensor(0.3490) 4 331
tensor(0.1803) 0 385
tensor(0.3996) 0 178
tensor(0.1601) 75 385
tensor(0.3883) 3 205
tensor(0.1835) 0 438
tensor(0.1914) 10 311
tensor(0.2787) 8 358
tensor(0.4624) 6 228
tensor(0.1609) 8 417
tensor(0.5155) 4 380
tensor(0.1882) 0 385
tensor(0.5183) 0 335
tensor(0.1606) 37 385
tensor(0.2530) 4 489
tensor(0.2713) 0 271
tensor(0.3198) 14 236
tensor(0.1960) 0 137
tensor(0.3128) 0 537
tensor(0.1097) 130 385
Saving players:
tensor([ 0.1981,  0.7902,  0.0510,  0.9174,  0.7925,  0.3482,  0.7981,
         0.9179,  0.3985,  0.1562,  0.0795,  0.1167,  0.6562,  0.5929,
         0.8113,  0.8535,  0.8729,  0.8939,  0.3793,  0.1458,  0.1285,
   

tensor(0.3450) 4 410
tensor(0.3291) 39 449
tensor(0.3351) 8 427
tensor(0.2870) 39 228
tensor(0.3667) 3 296
tensor(0.4463) 39 284
tensor(0.1302) 15 311
tensor(0.1090) 75 385
tensor(0.2389) 15 311
tensor(0.1169) 75 385
tensor(0.4132) 0 178
tensor(0.1103) 75 385
tensor(0.2657) 4 348
tensor(0.1201) 39 332
tensor(0.1780) 117 261
tensor(0.1413) 33 379
tensor(0.2882) 4 348
tensor(0.1251) 39 332
tensor(0.1150) 15 311
tensor(0.1084) 75 385
tensor(0.3494) 3 205
tensor(0.1704) 8 438
tensor(0.1571) 15 311
tensor(0.1490) 75 385
tensor(0.3047) 0 355
tensor(0.2113) 75 349
tensor(0.3704) 0 178
tensor(0.1080) 75 385
tensor(0.1245) 10 311
tensor(0.1896) 39 409
tensor(0.2043) 3 321
tensor(0.2014) 79 385
tensor(0.1505) 15 311
tensor(0.1015) 75 385
tensor(0.3971) 0 178
tensor(0.1159) 75 385
tensor(0.1628) 10 311
tensor(0.2070) 39 409
tensor(0.1901) 10 311
tensor(0.1802) 0 385
tensor(0.1055) 15 311
tensor(0.1079) 75 385
tensor(0.2706) 4 348
tensor(0.1170) 39 332
tensor(1.00000e-02 *
       9.7163) 15 311
te

tensor(0.4483) 10 270
tensor(0.1876) 39 409
tensor(0.2851) 15 289
tensor(1.00000e-02 *
       5.7599) 75 385
tensor(0.2976) 15 289
tensor(1.00000e-02 *
       6.1550) 75 385
tensor(0.2551) 15 218
tensor(1.00000e-02 *
       7.5369) 75 385
tensor(0.3917) 10 395
tensor(1.00000e-02 *
       8.6522) 13 225
tensor(0.3027) 15 289
tensor(1.00000e-02 *
       6.5721) 75 385
tensor(0.2981) 15 289
tensor(1.00000e-02 *
       8.3991) 75 385
tensor(0.2286) 28 315
tensor(1.00000e-02 *
       5.6856) 75 385
tensor(0.2729) 15 289
tensor(1.00000e-02 *
       5.1497) 75 385
tensor(0.3375) 10 270
tensor(0.2056) 39 409
tensor(0.2832) 15 289
tensor(0.1054) 75 385
tensor(0.2693) 28 315
tensor(1.00000e-02 *
       4.9326) 75 385
tensor(0.2379) 4 473
tensor(0.2776) 16 244
tensor(0.3011) 15 289
tensor(1.00000e-02 *
       5.4816) 75 385
tensor(0.2761) 43 252
tensor(1.00000e-02 *
       2.5937) 257 385
tensor(0.3513) 10 395
tensor(1.00000e-02 *
       9.2235) 13 225
tensor(0.2551) 35 370
tensor(1.00000e-02 *
 