In [1]:
import numpy as np
import random
import os
import time
import torch 
from torch import tensor
from torch.autograd import Variable
from copy import deepcopy
from random import shuffle
from tensorboardX import SummaryWriter

In [2]:
from lib.turn_program_into_file import turn_program_into_file
from game.wrappers.game_container import GameContainer

In [3]:
cpus = 4
player_size = 8
game = GameContainer(128, 128)
# Each run taks 2.6 sec per cpu, 38089 = a 24h run.
run_limit = 10
save_cycle = 2
output_dir = "./output/pyramid/"
log_path = './output/logs/'
run = int(time.time())
save_dir = output_dir + str(run) + '/'
writer = SummaryWriter()

In [4]:
# king_hill_ids = list(range(0, cpus))
os.makedirs(os.path.join(output_dir, str(run)))

In [None]:
# -*- coding: utf-8 -*-
import torch

top = 2

# Use the nn package to define our model and loss function.
p1 = torch.nn.Sequential(
    torch.nn.Linear(top**6, top**5),
    torch.nn.ReLU(),
    torch.nn.Dropout(0.2),
    torch.nn.Linear(top**5, top**4),
    torch.nn.ReLU(),
    torch.nn.Linear(top**4, top**3),
    torch.nn.ReLU(),
    torch.nn.Dropout(0.2),
    torch.nn.Linear(top**3, top**4),
    torch.nn.ReLU(),
    torch.nn.Linear(top**4, top**5),
    torch.nn.ReLU(),
    torch.nn.Dropout(0.2),
    torch.nn.Linear(top**5, top**6),
    torch.nn.Sigmoid(),
)


p2 = torch.nn.Sequential(
    torch.nn.Linear(top**6, top**5),
    torch.nn.ReLU(),
    torch.nn.Dropout(0.2),
    torch.nn.Linear(top**5, top**4),
    torch.nn.ReLU(),
    torch.nn.Linear(top**4, top**3),
    torch.nn.ReLU(),
    torch.nn.Dropout(0.2),
    torch.nn.Linear(top**3, top**4),
    torch.nn.ReLU(),
    torch.nn.Linear(top**4, top**5),
    torch.nn.ReLU(),
    torch.nn.Dropout(0.2),
    torch.nn.Linear(top**5, top**6),
    torch.nn.Sigmoid(),
)


loss_fn = torch.nn.BCELoss()


def return_result(game_result, number):
    result = 0
    
    for i in game_result:
        if i == number:
            result += 1
        
    return result

def create_player(game_board_pt, player_size):
    game_board_np = deepcopy(game_board_pt.detach().numpy())
    
    for i in range(len(game_board_np)):
        game_board_np[i] = 1.0 if game_board_np[i] > 0.5 else 0.0
    
    game_board_np = np.array(game_board_np, dtype=np.int)
    
    return game_board_np


def run_game(home, away, rounds, player_size):
    home = np.reshape(home, (player_size, player_size))
    away = np.reshape(away, (player_size, player_size))
    
    game.add_players(home, away)
    game.launch(rounds)
    final_board = game.gol.grid()
    final_board = np.array(final_board).flatten()
    
    return return_result(final_board, 1)

def create_loss_board(home, away, rounds, player_size, check_order):
    org_score = run_game(home, away, rounds, player_size)
    loss_board = deepcopy(home)
    tile_board = deepcopy(home)
    top_score = org_score
    
    for i in check_order:
        tile_board[i] = 0.0 if home[i] == 1.0 else 1.0
        tile_result = run_game(tile_board, away, rounds, player_size)
        if tile_result >= top_score:
            loss_board[i] = tile_board[i]
            top_score = tile_result
        else:
            tile_board = deepcopy(loss_board)
    if top_score == org_score:
        shuffle(check_order)
        print('shuffle')
    
    return torch.tensor(loss_board, dtype=torch.float), org_score, top_score, check_order

learning_rate = 0.0002
check_order_p1 = list(range(0, player_size**2))
check_order_p2 = list(range(0, player_size**2))
optimizer_p1 = torch.optim.Adam(p1.parameters(), lr=learning_rate)
optimizer_p2 = torch.optim.Adam(p2.parameters(), lr=learning_rate)

start_p1 = Variable(torch.tensor(torch.rand(player_size * player_size), requires_grad=True))
start_p2 = Variable(torch.tensor(torch.rand(player_size * player_size), requires_grad=True))

for t in range(10000):
    
    p1_board = p1(start_p1)
    p2_board = p2(start_p2)

    with torch.no_grad():
        p1_board_np = create_player(p1_board.clone(), player_size)
        p2_board_np = create_player(p2_board.clone(), player_size)

        p1_loss_board, p1_score, p1_top, check_order_p1 = create_loss_board(p1_board_np, p2_board_np, 1000, player_size, check_order_p1)
        p2_loss_board, p2_score, p2_top, check_order_p2 = create_loss_board(p2_board_np, p1_board_np, 1000, player_size, check_order_p2)

    loss_p1 = tensor((1 - (p1_score/p1_top))) * loss_fn(p1_board, p1_loss_board)
    loss_p1.backward()

    
    optimizer_p1.step()
    
    loss_p2 = tensor((1 - (p2_score/p2_top))) * loss_fn(p2_board, p2_loss_board)
    loss_p2.backward()
    optimizer_p2.step()
    
    optimizer_p1.zero_grad()
    optimizer_p2.zero_grad()
    
    start_p1 = Variable(torch.tensor(p1_loss_board, requires_grad=True))
    start_p2 = Variable(torch.tensor(p2_loss_board, requires_grad=True))
    
    print(loss_p1, p1_score, p1_top)
    print(loss_p2, p2_score, p2_top)
    
    writer.add_scalars('1000/loss', {'loss_p1': loss_p1, 'loss_p2': loss_p2}, t)
    writer.add_scalars('1000/top', {'p1_top': p1_top, 'p2_top': p2_top}, t)
    writer.add_scalars('1000/score', {'p1_score': p1_score, 'p2_score': p2_score}, t)


    if t % 100 == 0:
        print("Saving players:")
        print(p1_board, p2_board)
        turn_program_into_file(np.reshape(p1_board_np, (8,8)), save_dir + "p1-cycle-" + str(t) + ".rle", "RKH", "EW", "")
        turn_program_into_file(np.reshape(p2_board_np, (8,8)), save_dir + "p2-cycle-" + str(t) + ".rle", "RKH", "EW", "")

tensor(0.6314) 14 372
tensor(0.6215) 19 321
Saving players:
tensor([ 0.5517,  0.4705,  0.4554,  0.4797,  0.4824,  0.4681,  0.5055,
         0.5693,  0.4624,  0.4689,  0.5605,  0.4843,  0.5067,  0.5290,
         0.4748,  0.5371,  0.5306,  0.5421,  0.4563,  0.4893,  0.5021,
         0.5561,  0.5039,  0.4768,  0.4803,  0.5185,  0.5132,  0.4736,
         0.4885,  0.5343,  0.5450,  0.4943,  0.5193,  0.4609,  0.5013,
         0.5025,  0.5425,  0.5235,  0.5052,  0.5090,  0.4642,  0.5221,
         0.4915,  0.4991,  0.4989,  0.5072,  0.4706,  0.5382,  0.5406,
         0.5443,  0.5062,  0.5357,  0.5200,  0.5230,  0.5483,  0.5342,
         0.5174,  0.5351,  0.5364,  0.4436,  0.4765,  0.4487,  0.5068,
         0.4800]) tensor([ 0.4726,  0.5079,  0.4805,  0.5197,  0.5058,  0.4765,  0.5342,
         0.4521,  0.5007,  0.4819,  0.5196,  0.4869,  0.4994,  0.5212,
         0.5103,  0.5452,  0.4974,  0.4947,  0.4885,  0.5307,  0.5142,
         0.5009,  0.4851,  0.4904,  0.4683,  0.4938,  0.4528,  0.4682,

tensor(0.6103) 8 407
tensor(0.6018) 19 407
tensor(0.5722) 60 504
tensor(0.6072) 11 197
tensor(0.5810) 60 637
tensor(0.6075) 8 306
tensor(0.4323) 119 394
tensor(0.3995) 79 217
tensor(0.6212) 8 558
tensor(0.4274) 89 281
tensor(0.6113) 8 327
tensor(0.6172) 6 195
tensor(0.5962) 8 306
tensor(0.6097) 8 435
shuffle
tensor(0.6313) 4 577
tensor(0.) 419 419
tensor(0.6000) 8 306
tensor(0.6363) 6 443
tensor(0.6066) 8 437
tensor(0.5940) 11 201
shuffle
tensor(0.6341) 4 577
tensor(0.) 419 419
tensor(0.6311) 8 324
tensor(0.5920) 18 262
tensor(0.6373) 4 343
tensor(0.2597) 170 292
tensor(0.6144) 8 558
tensor(0.4850) 89 378
tensor(0.6093) 8 322
tensor(0.5829) 31 394
tensor(0.6233) 8 558
tensor(0.4737) 89 378
tensor(0.5430) 48 387
tensor(0.6186) 11 380
tensor(0.5940) 8 306
tensor(0.5926) 27 482
tensor(0.3114) 195 382
tensor(0.6225) 4 358
tensor(0.6014) 8 327
tensor(0.6118) 6 297
shuffle
tensor(0.6257) 4 577
tensor(0.) 419 419
tensor(0.5912) 8 306
tensor(0.6162) 25 564
tensor(0.6383) 0 152
tensor(0.4503) 1

tensor(0.3593) 29 314
tensor(0.3041) 110 345
tensor(0.4809) 21 404
tensor(0.4490) 16 349
tensor(0.4574) 21 377
tensor(0.3203) 54 204
tensor(0.4289) 19 251
tensor(0.4017) 0 355
tensor(1.00000e-02 *
       8.9843) 339 436
tensor(0.5234) 0 278
tensor(0.3747) 21 311
tensor(0.5748) 4 371
tensor(0.4752) 21 373
tensor(0.4851) 8 313
tensor(0.5118) 4 326
tensor(0.4470) 0 375
tensor(0.3814) 19 273
tensor(0.3953) 41 270
tensor(0.4366) 21 311
tensor(0.4345) 4 371
tensor(0.5176) 21 772
tensor(0.4523) 0 256
tensor(0.5165) 4 316
tensor(0.3795) 86 311
tensor(0.4193) 21 625
tensor(0.4180) 6 261
tensor(0.4166) 21 229
tensor(0.2232) 105 261
tensor(0.3486) 42 287
tensor(0.4247) 0 539
tensor(0.4998) 0 350
tensor(0.4787) 12 261
tensor(0.4780) 21 373
tensor(0.3514) 13 337
Saving players:
tensor([ 0.6974,  0.3781,  0.2975,  0.2188,  0.1865,  0.0839,  0.7063,
         0.8506,  0.5961,  0.3641,  0.8045,  0.1987,  0.7769,  0.7408,
         0.3461,  0.8432,  0.7242,  0.7568,  0.1709,  0.2370,  0.7054,
         0.

tensor(0.2960) 10 817
tensor(0.2841) 6 486
tensor(0.3289) 35 393
tensor(1.00000e-02 *
       7.9700) 105 165
tensor(0.3624) 19 310
tensor(0.4249) 13 325
tensor(0.1742) 29 314
tensor(0.1357) 110 293
tensor(0.1668) 29 314
tensor(0.1826) 110 339
tensor(0.4543) 10 277
tensor(0.3161) 13 231
tensor(0.3230) 41 452
tensor(0.2294) 103 365
tensor(0.3039) 19 251
tensor(0.3892) 0 160
tensor(0.2917) 21 248
tensor(0.1692) 105 337
tensor(0.2884) 18 324
tensor(0.1855) 105 307
tensor(0.2945) 19 556
tensor(0.3029) 23 263
tensor(0.3443) 21 248
tensor(1.00000e-02 *
       8.2542) 105 173
tensor(0.3663) 21 772
tensor(0.3777) 0 315
tensor(0.3357) 19 452
tensor(1.00000e-02 *
       9.1780) 105 176
tensor(0.3220) 21 248
tensor(0.1423) 105 337
tensor(0.3358) 19 452
tensor(0.1933) 105 522
tensor(0.4065) 11 219
tensor(0.1932) 105 323
tensor(0.2574) 21 248
tensor(0.1494) 105 337
tensor(0.2699) 18 324
tensor(0.2493) 105 307
tensor(0.1785) 21 229
tensor(0.1567) 105 337
tensor(0.1772) 21 229
tensor(0.1578) 105 337
t

tensor(0.1353) 46 405
tensor(0.1364) 88 283
shuffle
tensor(0.) 447 447
tensor(1.00000e-02 *
       5.6924) 94 140
tensor(0.2818) 19 383
tensor(1.00000e-02 *
       6.5172) 105 176
tensor(0.4128) 21 472
tensor(1.00000e-02 *
       7.2706) 105 177
tensor(0.3168) 41 251
tensor(0.1084) 103 365
tensor(0.3216) 46 324
tensor(0.1289) 88 283
tensor(0.2543) 19 383
tensor(0.1428) 105 522
tensor(0.4234) 42 328
tensor(0.2415) 24 286
tensor(0.2926) 19 189
tensor(1.00000e-02 *
       6.0457) 105 176
tensor(0.1574) 43 178
tensor(0.2971) 12 244
tensor(0.3191) 46 358
tensor(0.1252) 88 283
tensor(0.5678) 0 321
tensor(0.2855) 26 374
tensor(0.3189) 46 358
tensor(0.2053) 88 373
tensor(0.1438) 57 375
tensor(0.1167) 105 284
tensor(0.2565) 41 251
tensor(0.1208) 103 365
tensor(0.2935) 19 189
tensor(1.00000e-02 *
       6.0804) 105 176
tensor(0.2675) 19 189
tensor(0.1521) 105 522
tensor(0.3010) 41 251
tensor(0.1153) 103 365
tensor(0.1419) 57 375
tensor(0.1173) 105 284
tensor(0.3913) 35 332
tensor(1.00000e-02 *
 

tensor(0.1506) 57 375
tensor(1.00000e-02 *
       8.3927) 105 342
tensor(0.1304) 57 375
tensor(1.00000e-02 *
       9.2716) 105 362
tensor(0.1092) 57 375
tensor(0.1014) 105 342
tensor(0.2273) 46 358
tensor(0.2459) 88 274
tensor(0.4212) 17 609
tensor(1.00000e-02 *
       9.3027) 105 239
tensor(0.2195) 19 383
tensor(0.2035) 105 372
tensor(0.4255) 17 609
tensor(0.1299) 105 239
tensor(0.1419) 57 375
tensor(0.1538) 105 436
tensor(0.1637) 57 375
tensor(0.1389) 105 436
tensor(0.3957) 17 609
tensor(0.2000) 105 290
tensor(0.3878) 35 332
tensor(0.1428) 105 514
Saving players:
tensor([ 0.8235,  0.1025,  0.0490,  0.0466,  0.1186,  0.0048,  0.9933,
         0.9537,  0.9578,  0.0443,  0.9802,  0.1317,  0.8840,  0.9741,
         0.0495,  0.9838,  0.9439,  0.8324,  0.0932,  0.0348,  0.9724,
         0.7720,  0.9837,  0.1542,  0.0462,  0.6369,  0.9682,  0.5467,
         0.1391,  0.8234,  0.9576,  0.7958,  0.9428,  0.0356,  0.0461,
         0.0216,  0.9436,  0.4429,  0.9662,  0.8601,  0.0612,  0.8873,
 