In [1]:
import numpy as np
import random
import os
import time
import torch 
from torch.autograd import Variable
from copy import deepcopy

In [2]:
from lib.turn_program_into_file import turn_program_into_file
from game.wrappers.game_container import GameContainer

In [3]:
cpus = 4
player_size = 8
game = GameContainer(128, 128)
# Each run taks 2.6 sec per cpu, 38089 = a 24h run.
run_limit = 10
save_cycle = 2
output_dir = "./output/pyramid/"
run = int(time.time())
save_dir = output_dir + str(run) + '/'

In [4]:
# king_hill_ids = list(range(0, cpus))
os.makedirs(os.path.join(output_dir, str(run)))

In [None]:
# -*- coding: utf-8 -*-
import torch

top = 2

# Use the nn package to define our model and loss function.
p1 = torch.nn.Sequential(
    torch.nn.Linear(top**0, top**1),
    torch.nn.ReLU(),
    torch.nn.Linear(top**1, top**2),
    torch.nn.ReLU(),
    torch.nn.Linear(top**2, top**3),
    torch.nn.ReLU(),
    torch.nn.Linear(top**3, top**4),
    torch.nn.ReLU(),
    torch.nn.Linear(top**4, top**5),
    torch.nn.ReLU(),
    torch.nn.Linear(top**5, top**6),
    torch.nn.Sigmoid(),
)


p2 = torch.nn.Sequential(
    torch.nn.Linear(top**0, top**1),
    torch.nn.ReLU(),
    torch.nn.Linear(top**1, top**2),
    torch.nn.ReLU(),
    torch.nn.Linear(top**2, top**3),
    torch.nn.ReLU(),
    torch.nn.Linear(top**3, top**4),
    torch.nn.ReLU(),
    torch.nn.Linear(top**4, top**5),
    torch.nn.ReLU(),
    torch.nn.Linear(top**5, top**6),
    torch.nn.Sigmoid(),
)


loss_fn = torch.nn.MSELoss()


def return_result(game_result, number):
    result = 0
    
    for i in game_result:
        if i == number:
            result += 1
        
    return result

def create_player(game_board_pt, player_size):
    game_board_np = game_board_pt.detach().numpy()
    
    for i in range(len(game_board_np)):
        game_board_np[i] = 1 if game_board_np[i] > 0.5 else 0
    
    game_board_np = np.array(game_board_np, dtype=np.int)
    
    return game_board_np


def run_game(home, away, rounds, player_size):
    home = np.reshape(home, (player_size, player_size))
    away = np.reshape(away, (player_size, player_size))
    
    game.add_players(home, away)
    game.launch(rounds)
    final_board = game.gol.grid()
    final_board = np.array(final_board).flatten()
    
    return return_result(final_board, 1)

def create_loss_board(home, away, rounds, player_size):
    org_score = run_game(home, away, rounds, player_size)
    loss_board = deepcopy(home)
    
    for i in range(player_size**2):
        tile_board = deepcopy(home)
        tile_board[i] = 0 if home[i] == 1 else 1
        tile_result = run_game(tile_board, away, rounds, player_size)
        if tile_result >= org_score:
            loss_board[i] = tile_board[i]
    
    return torch.tensor(loss_board, dtype=torch.float), org_score

learning_rate = 0.001
optimizer_p1 = torch.optim.Adam(p1.parameters(), lr=learning_rate)
optimizer_p2 = torch.optim.Adam(p2.parameters(), lr=learning_rate)

for t in range(10000):
    
    
    start_p1 = Variable(torch.tensor([1.0], requires_grad=True))
    start_p2 = Variable(torch.tensor([1.0], requires_grad=True))
    
    p1_board = p1(start_p1)
    p2_board = p2(start_p2)

    with torch.no_grad():
        p1_board_np = create_player(p1_board.clone(), player_size)
        p2_board_np = create_player(p2_board.clone(), player_size)

        p1_loss_board, p1_score = create_loss_board(p1_board_np, p2_board_np, 500, player_size)
        p2_loss_board, p2_score = create_loss_board(p2_board_np, p1_board_np, 500, player_size)

    loss_p1 = loss_fn(p1_board, p1_loss_board)
    loss_p1.backward()

    
    optimizer_p1.step()
    
    loss_p2 = loss_fn(p2_board, p2_loss_board)
    loss_p2.backward()
    optimizer_p2.step()
    
    optimizer_p1.zero_grad()
    optimizer_p2.zero_grad()
    
    print(loss_p1, loss_p2)
    print(p1_score, p2_score)
    
    if t % 100 == 0:
        print("Saving players:")
        print(p1_board, p2_board)
        turn_program_into_file(np.reshape(p1_board_np, (8,8)), save_dir + "p1-cycle-" + str(t) + ".rle", "RKH", "EW", "")
        turn_program_into_file(np.reshape(p2_board_np, (8,8)), save_dir + "p2-cycle-" + str(t) + ".rle", "RKH", "EW", "")

tensor(0.2665) tensor(0.2330)
4 171
Saving players:
tensor([ 0.5422,  0.4870,  0.4460,  0.4969,  0.4999,  0.5426,  0.5163,
         0.5042,  0.4899,  0.5172,  0.5705,  0.5489,  0.4609,  0.5109,
         0.5013,  0.4950,  0.4651,  0.5043,  0.4854,  0.4892,  0.4857,
         0.5017,  0.5265,  0.5180,  0.5096,  0.4846,  0.5222,  0.5253,
         0.4210,  0.4996,  0.5356,  0.5392,  0.4883,  0.5115,  0.5051,
         0.4661,  0.5161,  0.4955,  0.4909,  0.4739,  0.4964,  0.4972,
         0.5195,  0.4788,  0.4863,  0.4925,  0.4700,  0.4780,  0.4931,
         0.4968,  0.5237,  0.4890,  0.5129,  0.4921,  0.4728,  0.4630,
         0.4741,  0.4523,  0.5259,  0.4754,  0.5019,  0.5132,  0.5207,
         0.4917]) tensor([ 0.4996,  0.5038,  0.5263,  0.4953,  0.4872,  0.5405,  0.4623,
         0.4776,  0.4963,  0.5380,  0.4854,  0.4593,  0.5356,  0.4972,
         0.5102,  0.5135,  0.5221,  0.4803,  0.5126,  0.4810,  0.4748,
         0.5072,  0.4849,  0.4666,  0.5213,  0.5133,  0.4869,  0.4999,
       

tensor(1.00000e-02 *
       5.7005) tensor(1.00000e-02 *
       8.3675)
176 450
tensor(1.00000e-02 *
       5.6153) tensor(1.00000e-02 *
       8.1514)
176 450
tensor(1.00000e-02 *
       4.0930) tensor(1.00000e-02 *
       7.9354)
176 450
tensor(1.00000e-02 *
       4.0172) tensor(1.00000e-02 *
       7.7197)
176 450
tensor(1.00000e-02 *
       3.9398) tensor(1.00000e-02 *
       7.5048)
176 450
tensor(1.00000e-02 *
       3.8612) tensor(1.00000e-02 *
       7.2908)
176 450
tensor(1.00000e-02 *
       5.2498) tensor(1.00000e-02 *
       7.0780)
176 450
tensor(1.00000e-02 *
       5.1860) tensor(1.00000e-02 *
       6.8667)
176 450
tensor(1.00000e-02 *
       5.1204) tensor(1.00000e-02 *
       6.6573)
176 450
tensor(1.00000e-02 *
       4.3024) tensor(1.00000e-02 *
       6.4498)
176 450
tensor(1.00000e-02 *
       4.2555) tensor(1.00000e-02 *
       6.2446)
176 450
tensor(1.00000e-02 *
       6.6018) tensor(1.00000e-02 *
       6.7331)
177 374
tensor(1.00000e-02 *
       4.7698) tens

tensor(1.00000e-02 *
       8.3174) tensor(1.00000e-03 *
       3.1196)
112 388
tensor(1.00000e-02 *
       8.2559) tensor(1.00000e-03 *
       2.9751)
112 388
tensor(1.00000e-02 *
       9.3072) tensor(1.00000e-03 *
       2.8378)
112 388
tensor(1.00000e-02 *
       9.2357) tensor(1.00000e-03 *
       2.7073)
112 388
tensor(1.00000e-02 *
       9.1563) tensor(1.00000e-03 *
       2.5832)
112 388
tensor(1.00000e-02 *
       9.3609) tensor(1.00000e-03 *
       2.4655)
112 388
tensor(1.00000e-02 *
       7.9396) tensor(1.00000e-03 *
       2.3538)
112 388
tensor(1.00000e-02 *
       8.0886) tensor(1.00000e-03 *
       2.2478)
112 388
tensor(1.00000e-02 *
       8.0107) tensor(1.00000e-03 *
       2.1473)
112 388
tensor(1.00000e-02 *
       7.7372) tensor(1.00000e-03 *
       2.0521)
112 388
tensor(1.00000e-02 *
       9.0239) tensor(1.00000e-03 *
       1.9617)
112 388
tensor(1.00000e-02 *
       8.9494) tensor(1.00000e-03 *
       1.8761)
112 388
tensor(1.00000e-02 *
       8.5003) tens

In [None]:
 #print(loss_p1.detach().numpy(), loss_p2.detach().numpy())
    #     if t % 100:
    #         turn_program_into_file(p1_board, save_dir + "p1-cycle-" + str(t) + ".rle", "RKH", "EW", "")
    #         turn_program_into_file(p2_board, save_dir + "p2-cycle-" + str(t) + ".rle", "RKH", "EW", "")