In [1]:
import numpy as np
import random
import os
import time
import torch 
from torch.autograd import Variable
from copy import deepcopy

In [2]:
from lib.turn_program_into_file import turn_program_into_file
from game.wrappers.game_container import GameContainer

In [3]:
cpus = 4
player_size = 8
game = GameContainer(128, 128)
# Each run taks 2.6 sec per cpu, 38089 = a 24h run.
run_limit = 10
save_cycle = 2
output_dir = "./output/pyramid/"
run = int(time.time())
save_dir = output_dir + str(run) + '/'

In [4]:
# king_hill_ids = list(range(0, cpus))
os.makedirs(os.path.join(output_dir, str(run)))

In [None]:
# -*- coding: utf-8 -*-
import torch

top = 2

# Use the nn package to define our model and loss function.
p1 = torch.nn.Sequential(
    torch.nn.Linear(top**0, top**1),
    torch.nn.ReLU(),
    torch.nn.Linear(top**1, top**2),
    torch.nn.ReLU(),
    torch.nn.Linear(top**2, top**3),
    torch.nn.ReLU(),
    torch.nn.Linear(top**3, top**4),
    torch.nn.ReLU(),
    torch.nn.Linear(top**4, top**5),
    torch.nn.ReLU(),
    torch.nn.Linear(top**5, top**6),
    torch.nn.Sigmoid(),
)


p2 = torch.nn.Sequential(
    torch.nn.Linear(top**0, top**1),
    torch.nn.ReLU(),
    torch.nn.Linear(top**1, top**2),
    torch.nn.ReLU(),
    torch.nn.Linear(top**2, top**3),
    torch.nn.ReLU(),
    torch.nn.Linear(top**3, top**4),
    torch.nn.ReLU(),
    torch.nn.Linear(top**4, top**5),
    torch.nn.ReLU(),
    torch.nn.Linear(top**5, top**6),
    torch.nn.Sigmoid(),
)


loss_fn = torch.nn.MSELoss()


def return_result(game_result, number):
    result = 0
    
    for i in game_result:
        if i == number:
            result += 1
        
    return result

def create_player(game_board_pt, player_size):
    game_board_np = game_board_pt.detach().numpy()
    
    for i in range(len(game_board_np)):
        game_board_np[i] = 1 if game_board_np[i] > 0.5 else 0
    
    game_board_np = np.array(game_board_np, dtype=np.int)
    
    return game_board_np


def run_game(home, away, rounds, player_size):
    home = np.reshape(home, (player_size, player_size))
    away = np.reshape(away, (player_size, player_size))
    
    game.add_players(home, away)
    game.launch(rounds)
    final_board = game.gol.grid()
    final_board = np.array(final_board).flatten()
    
    return return_result(final_board, 1)

def create_loss_board(home, away, rounds, player_size):
    org_score = run_game(home, away, rounds, player_size)
    loss_board = deepcopy(home)
    tile_board = deepcopy(home)
    
    for i in range(player_size**2):
        x, y, z = random.randint(0, 63), random.randint(0, 63), random.randint(0, 63)
        
        tile_board[x] = 0 if home[x] == 1 else 1
        tile_board[y] = 0 if home[y] == 1 else 1
        tile_board[z] = 0 if home[z] == 1 else 1
        
        tile_result = run_game(tile_board, away, rounds, player_size)
        
        if tile_result >= org_score:
            loss_board = tile_board
    
    return torch.tensor(loss_board, dtype=torch.float), org_score

learning_rate = 0.001
optimizer_p1 = torch.optim.Adam(p1.parameters(), lr=learning_rate)
optimizer_p2 = torch.optim.Adam(p2.parameters(), lr=learning_rate)

for t in range(10000):
    
    
    start_p1 = Variable(torch.tensor([1.0], requires_grad=True))
    start_p2 = Variable(torch.tensor([1.0], requires_grad=True))
    
    p1_board = p1(start_p1)
    p2_board = p2(start_p2)

    with torch.no_grad():
        p1_board_np = create_player(p1_board.clone(), player_size)
        p2_board_np = create_player(p2_board.clone(), player_size)

        p1_loss_board, p1_score = create_loss_board(p1_board_np, p2_board_np, 100, player_size)
        p2_loss_board, p2_score = create_loss_board(p2_board_np, p1_board_np, 100, player_size)

    loss_p1 = loss_fn(p1_board, p1_loss_board)
    loss_p1.backward()

    
    optimizer_p1.step()
    
    loss_p2 = loss_fn(p2_board, p2_loss_board)
    loss_p2.backward()
    optimizer_p2.step()
    
    optimizer_p1.zero_grad()
    optimizer_p2.zero_grad()
    
    print(loss_p1, loss_p2)
    print(p1_score, p2_score)
    
    if t % 100 == 0:
        print("Saving players:")
        print(p1_board, p2_board)
        turn_program_into_file(np.reshape(p1_board_np, (8,8)), save_dir + "p1-cycle-" + str(t) + ".rle", "RKH", "EW", "")
        turn_program_into_file(np.reshape(p2_board_np, (8,8)), save_dir + "p2-cycle-" + str(t) + ".rle", "RKH", "EW", "")

tensor(0.2750) tensor(0.2716)
46 15
Saving players:
tensor([ 0.5105,  0.4321,  0.4605,  0.4964,  0.4901,  0.4672,  0.4767,
         0.4263,  0.4384,  0.4838,  0.4942,  0.4815,  0.4820,  0.4522,
         0.5103,  0.4653,  0.5277,  0.5371,  0.4617,  0.5386,  0.5288,
         0.5026,  0.4782,  0.4617,  0.4909,  0.5453,  0.4818,  0.5053,
         0.5205,  0.5137,  0.4776,  0.4556,  0.5189,  0.4948,  0.5261,
         0.5209,  0.5864,  0.4720,  0.5524,  0.4892,  0.5330,  0.4643,
         0.4452,  0.4908,  0.5150,  0.4991,  0.5156,  0.4909,  0.5319,
         0.5065,  0.5158,  0.4503,  0.5280,  0.5154,  0.4998,  0.5315,
         0.5594,  0.4928,  0.4935,  0.5351,  0.5122,  0.5067,  0.4776,
         0.5082]) tensor([ 0.4707,  0.5387,  0.4818,  0.4414,  0.5531,  0.5330,  0.4946,
         0.4867,  0.4799,  0.4715,  0.5183,  0.4983,  0.5387,  0.5087,
         0.5037,  0.5026,  0.5167,  0.5173,  0.5125,  0.5292,  0.4985,
         0.5354,  0.4722,  0.5269,  0.5189,  0.5050,  0.4824,  0.4419,
       

tensor(0.2405) tensor(0.2502)
190 28
tensor(0.2397) tensor(0.2502)
190 12
tensor(0.2389) tensor(0.2502)
190 78
tensor(0.2614) tensor(0.2502)
190 30
tensor(0.2373) tensor(0.2502)
190 18
tensor(0.2366) tensor(0.2502)
190 88
tensor(0.2358) tensor(0.2502)
190 62
tensor(0.2349) tensor(0.2502)
190 67
tensor(0.2648) tensor(0.2502)
190 77
tensor(0.2333) tensor(0.2502)
190 10
tensor(0.2325) tensor(0.2502)
190 24
tensor(0.2316) tensor(0.2502)
190 85
tensor(0.2306) tensor(0.2502)
190 15
tensor(0.2296) tensor(0.2502)
190 0
tensor(0.2697) tensor(0.2502)
190 77
tensor(0.2275) tensor(0.2502)
190 33
tensor(0.2266) tensor(0.2498)
190 129
tensor(0.2255) tensor(0.2503)
190 23
tensor(0.2243) tensor(0.2503)
190 9
tensor(0.2229) tensor(0.2504)
190 8
tensor(0.2215) tensor(0.2503)
190 3
tensor(0.2199) tensor(0.2503)
190 31
tensor(0.2183) tensor(0.2503)
190 17
tensor(0.2165) tensor(0.2502)
190 18
tensor(0.2145) tensor(0.2502)
190 12
tensor(0.2124) tensor(0.2502)
190 85
tensor(0.2102) tensor(0.2502)
190 5
tenso

tensor(1.00000e-02 *
       1.0022) tensor(0.2502)
190 68
Saving players:
tensor([ 0.0864,  0.9045,  0.9116,  0.0805,  0.0726,  0.9246,  0.1326,
         0.9060,  0.1332,  0.9170,  0.9096,  0.1136,  0.0983,  0.9230,
         0.1015,  0.9146,  0.0845,  0.0832,  0.9045,  0.0813,  0.0812,
         0.9042,  0.9117,  0.8907,  0.1116,  0.8832,  0.0908,  0.0934,
         0.8794,  0.8438,  0.0993,  0.8961,  0.8961,  0.1070,  0.8712,
         0.0860,  0.8855,  0.1040,  0.0803,  0.9289,  0.1077,  0.1257,
         0.1054,  0.9126,  0.8862,  0.1085,  0.9103,  0.1071,  0.8965,
         0.1006,  0.0944,  0.9307,  0.9111,  0.8965,  0.9021,  0.8748,
         0.0852,  0.9186,  0.8946,  0.8957,  0.8787,  0.9107,  0.8919,
         0.0751]) tensor([ 0.5001,  0.5001,  0.5001,  0.5000,  0.4999,  0.4995,  0.5001,
         0.5004,  0.4998,  0.4998,  0.5000,  0.4999,  0.5005,  0.4996,
         0.5002,  0.5008,  0.4999,  0.5000,  0.4999,  0.5001,  0.5000,
         0.4998,  0.4999,  0.4992,  0.4999,  0.5000,  0.

In [None]:
import random
x, y, z = random.randint(0, 63), random.randint(0, 63), random.randint(0, 63)

In [None]:
print(x, y, z)

In [None]:
 #print(loss_p1.detach().numpy(), loss_p2.detach().numpy())
    #     if t % 100:
    #         turn_program_into_file(p1_board, save_dir + "p1-cycle-" + str(t) + ".rle", "RKH", "EW", "")
    #         turn_program_into_file(p2_board, save_dir + "p2-cycle-" + str(t) + ".rle", "RKH", "EW", "")