In [1]:
import math

from board3 import Board3, sqr_distance
from controller3 import ActionController
from heapq import heappush, heappop

In [2]:
def eval_board(board: Board3):
    py, px = board.player_position
    ey, ex = board.enemy_position
    player_distance = min(sqr_distance(py, px, 0, 0), sqr_distance(py, px, 0, 2))
    enemy_distance = min(sqr_distance(ey, ex, 0, 0), sqr_distance(ey, ex, 0, 2))

    short_distance = 0
    spm = 0
    if len(board.player_short_push) > 0 and board.player_short_push[1] == 'E':
        spm = 2 / (1.1 - board.player_short_push[2] / 1000)

        if sqr_distance(ey, ex, 0, 0) < sqr_distance(ey, ex, 0, 2):
            short_distance = math.sqrt(sqr_distance(board.player_short_push[0][0], board.player_short_push[0][1], 0, 0))
        else:
            short_distance = math.sqrt(sqr_distance(board.player_short_push[0][0], board.player_short_push[0][1], 0, 2))

    long_distance = 0
    lpm = 0
    if len(board.player_long_push) > 0 and board.player_long_push[1] == 'E':
        lpm = 2 / (1.1 - board.player_long_push[2] / 1600)

        if sqr_distance(ey, ex, 0, 0) < sqr_distance(ey, ex, 0, 2):
            long_distance = math.sqrt(sqr_distance(board.player_long_push[0][0], board.player_long_push[0][1], 0, 0))
        else:
            long_distance = math.sqrt(sqr_distance(board.player_long_push[0][0], board.player_long_push[0][1], 0, 2))

    block = 0
    if board.enemy_position != (0, 0) and (1, 0) in board.mw or board.enemy_position != (0, 2) and (1, 2) in board.mw:
        block = 10

    win = 0
    if board.enemy_position == (0, 0) and (1, 0) in board.mw or board.enemy_position == (0, 2) and (1, 2) in board.mw:
        win = 10

    lose = 0
    if board.player_position == (0, 0) and (1, 0) in board.mw or board.player_position == (0, 2) and (1, 2) in board.mw:
        lose = 10

    enemy_spaces = len(board.get_pos_available_moves(ey, ex))
    player_spaces = len(board.get_pos_available_moves(py, px))

    if sqr_distance(ey, ex, 0, 0) < sqr_distance(ey, ex, 0, 2):
        sp = board.shortest_path(ey, ex, 0, 0)
    else:
        sp = board.shortest_path(ey, ex, 0, 2)

    spp = 0
    if len(sp) > 0:
        spp = 5
    else:
        spm /= 4
        lpm /= 4


    return (10 / (enemy_distance + 1)) - (10 / (math.sqrt(player_distance) + 1)) - (len(board.mw)) - block + (spm / (short_distance + 1)) + (lpm / (long_distance + 1)) + 7 / (enemy_spaces + 1) + spp + win - lose - 5 / (player_spaces + 1)



class NT:

    def __init__(self, board, player):
        self.board = board
        self.controller = ActionController(board)
        self.children = None
        self.player = player
        self.a = None
        self.p = 0
        self.v = 0
        self.n = 0
        pass


    def get_winner(self):
        if self.controller.is_win():
            return 1
        if self.controller.is_lose():
            return -1
        if self.controller.is_block():
            return -1

        return None

    def search(self):

        winner = self.get_winner()
        if winner is not None:
            self.n += 1
            self.v = -winner
            return self.v


        if self.children is None:
            self.expand()

            return -self.v

        cs = sum([x.n for x in self.children])
        sv = max(self.children, key=lambda x: x.value + 1.5 * x.prob * math.sqrt(cs / (x.n + 1)))
        res = sv.search()
        self.v = (self.v * self.n + res) / (self.n + 1)
        self.n += 1

        return -res

    def expand(self):
        self.children = []
        vs = []
        for p in self.controller.get_available_moves():
            board_copy = self.board.copy()
            nt = NT(board_copy, -self.player)
            nt.controller.execute_action(p)
            board_copy.step(500, walk_time=200)
            nt.a = p
            nt.v = 0
            nt.p = eval_board(board_copy)
            nt.n = 1
            vs.append(nt.p)
            board_copy.swap_enemy()
            self.children.append(nt)

        self.v = sum(vs) / len(vs)
        self.n = len(vs)

In [3]:
board = Board3()

board.set_enemy(3, 3)
board.set_player(2, 2)
board.set_todd(2, 1)
print(board)
controller = ActionController(board)

[97m.[0m  [33m%[0m  [97m.[0m  [33m%[0m
[97m.[0m  [33m%[0m  [97m.[0m  [33m%[0m
[97m.[0m  [97mT[0m  [32mP[0m  [97m.[0m
[97m.[0m  [97m.[0m  [97m.[0m  [31mE[0m


In [9]:



print(board)
nt = NT(board, 1)
for _ in range(10000):
    nt.search()

[97m.[0m  [33m%[0m  [97m.[0m  [33m%[0m
[97m.[0m  [33m%[0m  [97m.[0m  [33m%[0m
[97m.[0m  [97mT[0m  [32mP[0m  [97m.[0m
[97m.[0m  [97m.[0m  [97m.[0m  [31mE[0m


In [10]:
print([x.n for x in nt.children])
print([round(x.value, 2) for x in nt.children])
print([round(x.prob, 2) for x in nt.children])
print([round(x.action, 2) for x in nt.children])

ma = max(nt.children, key=lambda x: x.n)
print(ma.n)
print(ma.value)
print(ma.controller.explain_action(ma.action))

[329, 1, 330, 409, 372, 509, 934, 494, 609, 466, 494, 509, 476, 442, 455, 469, 453, 250, 329, 329, 329, 331, 324, 329, 1, 1, 175, 1, 175, 148, 278]
[2.0, 0, 2.07, 0.95, 1.16, 1.97, 1.04, 1.96, 1.84, 1.95, 1.96, 1.97, 1.32, 1.97, 2.0, 2.28, 1.89, -2.97, 2.0, 2.0, 2.0, 2.08, 1.7, 2.0, 0, 0, 1.32, 0, 1.32, 1.53, -0.19]
[3.91, -3.59, 3.9, 4.49, 4.26, 4.86, 6.77, 4.79, 5.34, 4.66, 4.79, 4.86, 4.79, 4.53, 4.6, 4.62, 4.6, 3.91, 3.91, 3.91, 3.91, 3.91, 3.91, 3.91, -7.09, -12.34, 2.91, -1.17, 2.91, 2.66, 3.83]
[0, 2, 4, 6, 7, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 33, 34, 35, 36]
934
1.0401476819765225
Push E 0 2


In [15]:
bc = board
ActionController(bc).execute_action(ma.action)
bc.step(500, walk_time=200)
print(bc)


[35mU[0m  [33m%[0m  [97m.[0m  [33m%[0m
[31mE[0m  [33m%[0m  [97m.[0m  [33m%[0m
[34m#[0m  [32mP[0m  [97m.[0m  [97m.[0m
[97m.[0m  [97m.[0m  [97m.[0m  [97mT[0m


In [7]:
print(nt.controller.explain_action(2))

Move -1 0


In [8]:
bc.swap_enemy()

In [8]:
grid = []
actions = []
evals = []
for a in ActionController(board).get_available_moves():
    board_copy = board.copy()
    ActionController(board_copy).execute_action(a)
    board_copy.step(200, walk_time=200)
    grid.append(str(board_copy))
    actions.append(a)
    evals.append(eval_board(board_copy))

gridt = [x.split('\n') for x in grid]
for n in range( math.ceil(len(gridt) / 10)):
    print('\t\t'.join([f'{ActionController(board).explain_action(actions[i + 10 * n]): <8}' for i in range(min(10, len(gridt) - 10 * n))]))
    for j in range(4):
        l = [gridt[i + 10 * n][j] for i in range(min(10, len(gridt) - 10 * n))]
        print('\t\t'.join(l))
    print('\t\t'.join([f'{round(evals[i + 10 * n], 2): <8}' for i in range(min(10, len(gridt) - 10 * n))]))
    print()

Skip    		Move -1 0		Move 0 1		Move 1 0		Move 1 -1		Push E 0 0		Push E 0 2		Push E 1 0		Push E 1 2		Push E 2 0
[97m.[0m  [33m%[0m  [97m.[0m  [33m%[0m		[97m.[0m  [33m%[0m  [97m.[0m  [33m%[0m		[97m.[0m  [33m%[0m  [97m.[0m  [33m%[0m		[97m.[0m  [33m%[0m  [97m.[0m  [33m%[0m		[97m.[0m  [33m%[0m  [97m.[0m  [33m%[0m		[95mo[0m  [33m%[0m  [97m.[0m  [33m%[0m		[97m.[0m  [33m%[0m  [95mo[0m  [33m%[0m		[97m.[0m  [33m%[0m  [97m.[0m  [33m%[0m		[97m.[0m  [33m%[0m  [97m.[0m  [33m%[0m		[97m.[0m  [33m%[0m  [97m.[0m  [33m%[0m
[97m.[0m  [33m%[0m  [97m.[0m  [33m%[0m		[97m.[0m  [33m%[0m  [32mP[0m  [33m%[0m		[97m.[0m  [33m%[0m  [97m.[0m  [33m%[0m		[97m.[0m  [33m%[0m  [97m.[0m  [33m%[0m		[97m.[0m  [33m%[0m  [97m.[0m  [33m%[0m		[97m.[0m  [33m%[0m  [97m.[0m  [33m%[0m		[97m.[0m  [33m%[0m  [97m.[0m  [33m%[0m		[95mo[0m  [33m%[0m  [97m.[0m  [33m%[0m		[97m.[0m  [33m%[0m  [

In [10]:
q = [(0, board)]
visited = set()
for _ in range(5000):
    _, b = heappop(q)
    if b in visited:
        continue

    if ActionController(b).is_win():
        print("WIN")
        break
    visited.add(b)
    for p in ActionController(b).get_available_moves():
        board_copy = b.copy()
        ActionController(board_copy).execute_action(p)
        board_copy.step(1000, walk_time=800)
        board_copy.swap_enemy()
        heappush(q, (eval_board(board_copy), board_copy))

v, b = heappop(q)
print(v)
print(b)

-1006.6666666666666
[97m.[0m  [33m%[0m  [97m.[0m  [33m%[0m
[34m#[0m  [33m%[0m  [34m#[0m  [33m%[0m
[32mP[0m  [31mE[0m  [34m#[0m  [34m#[0m
[34m#[0m  [34m#[0m  [97mT[0m  [34m#[0m
