In [1]:
from board3 import Board3, empty_cells
from controller3 import ActionController

import random

import torch
import torch.nn as nn
import torch.optim as optim

from torch.utils.data import DataLoader, TensorDataset
from nnl import to_emb

import matplotlib.pyplot as plt

from tqdm.auto import tqdm

In [2]:
def step(b, a, time=1600):
    bc = b.copy()
    nc = ActionController(bc)

    nc.execute_action(a)
    bc.step(time)

    reward = 0
    end = False
    if nc.is_win():
        reward = 1
        end = True
    elif nc.is_lose():
        reward = -1
        end = True
    elif nc.is_block():
        reward = -1
        end = True

    return bc, reward, end

In [10]:
b = Board3(walk_frodo=False, walk_time=1600)
b.players_positions = [(0, 2), (1, 0), (3, 3)]

print(b)

[97m.[0m  [33m%[0m  [32mP[0m  [33m%[0m
[31mE[0m  [33m%[0m  [97m.[0m  [33m%[0m
[97m.[0m  [97m.[0m  [97m.[0m  [97m.[0m
[97m.[0m  [97m.[0m  [97m.[0m  [97mT[0m


In [11]:



def gen_init_board():
    b = Board3(walk_time=500)

    b.players_positions = [(0, 2), (1, 0), (3, 3)]

    return b

In [12]:
m = {}


In [13]:

e = 1.0
for i in tqdm(range(200000)):

    e = max(0.1, e - 0.001 * i)

    b = gen_init_board()

    h = []

    for _ in range(50):

        if b not in m:
            m[b] = [0.5 for _ in range(ActionController.get_action_space())]

        if random.random() < e:
            a = random.randint(0, ActionController.get_action_space() - 1)
        else:
            a = max(range(ActionController.get_action_space()), key=lambda x: m[b][x])

        bc, r, end = step(b, a, 1000)

        h.append((b.copy(), a, r))

        # if end:
        #     y = r
        # else:
        #     if bc not in m:
        #         m[bc] = [0.5 for _ in range(ActionController.get_action_space())]
        #     y = r + 0.85 * max(m[bc])
        #
        # m[b][a] += 0.1 * (y - m[b][a])

        if end:
            break

        b = bc
    accr = 0
    for b, a, r in h[::-1]:
        accr = r + 0.85 * accr
        m[b][a] += 0.1 * (accr - m[b][a])

print(len(m))


  0%|          | 0/200000 [00:00<?, ?it/s]

368257


In [14]:
def eval_m(m, n=100):
    w, d, l = 0, 0, 0
    for _ in range(n):
        b = gen_init_board()

        for _ in range(50):

            if b not in m:
                m[b] = [0.5 for _ in range(ActionController.get_action_space())]

            a = max(range(ActionController.get_action_space()), key=lambda x: m[b][x])

            bc, r, end = step(b, a, 1000)

            if end:
                if r == 1:
                    w += 1
                elif r == -1:
                    l += 1
                else:
                    d += 1
                break

            b = bc
        else:
            d += 1

    return w / n, d / n, l / n


w, d, l = eval_m(m)
print(f'Wins: {w:.2f}, Draws: {d:.2f}, Loses: {l:.2f}')


Wins: 1.00, Draws: 0.00, Loses: 0.00


In [9]:
for _ in range(100000):

    b = Board3(walk_frodo=False, walk_time=1600)

    if random.random() < 0.5:
        b.players_positions = [(2, 1), (1, 0), (3, 3)]
    else:
        b.players_positions = [(2, 1), (1, 2), (3, 3)]

    b.set_player(*random.choice(list(set(empty_cells) - {b.get_enemy_position(), b.get_todd_position()})))
    sb = b.copy()

    for _ in range(20):

        if b not in m:
            m[b] = [0.5 for _ in range(ActionController.get_action_space())]

        a = max(range(ActionController.get_action_space()), key=lambda x: m[b][x])

        bc, r, end = step(b, a, 1600)

        if end:
            if r == 1:
                w += 1
            elif r == -1:
                l += 1
            else:
                d += 1
                print(sb)
                print('---')
                print(b)
                raise Exception('hue')
            break

        b = bc
    else:
        d += 1
        print(sb)
        print('---')
        print(b)
        raise Exception('hue')

[97m.[0m  [33m%[0m  [32mP[0m  [33m%[0m
[31mE[0m  [33m%[0m  [97m.[0m  [33m%[0m
[97m.[0m  [97m.[0m  [97m.[0m  [97m.[0m
[97m.[0m  [97m.[0m  [97m.[0m  [97mT[0m
---
[97m.[0m  [33m%[0m  [32mP[0m  [33m%[0m
[31mE[0m  [33m%[0m  [97m.[0m  [33m%[0m
[97m.[0m  [97m.[0m  [97m.[0m  [97m.[0m
[97m.[0m  [97m.[0m  [97m.[0m  [97mT[0m


Exception: hue

In [9]:
b = Board3(walk_frodo=False, walk_time=1600)
b.players_positions = [(1, 2), (2, 2), (3, 3)]
print(b)
print('---')
print(ActionController.explain_action(max(range(ActionController.get_action_space()), key=lambda x: m[b][x])))
print('---')
for i, v in enumerate(m[b]):
    print(f'{ActionController.explain_action(i):15s}: {v:.2f}')

[97m.[0m  [33m%[0m  [97m.[0m  [33m%[0m
[97m.[0m  [33m%[0m  [32mP[0m  [33m%[0m
[97m.[0m  [97m.[0m  [31mE[0m  [97m.[0m
[97m.[0m  [97m.[0m  [97m.[0m  [97mT[0m
---
Move -1 0
---
Skip           : 0.01
Move -1 -1     : 0.13
Move -1 0      : 0.50
Move -1 1      : 0.50
Move 0 1       : 0.50
Move 1 1       : 0.50
Move 1 0       : 0.50
Move 1 -1      : 0.50
Move 0 -1      : 0.50
Push E 0 0     : 0.50
Push E 0 2     : 0.50
Push E 1 0     : 0.50
Push E 1 2     : 0.50
Push E 2 0     : 0.50
Push E 2 1     : 0.50
Push E 2 2     : 0.50
Push E 2 3     : 0.50
Push E 3 0     : 0.50
Push E 3 1     : 0.50
Push E 3 2     : 0.50
Push E 3 3     : 0.50
Push T 1 2     : 0.50
Push T 2 1     : 0.50
Push T 2 2     : 0.50
Push T 2 3     : 0.50
Push T 3 1     : 0.50
Push T 3 2     : 0.50
Push T 3 3     : 0.50
MW 1 0         : 0.50
MW 1 2         : 0.50
MW 2 0         : 0.50
MW 2 1         : 0.50
MW 2 2         : 0.50
MW 2 3         : 0.50
MW 3 0         : 0.50
MW 3 1         : 0.50
MW