In [7]:
from catanatron.mcts_game import CatanGame
from typing import Any
from catanatron.state import State, apply_action
from catanatron import game
from catanatron.models.player import Color
from catanatron.models import map
from catanatron.players.weighted_random import WeightedRandomPlayer
from catanatron.models.enums import ActionPrompt

from catanatron.mcts_simple import Game as MCTS_Game
from catanatron.mcts_simple import UCT
from catanatron.models.enums import (ActionType, Action, WOOD,
    BRICK,
    SHEEP,
    WHEAT,
    ORE)
from catanatron.models.map import build_dice_probas

from catanatron.models.map import (
    BASE_MAP_TEMPLATE,
    MINI_MAP_TEMPLATE,
    NUM_NODES,
    CatanMap,
    NodeId,
)

import torch
from torch import nn

import random

In [8]:
board = map.CatanMap.from_template(map.MINI_MAP_TEMPLATE)
init_game = CatanGame(players=[WeightedRandomPlayer(Color.RED), WeightedRandomPlayer(Color.BLUE)], board=board)
tree = UCT(game=init_game, allow_transpositions=False)
tree.self_play(iterations=5000)
#for key in tree.root.children.keys():
    #print(tree.root.children[key].n, tree.game.get_state().board.map.node_production[key.value])


Training:  49%|████▊     | 2431/5000 [00:38<00:43, 59.15it/s]

In [5]:
tree.self_play(iterations=5000)

Training: 100%|██████████| 5000/5000 [24:50<00:00,  3.35it/s]   


In [38]:
DICE_PROBS = build_dice_probas()

rolls = {2 :(1,1),
         3 :(1,2),
         4 :(1,3),
         5 :(1,4),
         6 :(1,5),
         7 :(1,6),
         8 :(2,6),
         9 :(3,6),
         10 :(4,6),
         11 :(5,6),
         12 :(6,6)}

node_direction_embed = {"NORTH": (-1, 0), "SOUTH": (1,0),
                        "NORTHEAST":(-1,2), "NORTHWEST":(-1,-2),
                        "SOUTHEAST":(1,2), "SOUTHWEST":(1,-2)}

edge_direction_embed = {"EAST": (0, 2), "WEST": (0,-2),
                        "NORTHEAST":(-1,1), "NORTHWEST":(-1,-1),
                        "SOUTHEAST":(1,1), "SOUTHWEST":(1,-1)}

numbers = list(rolls.keys())
probs_list = []
for number in numbers:
    probs_list.append(DICE_PROBS[number])

In [52]:
root = tree.root
depth = 6


while root.children and depth > 0:
    action = root.choose_best_action(training=False)
    if (action.action_type == ActionType.ROLL):
            root = root.children[action]
            if root.is_expanded:            
                action = Action(action.color, action.action_type, 
                            rolls[random.choices(numbers, probs_list)[0]])
            else:
                action = root.choose_random_action()
    print(action)
    
    depth -= 1
    root = root.children[action]

    #print(root.children.keys())

Action(color=<Color.RED: 'RED'>, action_type=<ActionType.BUILD_SETTLEMENT: 'BUILD_SETTLEMENT'>, value=3)
Action(color=<Color.RED: 'RED'>, action_type=<ActionType.BUILD_ROAD: 'BUILD_ROAD'>, value=(2, 3))
Action(color=<Color.BLUE: 'BLUE'>, action_type=<ActionType.BUILD_SETTLEMENT: 'BUILD_SETTLEMENT'>, value=5)
Action(color=<Color.BLUE: 'BLUE'>, action_type=<ActionType.BUILD_ROAD: 'BUILD_ROAD'>, value=(4, 5))
Action(color=<Color.BLUE: 'BLUE'>, action_type=<ActionType.BUILD_SETTLEMENT: 'BUILD_SETTLEMENT'>, value=15)
Action(color=<Color.BLUE: 'BLUE'>, action_type=<ActionType.BUILD_ROAD: 'BUILD_ROAD'>, value=(14, 15))


In [6]:
for action in root.children:
    print(action, root.children[action].n/root.n)

NameError: name 'root' is not defined

In [48]:
from catanatron.torch_converter import state_to_tensor

board_embed = torch.zeros(9,7,13)

resources_embed = nn.Embedding(6, 4)

board_embed = state_to_tensor(root.state, board_embed, resources_embed)

board_embed






tensor([[[ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
           0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  0.0000,  1.4794,  0.0000,  0.0000,
           0.0000,  0.8890,  0.0000,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  0.0000, -1.0000,  0.0000,  0.0000,
           0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000, -0.9866,  0.0000,  0.0000,  0.0000,  1.4794,
           0.0000,  0.0000,  0.0000, -1.7887,  0.0000,  0.0000],
         [ 0.0000,  0.0000, -1.0000,  0.0000,  0.0000,  0.0000,  1.0000,
           0.0000,  0.0000,  0.0000,  1.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  0.0000, -0.4439,  0.0000,  0.0000,
           0.0000,  0.4405,  0.0000,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
           0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000]],

        [[ 0.0000,  0.0000,  0.0

In [9]:
resources = {'ORE':0, 'WHEAT':1, 'SHEEP':2, 'WOOD':3, 'BRICK':4, None:5}