In [None]:
!pip install -e "git+https://github.com/bcollazo/catanatron.git@master#egg=catanatron&subdirectory=catanatron_core"
!pip install -e "git+https://github.com/bcollazo/catanatron.git@master#egg=catanatron_gym&subdirectory=catanatron_gym"
!pip install -e "git+https://github.com/bcollazo/catanatron.git@master#egg=catanatron_experimental&subdirectory=catanatron_experimental"

In [1]:
import os
import time
from pprint import pprint

import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
    
from catanatron_gym.envs.catanatron_env import ACTION_SPACE_SIZE
from catanatron_gym.features import get_feature_ordering
from catanatron_experimental.machine_learning.board_tensor_features import (
    WIDTH,
    HEIGHT,
    get_channels
)

2022-01-15 14:00:08.572585: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-01-15 14:00:08.572616: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [2]:
# Ensure datasets exists
import pathlib
import urllib.request

DATA_FOLDER = "./data"
DATASET_PATH = pathlib.Path(DATA_FOLDER, "1v1-ab2s-nodiscard", "main.csv.gzip")
VALIDATION_DATASET_PATH = pathlib.Path(DATA_FOLDER, "1v1-ab2s-nodiscard-validation", "main.csv.gzip")

DATASET_PATH.parents[0].mkdir(parents=True, exist_ok=True)
VALIDATION_DATASET_PATH.parents[0].mkdir(parents=True, exist_ok=True)

if not DATASET_PATH.exists():
    urllib.request.urlretrieve("https://catanatron-public.s3.us-east-2.amazonaws.com/1v1-ab2s-nodiscard/main.csv.gzip", DATASET_PATH)
if not VALIDATION_DATASET_PATH.exists():
    urllib.request.urlretrieve("https://catanatron-public.s3.us-east-2.amazonaws.com/1v1-ab2s-nodiscard-validation/main.csv.gzip", VALIDATION_DATASET_PATH)

In [3]:
BATCH_SIZE = 128
SHUFFLE_SEED = 1

rows_per_bytes = 6345 / 1209734  # ~0.0052
size = os.path.getsize(DATASET_PATH)  # bytes
estimated_rows = int(size * rows_per_bytes)
print(estimated_rows)

1004976


In [4]:
data = pd.read_csv(DATASET_PATH, nrows=10, compression='gzip')
validation_data = pd.read_csv(VALIDATION_DATASET_PATH, nrows=10, compression='gzip')

assert (data.columns == validation_data.columns).all()
data

Unnamed: 0,F_BANK_BRICK,F_BANK_DEV_CARDS,F_BANK_ORE,F_BANK_SHEEP,F_BANK_WHEAT,F_BANK_WOOD,"F_EDGE(0, 1)_P0_ROAD","F_EDGE(0, 1)_P1_ROAD","F_EDGE(0, 20)_P0_ROAD","F_EDGE(0, 20)_P1_ROAD",...,BT_3692,BT_3693,BT_3694,BT_3695,ACTION,RETURN,DISCOUNTED_RETURN,TOURNAMENT_RETURN,DISCOUNTED_TOURNAMENT_RETURN,VICTORY_POINTS_RETURN
0,19.0,25.0,19.0,19.0,19.0,19.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,110,0.0,0.0,6.0,3.185433,5.962317
1,19.0,25.0,19.0,19.0,19.0,19.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,52,0.0,0.0,6.0,3.185433,5.962317
2,18.0,25.0,19.0,18.0,17.0,17.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,107,0.0,0.0,6.0,3.185433,5.962317
3,18.0,25.0,19.0,18.0,17.0,17.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,45,0.0,0.0,6.0,3.185433,5.962317
4,17.0,25.0,19.0,18.0,17.0,17.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0,0.0,0.0,6.0,3.185433,5.962317
5,17.0,25.0,19.0,18.0,17.0,17.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,289,0.0,0.0,6.0,3.185433,5.962317
6,18.0,25.0,19.0,18.0,17.0,17.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0,0.0,0.0,6.0,3.185433,5.962317
7,19.0,25.0,19.0,18.0,17.0,18.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,50,0.0,0.0,6.0,3.185433,5.962317
8,19.0,25.0,19.0,18.0,17.0,18.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,289,0.0,0.0,6.0,3.185433,5.962317
9,19.0,25.0,18.0,17.0,17.0,18.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0,0.0,0.0,6.0,3.185433,5.962317


In [12]:
# ===== Read Dataset
def allow_column(x):
        return x.startswith("BT_") or (
            x.startswith("F_") and 
            not x.startswith("F_EDGE") and not x.startswith("F_NODE") and 
            not x.startswith("F_PORT") and not x.startswith("F_TILE")
        )
INPUT_COLUMNS = list(filter(allow_column, data.columns))
BOARD_INPUT_COLUMNS = [f for f in INPUT_COLUMNS if f.startswith("BT_")]
NUMERIC_INPUT_COLUMNS = [f for f in INPUT_COLUMNS if f.startswith("F_")]
NUM_CHANNELS = get_channels(2)
LABEL_COLUMN = "ACTION"

raw_dataset = tf.data.experimental.make_csv_dataset(
    str(DATASET_PATH),
    batch_size=BATCH_SIZE,
    num_epochs=1,
    label_name=LABEL_COLUMN,
    select_columns=INPUT_COLUMNS + [LABEL_COLUMN],
    compression_type="GZIP",
    shuffle=True,  # shuffle will shuffle at the element level. nice.
    shuffle_seed=1,
    shuffle_buffer_size=1000,
    prefetch_buffer_size=100,
)
raw_validation_dataset = tf.data.experimental.make_csv_dataset(
    str(VALIDATION_DATASET_PATH),
    batch_size=BATCH_SIZE,
    num_epochs=1,
    label_name=LABEL_COLUMN,
    select_columns=INPUT_COLUMNS + [LABEL_COLUMN],
    compression_type="GZIP",
    shuffle=True,
    shuffle_seed=1,
    shuffle_buffer_size=1000,
    prefetch_buffer_size=100,
)

In [22]:
def preprocess(batch, label):
    input1 = tf.reshape(
        tf.stack([v for f, v in batch.items() if f in BOARD_INPUT_COLUMNS], axis=1),
        (BATCH_SIZE, WIDTH, HEIGHT, NUM_CHANNELS, 1),
    )
    input2 = tf.stack([batch[f] for f in NUMERIC_INPUT_COLUMNS], axis=1)
    return ((input1, input2), label)

dataset = raw_dataset.map(preprocess)
validation_dataset = raw_validation_dataset.map(preprocess)

print(len(NUMERIC_INPUT_COLUMNS))
print(NUM_CHANNELS)

48
16


In [16]:
print("Printing...")
for x in dataset:
    inputs, label = x
    print(label.numpy())
    break

def create_output(batch, label):
    # input1 = tf.reshape(
    #     tf.stack([v for f, v in batch.items() if f in BOARD_INPUT_COLUMNS], axis=1),
    #     (BATCH_SIZE, WIDTH, HEIGHT, NUM_CHANNELS, 1),
    # )
    # input2 = tf.stack([batch[f] for f in NUMERIC_INPUT_COLUMNS], axis=1)
    output1 = 1  # high level action lookup
    output2 = 2  # variable component
    
    return (batch, (output1, output2))
    
dataset = raw_dataset.map(preprocess).map(create_output)
# validation_dataset = raw_validation_dataset.map(preprocess)

print("Printing...")
for x in dataset:
    inputs, label = x
    print(label)
    break



Printing...
[289   0 289 289 289 289 289  30   0   0 237   0   0 289 289   0  93 286
 289 240   0 289   0 289   0  36   4 289 289   0   2   0 289 289   0 164
   0   0   0 110   0 289 165   0 103   0   0  77 235  42   0 289   0  51
 289  93 289   0   0   0 289 289 106 289 289  60 289   0   0  60 289   0
 237 149 289  37   0 289 231 289 289   0 151   0 289  19   0   0 100   0
   4 289   0 231  58 289 289 289 116   0  52 105   0   6  97 237 289   0
   0   1 289 242 237 238   0 289   0  44 242 289   0   1 289  55   0   0
   0 289]
Printing...
(<tf.Tensor: shape=(), dtype=int32, numpy=1>, <tf.Tensor: shape=(), dtype=int32, numpy=2>)


In [19]:
@tf.function
def fn1(inputs, outputs):
    ##use x to derive additional columns u want. Set the shape as well
    print(outputs)
    # y = {}
    # y.update(x)
    # y['new1'] = new1
    # y['new2'] = new2
    return inputs, outputs

newdataset = raw_dataset.take(1).map(fn1)

Tensor("outputs:0", shape=(None,), dtype=int32)


In [81]:
import time

from rich.progress import Progress

TOTAL = 100
with Progress(auto_refresh=False, refresh_per_second=1) as progress:

    task1 = progress.add_task("[red]Downloading...", total=TOTAL)
    task2 = progress.add_task("[green]Processing...", total=TOTAL)
    task3 = progress.add_task("[cyan]Cooking...", total=TOTAL)

    for job in range(TOTAL):
        progress.console.print(f"Working on job #{job}")
        progress.update(task1, advance=1.5)
        progress.update(task2, advance=1.3)
        progress.update(task3, advance=1.9)
        time.sleep(0.02)
        progress.refresh()

In [90]:
from ctypes import alignment
import time
import random
from rich.console import Console
from rich.table import Table
from rich.progress import Progress
from rich.progress import Progress, BarColumn, TimeRemainingColumn
from rich import box
from rich.console import Console
from rich.theme import Theme
from rich.text import Text


TOTAL = 100


custom_theme = Theme(
    {
        "progress.remaining": "",
        "progress.percentage": "",
        "bar.complete": "green",
        "bar.finished": "green",
    }
)
console = Console(theme=custom_theme)


class CustomTimeRemainingColumn(TimeRemainingColumn):
    """Renders estimated time remaining according to show_time field."""

    def render(self, task):
        """Show time remaining."""
        show = task.fields.get("show_time", True)
        if not show:
            return Text("")
        return super().render(task)


with Progress(
    "[progress.description]{task.description}",
    BarColumn(),
    "[progress.percentage]{task.percentage:>3.0f}%",
    CustomTimeRemainingColumn(),
    console=console,
) as progress:
    task1 = progress.add_task(f"Playing {TOTAL} games...", total=TOTAL)
    task2 = progress.add_task("[red]RandomPlayer:RED", total=TOTAL, show_time=False)
    task3 = progress.add_task("[blue]RandomPlayer:BLUE", total=TOTAL, show_time=False)
    for job in range(TOTAL):
        winner = random.randint(0, 1)
        time.sleep(0.13)
        progress.update(task1, advance=1)
        progress.update(task2, advance=winner)
        progress.update(task3, advance=0 if winner else 1)
    progress.refresh()

# # ===== Game Details
table = Table(title="Last 10 Games", box=box.MINIMAL)
table.add_column("#", justify="right", no_wrap=True)
table.add_column("WINNER")
table.add_column("RED VPs", justify="right")
table.add_column("BLUE VPs", justify="right")
table.add_column("SEATING")
table.add_column("DURATION", justify="right")
table.add_column("NUM TURNS", justify="right")
table.add_column("LINK", justify="right")
for i in range(10):
    winner = random.randint(0, 1)
    seating = (
        "[red]RED[/red],[blue]BLUE"
        if random.random() < 0.5
        else "[blue]BLUE[/blue],[red]RED[/red]"
    )
    x = random.random()
    table.add_row(
        str(i),
        "[red]RED[/red]" if winner else "[blue]BLUE[/blue]",
        str(10 if winner else random.randint(2, 9)),
        str(10 if not winner else random.randint(2, 9)),
        seating,
        f"{x:.3f}",
        str(random.randint(200, 400)),
        "http://localhost:3000/games/123",
    )

console.print(table)

# ===== PLAYER SUMMARY
table = Table(title="Player Summary", box=box.MINIMAL)
table.add_column("", justify="right", no_wrap=True)
table.add_column("TOTAL WINS", justify="right")
table.add_column("AVG VPs", justify="right")
table.add_column("AVG SETTLEMENTS", justify="right")
table.add_column("AVG CITIES", justify="right")
table.add_column("AVG ARMY", justify="right")
table.add_column("AVG ROAD", justify="right")
table.add_row(
    "[red]RED",
    "123",
    "4.4",
)
table.add_row(
    "[blue]BLUE",
    "87",
    "7.8",
)
console.print(table)

# ===== GAME SUMMARY
table = Table(box=box.MINIMAL, title="Game Summary")
table.add_column("AVG TICKS", justify="right")
table.add_column("AVG TURNS", justify="right")
table.add_column("AVG DURATION", justify="right")
table.add_row("955.4", "312.2", "0.087 secs")
console.print(table)


In [11]:
# Things to try: Dropout. Less Learning Rate. Simpler Models. Validation Graph.
INPUT_SHAPE = (len(INPUT_COLUMNS),)
LAYERS = [16]



inputs = tf.keras.Input(shape=INPUT_SHAPE)

input_1 = tf.keras.layers.Input(shape=(WIDTH, HEIGHT, NUM_CHANNELS, 1))

last_layer = inputs
for num_neurons in LAYERS:
    last_layer = tf.keras.layers.Dense(num_neurons, activation="relu", dtype='float64')(last_layer)

last_layer = tf.keras.layers.Dropout(0.2)(last_layer)
output_dense = tf.keras.layers.Dense(ACTION_SPACE_SIZE, dtype='float64')(last_layer)
model = tf.keras.Model(inputs=inputs, outputs=output_dense)

model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-4),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), 
    metrics=['categorical_accuracy']
)
model.summary()

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 614)]             0         
_________________________________________________________________
dense_2 (Dense)              (None, 16)                9840      
_________________________________________________________________
dropout_1 (Dropout)          (None, 16)                0         
_________________________________________________________________
dense_3 (Dense)              (None, 290)               4930      
Total params: 14,770
Trainable params: 14,770
Non-trainable params: 0
_________________________________________________________________


In [None]:
# Shuffle at the individual element level
# SHUFFLE_BUFFER_SIZE = 20000  # each game is like 200 samples...
# dataset = dataset.unbatch().shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)
# validation_dataset = validation_dataset.unbatch().shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)
class_weight = {i: 1 for i in range(290)}
class_weight[0] = 0.01
class_weight[289] = 0.01

history = model.fit(
    dataset, 
    epochs=10,
    # steps_per_epoch=10,
    # validation_steps=10,
    validation_data=validation_dataset,
    class_weight=class_weight
)

Instructions for updating:
The `validate_indices` argument has no effect. Indices are always validated on CPU and never validated on GPU.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10

In [None]:
# summarize history for categorical_accuracy
plt.plot(history.history['categorical_accuracy'])
plt.plot(history.history['val_categorical_accuracy'])
plt.title('model categorical_accuracy')
plt.ylabel('categorical_accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [None]:
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [None]:
model.save("data/models/ab2-copycat-model")

In [None]:
from pprint import pprint
import random
import numpy as np
import tensorflow as tf

from catanatron.game import Game
from catanatron.models.player import Player, RandomPlayer, Color
from catanatron.players.weighted_random import WeightedRandomPlayer
from catanatron_experimental.play import play_batch
from catanatron_gym.envs.catanatron_env import to_action_space, ACTION_SPACE_SIZE, from_action_space
from catanatron_gym.features import create_sample_vector, create_sample


class MyPlayer(Player):
    def decide(self, game, playable_actions):
        """Should return one of the playable_actions.

        Args:
            game (Game): complete game state. read-only.
            playable_actions (Iterable[Action]): options to choose from
        Return:
            action (Action): Chosen element of playable_actions
        """
        # ===== YOUR CODE HERE =====
        action_ints = [to_action_space(a) for a in playable_actions]
        mask = np.zeros(ACTION_SPACE_SIZE, dtype=np.float)
        mask[action_ints] = 1
        mask[mask == 0] = np.nan

        # Get action probabilities with neural network.
        vector = create_sample_vector(game, self.color)
        record = create_sample(game, self.color)
        keys = [name[2:] for name in INPUT_COLUMNS] # remove the F_ prefix
        vector = [record[x] for x in keys]
        X = [vector]
        result = model.call(tf.convert_to_tensor(X))

        # Multiply mask with output, and take max.
        clipped_probabilities = np.multiply(mask, result[0])
        clipped_probabilities[np.isnan(clipped_probabilities)] = -np.inf
        action_index = np.argmax(clipped_probabilities)
        action = from_action_space(action_index, playable_actions)
        
        # print(result)
        # print("Playing", action)
        
        return action
        # ===== END YOUR CODE =====

# Play a simple 4v4 game. Edit MyPlayer with your logic!
players = [
    MyPlayer(Color.RED),
    RandomPlayer(Color.WHITE),
]
wins, results_by_player, games = play_batch(5, players)

pprint(wins)
pprint(results_by_player)

# Results: [64, 32, 32] with LR 1e-4 made a bot that won 60% of games against Random. Had %60 categorical accuracy.