In [77]:
import sys
import copy 
import random
import pandas as pd
import numpy as np
import torch
import tqdm.auto as tqdm
from pathlib import Path

from typing import *
from enum import Enum
from dataclasses import dataclass

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import logging
logger = logging.getLogger("research")

In [4]:
import os,sys
sys.path.insert(0, str(Path.cwd().parent))

In [5]:
import utils.logging
utils.logging.setup(debug=False)

### Narde rules
https://www.bkgm.com/variants/Narde.html

In [31]:
class Game:
    class Step(Enum):
        IDLE = "idle"
        ROLL = "roll"
        TURN = "trun"
        FINISHED = "finished"

    def __init__(self, seed: Optional[int] = None):
        self.seed = random.randint(0, 2**32-1) if seed is None else seed
        logger.debug(f"new game started, seed={self.seed}")
        random.seed(self.seed)
        np.random.seed(self.seed)
        self.board = np.zeros(24, dtype=int)
        self.board[0] = 15
        self.board[12] = -15
        self.dice = [0, 0]
        self.home = [0, 0]
        self.pturn = 0
        self.t = 0
        self.step : Game.Step = Game.Step.IDLE

    def _get_dst_idx(self, src_idx: int, steps: int) -> int:
        dst_idx = src_idx + steps
        if dst_idx > 24:
            dst_idx = dst_idx % 25 + 1
        return dst_idx

    def _can_bearoff(self):
        counter = self.home[self.pturn]
        sign = 1 if self.pturn == 0 else -1
        home = range(19, 25) if self.pturn == 0 else range(7, 13)
        for idx in home:
            if sign * self.board[idx-1] > 0:
                counter += sign * self.board[idx-1]
        return counter == 15

    def _is_bearoff(self, src_idx: int, steps: int) -> bool:
        dst_idx = self._get_dst_idx(src_idx, steps)
        return (self.pturn == 0 and dst_idx < src_idx) or \
               (self.pturn == 1 and src_idx <= 12 and dst_idx > 12)
    
    def _check_move(self, src_idx: int, steps: int):
        dst_idx = self._get_dst_idx(src_idx, steps)
        sign = 1 if self.pturn == 0 else -1
        if self.step != Game.Step.TURN:
            raise RuntimeError("invalid action")
        if not (1 <= src_idx <= 24):
            raise RuntimeError("invalid position")
        if not (sign * self.board[src_idx-1] > 0):
            raise RuntimeError(f"no checkers at position {src_idx}")
        if steps not in self.dice:
            raise RuntimeError(f"no dice with value {steps}")
        if self._is_bearoff(src_idx, steps):
            if not self._can_bearoff():
                raise RuntimeError(f"not all checkers are at home")
        elif not (sign * self.board[dst_idx-1] >= 0):
            raise RuntimeError(f"can't move to position {dst_idx}")

    def _enum_valid_moves(self) -> Iterator[Tuple[int, int]]:
        eligible_moves = []
        sign = 1 if self.pturn == 0 else -1
        for src_idx in range(1, 25):
            if sign * self.board[src_idx-1] > 0:
                for steps in range(1, 7):
                    if self._is_valid_move(src_idx, steps):
                        yield (src_idx, steps)
        return eligible_moves
        
    def _is_valid_move(self, src_idx: int, steps: int) -> bool:
        try:
            self._check_move(src_idx, steps)
            return True
        except RuntimeError as e:
            return False
        
    def _can_move(self):
        try:
            next(iter(self._enum_valid_moves()))
            return True
        except StopIteration:
            return False

    def start(self, d1: int = 0, d2: int = 0) -> "Game":
        if self.step != Game.Step.IDLE:
            raise RuntimeError("invalid action")

        self.dice = [d1 or random.randint(1, 6), d2 or random.randint(1, 6)]
        while self.dice[0] == self.dice[1]:
            self.dice = [random.randint(1, 6), random.randint(1, 6)]

        self.step = Game.Step.ROLL
        if self.dice[0] > self.dice[1]:
            self.pturn = 0
        else: # self.dice[0] < self.dice[1]:
            self.pturn = 1
        return self

    def roll(self, d1: int = 0, d2: int = 0) -> "Game":
        if self.step != Game.Step.ROLL:
            raise RuntimeError("invalid action")
        self.dice = [d1 or random.randint(1, 6), d2 or random.randint(1, 6)]
        if self.dice[0] == self.dice[1]:
            self.dice += self.dice
        self.step = Game.Step.TURN
        return self

    def turn(self, src_idx: int, steps: int) -> "Game":
        dst_idx = self._get_dst_idx(src_idx, steps)
        sign = 1 if self.pturn == 0 else -1
        
        self._check_move(src_idx, steps)
        
        if self._is_bearoff(src_idx, steps):
            self.board[src_idx-1] -= sign
            self.home[self.pturn] += 1
        else:
            self.board[src_idx-1] -= sign
            self.board[dst_idx-1] += sign

        if len(self.dice) > 2:
            self.dice.pop(self.dice.index(steps, -1))
        else:
            self.dice[self.dice.index(steps)] = 0

        if self.home[self.pturn] == 15:
            self.step = Game.Step.FINISHED
        elif self.dice[0] == 0 and self.dice[1] == 0:
            self.step = Game.Step.ROLL
            self.pturn = (self.pturn + 1) % 2
            self.t += 1
        
        return self
    
    def is_finished(self):
        return self.step == Game.Step.FINISHED
    
    def skip(self) -> "Game":
        if self.step != Game.Step.TURN:
            raise RuntimeError("invalid action")
        
        if not self._can_move():
            self.dice = [0, 0]
            self.step = Game.Step.ROLL
            self.pturn = (self.pturn + 1) % 2
            self.t += 1
        
        return self

    def __repr__(self):
        template = """
        |{OHM}| 24 | 23 | 22 | 21 | 20 | 19 | {X} | 18 | 17 | 16 | 15 | 14 | 13 |  X  |
        |     |-----------------------------|     |-----------------------------|     |
        |     |{d6}|{d5}|{d4}|{d3}|{d2}|{d1}|     |{c6}|{c5}|{c4}|{c3}|{c2}|{c1}|     |
        |     |    |    |    |    |    |    |     |    |    |    |    |    |    |     |
        |     |    |    |    |    |    |    |     |    |    |    |    |    |    |     |
        |     |    |    |    |    |    |    |     |    |    |    |    |    |    |     |
        |     |    |    |    |    |    |    |     |    |    |    |    |    |    |     |
        |     |-----------------------------|{dcs}|-----------------------------|     |
        |     |    |    |    |    |    |    |     |    |    |    |    |    |    |     |
        |     |    |    |    |    |    |    |     |    |    |    |    |    |    |     |
        |     |    |    |    |    |    |    |     |    |    |    |    |    |    |     |
        |     |    |    |    |    |    |    |     |    |    |    |    |    |    |     |
        |     |{a1}|{a2}|{a3}|{a4}|{a5}|{a6}|     |{b1}|{b2}|{b3}|{b4}|{b5}|{b6}|     |
        |     |-----------------------------|     |-----------------------------|     |
        |  O  |  1 |  2 |  3 |  4 |  5 |  6 | {O} |  7 |  8 |  9 | 10 | 11 | 12 |{XHM}|
        """
        values = {}
        for c in ["a", "b", "c", "d"]:
            for n in range(1, 7):
                idx = 6 * (ord(c) - ord("a")) + n-1
                val = int(self.board[idx])
                if val < 0:
                    val = f" O{-val}".ljust(4)
                elif val > 0:
                    val = f" X{val}".ljust(4)
                else:
                    val = f"    "
                values[f"{c}{n}"] = val
        values["dcs"] = f" {self.dice[0] or ' '}:{self.dice[1] or ' '} "
        values["X"] = "   "
        values["O"] = "   "
        values["XHM"] = "     "
        values["OHM"] = "     "
        if self.step == Game.Step.ROLL:
            values["O" if self.pturn == 0 else "X"] = " * "
        elif self.step == self.step.TURN:
            values["O" if self.pturn == 0 else "X"] = f"({len([d for d in self.dice if d])})"
        if self.home[0] > 0:
            values["OHM"] = f"  {self.home[0]}".ljust(5)
        if self.home[1] > 0:
            values["XHM"] = f"  {self.home[1]}".ljust(5)
        return template.format(**values)

In [59]:
def random_move(game: Game):
    if not game.is_finished():
        moves = list(game._enum_valid_moves())
        if len(moves) > 0:
            pos, steps = random.choice(moves)
            logger.debug(f"t={game.t}, p={game.pturn} moves ({pos}+{steps})")
            game.turn(pos, steps)
            return True
        else:
            logger.debug(f"t={game.t}, p={game.pturn} has no eligible moves, skipping")
            game.skip()
    return False

def auto_turn(game: Game):
    game.roll()
    logger.debug(f"t={game.t}, p={game.pturn} rolls {game.dice}")
    for _ in range(len(game.dice)):
        if not random_move(game):
            break
    return game

def auto_rollout(game, turns: int = 100):
    for turn in range(turns):
        logger.debug(f"{turn=}")
        auto_turn(game)
        if game.is_finished():
            logger.debug(f"t={game.t}, game finished, p={game.pturn} wins")
            break
    return game

In [60]:
g = Game(seed=42)
g.start().roll().turn(1, 6).turn(7, 1).roll().turn(13, 3).turn(16, 2).roll().turn(1, 2).turn(3, 2).turn(5, 2).turn(8, 2).roll().turn(13,6).turn(18,1).roll()
g.turn(10, 6).turn(16,6).turn(1, 6) # < there's no moves for red here


        |     | 24 | 23 | 22 | 21 | 20 | 19 |     | 18 | 17 | 16 | 15 | 14 | 13 |  X  |
        |     |-----------------------------|     |-----------------------------|     |
        |     |    |    | X1 |    |    | O2 |     |    |    |    |    |    | O13|     |
        |     |    |    |    |    |    |    |     |    |    |    |    |    |    |     |
        |     |    |    |    |    |    |    |     |    |    |    |    |    |    |     |
        |     |    |    |    |    |    |    |     |    |    |    |    |    |    |     |
        |     |    |    |    |    |    |    |     |    |    |    |    |    |    |     |
        |     |-----------------------------|  :6 |-----------------------------|     |
        |     |    |    |    |    |    |    |     |    |    |    |    |    |    |     |
        |     |    |    |    |    |    |    |     |    |    |    |    |    |    |     |
        |     |    |    |    |    |    |    |     |    |    |    |    |    |    |     |
        |     |    |    |    | 

In [61]:
g = Game(seed=1761854789)
auto_rollout(g.start(), turns=100)


        |  15 | 24 | 23 | 22 | 21 | 20 | 19 |     | 18 | 17 | 16 | 15 | 14 | 13 |  X  |
        |     |-----------------------------|     |-----------------------------|     |
        |     |    |    |    |    |    |    |     |    |    |    |    |    |    |     |
        |     |    |    |    |    |    |    |     |    |    |    |    |    |    |     |
        |     |    |    |    |    |    |    |     |    |    |    |    |    |    |     |
        |     |    |    |    |    |    |    |     |    |    |    |    |    |    |     |
        |     |    |    |    |    |    |    |     |    |    |    |    |    |    |     |
        |     |-----------------------------|  :  |-----------------------------|     |
        |     |    |    |    |    |    |    |     |    |    |    |    |    |    |     |
        |     |    |    |    |    |    |    |     |    |    |    |    |    |    |     |
        |     |    |    |    |    |    |    |     |    |    |    |    |    |    |     |
        |     |    |    |    | 

In [62]:
g = Game()
auto_rollout(g.start(), turns=3)


        |     | 24 | 23 | 22 | 21 | 20 | 19 |     | 18 | 17 | 16 | 15 | 14 | 13 |  X  |
        |     |-----------------------------|     |-----------------------------|     |
        |     |    | O1 |    |    |    |    |     |    |    |    |    | O1 | O13|     |
        |     |    |    |    |    |    |    |     |    |    |    |    |    |    |     |
        |     |    |    |    |    |    |    |     |    |    |    |    |    |    |     |
        |     |    |    |    |    |    |    |     |    |    |    |    |    |    |     |
        |     |    |    |    |    |    |    |     |    |    |    |    |    |    |     |
        |     |-----------------------------|  :  |-----------------------------|     |
        |     |    |    |    |    |    |    |     |    |    |    |    |    |    |     |
        |     |    |    |    |    |    |    |     |    |    |    |    |    |    |     |
        |     |    |    |    |    |    |    |     |    |    |    |    |    |    |     |
        |     |    |    |    | 

In [63]:
auto_rollout(Game().start(), turns=500)


        |  10 | 24 | 23 | 22 | 21 | 20 | 19 |     | 18 | 17 | 16 | 15 | 14 | 13 |  X  |
        |     |-----------------------------|     |-----------------------------|     |
        |     | X5 |    |    |    |    |    |     |    |    |    |    |    |    |     |
        |     |    |    |    |    |    |    |     |    |    |    |    |    |    |     |
        |     |    |    |    |    |    |    |     |    |    |    |    |    |    |     |
        |     |    |    |    |    |    |    |     |    |    |    |    |    |    |     |
        |     |    |    |    |    |    |    |     |    |    |    |    |    |    |     |
        |     |-----------------------------|  :5 |-----------------------------|     |
        |     |    |    |    |    |    |    |     |    |    |    |    |    |    |     |
        |     |    |    |    |    |    |    |     |    |    |    |    |    |    |     |
        |     |    |    |    |    |    |    |     |    |    |    |    |    |    |     |
        |     |    |    |    | 

In [64]:
np.array([rollout(Game().start(), turns=500).pturn for _ in tqdm.trange(1000)]).mean()

100%|██████████| 1000/1000 [00:06<00:00, 147.54it/s]


np.float64(0.513)

In [78]:
class Policy:
    def make_move(self, game: Game) -> bool:
        pass

    def play_turn(self, game: Game) -> Game:
        game.roll()
        logger.debug(f"t={game.t}, p={game.pturn} rolls {game.dice}")
        for _ in range(len(game.dice)):
            if not self._make_move(game):
                break
        return game

class RandomPolicy(Policy):
    def _make_move(self, game: Game):
        if not game.is_finished():
            moves = list(game._enum_valid_moves())
            if len(moves) > 0:
                pos, steps = random.choice(moves)
                logger.debug(f"t={game.t}, p={game.pturn} moves ({pos}+{steps})")
                game.turn(pos, steps)
                return True
            else:
                logger.debug(f"t={game.t}, p={game.pturn} has no eligible moves, skipping")
                game.skip()
        return False

class LazyPolicy(Policy):
    def _first_move(self, game: Game) -> Optional[Tuple[int, int]]:
        try:
            return next(iter(game._enum_valid_moves()))
        except StopIteration:
            return None

    def _make_move(self, game: Game):
        if not game.is_finished():
            move = self._first_move(game)
            if move:
                pos, steps = move
                logger.debug(f"t={game.t}, p={game.pturn} moves ({pos}+{steps})")
                game.turn(pos, steps)
                return True
            else:
                logger.debug(f"t={game.t}, p={game.pturn} has no eligible moves, skipping")
                game.skip()
        return False

@dataclass
class Results:
    winner: int
    turns: int
    reward: int

def calc_reward(game: Game) -> int:
    opponent = 1 if game.pturn == 0 else 0
    return 2 if game.home[opponent] == 0 else 1


def tournament(game: Game, player1: Policy, player2: Policy, turns: int = 100) -> Results:
    game.start()   
    for turn in range(turns):
        if game.is_finished():
            logger.debug(f"t={game.t}, game finished, p={game.pturn} wins")
            break
        cur_player = player1 if game.pturn == 0 else player2
        cur_player.play_turn(game)
    return Results(winner=game.pturn, turns=game.t, reward=calc_reward(game))

In [None]:
p1 = LazyPolicy()
p2 = LazyPolicy()
tournament(Game(), LazyPolicy(), LazyPolicy(), turns=1000)

{'winner': 1, 'turns': 90, 'reward': 1}

In [86]:
pd.DataFrame([tournament(Game(), LazyPolicy(), LazyPolicy(), turns=1000).__dict__ for _ in tqdm.trange(10000)]).mean()

100%|██████████| 10000/10000 [00:17<00:00, 574.19it/s]


winner     0.5589
turns     91.8149
reward     1.0760
dtype: float64

In [87]:
pd.DataFrame([tournament(Game(), RandomPolicy(), RandomPolicy(), turns=1000).__dict__ for _ in tqdm.trange(10000)]).mean()

100%|██████████| 10000/10000 [01:07<00:00, 148.44it/s]


winner     0.5025
turns     94.0544
reward     1.1808
dtype: float64

In [85]:
pd.DataFrame([tournament(Game(), RandomPolicy(), LazyPolicy(), turns=1000).__dict__ for _ in tqdm.trange(10000)]).mean()

100%|██████████| 10000/10000 [00:44<00:00, 225.66it/s]


winner     0.5116
turns     92.5525
reward     1.1385
dtype: float64