# Introduction to the game API

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
from knight_five.game import EXAMPLE_BOARD, Game

## Run the README.md example

In [11]:
import logging
logger = logging.getLogger("game.game")
logger.setLevel(logging.DEBUG)

In [18]:
game = Game(
    board=EXAMPLE_BOARD.board,
    goal=EXAMPLE_BOARD.goal,
    start=EXAMPLE_BOARD.start
)
game.initialize()

steps = [
    None, # a0
    (-2, 1, 0), # b3
    (0, -1, 2), # a3
    (1, 0, 2), # a2
    (-2, 1, 0), # b4
    None,
    None,
    None,
    None,
    None,
    (2, -1, 0), # a2
    None,
    None,
    None,
    (1, 2, 0), # c1
    None,
    None,
    (-1, -2, 0), # a2
    (1, 2, 0), # c1
    None, None, None, None, None, None, None, None, None, None, None, None, None, None, None,
    (0, 1, 2), # d1
    None,
    (-1, 0, 2)
]

for s in steps:
    _ = game.get_valid_actions(game.king_pos)
    game.step(action=s)

print(game._board)

[[11. 14. 11. 15.]
 [ 8.  6.  9. 10.]
 [-2.  4.  3.  1.]
 [ 6.  6. -2.  0.]]


## Run a game loop with random agent

In [25]:
import random
# random.seed(42)

MAX_TIME = 200

game = Game(board=EXAMPLE_BOARD.board, goal=EXAMPLE_BOARD.goal, start=EXAMPLE_BOARD.start)
game.initialize()
T = 0
ALL_ACTIONS = game.get_all_actions()

t = 0
while game.is_on and T < MAX_TIME:
    possible_actions = game.get_valid_actions(game.king_pos)
    if not possible_actions:
        break
    action_idx = random.choice(possible_actions)

    action = ALL_ACTIONS[action_idx]
    game.step(action=action)
    if action is None:
        T += 1
        t += 1
    else:
        print(f"({t},{game.excel_pos(pos=game.king_pos)})")
        t = 0

print(f"({t},{game.excel_pos(pos=game.king_pos)})")

print("game is over")
print(f"king reaches goal: {not game.is_on}")
print(f"king is blocked: {len(possible_actions) == 0}")
print(f"time is {T}")
print(game.king_pos)
print(game._board)
print(game._freq)

(0,a3)
(0,b3)
(0,b2)
(0,b3)
(0,b2)
(0,b3)
(2,c1)
(3,b3)
(0,c1)
(1,b3)
(2,c1)
(0,b3)
(1,c1)
(2,d1)
(0,d1)
game is over
king reaches goal: False
king is blocked: True
time is 11
(3, 3)
[[11. 12. 11. 14.]
 [ 8.  2.  9.  9.]
 [10.  4.  5.  1.]
 [ 7.  2.  2.  0.]]
[[0. 0. 0. 0.]
 [0. 3. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 3. 0.]]


## Reinforcement learning