# Encoding the Game

In [1]:
import json
import numpy as np

In [26]:
GAME_PATH = "mygame_0.json"
BOARD_SIZE = 6
DISCOUNT = 1.0

with open(GAME_PATH, "r") as game_file:
    game = json.load(game_file)

game["meta"]

{'name': 'mygame_0.json',
 'player0': {'name': 'B', 'seed': 42, 'starting config': [2, 1, 3, 3, 1, 2]},
 'player1': {'name': 'W', 'seed': 346, 'starting config': [3, 3, 1, 1, 2, 2]},
 'winning player': 0}

In [27]:
len(game)

7

In [28]:
# 0 - 7
# 1 - 1
(8 + 1) % 8

1

In [38]:
for timestep in range(6):
    game_step = game[str(timestep)]
    moves_ahead = len(game) - 1 - timestep
    if timestep == 0:
        initial_board_state = np.zeros((8,6))
        for player in [0, 1]:
            starting_config = game["meta"][f"player{player}"]["starting config"]
            row = (BOARD_SIZE) * (1 - player) + player
            initial_board_state[row, :] = np.array(starting_config) / 24.
    else:
        initial_board_state = np.array(game[str(timestep - 1)][-1])

    final_board_state = game_step[-1]
    active_player = (timestep + 1) % 2
    print(f'Player {active_player}')
    players_channels = np.array(
        [np.zeros((8,6)), np.zeros((8,6))]
    )
    players_channels[active_player] = np.ones((8, 6))
    # print(f'Player Channels:\n{players_channels}')

    home_row = np.zeros((8, 6))
    home_row[-active_player, :] = 1.
    # print(f'Home Row:\n{home_row}')

    reverse_index = active_player * 2 - 1
    valid_starting_row = np.argmax(np.any(initial_board_state[::reverse_index], axis=1))
    valid_starting_pos = np.zeros((8, 6))
    valid_starting_pos[valid_starting_row, :] = 1.
    valid_starting_pos = valid_starting_pos[::reverse_index]
    valid_pieces = valid_starting_pos * initial_board_state
    # print(f'Valid starting pieces:\n{valid_pieces}')

    reward = 1.0 if active_player == game["meta"]["winning player"] else -5
    # print(reward)
    reward *= DISCOUNT / moves_ahead
    reward = np.full((8, 6), reward)
    print(f'Reward at step {timestep}:\n{reward}')

Player 1
Reward at step 0:
[[-0.83333333 -0.83333333 -0.83333333 -0.83333333 -0.83333333 -0.83333333]
 [-0.83333333 -0.83333333 -0.83333333 -0.83333333 -0.83333333 -0.83333333]
 [-0.83333333 -0.83333333 -0.83333333 -0.83333333 -0.83333333 -0.83333333]
 [-0.83333333 -0.83333333 -0.83333333 -0.83333333 -0.83333333 -0.83333333]
 [-0.83333333 -0.83333333 -0.83333333 -0.83333333 -0.83333333 -0.83333333]
 [-0.83333333 -0.83333333 -0.83333333 -0.83333333 -0.83333333 -0.83333333]
 [-0.83333333 -0.83333333 -0.83333333 -0.83333333 -0.83333333 -0.83333333]
 [-0.83333333 -0.83333333 -0.83333333 -0.83333333 -0.83333333 -0.83333333]]
Player 0
Reward at step 1:
[[0.2 0.2 0.2 0.2 0.2 0.2]
 [0.2 0.2 0.2 0.2 0.2 0.2]
 [0.2 0.2 0.2 0.2 0.2 0.2]
 [0.2 0.2 0.2 0.2 0.2 0.2]
 [0.2 0.2 0.2 0.2 0.2 0.2]
 [0.2 0.2 0.2 0.2 0.2 0.2]
 [0.2 0.2 0.2 0.2 0.2 0.2]
 [0.2 0.2 0.2 0.2 0.2 0.2]]
Player 1
Reward at step 2:
[[-1.25 -1.25 -1.25 -1.25 -1.25 -1.25]
 [-1.25 -1.25 -1.25 -1.25 -1.25 -1.25]
 [-1.25 -1.25 -1.25 -1.

## Create the Encoder Class

In [42]:
a = "1"
a + "2"

'12'

In [69]:
class GameEncoder:
    def __init__(self, game_path, discount=1.0):
        with open(game_path, "r") as game_file:
            self.game = json.load(game_file)

        self.game_len = len(self.game) - 1
        print(f"Loaded game '{game_path}' with a {self.game_len} rounds game.")
        if isinstance(discount, float):
            self.discount = np.array(discount for _ in range(self.game_len))
        elif isinstance(discount, (tuple, np.ndarray)):
            self.discount = discount
        
        self.player_channels = np.empty((self.game_len, 2, 8, 6))
        self.home_row = np.empty((self.game_len, 8, 6))
        self.initial_board_state = np.empty((self.game_len, 8, 6))
        self.final_board_state = np.empty((self.game_len, 8, 6))
        self.valid_pieces = np.empty((self.game_len, 8, 6))
        self.rewards = np.empty((self.game_len, 8, 6))

        for timestep in range(self.game_len):
            game_step = game[str(timestep)]
            moves_ahead = self.game_len - timestep
            if timestep == 0:
                initial_board_state = np.zeros((8,6))
                for player in [0, 1]:
                    starting_config = game["meta"][f"player{player}"]["starting config"]
                    row = (BOARD_SIZE) * (1 - player) + player
                    initial_board_state[row, :] = np.array(starting_config) / 24.
            else:
                initial_board_state = np.array(game[str(timestep - 1)][-1])

            final_board_state = game_step[-1]
            self.initial_board_state[timestep] = initial_board_state
            self.final_board_state[timestep] = final_board_state

            active_player = (timestep + 1) % 2
            players_channels = np.array(
                [np.zeros((8,6)), np.zeros((8,6))]
            )
            players_channels[active_player] = np.ones((8, 6))
            self.player_channels[timestep] = players_channels

            home_row = np.zeros((8, 6))
            home_row[-active_player, :] = 1.
            self.home_row[timestep] = home_row

            reverse_index = active_player * 2 - 1
            valid_starting_row = np.argmax(np.any(initial_board_state[::reverse_index], axis=1))
            valid_starting_pos = np.zeros((8, 6))
            valid_starting_pos[valid_starting_row, :] = 1.
            valid_starting_pos = valid_starting_pos[::reverse_index]
            self.valid_pieces[timestep] = valid_starting_pos * initial_board_state

            reward = 1.0 if active_player == game["meta"]["winning player"] else -5
            reward *= DISCOUNT / moves_ahead
            self.rewards[timestep] = np.full((8, 6), reward)


    def __repr__(self):
        string = ""
        for key, value in self.game["meta"].items():
            string += f"{key}:\n  {value}\n"
        return string
    
data = GameEncoder(GAME_PATH)
data.rewards[::2, 0, 0]

Loaded game 'mygame_0.json' with a 6 rounds game.


array([-0.83333333, -1.25      , -2.5       ])