In [17]:
from Game import Game
import sys 
sys.path.append("Othello")
from othello_game import OthelloGame

In [18]:
game = OthelloGame(n=8)

In [19]:
game.getInitBoard()

array([[ 0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0, -1,  1,  0,  0,  0],
       [ 0,  0,  0,  1, -1,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0]])

In [20]:
board = game.getInitBoard()

In [21]:
type(board)

numpy.ndarray

In [22]:
board

array([[ 0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0, -1,  1,  0,  0,  0],
       [ 0,  0,  0,  1, -1,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0]])

In [23]:
import numpy as np 
player_a = np.maximum(board, 0)

In [24]:
player_b = board.copy()

In [25]:
player_b[player_b > 0] = 0 

In [26]:
player_b=player_b*-1

In [27]:
player_b

array([[0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 1, 0, 0, 0, 0],
       [0, 0, 0, 0, 1, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0]])

In [28]:
board

array([[ 0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0, -1,  1,  0,  0,  0],
       [ 0,  0,  0,  1, -1,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0]])

In [29]:
import torch 

board_tensor = torch.tensor(board, dtype=torch.float32)

In [30]:
board_tensor.shape

torch.Size([8, 8])

We want a stack of (MT+L) NxN planes 

In [31]:
tmp = []
for a in range(5): 
    tmp.append(a)

In [None]:
"""
Author: Aryaman Pandya
File contents: Convolutional neural network that takes in board state 
and outputs expected value 
"""

from torch import nn
import numpy as np


class OthelloNN(nn.Module):
    '''
    Convolutional neural network used in the AlphaZero implementation scaled 
    for the dimensions of the othello game. 
    '''
    def __init__(self) -> None:
        '''
        Initialization of the neural network graph. 
        Contains a common body that includes 4 sequential Conv2D operations 
        followed by batch normalization and ReLU. 
        Contains two separate heads for policy and value estimation as specified 
        in the original DeepMind paper supplemental materials. 
        '''

        super().__init__()
        #we expect an input of dimensionality 8 x 8 x 7 following conventions from the paper:
        #N x N -> 8 x 8. M = 2, T = 3, L =1 (who's playing)

        self.conv1 = nn.Sequential(
            nn.Conv2d(in_channels=7, out_channels=128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
        )
        self.conv3 = nn.Sequential(
            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
        )
        self.conv4 = nn.Sequential(
            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
        )

        self.policy_head = nn.Sequential(nn.Linear(8192, 64), nn.Softmax(dim=1))

        self.value_head_conv = nn.Sequential(
            nn.Conv2d(in_channels=128, out_channels=1, kernel_size=1),
            nn.BatchNorm2d(1),
            nn.ReLU(),
        )

        self.value_head_linear = nn.Sequential(
            nn.Linear(64, 64), nn.ReLU(), nn.Linear(64, 1), nn.Tanh()
        )

    def forward(self, state) -> tuple[np.array, int]:
        '''
        Forward pass for the nn graph

        Args:
            param1: self
            param2: state- game state at time of evaluation 

        Returns:
            pi (torch.tensor): policy pi[a|s]
            val (float32): scalar value estimate from input state 
        '''

        s = self.conv1(state)
        s = self.conv2(s)
        s = self.conv3(s)
        s = self.conv4(s)

        pi = self.policy_head(s)
        s = self.value_head_conv(s)
        val = self.value_head_linear(s)

        return pi, val


In [None]:
def update_history_frames(history: np.ndarray, new_frame: np.ndarray, m: int, history_length: int): 
    """
    Updates the history of game boards with a new frame.

    Shifts existing frames in history and adds the new frame at the end.

    Args:
        history (np.ndarray): Game frame history (NxNx(MT+L))
        new_frame (np.ndarray): 2D array representing new game state.
        m (int): Number of channels per frame.
        history_length (int): Number of frames in history.

    Returns:
        None: Updates 'history' array in place.
    """
    board_player_1, board_player_2 = split_player_boards(new_frame)
    history[m*(history_length-1):, :, :] = history[:, :, m:]
    new_frames = np.stack([board_player_1, board_player_2], axis=0)
    history[m*(history_length-1):m*history_length:, :] = new_frames


def add_player_information(board_tensor: np.ndarray, current_player: int):
    """
    Adds a feature plane indicating the current player.

    Args:
        board_tensor (np.ndarray): The tensor representing the game state.
        current_player (int): The current player (e.g., 0 or 1).

    Returns:
        np.ndarray: Updated board tensor with the player information added.
    """
    # Assuming the last channel is for the current player information
    player_plane = np.full((board_tensor.shape[0], board_tensor.shape[1]), current_player)
    board_tensor[:, :, -1] = player_plane
    return board_tensor