In [1]:
import numpy as np

## Preprocessing Input to Convolutional Layer
The input tensor to the convolutional layer has the shape:

`(batch_size,channels,height,width)`

batch_size: The number of samples in a batch (e.g., 1 in your case).
channels: The number of input channels (3, from one-hot encoding).
height and width: The spatial dimensions of the input


In [10]:
BOARD_SIZE = 2

board = np.random.choice([0, 1, 2], size=(BOARD_SIZE, BOARD_SIZE))
board

array([[0, 2],
       [1, 2]])

We then one hot encode the board

In [11]:
one_hot_board = np.eye(3)[board]
print(one_hot_board.shape)
one_hot_board

(2, 2, 3)


array([[[1., 0., 0.],
        [0., 0., 1.]],

       [[0., 1., 0.],
        [0., 0., 1.]]])

In [12]:
one_hot_board = np.transpose(one_hot_board, (2, 0, 1))  # Shape: (3, board_size, board_size)
print(one_hot_board.shape)
one_hot_board

(3, 2, 2)


array([[[1., 0.],
        [0., 0.]],

       [[0., 0.],
        [1., 0.]],

       [[0., 1.],
        [0., 1.]]])

In [13]:
input_tensor = np.expand_dims(one_hot_board, axis=0)  # Shape: (1, 3, board_size, board_size) (1 => batch, 3 => channels)
input_tensor

array([[[[1., 0.],
         [0., 0.]],

        [[0., 0.],
         [1., 0.]],

        [[0., 1.],
         [0., 1.]]]])

## Coming up with Policy Network

### Using Convolution Layer

In [None]:
import torch
import torch.nn.functional as F
from torch import nn

class PolicyNetwork(nn.Module):
    def __init__(self, board_size, action_space):
        super(PolicyNetwork, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=board_size),  # For a 2x2 board, kernel_size can be 2
            nn.ReLU(),
        )
        self.fc_layers = nn.Sequential(
            nn.Linear(32 * 1 * 1, 256),  # Output size after conv layers
            nn.ReLU(),
            nn.Linear(256, action_space)
        )

    def forward(self, x):
        x = self.conv_layers(x)
        print(x)
        x = x.view(x.size(0), -1)  # Flatten
        x = self.fc_layers(x)
        return x

policy_network = PolicyNetwork(board_size=BOARD_SIZE, action_space=BOARD_SIZE * BOARD_SIZE + 1)


```
kernel_size - Size of the filter. Can be a single number (e.g., 3 for a 3x3 kernel) or a tuple (e.g., (3, 5) for a 3x5 kernel).
```

In [7]:
policy_network.forward(torch.tensor(input_tensor, dtype=torch.float32))

tensor([[[[0.0000]],

         [[0.0000]],

         [[0.8041]],

         [[0.0000]],

         [[0.3165]],

         [[0.0000]],

         [[0.2495]],

         [[0.0000]],

         [[0.1351]],

         [[0.0000]],

         [[0.1968]],

         [[0.0000]],

         [[0.0000]],

         [[0.1028]],

         [[0.0000]],

         [[0.0000]],

         [[0.3365]],

         [[0.5596]],

         [[0.0000]],

         [[0.0000]],

         [[0.5377]],

         [[0.0000]],

         [[0.0000]],

         [[0.1831]],

         [[0.0000]],

         [[0.0000]],

         [[0.0000]],

         [[0.3725]],

         [[0.0000]],

         [[0.0000]],

         [[0.1588]],

         [[0.4451]]]], grad_fn=<ReluBackward0>)


tensor([[-0.0792,  0.0025,  0.1203, -0.0674,  0.2408]],
       grad_fn=<AddmmBackward0>)