In [1]:
import torch


class SmallBlock(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super(SmallBlock, self).__init__()
        self.model = torch.nn.Sequential(
            torch.nn.Conv2d(in_channels, out_channels, 3, stride=1, padding=1),
            torch.nn.BatchNorm2d(out_channels),
        )

    def forward(self, X):
        return self.model(X)


class ResnetBlock(torch.nn.Module):
    def __init__(self, in_channels, mid_channels):
        super(ResnetBlock, self).__init__()
        self.model = torch.nn.Sequential(
            SmallBlock(in_channels, mid_channels),
            torch.nn.ReLU(),
            SmallBlock(mid_channels, in_channels),
        )

    def forward(self, X):
        Y = self.model(X)
        Y = Y + X
        Y = torch.nn.ReLU()(Y)
        return Y


class DropoutBlock(torch.nn.Module):
    def __init__(self, in_units, out_units):
        super(DropoutBlock, self).__init__()
        self.model = torch.nn.Sequential(
            torch.nn.Linear(in_units, out_units),
            torch.nn.BatchNorm1d(out_units),
            torch.nn.ReLU(),
            torch.nn.Dropout(p=args.dropout),
        )

    def forward(self, X):
        return self.model(X)


class CRNet(torch.nn.Module):
    def __init__(self, H=[200, 100], num_channels=32):

        # input shape: batch_size x 7 x args.M x args.N

        super(CRNet, self).__init__()
        self.epoch = None

        self.initial_block = torch.nn.Sequential(
            torch.nn.Conv2d(7, num_channels, 3, stride=1, padding=1),
            torch.nn.BatchNorm2d(num_channels),
            torch.nn.ReLU(),
        )
        self.middle_blocks = torch.nn.Sequential(
            *[ResnetBlock(num_channels, num_channels) for _ in range(5)]
        )
        self.dropout_blocks = torch.nn.Sequential(
            DropoutBlock(num_channels * args.M * args.N, H[0]), DropoutBlock(H[0], H[1])
        )

        self.model = torch.nn.Sequential(
            self.initial_block,
            self.middle_blocks,
            torch.nn.Flatten(start_dim=1),
            self.dropout_blocks,
        )

        self.value_head = torch.nn.Sequential(
            torch.nn.Linear(H[1], H[1]),
            torch.nn.ReLU(),
            torch.nn.Linear(H[1], 1),
            torch.nn.Tanh(),
        )

        self.my_policy_head = torch.nn.Sequential(
            torch.nn.Linear(H[1], H[1]),
            torch.nn.ReLU(),
            torch.nn.Linear(H[1], args.M * args.N),
            torch.nn.Softmax(dim=-1),
        )

    def forward(self, X):
        if X.dim() == 3:
            X = X.unsqueeze(0)
        Y = self.model(X)
        v = self.value_head(Y)
        my_p = self.my_policy_head(Y).reshape(-1, args.M, args.N)
        return {"value": v, "policy": my_p}

In [2]:
board = [
    [1, 1, 1, 0, 0, 0, 0],
    [0, 0, 0, 0, 0, 0, 0],
    [0, 0, 0, 0, 0, 0, 2],
    [0, 0, 0, 0, 0, 0, 2],
    [2, 0, 0, 0, 0, 0, 2],
    [0, 0, 0, 0, 0, 0, 0],
    [1, 0, 0, 0, 0, 0, 0],
]


def player_one(board):
    for row in board:
        for i in range(len(row)):
            if row[i] == 2:
                row[i] = 0
    return board


def player_two(board):
    for row in board:
        for i in range(len(row)):
            if row[i] == 1:
                row[i] = 0
            if row[i] == 2:
                row[i] = 1
    return board


player_two_board = player_two(board)

player_two_board

[[0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 1],
 [0, 0, 0, 0, 0, 0, 1],
 [1, 0, 0, 0, 0, 0, 1],
 [0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0]]