In [32]:
!pip install colorama



In [33]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data_utils
import numpy as np
from sklearn.model_selection import train_test_split
from colorama import Fore, Style
from copy import deepcopy
import random
import sys
import torch.nn.functional as F


In [34]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
def read_dataset(file_path):
    with open(file_path, 'r') as file:
        lines = file.readlines()
    return lines

def parse_dataset(lines):
    X = []
    y = []

    for line in lines:
        if line.startswith('S'):
            game_data = line.strip().split('S')[1]
            moves = game_data[:-1]
            winner = game_data[-1]

            if winner == 'A':
                y.append(0)
            elif winner == 'B':
                y.append(1)
            else:
                y.append(2)

            X.append(moves)

    return X, y

file_path = '/content/drive/MyDrive/Colab/UWR/UWR_NeuralNetworks/Class4/games3.txt'
lines = read_dataset(file_path)
X_moves, y = parse_dataset(lines)

In [5]:
def convert_to_board(seq):
    board = np.full((6,7), -1) # an empty Connect Four board all elements set to -1.
    curr_empty = [5,5,5,5,5,5,5] # the index of the next empty row for each column on the board
    for i in range(len(seq)):
        if i%2 == 0: # detect which player's turn
            player = 0
        else:
            player = 1
        move = int(seq[i])
        board[curr_empty[move]][move] = player # update board with which player's turn
        curr_empty[move] -= 1 # update curr_empty where player's move replaced to the board
    return board

board = []
for move in X_moves:
    board.append(convert_to_board(move))

In [35]:
X_train, X_test, y_train, y_test = train_test_split(board, y, test_size=0.2)
X_train = torch.FloatTensor(X_train)
X_test = torch.FloatTensor(X_test)
y_train = torch.LongTensor(y_train)
y_test = torch.LongTensor(y_test)

class CNN(nn.Module):
    def __init__(self, input_channels=6):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv1d(input_channels, 32, kernel_size=3, padding=0)
        self.conv2 = nn.Conv1d(32, 64, kernel_size=3, padding=0)
        self.fc1 = nn.Linear(64, 128)
        self.fc2 = nn.Linear(128, 3)

    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = torch.relu(self.conv2(x))
        x = torch.max_pool1d(x, kernel_size=2)
        x = x.view(x.size(0), -1)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

model = CNN()

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())

num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train)
    loss = criterion(outputs, y_train)
    loss.backward()
    optimizer.step()

model.eval()

with torch.no_grad():
    outputs = model(X_test)
    _, predicted = torch.max(outputs, 1)

correct = (predicted == y_test).sum().item()
total = y_test.size(0)
accuracy = correct / total

print(f'Accuracy: {accuracy:.4f}')


Accuracy: 0.7668


In [30]:
DX = 7
DY = 6
STRENGTH = 10
LEVEL = 3
GAMMA = 0.999


coins = [Fore.BLUE + '⬤', Fore.RED + '⬤']

directions = [ (1,0), (0,1), (1,-1), (1,1) ]

EMPTY = 0

class AgentMC:
    def __init__(self, n_of_rollouts):
        self.n_of_rollouts = n_of_rollouts
        self.name = f'MC({self.n_of_rollouts})'

    def best_move(self, b):
        ms = b.moves()
        return b.best_move_rollouts(ms, self.n_of_rollouts)

class AgentRandom:
    def __init__(self):
        self.name = 'RND'

    def best_move(self, b):
        return b.random_move()

class AgentCNN:
    def __init__(self, model, n_of_rollouts):
        self.model = model
        self.n_of_rollouts = n_of_rollouts
        self.name = f'CNN({self.n_of_rollouts})'

    def best_move(self, b):
        ms = b.moves()
        return b.best_move_rollouts(ms, self.n_of_rollouts)

    def evaluate_board(self, b):
        board_tensor = torch.FloatTensor(b.board)
        board_tensor = board_tensor.unsqueeze(0)
        outputs = self.model(board_tensor)
        outputs = outputs.detach().numpy().flatten()
        return outputs

class AgentNN:
    def __init__(self, model):
        self.model = model
        self.name = 'NN'

    def best_move(self, b):
        valid_moves = b.moves()
        best_move = valid_moves[0]
        highest_score = float('-inf')

        for move in valid_moves:
            b.apply_move(move)
            board_state = torch.FloatTensor(b.board).view(1, -1)

            # Use the model to predict the score for the new board state
            output = self.model(board_state)
            _, predicted = torch.max(output.data, 1)
            score = predicted.item()

            if score > highest_score:
                best_move = move
                highest_score = score

            b.undo_move(move)

        return best_move

class AgentCNN_Policy:
    def __init__(self, model):
        self.model = model

    def best_move(self, board):
        board_tensor = torch.FloatTensor(board.board).unsqueeze(0)
        with torch.no_grad():
            policy_logits, value_estimate = self.model(board_tensor)
            move_probabilities = F.softmax(policy_logits, dim=1).numpy()
        best_move = np.argmax(move_probabilities)
        if isinstance(best_move, np.ndarray):
            best_move = int(best_move[0])
        else:
            best_move = int(best_move)
        return best_move, value_estimate.item()


class AgentMinMaxMC:
    def __init__(self, level, n_of_rollouts):
        self.level = level
        self.n_of_rollouts = n_of_rollouts
        self.name = f'MM_MC({self.level}, {self.n_of_rollouts})'

    def best_move(self, b):
        return b.best_move(self.level, self.n_of_rollouts)


class Board:
    def __init__(self):
        self.board = [DX * [0] for y in range(DY)]
        self.hs = DX * [0]
        self.who = +1
        self.last_moves = []
        self.move_number = 0
        self.result = '?'

    def moves(self):
        return [n for n in range(DX) if self.hs[n] < DY]

    def apply_move(self, m):
        h = self.hs[m]
        self.board[h][m] = self.who
        self.hs[m] += 1
        self.who = -self.who
        self.last_moves.append(m)
        self.move_number += 1

    def undo_move(self, m):
        h = self.hs[m]
        self.board[h-1][m] = EMPTY

        self.hs[m] -= 1
        self.who = -self.who
        self.last_moves.pop()
        self.move_number -=1


    def print(self):
        for raw in self.board[::-1]:
            for x in range(DX):
                if raw[x] == EMPTY:
                    print ('  ', end='')
                else:
                    r = (raw[x] + 1) // 2
                    print (coins[r] + ' ', end='')
            print ()
        print (Fore.LIGHTYELLOW_EX + 2 * DX*'‒')
        for i in range(DX):
            if self.last_moves and i == self.last_moves[-1]:
                style = Style.BRIGHT
            else:
                style = Style.NORMAL
            print (style + str(i+1), end=' ')

        print ()
        print ()

    def random_move(self):
        ms = self.moves()
        for m in ms:
            if self.is_winning(m):
                return m
        return random.choice(ms)

    def rollout(self, m):
        while True:
            if self.is_winning(m):
                return self.who
            self.apply_move(m)
            ms = self.moves()
            if ms == []:
                return 99
            m = self.random_move()


    def move_value(self, m, n_of_rollouts):
        value = 0
        who_is_playing = self.who
        for i in range(n_of_rollouts):
            state = (self.who, self.last_moves[:], self.hs[:], deepcopy(self.board))

            r = self.rollout(m)
            if r == who_is_playing:
                value += 1
            if r == -who_is_playing:
                value -= 1

            self.who, self.last_moves, self.hs, self.board = state

        return value

    def best_move_rollouts(self, ms,  n_of_rollouts):
        #return random.choice(ms)
        return max(ms, key=lambda x:self.move_value(x,  n_of_rollouts))


    def best_moves(self, level):
        #minimax
        ms = self.moves()

        vms = []
        for m in ms:
            if self.is_winning(m):
                return [m]
            self.apply_move(m)
            vms.append( (self.mini_max(level), m))
            self.undo_move(m)

        if self.who == 1:
            min_max = max
        else:
            min_max = min

        v_max,m = min_max(vms)

        good_moves = [m for (v,m) in vms if v == v_max]
        return good_moves

    def best_move(self, level, n_of_rollouts):
        ms = self.best_moves(level)
        return self.best_move_rollouts(ms, n_of_rollouts)

    def mini_max(self, level):
        if level == 0:
            return 0
        ms = self.moves()
        if not ms:
            return 0

        vals = []
        for m in ms:
            if self.is_winning(m):
               return self.who * (GAMMA ** self.move_number)
            self.apply_move(m)

            vals.append(self.mini_max(level-1))
            self.undo_move(m)
        if self.who == +1:
            return max(vals)
        return min(vals)

    def last_move_was_winning(self):
        return self.was_winning(self.last_moves[-1])

    def end(self):
        if not self.last_moves:
            return False
        if self.last_move_was_winning():
            if len(self.last_moves) % 2 == 0:
                self.result = -1
            else:
                self.result = +1
            return True
        if len(self.last_moves) == DX*DY:
                self.result = 0
                return True
        return False

    def vertical_winning(self):
        return self.was_vertical_winning(self.last_moves[-1])

    def was_winning(self, m):
        for dx, dy in directions:
            x,y = m, self.hs[m]-1  # after applying move
            score = 0

            while self.board[y][x] == -self.who:
                score += 1
                x += dx
                y += dy
                if not (0<=x<DX and 0<=y<DY):
                    break

            x,y = m, self.hs[m]-1
            dx = -dx
            dy = -dy

            while self.board[y][x] == -self.who:
                score += 1
                x += dx
                y += dy
                if not (0<=x<DX and 0<=y<DY):
                    break
            score -= 1

            if score >= 4:
                return True

        return False

    def was_vertical_winning(self, m):
        for dx, dy in [(0,1)]:
            x,y = m, self.hs[m]-1  # after applying move
            score = 0

            while self.board[y][x] == -self.who:
                score += 1
                x += dx
                y += dy
                if not (0<=x<DX and 0<=y<DY):
                    break

            x,y = m, self.hs[m]-1
            dx = -dx
            dy = -dy

            while self.board[y][x] == -self.who:
                score += 1
                x += dx
                y += dy
                if not (0<=x<DX and 0<=y<DY):
                    break
            score -= 1

            if score >= 4:
                return True

        return False



    def is_winning(self, m):
        for dx, dy in directions:
            x,y = m, self.hs[m]
            score = 0

            while True:
                x += dx
                y += dy
                if not (0<=x<DX and 0<=y<DY):
                    break

                if self.board[y][x] == self.who:
                    score += 1
                else:
                    break


            x,y = m, self.hs[m]
            dx = -dx
            dy = -dy

            while True:
                x += dx
                y += dy
                if not (0<=x<DX and 0<=y<DY):
                    break

                if self.board[y][x] == self.who:
                    score += 1
                else:
                    break

            score += 1

            if score >= 4:
                return True

        return False

In [None]:
def game(agent_a, agent_b):
    b = Board()
    agents = [agent_a, agent_b]
    moves = []

    who = 0

    while not b.end():
        m = agents[who].best_move(b)
        b.apply_move(m)

        who = 1-who

    b.print()
    print (b.result)
    print ()

    return b.result

def duel(agent_a, agent_b, N):
    score = {1:0, -1:0, 0:0}

    for i in range(N):
        r1 = game(agent_a, agent_b)
        score[r1] += 1
        r2 = game(agent_b, agent_a)
        score[-r2] += 1

    s = sum(score.values())

    for k in score:
        score[k] /= s
    print (f'{agent_a.name}: {score[+1]}, {agent_b.name}: {score[-1]}, Draw: {score[0]}')

In [36]:
if __name__ == '__main__':
    A = AgentRandom()
    B = AgentCNN(model, 10)

    duel(A, B, 10)

[34m⬤ [31m⬤ [31m⬤         
[34m⬤ [31m⬤ [31m⬤         
[34m⬤ [34m⬤ [34m⬤         
[34m⬤ [31m⬤ [34m⬤   [34m⬤ [31m⬤   
[31m⬤ [34m⬤ [34m⬤   [31m⬤ [34m⬤   
[34m⬤ [31m⬤ [31m⬤ [31m⬤ [34m⬤ [31m⬤ [31m⬤ 
[93m‒‒‒‒‒‒‒‒‒‒‒‒‒‒
[1m1 [22m2 [22m3 [22m4 [22m5 [22m6 [22m7 

-1

              
              
              
[34m⬤             
[31m⬤       [34m⬤     
[31m⬤ [31m⬤ [31m⬤ [31m⬤ [34m⬤   [34m⬤ 
[93m‒‒‒‒‒‒‒‒‒‒‒‒‒‒
[22m1 [1m2 [22m3 [22m4 [22m5 [22m6 [22m7 

1

              
              
      [34m⬤       
      [34m⬤   [31m⬤   
      [34m⬤   [31m⬤   
[31m⬤     [34m⬤   [31m⬤   
[93m‒‒‒‒‒‒‒‒‒‒‒‒‒‒
[22m1 [22m2 [22m3 [1m4 [22m5 [22m6 [22m7 

-1

              
              
              
              
    [34m⬤         
[31m⬤ [31m⬤ [31m⬤ [31m⬤ [34m⬤   [34m⬤ 
[93m‒‒‒‒‒‒‒‒‒‒‒‒‒‒
[1m1 [22m2 [22m3 [22m4 [22m5 [22m6 [22m7 

1

              
[34m⬤             
[34m⬤ [31m⬤   [31m⬤       
[34m⬤ [34m⬤   [34m⬤       


In [None]:
if __name__ == '__main__':
    A = AgentMinMaxMC(3, 50)
    B = AgentCNN(model, 10)

    duel(A, B, 10)

[34m⬤ [34m⬤   [34m⬤ [31m⬤     
[34m⬤ [31m⬤   [31m⬤ [31m⬤     
[31m⬤ [31m⬤   [31m⬤ [34m⬤ [31m⬤   
[34m⬤ [31m⬤   [34m⬤ [31m⬤ [34m⬤   
[34m⬤ [34m⬤ [31m⬤ [31m⬤ [34m⬤ [34m⬤   
[31m⬤ [34m⬤ [34m⬤ [31m⬤ [34m⬤ [31m⬤ [31m⬤ 
[93m‒‒‒‒‒‒‒‒‒‒‒‒‒‒
[22m1 [22m2 [1m3 [22m4 [22m5 [22m6 [22m7 

1

              
              
[31m⬤   [31m⬤         
[34m⬤ [34m⬤ [34m⬤ [34m⬤       
[31m⬤ [34m⬤ [34m⬤ [34m⬤ [31m⬤     
[31m⬤ [31m⬤ [34m⬤ [31m⬤ [34m⬤ [31m⬤ [31m⬤ 
[93m‒‒‒‒‒‒‒‒‒‒‒‒‒‒
[22m1 [1m2 [22m3 [22m4 [22m5 [22m6 [22m7 

-1

[31m⬤   [34m⬤ [31m⬤       
[34m⬤ [31m⬤ [34m⬤ [34m⬤   [31m⬤   
[31m⬤ [34m⬤ [31m⬤ [34m⬤   [34m⬤ [31m⬤ 
[31m⬤ [34m⬤ [31m⬤ [31m⬤   [31m⬤ [34m⬤ 
[31m⬤ [31m⬤ [31m⬤ [34m⬤   [34m⬤ [31m⬤ 
[34m⬤ [31m⬤ [34m⬤ [31m⬤ [34m⬤ [34m⬤ [34m⬤ 
[93m‒‒‒‒‒‒‒‒‒‒‒‒‒‒
[22m1 [1m2 [22m3 [22m4 [22m5 [22m6 [22m7 

1

              
      [31m⬤       
      [34m⬤       
  [31m⬤ [34m⬤ [34m⬤ [34m⬤ [34m⬤   
  [3

In [None]:
if __name__ == '__main__':
    A = AgentMC(10)
    B = AgentCNN(model, 10)

    duel(A, B, 10)

[31m⬤ [34m⬤   [34m⬤ [34m⬤ [31m⬤ [31m⬤ 
[34m⬤ [34m⬤   [31m⬤ [31m⬤ [34m⬤ [31m⬤ 
[31m⬤ [31m⬤   [34m⬤ [34m⬤ [31m⬤ [34m⬤ 
[34m⬤ [31m⬤   [34m⬤ [31m⬤ [34m⬤ [31m⬤ 
[34m⬤ [31m⬤ [34m⬤ [34m⬤ [31m⬤ [34m⬤ [31m⬤ 
[34m⬤ [34m⬤ [31m⬤ [31m⬤ [31m⬤ [34m⬤ [31m⬤ 
[93m‒‒‒‒‒‒‒‒‒‒‒‒‒‒
[22m1 [22m2 [1m3 [22m4 [22m5 [22m6 [22m7 

-1

[34m⬤ [34m⬤ [31m⬤ [31m⬤ [34m⬤ [31m⬤ [34m⬤ 
[31m⬤ [31m⬤ [31m⬤ [34m⬤ [31m⬤ [31m⬤ [31m⬤ 
[34m⬤ [34m⬤ [34m⬤ [31m⬤ [34m⬤ [31m⬤ [34m⬤ 
[34m⬤ [31m⬤ [34m⬤ [31m⬤ [31m⬤ [34m⬤ [34m⬤ 
[34m⬤ [31m⬤ [34m⬤ [34m⬤ [31m⬤ [34m⬤ [31m⬤ 
[31m⬤ [34m⬤ [31m⬤ [31m⬤ [31m⬤ [34m⬤ [34m⬤ 
[93m‒‒‒‒‒‒‒‒‒‒‒‒‒‒
[22m1 [22m2 [22m3 [22m4 [22m5 [22m6 [1m7 

0

[34m⬤ [34m⬤   [31m⬤ [34m⬤ [34m⬤ [34m⬤ 
[31m⬤ [34m⬤   [34m⬤ [34m⬤ [31m⬤ [31m⬤ 
[34m⬤ [31m⬤   [31m⬤ [31m⬤ [31m⬤ [34m⬤ 
[31m⬤ [31m⬤   [31m⬤ [31m⬤ [34m⬤ [31m⬤ 
[34m⬤ [31m⬤ [34m⬤ [34m⬤ [34m⬤ [34m⬤ [31m⬤ 
[31m⬤ [34m⬤ [31m⬤ [31m⬤ [31m⬤ [34m⬤ 

In [None]:
def extract_features(board):
    features = []

    # Number of horizontal triplets for each player
    for player in range(2):
        horizontal_triplets = 0
        for row in range(6):
            for col in range(4):
                if board[row][col] == player and board[row][col+1] == player and board[row][col+2] == player:
                    horizontal_triplets += 1
        features.append(horizontal_triplets)

    # Example feature: Is it currently player A's turn? (Binary feature)
    is_player_a_turn = 1 if sum(board[0]) % 2 == 0 else 0
    features.append(is_player_a_turn)

    return features

In [None]:
X_features = [extract_features(b) for b in board]

class SimpleNN(nn.Module):
    def __init__(self, input_size):
        super(SimpleNN, self).__init__()
        self.input_size = input_size
        self.fc1 = nn.Linear(input_size, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 3)

    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

X_train_features, X_test_features, y_train, y_test = train_test_split(board, y, test_size=0.2)
X_train_features = torch.FloatTensor(X_train_features)
X_test_features = torch.FloatTensor(X_test_features)
y_train = torch.LongTensor(y_train)
y_test = torch.LongTensor(y_test)

NN_model = SimpleNN(7*6)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(NN_model.parameters())

num_epochs = 100
for epoch in range(num_epochs):
    NN_model.train()
    optimizer.zero_grad()
    outputs = NN_model(X_train_features)
    loss = criterion(outputs, y_train)
    loss.backward()
    optimizer.step()

NN_model.eval()

with torch.no_grad():
    outputs = NN_model(X_test_features)
    _, predicted = torch.max(outputs, 1)

correct = (predicted == y_test).sum().item()
total = y_test.size(0)
accuracy = correct / total

print(f'Accuracy: {accuracy:.4f}')

Accuracy: 0.7925


In [None]:
if __name__ == '__main__':
    A = AgentRandom()
    B = AgentNN(NN_model)

    duel(A, B, 10)

              
              
[34m⬤             
[34m⬤   [31m⬤       [31m⬤ 
[34m⬤   [34m⬤       [31m⬤ 
[34m⬤ [31m⬤ [31m⬤     [31m⬤ [34m⬤ 
[93m‒‒‒‒‒‒‒‒‒‒‒‒‒‒
[1m1 [22m2 [22m3 [22m4 [22m5 [22m6 [22m7 

-1

[31m⬤             
[31m⬤             
[34m⬤             
[31m⬤             
[31m⬤ [31m⬤           
[31m⬤ [34m⬤ [34m⬤ [34m⬤ [34m⬤   [34m⬤ 
[93m‒‒‒‒‒‒‒‒‒‒‒‒‒‒
[22m1 [22m2 [22m3 [22m4 [1m5 [22m6 [22m7 

-1

              
              
              
              
[34m⬤ [31m⬤           
[34m⬤ [34m⬤ [31m⬤ [31m⬤ [31m⬤ [31m⬤ [34m⬤ 
[93m‒‒‒‒‒‒‒‒‒‒‒‒‒‒
[22m1 [22m2 [1m3 [22m4 [22m5 [22m6 [22m7 

1

              
              
[31m⬤             
[31m⬤             
[31m⬤             
[31m⬤     [34m⬤ [34m⬤ [34m⬤   
[93m‒‒‒‒‒‒‒‒‒‒‒‒‒‒
[1m1 [22m2 [22m3 [22m4 [22m5 [22m6 [22m7 

1

              
              
              
              
[34m⬤             
[34m⬤   [31m⬤ [31m⬤ [31m⬤ [31m⬤ [34m⬤ 
[93m‒‒‒‒‒‒‒‒‒‒‒‒‒‒


In [None]:
if __name__ == '__main__':
    A = AgentMinMaxMC(3, 10)
    B = AgentNN(NN_model)

    duel(A, B, 10)

              
              
[31m⬤             
[34m⬤ [34m⬤           
[34m⬤ [34m⬤       [31m⬤   
[34m⬤ [31m⬤ [31m⬤ [31m⬤ [31m⬤ [34m⬤ [31m⬤ 
[93m‒‒‒‒‒‒‒‒‒‒‒‒‒‒
[22m1 [22m2 [22m3 [22m4 [1m5 [22m6 [22m7 

1

[31m⬤             
[31m⬤             
[34m⬤             
[31m⬤             
[31m⬤             
[31m⬤ [34m⬤ [34m⬤ [34m⬤ [34m⬤     
[93m‒‒‒‒‒‒‒‒‒‒‒‒‒‒
[22m1 [1m2 [22m3 [22m4 [22m5 [22m6 [22m7 

-1

              
              
              
              
              
[34m⬤ [34m⬤ [31m⬤ [31m⬤ [31m⬤ [31m⬤ [34m⬤ 
[93m‒‒‒‒‒‒‒‒‒‒‒‒‒‒
[22m1 [22m2 [1m3 [22m4 [22m5 [22m6 [22m7 

1

[31m⬤ [31m⬤           
[34m⬤ [31m⬤           
[31m⬤ [34m⬤           
[31m⬤ [31m⬤           
[34m⬤ [34m⬤ [34m⬤ [34m⬤       
[31m⬤ [31m⬤ [34m⬤ [34m⬤       
[93m‒‒‒‒‒‒‒‒‒‒‒‒‒‒
[22m1 [22m2 [1m3 [22m4 [22m5 [22m6 [22m7 

-1

              
              
[31m⬤     [31m⬤ [34m⬤     
[34m⬤     [31m⬤ [31m⬤     
[34m⬤ [34m⬤   [31m⬤ [3

In [None]:
if __name__ == '__main__':
    A = AgentMC(10)
    B = AgentNN(NN_model)

    duel(A, B, 10)

[31m⬤ [34m⬤ [31m⬤         
[34m⬤ [31m⬤ [31m⬤         
[31m⬤ [34m⬤ [31m⬤ [34m⬤       
[31m⬤ [34m⬤ [31m⬤ [34m⬤       
[34m⬤ [34m⬤ [34m⬤ [31m⬤       
[34m⬤ [31m⬤ [31m⬤ [31m⬤   [34m⬤   
[93m‒‒‒‒‒‒‒‒‒‒‒‒‒‒
[22m1 [22m2 [1m3 [22m4 [22m5 [22m6 [22m7 

1

[31m⬤ [31m⬤           
[31m⬤ [34m⬤           
[34m⬤ [31m⬤           
[31m⬤ [34m⬤           
[31m⬤ [34m⬤ [34m⬤         
[31m⬤ [31m⬤ [34m⬤ [34m⬤ [34m⬤     
[93m‒‒‒‒‒‒‒‒‒‒‒‒‒‒
[22m1 [22m2 [1m3 [22m4 [22m5 [22m6 [22m7 

-1

[31m⬤ [31m⬤ [31m⬤         
[34m⬤ [31m⬤ [34m⬤         
[31m⬤ [34m⬤ [31m⬤         
[34m⬤ [31m⬤ [34m⬤ [34m⬤       
[34m⬤ [34m⬤ [34m⬤ [31m⬤       
[34m⬤ [34m⬤ [31m⬤ [31m⬤ [31m⬤ [31m⬤   
[93m‒‒‒‒‒‒‒‒‒‒‒‒‒‒
[22m1 [22m2 [22m3 [22m4 [22m5 [1m6 [22m7 

1

[34m⬤ [31m⬤           
[31m⬤ [34m⬤           
[31m⬤ [31m⬤           
[31m⬤ [34m⬤           
[34m⬤ [31m⬤           
[31m⬤ [31m⬤ [34m⬤ [34m⬤ [34m⬤ [34m⬤   
[93m‒‒‒‒‒‒‒‒‒‒‒‒‒‒
[22m1 [2

# Problem 3

In [23]:
import copy

def generate_training_data(lines):
    X = []
    y_moves = []
    y_values = []
    outcomes = {'A': 1.0, 'B': -1.0}

    for line in lines:
        if line.startswith('S'):
            game_data = line.strip().split('S')[1]
            moves = game_data[:-1]
            winner = game_data[-1]
            current_value = outcomes.get(winner, 0)

            for i in range(len(moves)):
                seq = moves[:i+1]
                board = convert_to_board(seq)

                X.append(board)
                y_moves.append(int(moves[i]))
                y_values.append(current_value * (0.9 ** (len(moves) - i - 1)))

    return X, y_moves, y_values


X, y_train_moves, y_train_values = generate_training_data(lines)


In [24]:
X = torch.FloatTensor(X)
y_train_moves = torch.LongTensor(y_train_moves)
y_train_values = torch.FloatTensor(y_train_values)

class CNN(nn.Module):
    def __init__(self, input_channels=6):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv1d(input_channels, 32, kernel_size=3, padding=0)
        self.conv2 = nn.Conv1d(32, 64, kernel_size=3, padding=0)

        # Policy head: outputs a probability distribution over all possible moves
        self.policy = nn.Sequential(
            nn.Linear(64, 128),
            nn.ReLU(),
            nn.Linear(128, 7)
        )

        # Value head: estimating the value of the current board state
        self.value = nn.Sequential(
            nn.Linear(64, 128),
            nn.ReLU(),
            nn.Linear(128, 1),
            nn.Tanh()
        )

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.max_pool1d(x, kernel_size=2)
        x = x.view(x.size(0), -1)
        policy_logits = self.policy(x)
        value_estimate = self.value(x)
        return policy_logits, value_estimate

model=CNN()
criterion_policy = nn.CrossEntropyLoss()
criterion_value = nn.MSELoss()

optimizer = torch.optim.Adam(model.parameters())

for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()

    policy_logits, value_estimate = model(X)

    loss_policy = criterion_policy(policy_logits, y_train_moves)
    loss_value = criterion_value(value_estimate.squeeze(), y_train_values)

    loss = loss_policy + loss_value
    loss.backward()
    optimizer.step()


In [31]:
if __name__ == '__main__':
    A = AgentRandom()
    B = AgentCNN_Policy(model)

    duel(A, B, 10)

TypeError: list indices must be integers or slices, not tuple

In [None]:
if __name__ == '__main__':
    A = AgentMinMaxMC(3, 10)
    B = AgentCNN_Policy(model)

    duel(A, B, 10)

In [None]:
if __name__ == '__main__':
    A = AgentMC(10)
    B = AgentCNN_Policy(model)

    duel(A, B, 10)