<a href="https://colab.research.google.com/github/dbischoffhashem/Minesweeper-Neural-Network-Bot/blob/main/Minesweeper_Bot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Minesweeper: Logic Bot vs Neural Network Bot

### Generating Data

In [None]:
import random
import numpy as np

class Minesweeper:
    def __init__(self, width, height, num_mines):
        self.width = width
        self.height = height
        self.board = [[-2 for _ in range(width)] for _ in range(height)]  # -2 for hidden cells
        self.revealed = [[False] * width for _ in range(height)]
        self.cells_remaining = {(x, y) for x in range(height) for y in range(width)}
        self.inferred_safe = set()
        self.inferred_mines = set()
        self.clue_numbers = {}
        self._place_mines(num_mines)
        self._calculate_clues()

    def _place_mines(self, num_mines):
        placed = 0
        while placed < num_mines:
            x, y = random.randint(0, self.height - 1), random.randint(0, self.width - 1)
            if self.board[x][y] == -2:
                self.board[x][y] = -1  # -1 for mines
                placed += 1

    def _calculate_clues(self):
        for x in range(self.height):
            for y in range(self.width):
                if self.board[x][y] == -1:
                    continue
                count = 0
                for dx in [-1, 0, 1]:
                    for dy in [-1, 0, 1]:
                        nx, ny = x + dx, y + dy
                        if 0 <= nx < self.height and 0 <= ny < self.width and self.board[nx][ny] == -1:
                            count += 1
                self.board[x][y] = count

    def reveal(self, x, y):
        self.revealed[x][y] = True
        self.cells_remaining.remove((x, y))
        if self.board[x][y] == -1:
            return -1
        else:
            self.clue_numbers[(x, y)] = self.board[x][y]
            return self.board[x][y]

    def get_visible_board(self):
        return [[self.board[x][y] if self.revealed[x][y] else -2 for y in range(self.width)] for x in range(self.height)]

    def get_full_board(self):
        return [[self.board[x][y] for y in range(self.width)] for x in range(self.height)]

In [None]:
def play_minesweeper(width, height, num_mines):
    game = Minesweeper(width, height, num_mines)

    # Initialize sets and maps
    cells_remaining = {(x, y) for x in range(game.height) for y in range(game.width)}  # All cells initially
    inferred_safe = set()
    inferred_mines = set()
    clue_numbers = {}
    won = False
    steps = 0

    # Initial reveal: choose a cell with a '0' clue if possible
    possible_starts = [(x, y) for x in range(game.height) for y in range(game.width) if game.board[x][y] == 0]
    if possible_starts:
        x, y = random.choice(possible_starts)
    else:
        x, y = random.randint(0, game.height - 1), random.randint(0, game.width - 1)

    result = game.reveal(x, y)
    cells_remaining.remove((x, y))
    clue_numbers[(x, y)] = result

    visible_boards = [game.get_visible_board()]  # Initial state before any moves
    steps += 1
    game_over = False

    neighbors = [(x+dx, y+dy) for dx in [-1, 0, 1] for dy in [-1, 0, 1] if (dx != 0 or dy != 0) and 0 <= x+dx < game.height and 0 <= y+dy < game.width]

    while True:
        inferred_mines_copy = inferred_mines.copy()
        inferred_safe_copy = inferred_safe.copy()

        unrevealed_neighbors = [pos for pos in neighbors if pos in cells_remaining]
        revealed_safe_neighbors = [pos for pos in neighbors if pos not in unrevealed_neighbors and pos not in inferred_mines]
        inferred_mine_neighbors = [pos for pos in neighbors if pos in inferred_mines]

        if len(unrevealed_neighbors) > 0:
            # Clue - mines found
            clue_mines_left = clue_numbers[(x, y)] - len(inferred_mine_neighbors)
            if clue_mines_left == len(unrevealed_neighbors):
                inferred_mines.update(unrevealed_neighbors)
                cells_remaining.difference_update(unrevealed_neighbors)

            # Clue - safes found
            safe_unrevealed_left = len(neighbors) - clue_numbers[(x, y)] - len(revealed_safe_neighbors)
            if safe_unrevealed_left == len(unrevealed_neighbors):
                inferred_safe.update(unrevealed_neighbors)
                cells_remaining.difference_update(unrevealed_neighbors)

        if inferred_mines_copy == inferred_mines and inferred_safe_copy == inferred_safe:
            break

    while not game_over:
        if inferred_safe:
            x, y = inferred_safe.pop()
        else:
            x, y = random.choice(list(cells_remaining))
            cells_remaining.remove((x, y))

        result = game.reveal(x, y)
        if result == -1:  # Hit a mine, game ends
            game_over = True
            break

        clue_numbers[(x, y)] = result
        visible_boards.append(game.get_visible_board())
        steps += 1

        for (x, y) in clue_numbers.keys():
            clue = clue_numbers[(x, y)]
            neighbors = [(x+dx, y+dy) for dx in [-1, 0, 1] for dy in [-1, 0, 1] if (dx != 0 or dy != 0) and 0 <= x+dx < game.height and 0 <= y+dy < game.width]

            while True:
                inferred_mines_copy = inferred_mines.copy()
                inferred_safe_copy = inferred_safe.copy()

                unrevealed_neighbors = [pos for pos in neighbors if pos in cells_remaining]
                revealed_safe_neighbors = [pos for pos in neighbors if pos not in unrevealed_neighbors and pos not in inferred_mines]
                inferred_mine_neighbors = [pos for pos in neighbors if pos in inferred_mines]

                if len(unrevealed_neighbors) > 0:
                    # Update inferred mines
                    clue_mines_left = clue - len(inferred_mine_neighbors)
                    if clue_mines_left == len(unrevealed_neighbors):
                        inferred_mines.update(unrevealed_neighbors)
                        cells_remaining.difference_update(unrevealed_neighbors)

                    # Update inferred safe
                    safe_unrevealed_left = len(neighbors) - clue - len(revealed_safe_neighbors)
                    if safe_unrevealed_left == len(unrevealed_neighbors):
                        inferred_safe.update(unrevealed_neighbors)
                        cells_remaining.difference_update(unrevealed_neighbors)

                if inferred_mines_copy == inferred_mines and inferred_safe_copy == inferred_safe:
                    break

        if not cells_remaining and not inferred_safe:
            won = True
            game_over = True

    return np.array(game.get_full_board()), [np.array(board) for board in visible_boards], won

Testing Logic Bot Performance

In [None]:
from scipy.stats import norm

def test_logic_bot_performance(total_games, width, height, num_mines):
    games_won = 0
    total_steps = 0
    steps_per_game = []  # To store the steps taken per game for CI calculation

    for _ in range(total_games):
        _, visible_boards, won = play_minesweeper(width, height, num_mines)
        if won:
            steps = len(visible_boards)  # Number of boards revealed equals the steps taken
            games_won += 1
            total_steps += steps
            steps_per_game.append(steps)
            #print(len(steps_per_game))

    win_rate = games_won / total_games * 100
    average_steps = total_steps/total_games

    confidence_level = 0.95
    z_value = norm.ppf(1 - (1 - confidence_level) / 2)

    # Confidence interval for win rate
    win_rate_std_error = np.sqrt((win_rate / 100) * (1 - win_rate / 100) / total_games)
    win_rate_margin_of_error = z_value * win_rate_std_error * 100  # Convert back to percentage
    win_rate_ci = (win_rate - win_rate_margin_of_error, win_rate + win_rate_margin_of_error)

    # # Confidence interval for average steps
    if len(steps_per_game) > 1:
        steps_std_error = np.std(steps_per_game, ddof=1) / np.sqrt(len(steps_per_game))
        steps_margin_of_error = z_value * steps_std_error
        average_steps_ci = (average_steps - steps_margin_of_error, average_steps + steps_margin_of_error)
    else:
        average_steps_ci = (average_steps, average_steps)

    #print("Easy Mode")
    print(f"Games Won: {games_won}")
    print(f"Win Rate: {win_rate:.2f}% (95% CI: {win_rate_ci[0]:.2f}% - {win_rate_ci[1]:.2f}%)")
    print(f"Average Steps Survived: {average_steps:.2f}")

In [None]:
total_games = 500
test_logic_bot_performance(total_games, 9, 9, 10)

Games Won: 387
Win Rate: 77.40% (95% CI: 73.73% - 81.07%)
Average Steps Survived: 54.95


In [None]:
# Medium Difficulty
total_games = 500
test_logic_bot_performance(total_games, 16, 16, 40)

Games Won: 245
Win Rate: 49.00% (95% CI: 44.62% - 53.38%)
Average Steps Survived: 105.84


In [None]:
# Hard Difficulty
total_games = 500
test_logic_bot_performance(total_games, 30, 16, 99)

Games Won: 4
Win Rate: 0.80% (95% CI: 0.02% - 1.58%)
Average Steps Survived: 3.05


Generating Training Data

I will only include winning games in my data I use to train the network with. This ensure the network only learns based on cell selections that were successful.

In [None]:
import torch
import numpy as np

def generate_training_data(num_games, width, height, num_mines):
    game_states_list = []
    safety_boards_list = []

    for _ in range(num_games):
        full_board, game_steps, won = play_minesweeper(width, height, num_mines)
        if won:
            # Convert the full board to a safety board
            safety_board = (np.array(full_board) != -1).astype(int)

            # Prepare data for each game
            for state in game_steps:
                game_states_list.append(np.array(state).reshape(1, width, height))  # reshape
                safety_boards_list.append(safety_board)  # same safety board for all steps of the game

    # Convert lists to a single array before creating tensors
    game_states_array = np.array(game_states_list)
    safety_boards_array = np.array(safety_boards_list)

    # Convert arrays to tensors
    game_states_tensor = torch.FloatTensor(game_states_array)
    safety_boards_tensor = torch.FloatTensor(safety_boards_array)

    return game_states_tensor, safety_boards_tensor

### Neural Network

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

class MinesweeperCNN(nn.Module):
    def __init__(self, board_size):
        super(MinesweeperCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=5, padding=2)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(64, 1, kernel_size=3, padding=1)

        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = torch.relu(self.conv2(x))
        x = self.sigmoid(self.conv3(x))
        return x

### Training

In [None]:
def train_model(data_loader, model, criterion, optimizer, num_epochs=10):
    model.train()  # Set the model to training mode
    for epoch in range(num_epochs):
        running_loss = 0.0
        for game_states, safety_boards in data_loader:
            optimizer.zero_grad()
            outputs = model(game_states)
            loss = criterion(outputs, safety_boards.unsqueeze(1))
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f'Epoch {epoch+1}, Average Loss: {running_loss / len(data_loader)}')

In [None]:
game_states_tensor, safety_boards_tensor = generate_training_data(5000, 9, 9, 10)

from torch.utils.data import DataLoader, TensorDataset

dataset = TensorDataset(game_states_tensor, safety_boards_tensor)
data_loader = DataLoader(dataset, batch_size=32, shuffle=False)


# Initialize model
board_size = (9, 9)
model = MinesweeperCNN(board_size)

# Loss function and optimizer
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# call training
train_model(data_loader, model, criterion, optimizer, num_epochs=8)


Epoch 1, Average Loss: 0.1906108399832834
Epoch 2, Average Loss: 0.16908026615074911
Epoch 3, Average Loss: 0.16560277229707918
Epoch 4, Average Loss: 0.16373534183477542
Epoch 5, Average Loss: 0.16256728621473565
Epoch 6, Average Loss: 0.16174256027047812
Epoch 7, Average Loss: 0.16111271540702954
Epoch 8, Average Loss: 0.16064061955845496


### Testing (with Agent)

Note: For testing I will start by selecting a 0 on the board (if there is one) and from there use the network to determine the rest of the moves.

In [None]:
# Network Bot
import torch
import numpy as np
import random
from math import sqrt
from scipy import stats

def play_game_with_bot(model, game, num_mines, randomize_first_move=False):
    with torch.no_grad():
        model.eval()
        turn_count = 0
        first_move_done = False

        while True:
            if not first_move_done:
                if randomize_first_move:
                    full_board = game.get_full_board()
                    zero_cells = [(x, y) for x in range(game.height) for y in range(game.width) if full_board[x][y] == 0]
                    if zero_cells:
                        x, y = random.choice(zero_cells)  # Choose a random zero cell
                    else:
                        x, y = random.randint(0, game.height-1), random.randint(0, game.width-1)
                else:
                    x, y = random.randint(0, game.height-1), random.randint(0, game.width-1)

                result = game.reveal(x, y)
                first_move_done = True
            else:
                current_state = np.array(game.get_visible_board()).reshape(1, 1, game.height, game.width)
                current_state_tensor = torch.FloatTensor(current_state)
                predictions = model(current_state_tensor).squeeze().numpy()
                mask = np.array(game.revealed)
                predictions[mask] = -1  # Ignore already revealed cells
                x, y = np.unravel_index(np.argmax(predictions), predictions.shape)
                result = game.reveal(x, y)

            turn_count += 1

            if result == -1:
                return False, turn_count

            # Check if all non-mine cells have been revealed
            visible_board = game.get_visible_board()
            unrevealed_count = sum(cell == -2 for row in visible_board for cell in row)

            if unrevealed_count == num_mines:
                return True, turn_count

In [None]:
total_games = 500
games_won = 0
total_steps = 0
steps_list = []

for _ in range(total_games):
    game = Minesweeper(width=9, height=9, num_mines=10)  # Initialize a new game for each iteration
    won, steps = play_game_with_bot(model, game, num_mines=10, randomize_first_move=True)
    if won:
        games_won += 1
    total_steps += steps
    steps_list.append(steps)

# Win Rate
win_rate = games_won / total_games
win_rate_variance = win_rate * (1 - win_rate) / total_games
win_rate_ci = stats.norm.interval(0.95, loc=win_rate, scale=sqrt(win_rate_variance))

# Average Steps
average_steps = total_steps / total_games
steps_variance = np.var(steps_list, ddof=1)
steps_std = sqrt(steps_variance)
steps_ci = stats.t.interval(0.95, df=total_games-1, loc=average_steps, scale=steps_std/sqrt(total_games))

# Output Results
print(f"Games Played: {total_games}")
print(f"Games Won: {games_won}")
print(f"Win Rate: {win_rate * 100:.2f}%")
print(f"Win Rate with 95% Confidence Interval: ({win_rate_ci[0] * 100:.2f}%, {win_rate_ci[1] * 100:.2f}%)")
print(f"Average Steps Survived: {average_steps:.2f}")
print(f"Average Steps 95% Confidence Interval: ({steps_ci[0]:.2f}, {steps_ci[1]:.2f})")


Games Played: 500
Games Won: 436
Win Rate: 87.20%
Win Rate with 95% Confidence Interval: (84.27%, 90.13%)
Average Steps Survived: 69.25
Average Steps 95% Confidence Interval: (68.48, 70.02)


### Trying to Encorporate Self-Attention into Model

Now I will try training the same way, but use a different neural network that uses self attention to see if I can capture more complex trends in the boards.

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

class SelfAttention(nn.Module):
    def __init__(self, in_channels):
        super(SelfAttention, self).__init__()
        self.query_conv = nn.Conv2d(in_channels, in_channels // 8, kernel_size=1)
        self.key_conv = nn.Conv2d(in_channels, in_channels // 8, kernel_size=1)
        self.value_conv = nn.Conv2d(in_channels, in_channels, kernel_size=1)
        self.gamma = nn.Parameter(torch.zeros(1))
        self.softmax = nn.Softmax(dim=-1)

    def forward(self, x):
        batch_size, C, width, height = x.size()
        query = self.query_conv(x).view(batch_size, -1, width * height).permute(0, 2, 1)
        key = self.key_conv(x).view(batch_size, -1, width * height)
        attention = self.softmax(torch.bmm(query, key))
        value = self.value_conv(x).view(batch_size, -1, width * height)
        out = torch.bmm(value, attention.permute(0, 2, 1)).view(batch_size, C, width, height)
        out = self.gamma * out + x
        return out

class MinesweeperAttentionCNN(nn.Module):
    def __init__(self, board_size):
        super(MinesweeperAttentionCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=5, padding=2)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.attention = SelfAttention(64)
        self.conv3 = nn.Conv2d(64, 1, kernel_size=3, padding=1)

        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = torch.relu(self.conv2(x))
        x = self.attention(x)
        x = self.sigmoid(self.conv3(x))
        return x

In [None]:
game_states_tensor, safety_boards_tensor = generate_training_data(5000, 9, 9, 10)

from torch.utils.data import DataLoader, TensorDataset

dataset = TensorDataset(game_states_tensor, safety_boards_tensor)
data_loader = DataLoader(dataset, batch_size=32, shuffle=False)

# Initialize the model
board_size = (9, 9)
model = MinesweeperAttentionCNN(board_size)

# Loss function and optimizer
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Call training with your unchanged function
train_model(data_loader, model, criterion, optimizer, num_epochs=8)


Epoch 1, Average Loss: 0.1866335010518322
Epoch 2, Average Loss: 0.16916693315187997
Epoch 3, Average Loss: 0.16553650869569117
Epoch 4, Average Loss: 0.16352293354010905
Epoch 5, Average Loss: 0.16241917312839335
Epoch 6, Average Loss: 0.16137535993045357
Epoch 7, Average Loss: 0.16069654787765125
Epoch 8, Average Loss: 0.1602048155317014


Now I will test the model using a network bot to determine how well it performs compared to the logic bot and to the other network that doesn't use attention.

In [None]:
total_games = 500
games_won = 0
total_steps = 0
steps_list = []

for _ in range(total_games):
    game = Minesweeper(width=9, height=9, num_mines=10)  # Initialize a new game for each iteration
    won, steps = play_game_with_bot(model, game, num_mines=10, randomize_first_move=True)
    if won:
        games_won += 1
    total_steps += steps
    steps_list.append(steps)

# Win Rate
win_rate = games_won / total_games
win_rate_variance = win_rate * (1 - win_rate) / total_games
win_rate_ci = stats.norm.interval(0.95, loc=win_rate, scale=sqrt(win_rate_variance))

# Average Steps
average_steps = total_steps / total_games
steps_variance = np.var(steps_list, ddof=1)
steps_std = sqrt(steps_variance)
steps_ci = stats.t.interval(0.95, df=total_games-1, loc=average_steps, scale=steps_std/sqrt(total_games))

# Output Results
print(f"Games Played: {total_games}")
print(f"Games Won: {games_won}")
print(f"Win Rate: {win_rate * 100:.2f}%")
print(f"Win Rate with 95% Confidence Interval: ({win_rate_ci[0] * 100:.2f}%, {win_rate_ci[1] * 100:.2f}%)")
print(f"Average Steps Survived: {average_steps:.2f}")
print(f"Average Steps 95% Confidence Interval: ({steps_ci[0]:.2f}, {steps_ci[1]:.2f})")


Games Played: 500
Games Won: 420
Win Rate: 84.00%
Win Rate with 95% Confidence Interval: (80.79%, 87.21%)
Average Steps Survived: 69.29
Average Steps 95% Confidence Interval: (68.56, 70.01)


From these results, this model seems to perform better than the logic bot in terms of win rate and average steps survived. It performed comparably to the network with just CNN layers in terms of average steps survived, but slightly worse in terms of win rate.

Now, I will train the network for medium level problems.

### Training & Testing for Medium Difficulty

In [None]:
game_states_tensor, safety_boards_tensor = generate_training_data(1000, 16, 16, 40)

from torch.utils.data import DataLoader, TensorDataset

dataset = TensorDataset(game_states_tensor, safety_boards_tensor)
data_loader = DataLoader(dataset, batch_size=64, shuffle=False)

# Initialize the model
board_size = (16, 16)
medium_model = MinesweeperAttentionCNN(board_size)

# Loss function and optimizer
criterion = nn.BCELoss()
optimizer = optim.Adam(medium_model.parameters(), lr=0.001)

# Call training with your unchanged function
train_model(data_loader, medium_model, criterion, optimizer, num_epochs=4)

Epoch 1, Average Loss: 0.24450843217546575
Epoch 2, Average Loss: 0.2197058246497727
Epoch 3, Average Loss: 0.2133476423872231
Epoch 4, Average Loss: 0.20980224105607428


This loss would have probably continued to decrease if I trained for more epochs, but training took a really long time, so I did not continue.

In [None]:
# Network Bot
import torch
import numpy as np
import random
from math import sqrt
from scipy import stats

def play_game_with_bot(model, game, num_mines, randomize_first_move=False):
    with torch.no_grad():
        model.eval()
        turn_count = 0
        first_move_done = False

        while True:
            if not first_move_done:
                if randomize_first_move:
                    full_board = game.get_full_board()
                    zero_cells = [(x, y) for x in range(game.height) for y in range(game.width) if full_board[x][y] == 0]
                    if zero_cells:
                        x, y = random.choice(zero_cells)
                    else:
                        x, y = random.randint(0, game.height-1), random.randint(0, game.width-1)
                else:
                    x, y = random.randint(0, game.height-1), random.randint(0, game.width-1)

                result = game.reveal(x, y)
                first_move_done = True
            else:
                current_state = np.array(game.get_visible_board()).reshape(1, 1, game.height, game.width)
                current_state_tensor = torch.FloatTensor(current_state)
                predictions = model(current_state_tensor).squeeze().numpy()
                mask = np.array(game.revealed)
                predictions[mask] = -1  # Ignore already revealed cells
                x, y = np.unravel_index(np.argmax(predictions), predictions.shape)
                result = game.reveal(x, y)

            turn_count += 1

            if result == -1:
                return False, turn_count

            # Check if all non-mine cells have been revealed
            visible_board = game.get_visible_board()
            unrevealed_count = sum(cell == -2 for row in visible_board for cell in row)

            if unrevealed_count == num_mines:
                return True, turn_count

In [None]:
total_games = 500
games_won = 0
total_steps = 0
steps_list = []

for _ in range(total_games):
    game = Minesweeper(width=16, height=16, num_mines=40)
    won, steps = play_game_with_bot(medium_model, game, num_mines=40, randomize_first_move=True)
    if won:
        games_won += 1
    total_steps += steps
    steps_list.append(steps)

# Win Rate
win_rate = games_won / total_games
win_rate_variance = win_rate * (1 - win_rate) / total_games
win_rate_ci = stats.norm.interval(0.95, loc=win_rate, scale=sqrt(win_rate_variance))

# Average Steps
average_steps = total_steps / total_games
steps_variance = np.var(steps_list, ddof=1)
steps_std = sqrt(steps_variance)
steps_ci = stats.t.interval(0.95, df=total_games-1, loc=average_steps, scale=steps_std/sqrt(total_games))

# Output Results
print(f"Games Played: {total_games}")
print(f"Games Won: {games_won}")
print(f"Win Rate: {win_rate * 100:.2f}%")
print(f"Win Rate with 95% Confidence Interval: ({win_rate_ci[0] * 100:.2f}%, {win_rate_ci[1] * 100:.2f}%)")
print(f"Average Steps Survived: {average_steps:.2f}")
print(f"Average Steps 95% Confidence Interval: ({steps_ci[0]:.2f}, {steps_ci[1]:.2f})")

Games Played: 500
Games Won: 305
Win Rate: 61.00%
Win Rate with 95% Confidence Interval: (56.72%, 65.28%)
Average Steps Survived: 111.80
Average Steps 95% Confidence Interval: (106.37, 117.24)


After using the network bot to test how well the model performs, we can see from the results that it performs better than the logic bot, both in terms of the win rate and the average steps survived. This may be because the network is able to recognize deeper patterns through the CNN layers and self attention. On the other hand, that the logic bot can only make decisions based on explicit clues and rules derived from the revealed cells, limiting what it can do in certain situations.



Now I will print the decisions the network bot makes throughout one game to see how it compares to what the logic bot would click on.

In [None]:
# COMPARE DECISIONS OF LOGIC BOT VS NETWORK BOT
import torch
import numpy as np
import random

def play_game_with_bot(model, game, randomize_first_move=True):
    with torch.no_grad():
        model.eval()
        first_move_done = False
        turn_count = 0

        while True:
            if not first_move_done:
                if randomize_first_move:
                    full_board = game.get_full_board()
                    zero_cells = [(x, y) for x in range(game.height) for y in range(game.width) if full_board[x][y] == 0]

                    if zero_cells:
                        x, y = random.choice(zero_cells)
                    else:
                        x, y = random.randint(0, game.height-1), random.randint(0, game.width-1)

                    result = game.reveal(x, y)
                    print(f"Turn {turn_count}: First move at ({x}, {y}) - Result: {result}")
                    first_move_done = True
                else:
                    x, y = random.randint(0, game.height-1), random.randint(0, game.width-1)
                    result = game.reveal(x, y)
                    print(f"Turn {turn_count}: First move at ({x}, {y}) - Result: {result}")
                    first_move_done = True
            else:
                current_state = np.array(game.get_visible_board()).reshape(1, 1, game.height, game.width)
                current_state_tensor = torch.FloatTensor(current_state)
                predictions = model(current_state_tensor).squeeze().numpy()
                mask = np.array(game.revealed)
                predictions[mask] = -1  # Ignore already revealed cells

                x, y = np.unravel_index(np.argmax(predictions), predictions.shape)
                result = game.reveal(x, y)
                print(f"Turn {turn_count}: Choosing cell ({x}, {y}) - Result: {result}")

            turn_count += 1

            if result == -1:
                print(f"Game Over: Hit a mine at cell ({x}, {y}) after {turn_count} turns")
                return False

            # Check if all non-mine cells have been revealed
            visible_board = game.get_visible_board()
            unrevealed_count = sum(cell == -2 for row in visible_board for cell in row)

            if unrevealed_count == 40:
                print("Game Won: All safe cells revealed")
                return True


game = Minesweeper(16, 16, 40)
print(game.get_full_board())
result = play_game_with_bot(medium_model, game, randomize_first_move=True)

print("Testing result:", "Won" if result else "Lost")

[[0, 0, 1, 2, -1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 1, -1, 3, 2, 1, 0, 0, 1, 2, 2, 2, 2, 2, 1], [1, 1, 3, 3, 4, -1, 1, 0, 0, 2, -1, -1, 3, -1, -1, 1], [1, -1, 2, -1, -1, 2, 1, 0, 0, 2, -1, 4, -1, 3, 2, 1], [1, 1, 2, 2, 2, 1, 0, 0, 0, 1, 2, 3, 2, 1, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, -1, 1, 1, 1, 1], [0, 0, 0, 0, 1, 2, 2, 1, 0, 0, 1, 1, 1, 1, -1, 1], [0, 0, 0, 0, 2, -1, -1, 1, 0, 1, 1, 1, 0, 1, 1, 1], [2, 2, 1, 0, 2, -1, 3, 1, 0, 1, -1, 1, 0, 0, 0, 0], [-1, -1, 3, 2, 3, 4, 3, 1, 0, 1, 1, 1, 0, 0, 0, 0], [4, -1, -1, 3, -1, -1, -1, 1, 0, 0, 0, 0, 0, 0, 0, 0], [-1, 3, 2, 4, -1, 5, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0], [1, 2, 1, 3, -1, 3, 1, 2, 1, 2, 1, 1, 0, 0, 1, 1], [0, 2, -1, 3, 1, 2, -1, 3, -1, 2, -1, 1, 0, 1, 2, -1], [1, 3, -1, 3, 1, 2, 3, -1, 3, 2, 1, 1, 0, 1, -1, 3], [-1, 2, 1, 2, -1, 1, 2, -1, 2, 0, 0, 0, 0, 1, 2, -1]]
Turn 0: First move at (6, 2) - Result: 0
Turn 1: Choosing cell (6, 1) - Result: 0
Turn 2: Choosing cell (7, 2) - Result: 0
Turn 3: Choosing cell (7, 1) - Resul

Looking at the moves the network bot is making, they seem similar to the decisions the logic bot would make. The cells that are chosen consequtively tend to be touching or close to each other. The logic bot would sometimes have to choose a cell at random which may be far away from the previous cells it selected when inferred_safe was empty. However, I do not see many instances of this with the network bot. Turns 177-179 are interesting to look at in this game:



Turn 177: Choosing cell (11, 2) - Result: 2

Turn 178: Choosing cell (7, 0) - Result: 0

Turn 179: Choosing cell (11, 3) - Result: 4


Even though (11, 2) and (11, 3) are close to each other, they are not clicked consequtively. Instead (7, 0) is clicked in between them. This might demonstrate that the network bot is not simply making decisions based solely on immediate spatial proximity but is also identifying patterns or associations across different areas of the board. By incorporating spatial awareness through convolutional layers and self-attention, the network can infer probable safe cells based on global board patterns rather than just adjacent tiles

### Training & Testing for Hard Difficulty

In [None]:
# Adjusting how data is formatted since it is no longer square
import torch
import numpy as np

def generate_training_data(num_games, width, height, num_mines):
    game_states_list = []
    safety_boards_list = []

    for _ in range(num_games):
        full_board, game_steps, won = play_minesweeper(width, height, num_mines)
        #if won:
        # Convert the full board to a safety board
        safety_board = (np.array(full_board) != -1).astype(int).reshape(1, height, width)

        # Prepare data for each game state
        for state in game_steps:
            game_states_list.append(np.array(state).reshape(1, height, width))  # Reshape to (1, height, width)
            safety_boards_list.append(safety_board)  # Same safety board for all steps of the game

    game_states_array = np.array(game_states_list)
    safety_boards_array = np.array(safety_boards_list)

    # Convert arrays to tensors
    game_states_tensor = torch.FloatTensor(game_states_array)
    safety_boards_tensor = torch.FloatTensor(safety_boards_array)

    return game_states_tensor, safety_boards_tensor

In [None]:
game_states_tensor, safety_boards_tensor = generate_training_data(2000, 30, 16, 99)

from torch.utils.data import DataLoader, TensorDataset

dataset = TensorDataset(game_states_tensor, safety_boards_tensor)
data_loader = DataLoader(dataset, batch_size=128, shuffle=False)

board_size = (30, 16)
hard_model = MinesweeperAttentionCNN(board_size)

criterion = nn.BCELoss()
optimizer = optim.Adam(hard_model.parameters(), lr=0.001)

train_model(data_loader, hard_model, criterion, optimizer, num_epochs=6)

Epoch 1, Average Loss: 0.37362768
Epoch 2, Average Loss: 0.35092143
Epoch 3, Average Loss: 0.33278416
Epoch 4, Average Loss: 0.31890762
Epoch 5, Average Loss: 0.30543258
Epoch 6, Average Loss: 0.29674325
Epoch 7, Average Loss: 0.29152319


Testing with Network Bot

In [None]:
total_games = 500
games_won = 0
total_steps = 0
steps_list = []

for _ in range(total_games):
    game = Minesweeper(width=30, height=16, num_mines=99)  # Initialize a new game for each iteration
    won, steps = play_game_with_bot(hard_model, game, num_mines=99, randomize_first_move=True)
    if won:
        games_won += 1
    total_steps += steps
    steps_list.append(steps)

# Win Rate
win_rate = games_won / total_games
win_rate_variance = win_rate * (1 - win_rate) / total_games
win_rate_ci = stats.norm.interval(0.95, loc=win_rate, scale=sqrt(win_rate_variance))

# Average Steps
average_steps = total_steps / total_games
steps_variance = np.var(steps_list, ddof=1)
steps_std = sqrt(steps_variance)
steps_ci = stats.t.interval(0.95, df=total_games-1, loc=average_steps, scale=steps_std/sqrt(total_games))

# Output Results
print(f"Games Played: {total_games}")
print(f"Games Won: {games_won}")
print(f"Win Rate: {win_rate * 100:.2f}%")
print(f"Win Rate with 95% Confidence Interval: ({win_rate_ci[0] * 100:.2f}%, {win_rate_ci[1] * 100:.2f}%)")
print(f"Average Steps Survived: {average_steps:.2f}")
print(f"Average Steps 95% Confidence Interval: ({steps_ci[0]:.2f}, {steps_ci[1]:.2f})")

Games Played: 500
Games Won: 46
Win Rate: 9.20%
Win Rate with 95% Confidence Interval: (6.67%, 11.73%)
Average Steps Survived: 21.23
Average Steps 95% Confidence Interval: (20.80, 21.66)


Once again, the network performed better than the logic bot in terms of both win rate and average steps survived.

### Trying without maintaining order

I will shuffle the individual boards in each game so that they will not be in order.

In [None]:
game_states_tensor, safety_boards_tensor = generate_training_data(5000, 9, 9, 10)

from torch.utils.data import DataLoader, TensorDataset

dataset = TensorDataset(game_states_tensor, safety_boards_tensor)
data_loader = DataLoader(dataset, batch_size=128, shuffle=True)

board_size = (9, 9)
model = MinesweeperAttentionCNN(board_size)

criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

train_model(data_loader, model, criterion, optimizer, num_epochs=6)

Epoch 1, Average Loss: 0.410543
Epoch 2, Average Loss: 0.400721
Epoch 3, Average Loss: 0.397842
Epoch 4, Average Loss: 0.395973
Epoch 5, Average Loss: 0.394886
Epoch 6, Average Loss: 0.393917


From this code, it looks like shuffling causes the loss to get stuck after only a few epochs and the loss at the end of training is higher than previously, indicating that the data should be kept sequential to get the best results.