# Minesweeper AI

In [1]:
import math
import numpy as np
import copy
import tensorflow as tf



In [2]:
class MineSweeper:
    def __init__(self, height = 9, width = 9, mines = 9):
        self.height = height
        self.width = width
        self.num_mines = mines
        self.reset()
        
    def reset(self):
        self.board = np.array([[' ' for i in range(self.width+2)] for j in range(self.height+2)])
        self.done = False
        self.reward = 0
        self.moves = [i for i in range(self.height * self.width)]
        self.mines = []
    
    # Find number of mines adjacent to current square
    def find_mines(self, row, col):
        
        # Return -1 if we hit a mine
        if row*self.width + col in self.mines:
            return -1
        
        num_mines = 0
        for r_offset in [-1, 0, 1]:
            for c_offset in [-1, 0, 1]:
                if (row + r_offset) * self.width + (col + c_offset) in self.mines:
                    num_mines += 1
        
        return num_mines
        
    def step(self, move):
        # Setup
        if move not in self.moves:
            print('Invalid move, picking a random move...')
            move = self.sample()

        self.moves.remove(move) # Remove move if already stepped
        
        # Make random mine positions after the first move
        if len(self.moves) == self.height*self.width-1:
            self.mines = np.random.choice(self.moves, self.num_mines, replace = False)
    
        row, col = move//self.width, move%self.width
        num_mines = self.find_mines(row, col)
        
        if num_mines == -1:
            self.board[row+1, col+1] = '*'
            self.reward = 0
            self.done = True
            
        # Recursive call opens adjacent cells if they are 0
        else:
            self.board[row+1, col+1] = str(num_mines)
            # Check for 0s and fill in board
            if num_mines == 0:
                for r_offset in [-1, 0, 1]:
                    for c_offset in [-1, 0, 1]:
                        if (r_offset, c_offset) == (0, 0): continue
                        if row+r_offset+1 in [0, self.height+1] or col+c_offset+1 in [0, self.width+1]: continue
                        if (row+r_offset)*self.width + (col+c_offset) in self.moves:
                            self.step((row+r_offset)*self.width + (col+c_offset))
            
            # Check whether the gameboard is solved
            if len(self.moves) == self.num_mines:
                self.reward = 1
                self.done = True
                
        return
            
    def sample(self):
        return np.random.choice(self.moves)
    
    # One hot enconding
    def get_network_state(self):
        miniboard = self.board[1:-1, 1:-1]
        myboard = [np.array(miniboard == ' ', dtype = int)]
        
        for i in range(9):
            myboard.append(np.array(miniboard==str(i), dtype = int))
        return tf.convert_to_tensor(myboard)
        

        
    def render(self):
        for rows in self.board[1:-1]:
            print('|'.join(rows))
    
    def render_with_mines(self):
        self.mineboard = copy.deepcopy(self.board)
        for mine in self.mines:
            self.mineboard[mine//self.height + 1, mine % self.height + 1] = '*'
        for rows in self.mineboard[1:-1]:
            print('|'.join(rows))

In [3]:
env = MineSweeper(9, 9, 9)

In [4]:
env.get_network_state()

Metal device set to: Apple M2


<tf.Tensor: shape=(10, 9, 9), dtype=int32, numpy=
array([[[1, 1, 1, 1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 1, 1, 1, 1]],

       [[0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0]],

       [[0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 

In [5]:
# Randomly choose from available moves/squares
def RandomAgent(state, moves):
    return np.random.choice(moves)

<tf.Tensor: shape=(1, 81), dtype=float32, numpy=
array([[0.01263031, 0.01273597, 0.01357459, 0.01250613, 0.01196085,
        0.01193194, 0.01281563, 0.01321252, 0.01230667, 0.01216088,
        0.01352779, 0.01244297, 0.01246126, 0.01340252, 0.0121316 ,
        0.0131472 , 0.01241942, 0.01203362, 0.01196798, 0.01243432,
        0.01256222, 0.01225402, 0.01249026, 0.01129467, 0.01247313,
        0.01280869, 0.01139038, 0.01293176, 0.01217573, 0.01202166,
        0.01132068, 0.01231079, 0.01243375, 0.01276378, 0.01272475,
        0.01193036, 0.01112977, 0.01163646, 0.012295  , 0.01271566,
        0.01209599, 0.01225223, 0.0127981 , 0.01223605, 0.01239357,
        0.0126732 , 0.01251283, 0.01312136, 0.01236694, 0.01246249,
        0.01158658, 0.01278333, 0.01250359, 0.01273395, 0.01300522,
        0.01263874, 0.01353951, 0.01147776, 0.0118647 , 0.01185411,
        0.01224627, 0.01195361, 0.0123708 , 0.01202848, 0.0126832 ,
        0.01193082, 0.01210242, 0.01241682, 0.01206955, 0.01232726,

In [12]:
def neural_network(state, moves):
    logits = nn_model(tf.reshape(env.get_network_state(), (1, 10, 9, 9))).numpy().flatten()
    logits[moves] = -1
    return tf.argmax(logits)

SyntaxError: '[' was never closed (984623720.py, line 2)

In [13]:
def play(env, agent = RandomAgent, episodes=100, verbose = False):
    total_reward = 0
    for game in range(episodes):
        env.reset()
        while not env.done:
            move = agent(env.get_network_state(), env.moves)
            env.step(move)
            
            if verbose:
                print('Move: ', move//9, move%9)
                env.render()
                print()
        
        total_reward += env.reward

    print(f'Number of wins in {episodes} games: {total_reward}')

In [14]:
play(env = MineSweeper(9, 9, 9), agent = RandomAgent, episodes = 100)

Number of wins in 100 games: 0


# Neural Network Approach to Solving Minesweeper

Input: Tensor state of the minesweeper board

Output: Probabilities of what cells to choose/expand

Loss: Crossentropy loss

In [15]:
from tensorflow import keras
from tensorflow.keras import layers

In [16]:
MINES = 9
HEIGHT = 9
WIDTH = 9

In [17]:
env = MineSweeper(HEIGHT, WIDTH, MINES)
nn_model = keras.Sequential(
    [
        keras.Input(shape=(10, HEIGHT, WIDTH)),
        layers.Conv2D(32, kernel_size=(3, 3), activation="relu"),
        layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),
        layers.Flatten(),
        layers.Dense(HEIGHT*WIDTH, activation="softmax"),
    ]
)

In [18]:
nn_model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_2 (Conv2D)           (None, 8, 7, 32)          2624      
                                                                 
 conv2d_3 (Conv2D)           (None, 6, 5, 64)          18496     
                                                                 
 flatten_1 (Flatten)         (None, 1920)              0         
                                                                 
 dense_1 (Dense)             (None, 81)                155601    
                                                                 
Total params: 176721 (690.32 KB)
Trainable params: 176721 (690.32 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [19]:
# Acquire training data for network
def get_samples(model, samples = 100):
    x, y = [], []
    env = MineSweeper(HEIGHT, WIDTH, MINES)
    for game in range(samples):
        env.reset()
        while not env.done:
            current_state = env.get_network_state()
            move = RandomAgent(current_state, env.moves)
            env.step(move)
            # Exclude samples that fail
            if env.done and env.reward == 0:
                continue
            else:
                x.append(current_state)
                y.append([i if i == move else 0 for i in range(HEIGHT*WIDTH)])
            
    return tf.convert_to_tensor(x), tf.convert_to_tensor(y)

In [20]:
x_train, y_train = get_samples(nn_model, samples = 100)

In [21]:
x_train.shape, y_train.shape

(TensorShape([412, 10, 9, 9]), TensorShape([412, 81]))

In [22]:
# Compile and fit model
nn_model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
nn_model.fit(x_train, y_train, batch_size=16, epochs=100, validation_split=0.1)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100


Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<keras.src.callbacks.History at 0x17ef39b70>

In [24]:
play(env = MineSweeper(HEIGHT, WIDTH, MINES), agent = nn_model, episodes = 100)

ValueError: Input 0 of layer "sequential_1" is incompatible with the layer: expected shape=(None, 10, 9, 9), found shape=(10, 9, 9)