In [3]:
from termcolor import cprint

def print_board(board, bolded=None):
    """A debugging function to print you board in a pretty way"""
    n = len(board)
    # For every row but the last
    for row_idx, row in enumerate(board[:-1]):
        # Print the row as a string with a line below
        if bolded and row_idx==bolded[0]:
            cprint("|".join(row[:bolded[1]]), None, attrs=["underline"], end='|' if bolded[1] != 0 else '')
            cprint(row[bolded[1]], None, attrs=["underline", "bold"], end='')
            if bolded[1] != len(row) - 1:
                cprint("|" + "|".join(row[bolded[1]+1:]), None, attrs=["underline"], end='')
            print()
        else:
            cprint("|".join(row), None, attrs=["underline"])
    row = board[-1]
    if bolded and bolded[0] == len(board) - 1:
        print("|".join(row[:bolded[1]]), end='|' if bolded[1] != 0 else '')
        cprint(row[bolded[1]], None, attrs=["bold"], end='')
        if bolded[1] != len(row) - 1:
            cprint("|" + "|".join(row[bolded[1]+1:]), None, end='')
        print()
    else:
        print("|".join(row))


def other_stone(stone):
    return "X" if stone == "O" else "O"

def consecutive_k(row, k, stone):
    desired_row = [stone] * k
    return sum(desired_row == row[i:i+k] for i in range(len(row) - k + 1))

def live_k(row, k, stone):
    desired_row = ['-'] + [stone] * k + ['-']
    return sum(desired_row == row[i:i+k + 2] for i in range(len(row) - k - 1))

def dead_k(row, k, stone):
    full_open = ['-'] + [stone] * k + ['-']
    count = 0
    for i in range(len(row) - k):
        row_slice = row[i:i+k+1]
        count += (row_slice.count('-') == 1 and row_slice.count(stone) == k)
    
    return count

def get_downdiag(board, row_idx, col_idx):
    return [board[row_idx + i][col_idx + i] for i in range(min(len(board) - row_idx, len(board) - col_idx))]

def get_updiag(board, row_idx, col_idx):
    return [board[row_idx - i][col_idx + i] for i in range(min(row_idx + 1, len(board) - col_idx))]

def winner_stone(board, stone):
    k_count = 0
    k = 5
    for row in board:
        row = list(row)
        k_count += consecutive_k(row, k, stone)

    for col_idx in range(len(board)):
        bl = [row[col_idx] for row in board]
        k_count += consecutive_k(bl, k, stone)

    for row_idx in range(len(board)):
        bl = get_updiag(board, row_idx, 0)
        k_count += consecutive_k(bl, k, stone)
        bl = get_downdiag(board, row_idx, 0)
        k_count += consecutive_k(bl, k, stone)

    for col_idx in range(len(board[0])):
        bl = get_updiag(board, len(board) - 1, col_idx)
        k_count += consecutive_k(bl, k, stone)
        bl = get_downdiag(board, 0, col_idx)
        k_count += consecutive_k(bl, k, stone)
    
    return k_count > 0

def complete_board(board):
    return winner_stone(board, "X") or winner_stone(board, "O") or not any('-' in row for row in board)


In [24]:
import random
import heapq
import numpy as np
from tqdm.notebook import tqdm
import copy

class Strategy:
    def __init__(self, stone, max_depth=1):
        self.stone = stone
        self.opponent_stone = other_stone(stone)
        self.max_depth = max_depth
        self.nodes = 0
        self.pruned = 0
    
    def eval_board(self, board, stone):
        retval = 0
        consecutive_5 = 0
        live_dict = {2: 10, 3: 100, 4: 10000, 5: 50000}
        dead_dict = {2: 5, 3: 50, 4: 10000, 5: 50000}
        live_count = {2: 0, 3: 0, 4: 0, 5: 0}
        dead_count = {2: 0, 3: 0, 4: 0, 5: 0}
        stone_p = other_stone(stone)
        for k in range(2, 6):
            for row in board:
                row = list(row)
                live_count[k]  += live_k(row, k, stone)
                dead_count[k] += dead_k(row, k, stone)
                consecutive_5 += consecutive_k(row, 5, stone)
                
            for col_idx in range(len(board)):
                bl = [row[col_idx] for row in board]
                live_count[k]  += live_k(bl, k, stone)
                dead_count[k] += dead_k(bl, k, stone)
                consecutive_5 += consecutive_k(bl, 5, stone)
                
            for row_idx in range(len(board)):
                bl = get_updiag(board, row_idx, 0)
                live_count[k]  += live_k(bl, k, stone)
                dead_count[k] += dead_k(bl, k, stone)
                consecutive_5 += consecutive_k(bl, 5, stone)
                bl = get_downdiag(board, row_idx, 0)
                live_count[k]  += live_k(bl, k, stone)
                dead_count[k] += dead_k(bl, k, stone)
                consecutive_5 += consecutive_k(bl, 5, stone)
            
            for col_idx in range(len(board[0])):
                bl = get_updiag(board, len(board) - 1, col_idx)
                live_count[k]  += live_k(bl, k, stone)
                dead_count[k] += dead_k(bl, k, stone)
                consecutive_5 += consecutive_k(bl, 5, stone)
                bl = get_downdiag(board, 0, col_idx)
                live_count[k]  += live_k(bl, k, stone)
                dead_count[k] += dead_k(bl, k, stone)
                consecutive_5 += consecutive_k(bl, 5, stone)
                
            if live_count[k]:
                retval += live_dict[k]
            if dead_count[k]:
                retval += dead_dict[k]
        #print(live_count, dead_count)
        if consecutive_5:
             return 100000000
        live_dict = {2: 10, 3: 100, 4: 10000, 5: 50000}
        dead_dict = {2: 5, 3: 50, 4: 10000, 5: 50000}
        live_count = {2: 0, 3: 0, 4: 0, 5: 0}
        dead_count = {2: 0, 3: 0, 4: 0, 5: 0}
        for k in range(2, 5):
            for row in board:
                row = list(row)
                live_count[k]  += live_k(row, k, stone_p)
                dead_count[k] += dead_k(row, k, stone_p)
                consecutive_5 += consecutive_k(row, 5, stone_p)
            
            for col_idx in range(len(board)):
                bl = [row[col_idx] for row in board]
                live_count[k]  += live_k(bl, k, stone_p)
                dead_count[k] += dead_k(bl, k, stone_p)
                consecutive_5 += consecutive_k(bl, 5, stone_p)
            
            for row_idx in range(len(board)):
                bl = get_updiag(board, row_idx, 0)
                live_count[k]  += live_k(bl, k, stone_p)
                dead_count[k] += dead_k(bl, k, stone_p)
                consecutive_5 += consecutive_k(bl, 5, stone_p)
                bl = get_downdiag(board, row_idx, 0)
                live_count[k]  += live_k(bl, k, stone_p)
                dead_count[k] += dead_k(bl, k, stone_p)
                consecutive_5 += consecutive_k(bl, 5, stone_p)

            for col_idx in range(len(board[0])):
                bl = get_updiag(board, len(board) - 1, col_idx)
                live_count[k]  += live_k(bl, k, stone_p)
                dead_count[k] += dead_k(bl, k, stone_p)
                consecutive_5 += consecutive_k(row, 5, stone_p)
                bl = get_downdiag(board, 0, col_idx)
                live_count[k]  += live_k(bl, k, stone_p)
                dead_count[k] += dead_k(bl, k, stone_p)
                consecutive_5 += consecutive_k(bl, 5, stone_p)
            
            if live_count[k]:
                retval -= live_dict[k]
            if dead_count[k]:
                retval -= dead_dict[k]
        if consecutive_5:
                return -10000000
        
        return retval #+ random.random()
    
    def minmax_search(self, board, depth, stone, alpha=0, beta=0):
        #print_board(board)
        if depth == 0 or complete_board(board):
            score = self.eval_board(board, stone)
            score = -score if stone == self.opponent_stone else score
            return score, (None, None)
        
        row_arr, col_arr = np.where(board == '-')
        open_spaces = zip(row_arr, col_arr)
        
        if stone == self.stone:
            best_score = -np.infty
            best_move = (0,0)
            for row, col in open_spaces:
                new_board = copy.deepcopy(board)
                new_board[row][col] = stone
                score, move = self.minmax_search(new_board, depth-1, other_stone(stone))
                if score >= best_score:
                    best_move = (row, col)
                    best_score = score
        else:
            best_score = np.infty
            best_move = (0,0)
            for row, col in open_spaces:
                new_board = copy.deepcopy(board)
                new_board[row][col] = stone
                score, move = self.minmax_search(new_board, depth-1, other_stone(stone))
                if score <= best_score:
                    best_move = (row, col)
                    best_score = score
        return best_score, best_move
        
        
    def alphabeta_search(self, board, depth, stone, alpha, beta):
        if depth == 0 or complete_board(board):
            score = self.eval_board(board, stone)
            score = -score if stone == self.opponent_stone else score
            self.nodes += 1
            return score, (None, None)
        
        row_arr, col_arr = np.where(board == '-')
        open_spaces = list(zip(row_arr, col_arr))
        if stone == self.stone:
            best_score = -np.infty
            best_move = (None, None)
            lop = len(open_spaces)
            c = 0
            for row, col in tqdm(open_spaces) if depth == 3 else open_spaces:
                new_board = copy.deepcopy(board)
                new_board[row][col] = stone
                score, move = self.alphabeta_search(new_board, depth-1, other_stone(stone), alpha, beta)
                if depth==3:
                    print(f"{row},{col} yields {score}, alpha={alpha}, beta={beta}")
                best_score = max(score, best_score)
                if best_score >= beta:
                    self.pruned += (lop - c)**depth
                    break
                elif best_score > alpha:
                    best_move = (row, col)
                    alpha = best_score
        else:
            best_score = np.infty
            best_move = (None, None)
            lop = len(open_spaces)
            c = 0
            for row, col in open_spaces:
                c += 1
                new_board = copy.deepcopy(board)
                new_board[row][col] = stone
                score, move = self.alphabeta_search(new_board, depth-1, other_stone(stone), alpha, beta)
                best_score = min(score, best_score)
                #print(f"O: New score={score}, Best score={best_score}, alpha={alpha}, beta={beta}")
                if best_score <= alpha:
                    self.pruned += (lop - c)**depth
                    break
                elif best_score < beta:
                    best_move = (row, col)
                    beta = best_score
                
        #print(f"Returning {best_score}")
        self.nodes += 1
        return best_score, best_move
        
                    
    def get_move(self, board, max_nodes = 100):
        score, move = self.minmax_search(board, self.max_depth, self.stone, -np.inf, np.inf)
        return move
            


In [25]:
            
x = Strategy("X", max_depth=3)
o = Strategy("O", max_depth=1)
board = np.array([['-'] * 8 for _ in range(8)])


board = np.array([list(x) for x in """
O|-|-|-|-|-|-|-
-|-|X|-|X|-|O|O
-|-|-|X|O|-|O|-
-|-|X|-|X|O|-|-
-|X|-|-|X|-|O|-
-|-|-|O|-|-|-|-""".replace("|", "").split('\n')[1:]])

print(o.eval_board(board, "X"))
print(x.get_move(board))
print(x.nodes)
print(x.pruned)

20150
(5, 7)
0
0


In [7]:
def play_game(board, strategy_1, strategy_2):
    player1_turn = True
    turn_count = 0
    n = len(board)
    m = len(board[0])
    while not complete_board(board) and turn_count < m*n:
        if player1_turn:
            row, col = strategy_1.get_move(board)
            board[row][col] = "X"
        else:
            row, col = strategy_2.get_move(board)
            board[row][col] = "O"
        turn_count += 1
        print(turn_count, "X" if player1_turn else "O", f"{row},{col}")
        print_board(board, (row, col))
        board[row][col] = "X" if player1_turn else "O"
        player1_turn = not player1_turn
    return complete_board(board)


board = np.array([list(x) for x in """
-|-|-|-|-|-|-|-
-|-|X|-|X|-|O|O
-|-|-|-|-|-|O|-
-|-|X|-|X|O|-|-
-|-|-|-|-|-|-|-
-|-|-|O|-|-|-|-
-|-|-|-|-|-|-|-
-|-|-|-|-|-|O|X""".replace("|", "").split('\n')[1:]])
n = 8
p1 = Strategy("X", max_depth=3)
p2 = Strategy("O", max_depth=3)
play_game(board, p1, p2)

HBox(children=(FloatProgress(value=0.0, max=53.0), HTML(value='')))


1 X 4,4
[4m-|-|-|-|-|-|-|-[0m
[4m-|-|X|-|X|-|O|O[0m
[4m-|-|-|-|-|-|O|-[0m
[4m-|-|X|-|X|O|-|-[0m
[4m-|-|-|-[0m|[1m[4mX[0m[4m|-|-|-[0m
[4m-|-|-|O|-|-|-|-[0m
[4m-|-|-|-|-|-|-|-[0m
-|-|-|-|-|-|O|X


HBox(children=(FloatProgress(value=0.0, max=52.0), HTML(value='')))


2 O 2,4
[4m-|-|-|-|-|-|-|-[0m
[4m-|-|X|-|X|-|O|O[0m
[4m-|-|-|-[0m|[1m[4mO[0m[4m|-|O|-[0m
[4m-|-|X|-|X|O|-|-[0m
[4m-|-|-|-|X|-|-|-[0m
[4m-|-|-|O|-|-|-|-[0m
[4m-|-|-|-|-|-|-|-[0m
-|-|-|-|-|-|O|X


HBox(children=(FloatProgress(value=0.0, max=51.0), HTML(value='')))


3 X 2,3
[4m-|-|-|-|-|-|-|-[0m
[4m-|-|X|-|X|-|O|O[0m
[4m-|-|-[0m|[1m[4mX[0m[4m|O|-|O|-[0m
[4m-|-|X|-|X|O|-|-[0m
[4m-|-|-|-|X|-|-|-[0m
[4m-|-|-|O|-|-|-|-[0m
[4m-|-|-|-|-|-|-|-[0m
-|-|-|-|-|-|O|X


HBox(children=(FloatProgress(value=0.0, max=50.0), HTML(value='')))


4 O 4,6
[4m-|-|-|-|-|-|-|-[0m
[4m-|-|X|-|X|-|O|O[0m
[4m-|-|-|X|O|-|O|-[0m
[4m-|-|X|-|X|O|-|-[0m
[4m-|-|-|-|X|-[0m|[1m[4mO[0m[4m|-[0m
[4m-|-|-|O|-|-|-|-[0m
[4m-|-|-|-|-|-|-|-[0m
-|-|-|-|-|-|O|X


HBox(children=(FloatProgress(value=0.0, max=49.0), HTML(value='')))


5 X 4,1
[4m-|-|-|-|-|-|-|-[0m
[4m-|-|X|-|X|-|O|O[0m
[4m-|-|-|X|O|-|O|-[0m
[4m-|-|X|-|X|O|-|-[0m
[4m-[0m|[1m[4mX[0m[4m|-|-|X|-|O|-[0m
[4m-|-|-|O|-|-|-|-[0m
[4m-|-|-|-|-|-|-|-[0m
-|-|-|-|-|-|O|X


HBox(children=(FloatProgress(value=0.0, max=48.0), HTML(value='')))


6 O 0,0
[4m[0m[1m[4mO[0m[4m|-|-|-|-|-|-|-[0m
[4m-|-|X|-|X|-|O|O[0m
[4m-|-|-|X|O|-|O|-[0m
[4m-|-|X|-|X|O|-|-[0m
[4m-|X|-|-|X|-|O|-[0m
[4m-|-|-|O|-|-|-|-[0m
[4m-|-|-|-|-|-|-|-[0m
-|-|-|-|-|-|O|X


HBox(children=(FloatProgress(value=0.0, max=47.0), HTML(value='')))


7 X 0,1
[4mO[0m|[1m[4mX[0m[4m|-|-|-|-|-|-[0m
[4m-|-|X|-|X|-|O|O[0m
[4m-|-|-|X|O|-|O|-[0m
[4m-|-|X|-|X|O|-|-[0m
[4m-|X|-|-|X|-|O|-[0m
[4m-|-|-|O|-|-|-|-[0m
[4m-|-|-|-|-|-|-|-[0m
-|-|-|-|-|-|O|X


HBox(children=(FloatProgress(value=0.0, max=46.0), HTML(value='')))


8 O 0,2
[4mO|X[0m|[1m[4mO[0m[4m|-|-|-|-|-[0m
[4m-|-|X|-|X|-|O|O[0m
[4m-|-|-|X|O|-|O|-[0m
[4m-|-|X|-|X|O|-|-[0m
[4m-|X|-|-|X|-|O|-[0m
[4m-|-|-|O|-|-|-|-[0m
[4m-|-|-|-|-|-|-|-[0m
-|-|-|-|-|-|O|X


HBox(children=(FloatProgress(value=0.0, max=45.0), HTML(value='')))


9 X 0,5
[4mO|X|O|-|-[0m|[1m[4mX[0m[4m|-|-[0m
[4m-|-|X|-|X|-|O|O[0m
[4m-|-|-|X|O|-|O|-[0m
[4m-|-|X|-|X|O|-|-[0m
[4m-|X|-|-|X|-|O|-[0m
[4m-|-|-|O|-|-|-|-[0m
[4m-|-|-|-|-|-|-|-[0m
-|-|-|-|-|-|O|X


True