In [1]:
import random
import math
from tqdm import tqdm

class MENACE:
    def __init__(self, learning_rate=0.1, discount_factor=0.9, exploration_rate=0.1, exploration_rate_decay=0.9995):
        self.matchboxes = {}
        self.moves_made = []
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.exploration_rate = exploration_rate
        self.exploration_rate_decay = exploration_rate_decay

    def get_move(self, board):
        if board not in self.matchboxes:
            self.matchboxes[board] = {i: 0 for i in range(9) if board[i] == '-'}

        moves = self.matchboxes[board]
        if random.random() < self.exploration_rate:
            move = random.choice(list(moves.keys()))
        else:
            move = max(moves, key=moves.get)

        self.moves_made.append((board, move))
        return move

    def learn(self, final_reward):
        for i in range(len(self.moves_made) - 1, -1, -1):
            board, move = self.moves_made[i]
            next_board = self.moves_made[i+1][0] if i+1 < len(self.moves_made) else None

            if next_board:
                next_max_q = max(self.matchboxes[next_board].values()) if self.matchboxes[next_board] else 0
                reward = self.calculate_reward(board, move)
                new_q = (1 - self.learning_rate) * self.matchboxes[board][move] + \
                        self.learning_rate * (reward + self.discount_factor * next_max_q)
            else:
                new_q = final_reward

            self.matchboxes[board][move] = new_q

        self.moves_made = []
        self.exploration_rate *= self.exploration_rate_decay  # Decay the exploration rate

    def calculate_reward(self, board, move):
        new_board = board[:move] + 'X' + board[move+1:]
        if check_win(new_board):
            return 1  # Immediate win
        elif check_win(new_board.replace('X', 'O').replace('-', 'X')):
            return 0.5  # Blocked opponent's win
        else:
            return 0.1  # Neither win nor block

def check_win(board):
    winning_combinations = [
        (0, 1, 2), (3, 4, 5), (6, 7, 8),  # Rows
        (0, 3, 6), (1, 4, 7), (2, 5, 8),  # Columns
        (0, 4, 8), (2, 4, 6)  # Diagonals
    ]
    for a, b, c in winning_combinations:
        if board[a] == board[b] == board[c] != '-':
            return True
    return False

class MinimaxPlayer:
    def __init__(self, max_depth=5):
        self.max_depth = max_depth

    def get_move(self, board):
        best_move = None
        best_score = -math.inf

        for move in range(9):
            if board[move] == '-':
                new_board = board[:move] + 'O' + board[move+1:]
                score = self.minimax(new_board, 0, False)
                if score > best_score:
                    best_score = score
                    best_move = move

        return best_move

    def minimax(self, board, depth, is_maximizing):
        if check_win(board):
            return -1 if is_maximizing else 1
        elif '-' not in board:
            return 0

        if is_maximizing:
            best_score = -math.inf
            for move in range(9):
                if board[move] == '-':
                    new_board = board[:move] + 'O' + board[move+1:]
                    score = self.minimax(new_board, depth + 1, False)
                    best_score = max(best_score, score)
            return best_score
        else:
            best_score = math.inf
            for move in range(9):
                if board[move] == '-':
                    new_board = board[:move] + 'X' + board[move+1:]
                    score = self.minimax(new_board, depth + 1, True)
                    best_score = min(best_score, score)
            return best_score

def play_game(menace, opponent):
    board = '-' * 9
    current_player = menace

    while True:
        move = current_player.get_move(board)
        board = board[:move] + ('X' if current_player == menace else 'O') + board[move+1:]

        if check_win(board):
            return 1 if current_player == menace else -1
        if '-' not in board:
            return 0

        current_player = opponent if current_player == menace else menace

def train_menace(menace, opponent, episodes):
    for episode in tqdm(range(episodes), desc="Training MENACE"):
        result = play_game(menace, opponent)
        menace.learn(result)

# Training
menace = MENACE()
opponent = MinimaxPlayer(max_depth=5)

print("Training MENACE...")
train_menace(menace, opponent, 5000)

print("\nTraining completed!")

# Test game
board = '-' * 9
current_player = menace

print("\nLet's play a game against MENACE!")
while True:
    print(f"\nCurrent board:\n{board[:3]}\n{board[3:6]}\n{board[6:]}")

    if current_player == menace:
        move = menace.get_move(board)
        print(f"MENACE plays: {move}")
    else:
        move = int(input("Your move (0-8): "))
        while board[move] != '-':
            move = int(input("Invalid move. Try again (0-8): "))

    board = board[:move] + ('X' if current_player == menace else 'O') + board[move+1:]

    if check_win(board):
        print(f"\nFinal board:\n{board[:3]}\n{board[3:6]}\n{board[6:]}")
        print("MENACE wins!" if current_player == menace else "You win!")
        break
    if '-' not in board:
        print(f"\nFinal board:\n{board[:3]}\n{board[3:6]}\n{board[6:]}")
        print("It's a draw!")
        break

    current_player = opponent if current_player == menace else menace

# Final learning step
if current_player == menace:
    menace.learn(-1)  # MENACE lost
else:
    menace.learn(1)  # MENACE won

Training MENACE...


Training MENACE: 100%|██████████| 5000/5000 [08:45<00:00,  9.52it/s]



Training completed!

Let's play a game against MENACE!

Current board:
---
---
---
MENACE plays: 0

Current board:
X--
---
---
Your move (0-8): 3

Current board:
X--
O--
---
MENACE plays: 1

Current board:
XX-
O--
---
Your move (0-8): 2

Current board:
XXO
O--
---
MENACE plays: 4

Current board:
XXO
OX-
---
Your move (0-8): 8

Current board:
XXO
OX-
--O
MENACE plays: 5

Current board:
XXO
OXX
--O
Your move (0-8): 7

Current board:
XXO
OXX
-OO
MENACE plays: 6

Final board:
XXO
OXX
XOO
It's a draw!
