In [2]:
import random
import math
import numpy as np
import pickle

def gameBoard():
    game_board = {
        1: ' ', 2: ' ', 3: ' ',
        4: ' ', 5: ' ', 6: ' ',
        7: ' ', 8: ' ', 9: ' '
    }
    defaultPlayerMark = 'X'
    minMaxMark = 'O'
    return game_board, defaultPlayerMark, minMaxMark

def printGame(gamegrid,plr):
    print("\n")
    print(f"{plr}'s turn:")
    separator = "\n" + "-"*9 + "\n"
    formatted_grid = separator.join(
        " | ".join(gamegrid[i+j] for j in range(3)) for i in range(1, 10, 3)
    )
    print(formatted_grid)
    print()


def movefirst():
    choices = [1, 2] 
    return random.choice(choices)

def isLegal(gamegrid, move):
    if move in gamegrid:
        return gamegrid[move] == ' '
    else:
        return False  


def isDraw(gamegrid):
    return ' ' not in gamegrid.values() and not isSuccess(gamegrid)

def isSuccess(gamegrid):
    wincombi = [
        (1, 2, 3), (4, 5, 6), (7, 8, 9),
        (1, 4, 7), (2, 5, 8), (3, 6, 9),
        (1, 5, 9), (7, 5, 3)
    ]

    for combo in wincombi:
        if gamegrid[combo[0]] == gamegrid[combo[1]] == gamegrid[combo[2]] != ' ':
            return True

    return False

def isSuccessMark(gamegrid, mark):
    winpos = [
        (1, 2, 3), (4, 5, 6), (7, 8, 9),
        (1, 4, 7), (2, 5, 8), (3, 6, 9),
        (1, 5, 9), (7, 5, 3)
    ]

    for pos in winpos:
        if all(gamegrid[i] == mark for i in pos):
            return True
    return False

def new_turns(gamegrid):
    position = random.randint(1, 9)
    if isLegal(gamegrid, position):
        return position
    else:
        return new_turns(gamegrid)

epsilon = 1.0
qlearnstinfo = {}

def fetchPos(crr_grid):
    return tuple(tuple(crr_grid[i+j] for j in range(3)) for i in range(1, 10, 3))


def qLearnVal(crr_grid, crr_pos):
    position = fetchPos(crr_grid)
    if position not in qlearnstinfo:
        qlearnstinfo[position] = np.zeros(9)
    return qlearnstinfo[position][crr_pos - 1]

def qlBestpos(crr_grid, availablemoves):
    global epsilon
    if random.random() < epsilon:
        return random.choice(availablemoves)
    else:
        return max(availablemoves, key=lambda x: qLearnVal(crr_grid, x))

def loadModel(filename="tictac_QLmodel.pickle"):
    global qlearnstinfo
    with open(filename, "rb") as file:
        qlearnstinfo = pickle.load(file)

def minMax(game_grid, minMaxMark, qlmark, rlevel=0.1):
    if random.random() < rlevel:
        # Random Move
        positions = [pos for pos in range(1, 10) if game_grid[pos] == ' ']
        return random.choice(positions) if positions else None

    bestscr = -float('inf')
    bestpos = None

    for availablemoves in range(1, 10):
        if game_grid[availablemoves] == ' ':
            game_grid[availablemoves] = minMaxMark
            crr_scr = minMax_eval(
                game_grid, minMaxMark, qlmark, False, -float('inf'), float('inf'))
            game_grid[availablemoves] = ' '

            if crr_scr > bestscr:
                bestscr = crr_scr
                bestpos = availablemoves

    return bestpos if bestpos is not None else new_turns(game_grid)

def minMax_eval(game_grid, minMaxMark, qlmark, ismovem, alpha, beta):
    if isSuccessMark(game_grid, minMaxMark):
        return 1
    elif isSuccessMark(game_grid, qlmark):
        return -1
    elif isDraw(game_grid):
        return 0

    if ismovem:
        bestscr = -math.inf

        for availablemoves in game_grid.keys():

            if game_grid[availablemoves] == ' ':
                game_grid[availablemoves] = minMaxMark
                crr_scr = minMax_eval(game_grid, minMaxMark, qlmark, False, alpha, beta)
                game_grid[availablemoves] = ' '

                bestscr = max(bestscr, crr_scr)
                alpha = max(alpha, bestscr)

                if alpha >= beta:
                    break

        return bestscr

    else:
        bestscr = math.inf

        for availablemoves in game_grid.keys():
            if game_grid[availablemoves] == ' ':
                game_grid[availablemoves] = qlmark
                crr_scr = minMax_eval(game_grid, minMaxMark, qlmark, True, alpha, beta)
                game_grid[availablemoves] = ' '

                bestscr = min(bestscr, crr_scr)
                beta = min(beta, bestscr)

                if alpha >= beta:
                    break

        return bestscr

def qlearnBlock(crr_grid, availablemoves, qlmark):
    for position in availablemoves:
        crr_grid_copy = crr_grid.copy()
        crr_grid_copy[position] = qlmark
        if isSuccessMark(crr_grid_copy, qlmark):
            return position  

    for position in availablemoves:
        crr_grid_copy = crr_grid.copy()
        crr_grid_copy[position] = 'O'  
        if isSuccessMark(crr_grid_copy, 'O'):
            return position 

    return max(availablemoves, key=lambda x: qLearnVal(crr_grid, x), default=None)

def play_game_random(num_games):
    win_mm = win_ql = Draw = 0
    
    for _ in range(num_games):
        game_board, _, _ = gameBoard()
        mmfirst = movefirst() == 1

        gamecrr = True
        while gamecrr:
           
            if mmfirst:
                
                mmavailablemoves = [i for i in range(1, 10) if isLegal(game_board, i)]

                if len(mmavailablemoves) == 0:
                    Draw += 1
                    break
            
                mmpos = minMax(game_board, 'O', 'X')
                
                if isLegal(game_board, mmpos):
                    game_board[mmpos] = 'O'
      
                if isSuccessMark(game_board, 'O') : 
                    win_mm += 1
                    break

                if isDraw(game_board):
                    Draw += 1
                    break

                qlavailablemoves = [i for i in range(1, 10) if isLegal(game_board, i)]

                if len(qlavailablemoves) == 0:
                    break

                qlpos = qlearnBlock(game_board, qlavailablemoves, 'X')

                if isLegal(game_board, qlpos):
                    game_board[qlpos] = 'X'

                if isSuccessMark(game_board, 'X') : 
                    win_ql += 1
                    break

                if isDraw(game_board):
                    Draw += 1
                    break

            else:
                qlavailablemoves = [i for i in range(1, 10) if isLegal(game_board, i)]

                if len(qlavailablemoves) == 0:
                    break

                qlpos = qlearnBlock(game_board, qlavailablemoves, 'X')

                if isLegal(game_board, qlpos):
                    game_board[qlpos] = 'X'

                if isSuccessMark(game_board, 'X') : 
                    win_ql += 1
                    break

                if isDraw(game_board):
                    Draw += 1
                    break


                mmavailablemoves = [i for i in range(1, 10) if isLegal(game_board, i)]

                if len(mmavailablemoves) == 0:
                    Draw += 1
                    break
            
                mmpos = minMax(game_board, 'O', 'X')
                
                if isLegal(game_board, mmpos):
                    game_board[mmpos] = 'O'
      
                if isSuccessMark(game_board, 'O') : 
                    win_mm += 1
                    break

                if isDraw(game_board):
                    Draw += 1
                    break

    return win_mm, win_ql, Draw

def play_game_minMax(num_games):
    win_mm = win_ql = Draw = 0
    
    for _ in range(num_games):
        game_board, _, _ = gameBoard()
        mmfirst = True 

        while True:  
            
            position = minMax(game_board, 'O', 'X')
            if position is None:
                if isDraw(game_board):  
                    Draw += 1
                break
            game_board[position] = 'O'
            if isSuccessMark(game_board, 'O'):
                win_mm += 1
                break
            elif isDraw(game_board):
                Draw += 1
                break

            qlposs = [i for i in range(1, 10) if isLegal(game_board, i)]
            if not qlposs:
                if isDraw(game_board):
                    Draw += 1
                break
            position = qlearnBlock(game_board, qlposs, 'X')
            game_board[position] = 'X'
            if isSuccessMark(game_board, 'X'):
                win_ql += 1
                break
            elif isDraw(game_board):
                Draw += 1
                break

    return win_mm, win_ql, Draw

def play_game_qlearn(num_games):
    win_mm = win_ql = Draw = 0
    
    for _ in range(num_games):
        game_board, _, _ = gameBoard()
        qlfirst = True 

        while True:
            qlposs = [i for i in range(1, 10) if isLegal(game_board, i)]
            if not qlposs:
                if isDraw(game_board):
                    Draw += 1
                break
            position = qlearnBlock(game_board, qlposs, 'X')
            game_board[position] = 'X'
            if isSuccessMark(game_board, 'X'):
                win_ql += 1
                break
            elif isDraw(game_board):
                Draw += 1
                break

            mmposs = [i for i in range(1, 10) if isLegal(game_board, i)]
            if not mmposs:
                if isDraw(game_board):
                    Draw += 1
                break
            position = minMax(game_board, 'O', 'X')
            game_board[position] = 'O'
            if isSuccessMark(game_board, 'O'):
                win_mm += 1
                break
            elif isDraw(game_board):
                Draw += 1
                break

    return win_mm, win_ql, Draw



In [3]:
loadModel()
num_games = 100
MinMaxRandomWin, QLearningRandomWin, RandomDraw = play_game_random(num_games)
print(f"Results after {num_games} games with random first move:")
print(f"MinMax wins: {MinMaxRandomWin}")
print(f"QLearning wins: {QLearningRandomWin}")
print(f"Draws: {RandomDraw}")


Results after 100 games with random first move:
MinMax wins: 44
QLearning wins: 10
Draws: 46


In [4]:
loadModel()
num_games = 50
MinMaxwin_mm, win_ql_mm, MinMaxMinMaxDraw = play_game_minMax(num_games)
print(f"\nResults after {num_games} games with MinMax starting first:")
print(f"MinMax wins: {MinMaxwin_mm}")
print(f"QLearning wins: {win_ql_mm}")
print(f"Draws: {MinMaxMinMaxDraw}")


Results after 50 games with MinMax starting first:
MinMax wins: 40
QLearning wins: 4
Draws: 6


In [4]:
loadModel()
num_games = 2000
MinMaxwin_ql, QLearningwin_ql, QLearningQLearningDraw = play_game_qlearn(num_games)
print(f"\nResults after {num_games} games with QLearning starting first:")
print(f"MinMax wins: {MinMaxwin_ql}")
print(f"QLearning wins: {QLearningwin_ql}")
print(f"Draws: {QLearningQLearningDraw}")


Results after 2000 games with QLearning starting first:
MinMax wins: 104
QLearning wins: 329
Draws: 1567
