In [1]:
import random
import pickle
import numpy as np

def gameBoard():
    return {
        1: ' ', 2: ' ', 3: ' ',
        4: ' ', 5: ' ', 6: ' ',
        7: ' ', 8: ' ', 9: ' '
    }

def printGame(gamegrid):
    print("\n")
    separator = "\n" + "-"*9 + "\n"
    formatted_grid = separator.join(
        " | ".join(gamegrid[i+j] for j in range(3)) for i in range(1, 10, 3)
    )
    print(formatted_grid)
    print()

def new_turn():
    choices = [1, 2]
    return random.choice(choices)

def isLegal(gamegrid, mv):
    return gamegrid[mv] == ' '

def isDraw(gamegrid):
    return all(gamegrid[key] != ' ' for key in gamegrid.keys())

def isSuccess(gamegrid, mk):
    wincombi = [
        (1, 2, 3), (4, 5, 6), (7, 8, 9),
        (1, 4, 7), (2, 5, 8), (3, 6, 9),
        (1, 5, 9), (7, 5, 3)
    ]

    for combo in wincombi:
        if (gamegrid[combo[0]] == gamegrid[combo[1]] == gamegrid[combo[2]] == mk):
            return True

    return False

def fetchPos(crr_grid):
    return tuple(tuple(crr_grid[i+j] for j in range(3)) for i in range(1, 10, 3))

def qLearnVal(qlearnstinfo, crr_grid, crr_pos):
    position = fetchPos(crr_grid)
    if position not in qlearnstinfo:
        qlearnstinfo[position] = np.zeros((9,))
    return qlearnstinfo[position][crr_pos - 1]

def qlBestpos(qlearnstinfo, crr_grid, availablemoves, epsilon):
    return random.choice(availablemoves) if random.random() < epsilon else max(availablemoves, key=lambda x: qLearnVal(qlearnstinfo, crr_grid, x))

def modifyQl(qlearnstinfo, crr_grid, crr_pos, reward, nextgrid, availablemoves):
    qval = max([qLearnVal(qlearnstinfo, nextgrid, crr_pos) for next_action in availablemoves], default=0)
    best_qval = qLearnVal(qlearnstinfo, crr_grid, crr_pos) + 0.1 * ((reward + 0.99 * qval) - qLearnVal(qlearnstinfo, crr_grid, crr_pos))
    position = fetchPos(crr_grid)
    qlearnstinfo[position][crr_pos - 1] = best_qval

def dumpModel(qlearnstinfo):
    try:
        with open("tictac_QLmodel.pickle", "wb") as file:
            pickle.dump(qlearnstinfo, file)
        print("Model saved successfully.")
    except Exception as e:
        print("Error while saving the model:", e)


def loadModel():
    with open("tictac_QLmodel.pickle", "rb") as file:
        return pickle.load(file)

def play_game(qlearnstinfo, defaultfirst):
    defaultmark = 'O'
    qlmark = 'X'
    turn = 1

    playgrid = gameBoard()

    if defaultfirst:
        while True:
            if turn % 2 != 0:  
                availablemoves = [i for i in range(1, 10) if isLegal(playgrid, i)]
                defaultpos = random.choice(availablemoves)
                playgrid[defaultpos] = defaultmark
                print("Default Players move:")
                printGame(playgrid)
                if isSuccess(playgrid, defaultmark):
                    return 'Default Player Wins'
                elif isDraw(playgrid):
                    return 'Draw'
            else:  
                availablemoves = [i for i in range(1, 10) if isLegal(playgrid, i)]
                qlpos = qlBestpos(qlearnstinfo, playgrid, availablemoves, epsilon)
                playgrid[qlpos] = qlmark
                print("QLearning Player move:")
                printGame(playgrid)
                if isSuccess(playgrid, qlmark):
                    return 'QLearning Wins'
                elif isDraw(playgrid):
                    return 'Draw'
            turn += 1
    else:
        while True:
            if turn % 2 != 0:  
                availablemoves = [i for i in range(1, 10) if isLegal(playgrid, i)]
                qlpos = qlBestpos(qlearnstinfo, playgrid, availablemoves, epsilon)
                playgrid[qlpos] = qlmark
                print("QLearningv Players move:")
                printGame(playgrid)
                if isSuccess(playgrid, qlmark):
                    return 'QLearning Wins'
                elif isDraw(playgrid):
                    return 'Draw'
            else:  
                availablemoves = [i for i in range(1, 10) if isLegal(playgrid, i)]
                defaultpos = random.choice(availablemoves)
                playgrid[defaultpos] = defaultmark
                print("Default Players move:")
                printGame(playgrid)
                if isSuccess(playgrid, defaultmark):
                    return 'Default Player Wins'
                elif isDraw(playgrid):
                    return 'Draw'
            turn += 1

epsilon = 1.0
qlearnstinfo = {}

num_episode = 2000000
for ep in range(num_episode):
    playgrid = gameBoard()

    while True:
        qlmoves = [i for i in range(1, 10) if isLegal(playgrid, i)]

        if len(qlmoves) == 0:
            break

        qlpos = qlBestpos(qlearnstinfo, playgrid, qlmoves, epsilon)

        if isLegal(playgrid, qlpos):
            playgrid[qlpos] = 'X'

        qlwin = isSuccess(playgrid, 'X')
        def_win = isSuccess(playgrid, 'O')
        availmove = [i for i in range(1, 10) if isLegal(playgrid, i)]

        if qlwin:
            modifyQl(qlearnstinfo, playgrid, qlpos, 1, playgrid, [])
            break

        elif def_win:
            modifyQl(qlearnstinfo, playgrid, qlpos, -1, playgrid, [])
            break

        elif isDraw(playgrid):
            modifyQl(qlearnstinfo, playgrid, qlpos, 0, playgrid, [])
            break
        else:
            modifyQl(qlearnstinfo, playgrid, qlpos, 0, playgrid, availmove)

        defmove = [i for i in range(1, 10) if isLegal(playgrid, i)]
        defaultpos = defmove[random.randint(0, len(defmove)-1)]

        if isLegal(playgrid, defaultpos):
            playgrid[defaultpos] = 'O'

        qlwin = isSuccess(playgrid, 'X')
        def_win = isSuccess(playgrid, 'O')
        availmove = [i for i in range(1, 10) if isLegal(playgrid, i)]

        if qlwin:
            modifyQl(qlearnstinfo, playgrid, defaultpos, 1, playgrid, [])
            break

        elif def_win:
            modifyQl(qlearnstinfo, playgrid, defaultpos, -1, playgrid, [])
            break

        elif isDraw(playgrid):
            modifyQl(qlearnstinfo, playgrid, defaultpos, 0, playgrid, [])
            break
        else:
            modifyQl(qlearnstinfo, playgrid, defaultpos, 0, playgrid, availmove)

    epsilon = max(epsilon * 0.999, 0.1)

dumpModel(qlearnstinfo)

qlearnstinfo = loadModel()
epsilon = 0.1




Model saved successfully.


In [2]:

num_games = 2000

num_qlwin = 0
num_defwin = 0
drawgame = 0

for _ in range(num_games):
    playgrid = gameBoard()
    defaultfirst = True if new_turn() == 1 else False

    while True:
    

        gamewinai = play_game(qlearnstinfo, defaultfirst)

        if gamewinai == 'QLearning Wins':
            print("QLearning Wins")
            num_qlwin += 1
            break
        elif gamewinai == 'Default Player Wins':
            print("Default Wins")
            num_defwin += 1
            break
        elif gamewinai == 'Draw':
            print("Draw")
            drawgame += 1
            break

print("QLearning Player Wins:", num_qlwin)
print("Default Player Wins:", num_defwin)
print("Draw Game:", drawgame)


QLearningv Players move:


X |   |  
---------
  |   |  
---------
  |   |  

Default Players move:


X | O |  
---------
  |   |  
---------
  |   |  

QLearningv Players move:


X | O | X
---------
  |   |  
---------
  |   |  

Default Players move:


X | O | X
---------
  |   | O
---------
  |   |  

QLearningv Players move:


X | O | X
---------
X |   | O
---------
  |   |  

Default Players move:


X | O | X
---------
X | O | O
---------
  |   |  

QLearningv Players move:


X | O | X
---------
X | O | O
---------
X |   |  

QLearning Wins
QLearningv Players move:


X |   |  
---------
  |   |  
---------
  |   |  

Default Players move:


X |   |  
---------
  |   |  
---------
O |   |  

QLearningv Players move:


X |   |  
---------
  |   | X
---------
O |   |  

Default Players move:


X |   |  
---------
  |   | X
---------
O | O |  

QLearningv Players move:


X | X |  
---------
  |   | X
---------
O | O |  

Default Players move:


X | X |  
---------
  | O | X
---------


In [2]:

num_game = 1000

num_qlwin = 0
num_defwin = 0
draws = 0

for _ in range(num_game):
    playgrid = gameBoard()
    defaultfirst = True

    while True:

        gamewinai = play_game(qlearnstinfo, defaultfirst)

        if gamewinai == 'QLearning Wins':
            print("QLearning Wins")
            num_qlwin += 1
            break
        elif gamewinai == 'Default Player Wins':
            print("Default Wins")
            num_defwin += 1
            break
        elif gamewinai == 'Draw':
            print("Draw")
            draws += 1
            break

print("QLearning wins:", num_qlwin)
print("Default Player wins:", num_defwin)
print("Draws:", draws)


Default Players move:


  |   |  
---------
  |   |  
---------
O |   |  

QLearning Player move:


X |   |  
---------
  |   |  
---------
O |   |  

Default Players move:


X |   |  
---------
  |   |  
---------
O |   | O

QLearning Player move:


X | X |  
---------
  |   |  
---------
O |   | O

Default Players move:


X | X |  
---------
  |   |  
---------
O | O | O

Default Wins
Default Players move:


  |   | O
---------
  |   |  
---------
  |   |  

QLearning Player move:


X |   | O
---------
  |   |  
---------
  |   |  

Default Players move:


X | O | O
---------
  |   |  
---------
  |   |  

QLearning Player move:


X | O | O
---------
X |   |  
---------
  |   |  

Default Players move:


X | O | O
---------
X | O |  
---------
  |   |  

QLearning Player move:


X | O | O
---------
X | O |  
---------
  |   | X

Default Players move:


X | O | O
---------
X | O |  
---------
O |   | X

Default Wins
Default Players move:


  |   | O
---------
  |   |  
---------
  |  

In [3]:

name_game = 1000

num_qlwin = 0
num_defwin = 0
draws = 0

for _ in range(num_game):
    playgrid = gameBoard()
    defaultfirst = False

    while True:
     

        gamewinai = play_game(qlearnstinfo, defaultfirst)

        if gamewinai == 'QLearning Wins':
            print("QLearning Win")
            num_qlwin += 1
            break
        elif gamewinai == 'Default Player Wins':
            print("Default Player Wins")
            num_defwin += 1
            break
        elif gamewinai == 'Draw':
            print("Draw")
            draws += 1
            break

print("QLearning wins:", num_qlwin)
print("Default Player Wins:", num_defwin)
print("Draws:", draws)


QLearningv Players move:


  |   |  
---------
  |   |  
---------
  | X |  

Default Players move:


  |   |  
---------
  |   |  
---------
O | X |  

QLearningv Players move:


X |   |  
---------
  |   |  
---------
O | X |  

Default Players move:


X |   |  
---------
O |   |  
---------
O | X |  

QLearningv Players move:


X | X |  
---------
O |   |  
---------
O | X |  

Default Players move:


X | X | O
---------
O |   |  
---------
O | X |  

QLearningv Players move:


X | X | O
---------
O | X |  
---------
O | X |  

QLearning Win
QLearningv Players move:


X |   |  
---------
  |   |  
---------
  |   |  

Default Players move:


X |   |  
---------
O |   |  
---------
  |   |  

QLearningv Players move:


X | X |  
---------
O |   |  
---------
  |   |  

Default Players move:


X | X |  
---------
O |   |  
---------
  |   | O

QLearningv Players move:


X | X | X
---------
O |   |  
---------
  |   | O

QLearning Win
QLearningv Players move:


X |   |  
---------
  | 