In [2]:
import numpy as np
import random
import pickle

def gameBoard(rows=6, columns=7):
    return np.zeros((rows, columns))

def isLegalturn(gamegrid, column):
    return gamegrid[len(gamegrid)-1][column] == 0

def fetchRow(gamegrid, column):
    return next((row for row in range(len(gamegrid)) if gamegrid[row][column] == 0), None)

def fetchturn(gamegrid):
    return [column for column in range(gamegrid.shape[1]) if isLegalturn(gamegrid, column)]

def fetchavailablePos(gamegrid, marker):
    rows, cols = gamegrid.shape
    for row in range(rows):
        for col in range(cols - 3):
            if all(gamegrid[row][col + i] == marker for i in range(4)):
                return row, col
    for row in range(rows - 3):
        for col in range(cols):
            if all(gamegrid[row + i][col] == marker for i in range(4)):
                return row, col
    for row in range(rows - 3):
        for col in range(cols - 3):
            if all(gamegrid[row + i][col + i] == marker for i in range(4)):
                return row, col
    for row in range(3, rows):
        for col in range(cols - 3):
            if all(gamegrid[row - i][col + i] == marker for i in range(4)):
                return row, col
    return -1, -1

def check_OP(gamegrid, marker):
    rows, cols = gamegrid.shape
    for row in range(rows):
        for col in range(cols - 3):
            if all(gamegrid[row][col + i] == marker for i in range(4)):
                return True
    for row in range(rows - 3):
        for col in range(cols):
            if all(gamegrid[row + i][col] == marker for i in range(4)):
                return True
    for row in range(rows - 3):
        for col in range(cols - 3):
            if all(gamegrid[row + i][col + i] == marker for i in range(4)):
                return True
    for row in range(3, rows):
        for col in range(cols - 3):
            if all(gamegrid[row - i][col + i] == marker for i in range(4)):
                return True
    return False

def checkturn(gamegrid, defaultmarker, mm_marker):
    return any(check_OP(gamegrid, marker) for marker in (defaultmarker, mm_marker)) or not fetchturn(gamegrid)

def defaultPlayer(gamegrid, defaultmarker, mm_marker):
    if checkturn(gamegrid, defaultmarker, mm_marker):
        defrow, defcol = fetchavailablePos(gamegrid, defaultmarker)
        if defrow != -1:
            return defrow, defcol
    return randMove(gamegrid)

def randMove(gamegrid):
    avaialablemoves = fetchturn(gamegrid)
    rndro = fetchRow(gamegrid, random.choice(avaialablemoves)) 
    rndcl = random.choice(avaialablemoves)
    return rndro, rndcl

def loadModel(file_path="connect4_QL_500k.pickle"):
    try:
        with open(file_path, "rb") as file:
            ql_states = pickle.load(file)
            return ql_states
    except FileNotFoundError:
        print("Error: Could not find the Q-learning model file.")
        return None

def playConnect(defualtfirst, defaultPlayer):
    qlmark = 1
    defmark = 2
    gamegrid = gameBoard()

    while True:
        if defualtfirst:
            if len(fetchturn(gamegrid)) == 0:
                return "Draw"

            defr, defc = defaultPlayer(gamegrid, defmark, qlmark)
            gamegrid[defr][defc] = defmark
            
            if check_OP(gamegrid, defmark): 
                return "Default Player Wins"

            if check_OP(gamegrid, qlmark):
                return "QLearning Wins"

            if len(fetchturn(gamegrid)) == 0:
                return "Draw"
            
            qlcol = random.choice(fetchturn(gamegrid))
            qlrow = fetchRow(gamegrid, qlcol)
            gamegrid[qlrow][qlcol] = qlmark
            
            if check_OP(gamegrid, defmark): 
                return "Default Player Wins"

            if check_OP(gamegrid, qlmark):
                return "QLearning Wins"

            if len(fetchturn(gamegrid)) == 0:
                return "Draw"
            
        else:
            if len(fetchturn(gamegrid)) == 0:
                return "Draw"
                
            qlcol = random.choice(fetchturn(gamegrid))
            qlrow = fetchRow(gamegrid, qlcol)
            gamegrid[qlrow][qlcol] = qlmark
            
            if check_OP(gamegrid, defmark): 
                return "Default Player Wins"

            if check_OP(gamegrid, qlmark):
                return "QLearning Wins"

            if len(fetchturn(gamegrid)) == 0:
                return "Draw"

            if len(fetchturn(gamegrid)) == 0:
                return "Draw"

            defr, defc = defaultPlayer(gamegrid, defmark, qlmark)
            gamegrid[defr][defc] = defmark
            
            if check_OP(gamegrid, defmark): 
                return "Default Player Wins"

            if check_OP(gamegrid, qlmark):
                return "QLearning Wins"

            if len(fetchturn(gamegrid)) == 0:
                return "Draw"

def print_results(defwins, qlwins, Draw):
    print("Results:")
    print(f"Default Player wins: {defwins}")
    print(f"Q-Learning wins: {qlwins}")
    print(f"Draws: {Draw}")


In [3]:
def simulate_game(games, defaultPlayer):
    defwins = qlwins = Draw = 0
    for _ in range(games):
        defualtfirst = random.choice([True, False])
        algowin = playConnect(defualtfirst, defaultPlayer)

        if algowin == 'QLearning Wins':
            qlwins += 1
        elif algowin == 'Default Player Wins':
            defwins += 1
        else:
            Draw += 1

    return defwins, qlwins, Draw




In [4]:

games = 10
ql_states = loadModel()
print(f"Current Q Learning model has {len(ql_states)} states")
print("Simulation when Default Player starts first:")
defwins, qlwins, Draw = simulate_game(games, defaultPlayer)
print_results(defwins, qlwins, Draw)



Current Q Learning model has 45882275 states
Simulation when Default Player starts first:
Results:
Default Player wins: 3
Q-Learning wins: 7
Draws: 0


In [None]:
games = 5
ql_states = loadModel()
print("Simulation when Q-Learning Agent starts first:")
defwins, qlwins, Draw = simulate_game(games, defaultPlayer)
print_results(defwins, qlwins, Draw)

In [3]:
games = 1000
ql_states = loadModel()
print("Simulation when first player is random:")
defwins, qlwins, Draw = simulate_game(games, defaultPlayer)
print_results(defwins, qlwins, Draw)


Simulation when first player is random:
Results:
Default Player wins: 320
Q-Learning wins: 664
Draws: 16
