In [1]:
import random
import math
import numpy as np
import pickle

In [2]:
def gameBoard(rows, columns):
    return np.zeros((rows, columns))

def checkt(gamegrid, column):
    return gamegrid[len(gamegrid)-1][column] == 0

def fetchRow(gamegrid, column):
    return next((row for row in range(len(gamegrid)) if gamegrid[row][column] == 0), None)

def legalMoves(gamegrid):
    return [column for column in range(gamegrid.shape[1]) if checkt(gamegrid, column)]


def checkWins(gamegrid, marker):
    rows, columns = gamegrid.shape
    for row in range(rows):
        for col in range(columns - 3):
            if all(gamegrid[row][col + i] == marker for i in range(4)):
                return True

    for row in range(rows - 3):
        for col in range(columns):
            if all(gamegrid[row + i][col] == marker for i in range(4)):
                return True

    for row in range(rows - 3):
        for col in range(columns - 3):
            if all(gamegrid[row + i][col + i] == marker for i in range(4)):
                return True

    for row in range(3, rows):
        for col in range(columns - 3):
            if all(gamegrid[row - i][col + i] == marker for i in range(4)):
                return True

    return False


def isFint(gamegrid, defmark, mmmark):
    return any(checkWins(gamegrid, marker) for marker in (defmark, mmmark)) or not legalMoves(gamegrid)

def fetchpos(positions):
    return int(''.join([str(int(position)) for position in positions.flatten()]))

def qlearnval(ql_states, crr_brd, crr_pos):
    position = fetchpos(crr_brd)
    if position not in ql_states:
        ql_states[(position, crr_pos)] = 0
    return ql_states[(position, crr_pos)]

def qlearnBestPos(ql_states, crr_brd, availablemoves, epsilon):
    return random.choice(availablemoves) if random.random() < epsilon else max([(qlearnval(ql_states, crr_brd, position), position) for position in availablemoves], key=lambda x: x[0])[1]

def loadModel(file_path="connect4_QL_500k.pickle"):
    with open(file_path, "rb") as file:
        ql_states = pickle.load(file)
    return ql_states
	
def getScore(gamegrid, marker, defaultmark, mm_marker):
    score = 0
    othermark = mm_marker if marker == defaultmark else defaultmark
    rows, cols = gamegrid.shape

    for i in range(rows):
        arr_r = [int(x) for x in list(gamegrid[i,:])]
        arr_c = [int(x) for x in list(gamegrid[:,i])]
        for j in range(cols-3):
            sr = arr_r[j:j+4]
            sc = arr_c[j:j+4]
            if sr.count(marker) == 4:
                score += 1000
            elif sr.count(marker) == 3 and sr.count(0) == 1:
                score += 100
            elif sr.count(marker) == 2 and sr.count(0) == 2:
                score += 10
            if sr.count(othermark) == 3 and sr.count(0) == 1:
                score -= 10
            if sc.count(marker) == 4:
                score += 1000
            elif sc.count(marker) == 3 and sc.count(0) == 1:
                score += 100
            elif sc.count(marker) == 2 and sc.count(0) == 2:
                score += 10
            if sc.count(othermark) == 3 and sc.count(0) == 1:
                score -= 10

    for i in range(rows-3):
        for j in range(cols-3):
            sdiag1 = [gamegrid[i+k][j+k] for k in range(4)]
            sdiag2 = [gamegrid[i+3-k][j+k] for k in range(4)]
            if sdiag1.count(marker) == 4:
                score += 1000
            elif sdiag1.count(marker) == 3 and sdiag1.count(0) == 1:
                score += 100
            elif sdiag1.count(marker) == 2 and sdiag1.count(0) == 2:
                score += 10
            if sdiag1.count(othermark) == 3 and sdiag1.count(0) == 1:
                score -= 10
            if sdiag2.count(marker) == 4:
                score += 1000
            elif sdiag2.count(marker) == 3 and sdiag2.count(0) == 1:
                score += 100
            elif sdiag2.count(marker) == 2 and sdiag2.count(0) == 2:
                score += 10
            if sdiag2.count(othermark) == 3 and sdiag2.count(0) == 1:
                score -= 10

    return score

def minMax(gamegrid, crrdepth, ismovem, mm_marker, defaultmark, alpha, beta):

    if isFint(gamegrid, defaultmark, mm_marker):

        if checkWins(gamegrid, mm_marker):
            return (None, float('inf'))

        elif checkWins(gamegrid, defaultmark):
            return (None, -float('inf'))

        else:
            return (None, 0)

    if crrdepth == 0:
        return (None, getScore(gamegrid, mm_marker, defaultmark, mm_marker))

    availablemoves = legalMoves(gamegrid)

    if ismovem:
        best_scr = -math.inf
        best_pos = random.choice(availablemoves)

        for position in availablemoves:
            randr = fetchRow(gamegrid, position)
            new_gg = gamegrid.copy()
            new_gg[randr][position] = mm_marker
            curr_mmscore = minMax(new_gg, crrdepth - 1, False, mm_marker, defaultmark, alpha, beta)[1]

            if curr_mmscore > best_scr:
                best_scr = curr_mmscore
                best_pos = position

            alpha = max(best_scr, alpha)

            if alpha >= beta:
                break

        return best_pos, best_scr

    else:
        best_scr = math.inf
        best_pos = random.choice(availablemoves)

        for position in availablemoves:
            randr = fetchRow(gamegrid, position)
            new_gg = gamegrid.copy()
            new_gg[randr][position] = mm_marker
            curr_mmscore = minMax(new_gg, crrdepth - 1, True, mm_marker, defaultmark, alpha, beta)[1]

            if curr_mmscore < best_scr:
                best_scr = curr_mmscore
                best_pos = position

            beta = min(beta, best_scr)

            if alpha >= beta:
                break

        return best_pos, best_scr


def playConnect(mmplayfirst, qlplay, mm_marker, qlmarkr, boardg, depth=8, epsilon=0.1):
    while True:
        crr_first = mmplayfirst
        for _ in range(2): 
            if crr_first:
              
                availablemoves = legalMoves(boardg)
                if not availablemoves:
                    return "Draw"
                ch_col, _ = minMax(boardg, depth, True, mm_marker, qlmarkr, -math.inf, math.inf)
                ch_rw = fetchRow(boardg, ch_col)
                boardg[ch_rw][ch_col] = mm_marker
                if checkWins(boardg, mm_marker):
                    return "MinMax Algo Wins"
            else:
           
                availablemoves = legalMoves(boardg)
                if not availablemoves:
                    return "Draw"
                ch_col = qlearnBestPos(qlplay, boardg, availablemoves, epsilon)
                ch_rw = fetchRow(boardg, ch_col)
                boardg[ch_rw][ch_col] = qlmarkr
                if checkWins(boardg, qlmarkr):
                    return "QLearning Algo Wins"
            

            crr_first = not crr_first

       
            if not legalMoves(boardg):
                return "Draw"



In [3]:
def simGames(games, rows, columns, first_Player):
    mmwinss = qlwinss = Draw = 0
    qlplay = loadModel() 
    mm_marker = 2
    qlmarkr = 1

    for _ in range(games):
        boardg = gameBoard(rows, columns)  

     
        if first_Player == "random":
            mmplayfirst = random.choice([True, False])
        elif first_Player == "minmax":
            mmplayfirst = True
        else:  
            mmplayfirst = False

        winner = playConnect(mmplayfirst, qlplay, mm_marker, qlmarkr, boardg, depth=8, epsilon=0.1)
        if winner == 'QLearning Algo Wins':
            qlwinss += 1
        elif winner == 'MinMax Algo Wins':
            mmwinss += 1
        else:
            Draw += 1

    print(f"Results when {'MinMax' if first_Player == 'minmax' else 'QLearning' if first_Player == 'qlearning' else 'either player'} starts first:")
    print(f"MinMax wins: {mmwinss}")
    print(f"QLearning wins: {qlwinss}")
    print(f"Draws: {Draw}")


In [4]:

games = 2
simGames(games, 6, 7, "random") 


Results when either player starts first:
MinMax wins: 2
QLearning wins: 0
Draws: 0


In [None]:
games=100    
simGames(games, 6, 7, "minmax")  


In [None]:
games=100
simGames(games, 6, 7, "qlearning")  