In [202]:
import numpy as np
import math
from collections import Counter

NUM_COLUMNS = 7
COLUMN_HEIGHT = 6
FOUR = 4
MINMAX_DEPTH = 2
MONTECARLO_SAMPLES = 5
MONTECARLO_STEPS = 10
EVAL_MODE = 1 #0 for minmax + montecarlo, 1 for montecarlo only

# Board can be initiatilized with `board = np.zeros((NUM_COLUMNS, COLUMN_HEIGHT), dtype=np.byte)`
# Notez Bien: Connect 4 "columns" are actually NumPy "rows"


Basic Functions

In [203]:
def valid_moves(board):
    """Returns columns where a disc may be played"""
    return [n for n in range(NUM_COLUMNS) if board[n, COLUMN_HEIGHT - 1] == 0]


def play(board, column, player):
    """Updates `board` as `player` drops a disc in `column`"""
    (index,) = next((i for i, v in np.ndenumerate(board[column]) if v == 0))
    board[column, index] = player


def take_back(board, column):
    """Updates `board` removing top disc from `column`"""
    (index,) = [i for i, v in np.ndenumerate(board[column]) if v != 0][-1]
    board[column, index] = 0


def four_in_a_row(board, player):
    """Checks if `player` has a 4-piece line"""
    return (
        any(
            all(board[c, r] == player)
            for c in range(NUM_COLUMNS)
            for r in (list(range(n, n + FOUR)) for n in range(COLUMN_HEIGHT - FOUR + 1))
        )
        or any(
            all(board[c, r] == player)
            for r in range(COLUMN_HEIGHT)
            for c in (list(range(n, n + FOUR)) for n in range(NUM_COLUMNS - FOUR + 1))
        )
        or any(
            np.all(board[diag] == player)
            for diag in (
                (range(ro, ro + FOUR), range(co, co + FOUR))
                for ro in range(0, NUM_COLUMNS - FOUR + 1)
                for co in range(0, COLUMN_HEIGHT - FOUR + 1)
            )
        )
        or any(
            np.all(board[diag] == player)
            for diag in (
                (range(ro, ro + FOUR), range(co + FOUR - 1, co - 1, -1))
                for ro in range(0, NUM_COLUMNS - FOUR + 1)
                for co in range(0, COLUMN_HEIGHT - FOUR + 1)
            )
        )
    )

Encode/decode a board

MinMax

In [204]:



def minmax(board, player, depth, a,b):
    if(four_in_a_row(board,player)):
        return (-1,player, depth)
    
    if(depth>= MINMAX_DEPTH):
        score, _, __, ___ = mc_simulate(board,player)
        return (-1,score, depth)

    evaluations = list()
    #print(valid_moves(board))
    cur_a = -math.inf
    cur_b = math.inf
    for m in valid_moves(board):
        play(board, m, player)
        val = minmax(board,-player, depth+1, cur_a, cur_b)
        evaluations.append((m,val[1], val[2]))
        take_back(board, m)
        if alpha_beta_stop(a,b,player, val[1]):
            #print("cut!")
            break
        cur_a, cur_b=alpha_beta_update(cur_a,cur_b, player, val[1])
    
    #print(f"depth: {depth} player: {player} ab: {a} {b} cur_ab: {cur_a} {cur_b} evals: {evaluations} ")
    if player>0:
        return max(evaluations, key = lambda k: k[1]*100 -k[2] )
    else:
        return min(evaluations, key=lambda k: k[1]*100 + k[2])

def alpha_beta_update(a,b, player, cur):
    if player <0 and cur < b:
        #print("update")
        return a, cur
    elif player > 0 and cur> a:
        #print("update"); 
        return cur, b 
    return a,b

def alpha_beta_stop(a, b, player, cur):
    if player <0 and cur <= a:
        return True
    elif player > 0 and cur >= b:
        return True 
    return False





Montecarlo

In [205]:
class node:
    def __init__(self, board, num, den, player, parent):
        self.board = np.copy(board)
        self.num = num 
        self.den = den
        self.player=player
        self.parent=parent
        self.children=list()
        self.terminal = four_in_a_row(board, player)
    
    def simulate(self):
        
        #print(f"simulating")
        #print(self.board)
        #get winrate statisticsf four_in_a_row(board, -p):
    
        
        _,draws,winmax,winmin = mc_simulate(self.board, self.player)

        if(self.terminal):
            draws = 0
            winmax=MONTECARLO_SAMPLES
            winmin=MONTECARLO_SAMPLES
        else:
            _,draws,winmax,winmin = mc_simulate(self.board, self.player)
        
        #derive numerator and denominator change
        if self.player > 0:
            self.num += winmax + 0.5* draws
        else:
            self.num += winmin + 0.5* draws
        
        self.den += MONTECARLO_SAMPLES
        #print(f"winmax {winmax} winmin {winmin} draws {draws}")
        #backpropagate evaluation
        if self.parent:
            self.parent.backprop(winmax,winmin, draws, MONTECARLO_SAMPLES)
            
        

    def backprop(self,winmax,winmin,draws, den):
        #print("bp")
        if self.player > 0:
            self.num += winmax + 0.5* draws
        else:
            self.num += winmin + 0.5* draws
        
        self.den += den
        if self.parent:
            self.parent.backprop(winmax,winmin, draws, den)
    
    def expand(self):
        #print("expanding")
        #print(self.board)
        
        for m in valid_moves(self.board):
            
            play(self.board,m,self.player)
            c = node(self.board,0,0,-self.player, self)
            c.simulate()
            self.children.append(c)
            take_back(self.board, m)
        
        #print(f"winrate for this {self.winrate()}")
        return self.children.copy()
    def winrate(self):
        #simplified UCT
        return self.num/self.den
         

def mc_select(node):
    if not node.children:
        return node
    if not filter(lambda c: c.terminal == False, node.children):
        return node
    cur_best = max(filter(lambda c: c.terminal == False, node.children), key= lambda n: n.winrate())

    return mc_select(cur_best)

def _mc(board, player):
    p = -player
    while valid_moves(board):
        p = -p
        c = np.random.choice(valid_moves(board))
        play(board, c, p)
        if four_in_a_row(board, p):
            return p
    return 0



def mc_simulate(board, player):
    
    cnt = Counter(_mc(np.copy(board), player) for _ in range(MONTECARLO_SAMPLES))
    return (cnt[1] - cnt[-1]) / MONTECARLO_SAMPLES, cnt[0], cnt[1], cnt[-1]

def montecarlo_wrap(board, player):
    #define root node
    root = node(board, 0,0, player, None)
    if(root.terminal):
        return 1.0
    root.expand()

    #main loop
    for i in range(MONTECARLO_STEPS):
        #selection: get leaf by going through path with best winrates 
        next_node = mc_select(root)

        

        #expand the node (expands + simulation + backpropagation)
        children = next_node.expand()

    #return winrate of root node
    return root.winrate() 



Evaluation wrapper

In [206]:
def montecarlo_only(board, player):
    #montecarlo evaluation for each possible move, then choose best move
    evaluations = list()
    for m in valid_moves(board):
        play(board, m, player)
        winrate = montecarlo_wrap(board, player)
        evaluations.append((m,winrate))
        take_back(board, m)
        print(f"{m} gives {winrate} winrate")
    
    return max(evaluations, key = lambda k: k[1])
        

#returns: move, score. Score is in range (-1,1) if using minmax+montecarlo (EVAL_MODE = 0), a winrate in range (0,1) with montecarlo only 
def eval_board(board, player):
    if four_in_a_row(board, 1):
        # Alice won
        return -1,1
    elif four_in_a_row(board, -1):
        # Bob won
        return -1,1
    else:
        # Not terminal, let's simulate...
        if EVAL_MODE ==0:
            
            eval= minmax(board, player,0, -math.inf, math.inf)
            return eval[0], eval[1]
        elif EVAL_MODE == 1:
            eval = montecarlo_only(board, player)
            return eval[0], eval[1]
        else:
            print("wrong evaluation mode selected, check the EVAL_MODE constant")
            return -1,0


Example

In [207]:


board = board = np.zeros((NUM_COLUMNS, COLUMN_HEIGHT), dtype=np.byte)
play(board, 3, 1)
play(board, 0, -1)
play(board, 4, 1)
play(board, 0, -1)
play(board, 5, 1)
#play(board, 2, -1)
#play(board, 0, 1)
#play(board,2,-1)
#print(board)
eval_board(board, 1)
#play(board, mv[0], )



0 gives 0.9506493506493506 winrate
1 gives 0.9454545454545454 winrate
2 gives 1.0 winrate
3 gives 0.9710526315789474 winrate
4 gives 0.8727272727272727 winrate
5 gives 0.8987012987012987 winrate
6 gives 1.0 winrate


(2, 1.0)