In [21]:
from collections import Counter
import numpy as np
import time

In [22]:

NUM_COLUMNS = 7
COLUMN_HEIGHT = 6
FOUR = 4

# Board can be initiatilized with `board = np.zeros((NUM_COLUMNS, COLUMN_HEIGHT), dtype=np.byte)`
# Notez Bien: Connect 4 "columns" are actually NumPy "rows"

In [23]:
def valid_moves(board):
    """Returns columns where a disc may be played"""
    return [n for n in range(NUM_COLUMNS) if board[n, COLUMN_HEIGHT - 1] == 0]


def play(board, column, player):
    """Updates `board` as `player` drops a disc in `column`"""
    (index,) = next((i for i, v in np.ndenumerate(board[column]) if v == 0))
    board[column, index] = player


def take_back(board, column):
    """Updates `board` removing top disc from `column`"""
    (index,) = [i for i, v in np.ndenumerate(board[column]) if v != 0][-1]
    board[column, index] = 0


def four_in_a_row(board, player):
    """Checks if `player` has a 4-piece line"""
    return (
        any(
            all(board[c, r] == player)
            for c in range(NUM_COLUMNS)
            for r in (list(range(n, n + FOUR)) for n in range(COLUMN_HEIGHT - FOUR + 1))
        )
        or any(
            all(board[c, r] == player)
            for r in range(COLUMN_HEIGHT)
            for c in (list(range(n, n + FOUR)) for n in range(NUM_COLUMNS - FOUR + 1))
        )
        or any(
            np.all(board[diag] == player)
            for diag in (
                (range(ro, ro + FOUR), range(co, co + FOUR))
                for ro in range(0, NUM_COLUMNS - FOUR + 1)
                for co in range(0, COLUMN_HEIGHT - FOUR + 1)
            )
        )
        or any(
            np.all(board[diag] == player)
            for diag in (
                (range(ro, ro + FOUR), range(co + FOUR - 1, co - 1, -1))
                for ro in range(0, NUM_COLUMNS - FOUR + 1)
                for co in range(0, COLUMN_HEIGHT - FOUR + 1)
            )
        )
    )

In [24]:
def _mc(board, player):
    p = -player
    while valid_moves(board):
        p = -p
        c = np.random.choice(valid_moves(board))
        play(board, c, p)
        if four_in_a_row(board, p):
            return p
    return 0


def montecarlo(board, player):
    montecarlo_samples = 100
    cnt = Counter(_mc(np.copy(board), player) for _ in range(montecarlo_samples))
    return (cnt[1] - cnt[-1]) / montecarlo_samples


def eval_board(board, player):
    if four_in_a_row(board, 1):
        # Alice won
        return 1
    elif four_in_a_row(board, -1):
        # Bob won
        return -1
    else:
        # Not terminal, let's simulate...
        return montecarlo(board, player)

In [9]:
class minmax:
    def __init__(self):
        self.initialize_game()

    def initialize_game(self):
        self.board = np.zeros((NUM_COLUMNS, COLUMN_HEIGHT), dtype=np.byte)
        self.player_turn = -1
    
    def check_end(self):
        if four_in_a_row(self.board, 1):
            # Alice won
            return 1
        if four_in_a_row(self.board, -1):
            # Bob won
            return -1
        if self.board.all():
            #IT IS A TIE
            return 0
        return None

    def max(self, depth):
        result = self.check_end()
        if result != None:
            if result == 1:
                return (1/depth, 0)
            elif result == -1:
                return (-1/depth, 0)
            elif result == 0:
                return (0, 0)
        max_value = -2
        max_idx = None
        for i in valid_moves(self.board):
            play(self.board, i, 1)
            (value, min_idx) = self.min(depth+1)
            if value > max_value:
                max_value = value
                max_idx = i
            take_back(self.board, i)
        return (max_value, max_idx)
            
    
    def min(self, depth):
        result = self.check_end()
        if result != None:
            if result == 1:
                return (1/depth, 0)
            elif result == -1:
                return (-1/depth, 0)
            elif result == 0:
                return (0, 0)
        min_value = 2
        min_idx = None
        for i in valid_moves(self.board):
            play(self.board, i, -1)
            (value, max_idx) = self.max(depth+1)
            if value < min_value:
                min_value = value
                min_idx = i
            take_back(self.board, i)
        return (min_value, min_idx)
    
    def play(self):
        while True:
            print(self.board)
            self.result = self.check_end()
            if self.result != None:
                if self.result == 1:
                    print('The winner is Player 1')
                elif self.result == -1:
                    print('The winner is Player -1')
                elif self.result == 0:
                    print("It's a tie!")
                self.initialize_game()
                return
            if self.player_turn == -1:
                (m, index) = self.min(1)
                play(self.board, index, -1)
                self.player_turn = 1

            # If it's AI's turn
            else:
                (m, index) = self.max(1)
                play(self.board, index, 1)
                self.player_turn = -1


In [10]:
g = minmax()
g.play()

[[ 1 -1  1 -1  1 -1]
 [ 1 -1  1 -1  1 -1]
 [-1  1 -1  1 -1  1]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 1 -1  1 -1  1 -1]
 [ 1 -1  1 -1  1 -1]]
[[ 1 -1  1 -1  1 -1]
 [ 1 -1  1 -1  1 -1]
 [-1  1 -1  1 -1  1]
 [-1  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 1 -1  1 -1  1 -1]
 [ 1 -1  1 -1  1 -1]]
[[ 1 -1  1 -1  1 -1]
 [ 1 -1  1 -1  1 -1]
 [-1  1 -1  1 -1  1]
 [-1  1  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 1 -1  1 -1  1 -1]
 [ 1 -1  1 -1  1 -1]]
[[ 1 -1  1 -1  1 -1]
 [ 1 -1  1 -1  1 -1]
 [-1  1 -1  1 -1  1]
 [-1  1 -1  0  0  0]
 [ 0  0  0  0  0  0]
 [ 1 -1  1 -1  1 -1]
 [ 1 -1  1 -1  1 -1]]
[[ 1 -1  1 -1  1 -1]
 [ 1 -1  1 -1  1 -1]
 [-1  1 -1  1 -1  1]
 [-1  1 -1  1  0  0]
 [ 0  0  0  0  0  0]
 [ 1 -1  1 -1  1 -1]
 [ 1 -1  1 -1  1 -1]]
[[ 1 -1  1 -1  1 -1]
 [ 1 -1  1 -1  1 -1]
 [-1  1 -1  1 -1  1]
 [-1  1 -1  1 -1  0]
 [ 0  0  0  0  0  0]
 [ 1 -1  1 -1  1 -1]
 [ 1 -1  1 -1  1 -1]]
[[ 1 -1  1 -1  1 -1]
 [ 1 -1  1 -1  1 -1]
 [-1  1 -1  1 -1  1]
 [-1  1 -1  1 -1  1]
 [ 0  0  0  0  0  0]
 [ 1 -1

In [11]:
class minmax_alphabeta:
    def __init__(self):
        self.initialize_game()

    def initialize_game(self):
        self.board = np.zeros((NUM_COLUMNS, COLUMN_HEIGHT), dtype=np.byte)
        self.player_turn = -1
    
    def check_end(self):
        if four_in_a_row(self.board, 1):
            # Alice won
            return 1
        elif four_in_a_row(self.board, -1):
            # Bob won
            return -1
        elif self.board.all():
            #IT IS A TIE
            return 0
        return None

    def max(self, alpha, beta, depth):
        result = self.check_end()
        if result != None:
            if result == 1:
                return (1/depth, 0)
            elif result == -1:
                return (-1/depth, 0)
            elif result == 0:
                return (0, 0)
        max_value = -2
        max_idx = None
        for i in valid_moves(self.board):
            play(self.board, i, 1)
            (value, min_idx) = self.min(alpha, beta, depth+1)
            if value > max_value:
                max_value = value
                max_idx = i
            take_back(self.board, i)
            if max_value >= beta:
                return (max_value, max_idx)
            if max_value > alpha:
                alpha = max_value
        return (max_value, max_idx)
            
    
    def min(self, alpha, beta, depth):
        result = self.check_end()
        if result != None:
            if result == 1:
                return (1/depth, 0)
            elif result == -1:
                return (-1/depth, 0)
            elif result == 0:
                return (0, 0)
        min_value = 2
        min_idx = None
        for i in valid_moves(self.board):
            play(self.board, i, -1)
            (value, max_idx) = self.max(alpha, beta, depth+1)
            if value < min_value:
                min_value = value
                min_idx = i
            take_back(self.board, i)
            if min_value <= alpha:
                return (min_value, min_idx)
            if min_value < beta:
                beta = min_value
        return (min_value, min_idx)
    
    def play(self):
        while True:
            print(self.board)
            self.result = self.check_end()
            if self.result != None:
                if self.result == 1:
                    print('The winner is Player 1')
                elif self.result == -1:
                    print('The winner is Player -1')
                elif self.result == 0:
                    print("It's a tie!")
                self.initialize_game()
                return
            if self.player_turn == -1:
                (m, index) = self.min(-2, 2, 1)
                play(self.board, index, -1)
                self.player_turn = 1

            # If it's AI's turn
            else:
                (m, index) = self.max(-2, 2, 1)
                play(self.board, index, 1)
                self.player_turn = -1

In [12]:
g = minmax_alphabeta()
g.play()

[[ 1 -1  1 -1  1 -1]
 [ 1 -1  1 -1  1 -1]
 [-1  1 -1  1 -1  1]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 1 -1  1 -1  1 -1]
 [ 0  0  0  0  0  0]]
[[ 1 -1  1 -1  1 -1]
 [ 1 -1  1 -1  1 -1]
 [-1  1 -1  1 -1  1]
 [-1  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 1 -1  1 -1  1 -1]
 [ 0  0  0  0  0  0]]
[[ 1 -1  1 -1  1 -1]
 [ 1 -1  1 -1  1 -1]
 [-1  1 -1  1 -1  1]
 [-1  1  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 1 -1  1 -1  1 -1]
 [ 0  0  0  0  0  0]]
[[ 1 -1  1 -1  1 -1]
 [ 1 -1  1 -1  1 -1]
 [-1  1 -1  1 -1  1]
 [-1  1 -1  0  0  0]
 [ 0  0  0  0  0  0]
 [ 1 -1  1 -1  1 -1]
 [ 0  0  0  0  0  0]]
[[ 1 -1  1 -1  1 -1]
 [ 1 -1  1 -1  1 -1]
 [-1  1 -1  1 -1  1]
 [-1  1 -1  1  0  0]
 [ 0  0  0  0  0  0]
 [ 1 -1  1 -1  1 -1]
 [ 0  0  0  0  0  0]]
[[ 1 -1  1 -1  1 -1]
 [ 1 -1  1 -1  1 -1]
 [-1  1 -1  1 -1  1]
 [-1  1 -1  1 -1  0]
 [ 0  0  0  0  0  0]
 [ 1 -1  1 -1  1 -1]
 [ 0  0  0  0  0  0]]
[[ 1 -1  1 -1  1 -1]
 [ 1 -1  1 -1  1 -1]
 [-1  1 -1  1 -1  1]
 [-1  1 -1  1 -1  1]
 [ 0  0  0  0  0  0]
 [ 1 -1

In [27]:
class minmax_alphabeta_mc:
    def __init__(self):
        self.initialize_game()

    def initialize_game(self):
        self.board = np.zeros((NUM_COLUMNS, COLUMN_HEIGHT), dtype=np.byte)
        self.player_turn = 1
    
    def check_end(self):
        if four_in_a_row(self.board, 1):
            # Alice won
            return 1
        elif four_in_a_row(self.board, -1):
            # Bob won
            return -1
        elif self.board.all():
            #IT IS A TIE
            return 0
        return None

    def max(self, alpha, beta, depth):
        result = self.check_end()
        if result != None or depth<=0:
            if result == 1:
                return (1, 0)
            elif result == -1:
                return (-1, 0)
            elif result == 0:
                return (0, 0)
            else:
                return (eval_board(self.board, 1), 0)
        max_value = -2
        max_idx = None
        for i in valid_moves(self.board):
            play(self.board, i, 1)
            (value, min_idx) = self.min(alpha, beta, depth-1)
            if value > max_value:
                max_value = value
                max_idx = i
            take_back(self.board, i)
            if max_value >= beta:
                return (max_value, max_idx)
            if max_value > alpha:
                alpha = max_value
        return (max_value, max_idx)
            
    
    def min(self, alpha, beta, depth):
        result = self.check_end()
        if result != None or depth<=0:
            if result == 1:
                return (1, 0)
            elif result == -1:
                return (-1, 0)
            elif result == 0:
                return (0, 0)
            else:
                return (eval_board(self.board, -1), 0)
        min_value = 2
        min_idx = None
        for i in valid_moves(self.board):
            play(self.board, i, -1)
            (value, max_idx) = self.max(alpha, beta, depth-1)
            if value < min_value:
                min_value = value
                min_idx = i
            take_back(self.board, i)
            if min_value <= alpha:
                return (min_value, min_idx)
            if min_value < beta:
                beta = min_value
        return (min_value, min_idx)

    def play(self, depth):
        while True:
            print(self.board)
            print('Player turn: ',self.player_turn)
            self.result = self.check_end()
            if self.result != None:
                if self.result == 1:
                    print('The winner is Player 1')
                elif self.result == -1:
                    print('The winner is Player -1')
                elif self.result == 0:
                    print("It's a tie!")
                self.initialize_game()
                return
            if self.player_turn == -1:
                (m, index) = self.min(-2, 2, depth)
                play(self.board, index, -1)
                self.player_turn = 1

            # If it's AI's turn
            else:
                (m, index) = self.max(-2, 2, depth)
                play(self.board, index, 1)
                self.player_turn = -1

In [48]:
g = minmax_alphabeta_mc()
g.play(2)

[[0 0 0 0 0 0]
 [0 0 0 0 0 0]
 [0 0 0 0 0 0]
 [0 0 0 0 0 0]
 [0 0 0 0 0 0]
 [0 0 0 0 0 0]
 [0 0 0 0 0 0]]
Player turn:  -1
[[ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [-1  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]]
Player turn:  1
[[ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 1  0  0  0  0  0]
 [-1  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]]
Player turn:  -1
[[ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 1  0  0  0  0  0]
 [-1 -1  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]]
Player turn:  1
[[ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 1  0  0  0  0  0]
 [-1 -1  1  0  0  0]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]]
Player turn:  -1
[[ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 1 -1  0  0  0  0]
 [-1 -1  1  0  0  0]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]]
Player turn:  1
[[ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 1 -1  0  0

In [49]:
#Monte Carlo Tree Search
from collections import defaultdict
from copy import deepcopy

class Node():
    def __init__(self):
        self.initialize_game()

    def initialize_game(self):
        self.board = np.zeros((NUM_COLUMNS, COLUMN_HEIGHT), dtype=np.byte)
        self.player_turn = 1
    
    def check_end(self):
        if four_in_a_row(self.board, 1):
            # Alice won
            return 1
        elif four_in_a_row(self.board, -1):
            # Bob won
            return -1
        elif self.board.all():
            #IT IS A TIE
            return 0
        return None

    def is_terminal(self):
        if self.check_end() != None:
            return True
        else :
            return False
    def valid_moves(self): 
        return [n for n in range(NUM_COLUMNS) if self.board[n, COLUMN_HEIGHT - 1] == 0]

    def play(self, column):
        (index,) = next((i for i, v in np.ndenumerate(self.board[column]) if v == 0))
        new_node = deepcopy(self)
        new_node.board[column, index] = self.player_turn
        new_node.player_turn = -new_node.player_turn
        return new_node

class MonteCarloTreeSearchNode():
    def __init__(self, state, parent=None, parent_action=None):
        self.state = state
        self.parent = parent
        self.parent_action = parent_action
        self.children = []
        self._number_of_visits = 0
        self._results = defaultdict(int)
        self._results[1] = 0
        self._results[-1] = 0
        self._untried_actions = None
        self._untried_actions = self.untried_actions()
        return

    def untried_actions(self):
        self._untried_actions = self.state.valid_moves()
        return self._untried_actions

    def q(self):
        wins = self._results[1]
        loses = self._results[-1]
        return wins - loses

    def n(self):
        return self._number_of_visits

    def expand(self):
	
        action = self._untried_actions.pop()
        next_state = self.state.play(action)
        child_node = MonteCarloTreeSearchNode(
		    next_state, parent=self, parent_action=action)

        self.children.append(child_node)
        return child_node 

    def is_terminal_node(self):
        return self.state.is_terminal()

    def rollout(self):
        current_rollout_state = self.state
    
        while not current_rollout_state.is_terminal():
        
            possible_moves = current_rollout_state.valid_moves()
        
            action = self.rollout_policy(possible_moves)
            current_rollout_state = current_rollout_state.play(action)
        return current_rollout_state.check_end()

    def backpropagate(self, result):
        self._number_of_visits += 1.
        self._results[result] += 1.
        if self.parent:
            self.parent.backpropagate(result)

    def is_fully_expanded(self):
        return len(self._untried_actions) == 0

    def best_child(self, c_param=0.1):
    
        choices_weights = [(c.q() / c.n()) + c_param * np.sqrt((2 * np.log(self.n()) / c.n())) for c in self.children]
        return self.children[np.argmax(choices_weights)]

    def rollout_policy(self, possible_moves):
    
        return possible_moves[np.random.randint(len(possible_moves))]

    def _tree_policy(self):

        current_node = self
        while not current_node.is_terminal_node():
        
            if not current_node.is_fully_expanded():
                return current_node.expand()
            else:
                current_node = current_node.best_child()
        return current_node

    def best_action(self):
        simulation_no = 100
	
	
        for i in range(simulation_no):
		
            v = self._tree_policy()
            reward = v.rollout()
            v.backpropagate(reward)
	
        return self.best_child(c_param=0.)

def playmc():
    initial_node = Node()
    root = MonteCarloTreeSearchNode(initial_node)
    selected_node = root.best_action()
    print(selected_node.state.board)
    while not selected_node.is_terminal_node():
        selected_node = selected_node.best_action()
        print(selected_node.state.board)
playmc()

[[1 0 0 0 0 0]
 [0 0 0 0 0 0]
 [0 0 0 0 0 0]
 [0 0 0 0 0 0]
 [0 0 0 0 0 0]
 [0 0 0 0 0 0]
 [0 0 0 0 0 0]]
[[ 1 -1  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]]
[[ 1 -1  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 1  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]]
[[ 1 -1 -1  0  0  0]
 [ 0  0  0  0  0  0]
 [ 1  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]]
[[ 1 -1 -1  0  0  0]
 [ 0  0  0  0  0  0]
 [ 1  0  0  0  0  0]
 [ 1  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]]
[[ 1 -1 -1  0  0  0]
 [ 0  0  0  0  0  0]
 [ 1  0  0  0  0  0]
 [ 1  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [-1  0  0  0  0  0]]
[[ 1 -1 -1  0  0  0]
 [ 1  0  0  0  0  0]
 [ 1  0  0  0  0  0]
 [ 1  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [-1  0  0  0  0  0]]
