In [None]:
!pip install kaggle_environments
import kaggle_environments
from kaggle_environments import make, evaluate, utils
import numpy as np
import random
import time

In [3]:
def get_win_percentages(agent1, agent2, n_rounds=10):
    # Use default Connect Four setup
    import numpy as np
    config = {'rows': 6, 'columns': 7, 'inarow': 4}
    # Agent 1 goes first (roughly) half the time          
    outcomes = evaluate("connectx", [agent1, agent2], config, [], n_rounds//2)
    # Agent 2 goes first (roughly) half the time      
    outcomes += [[b,a] for [a,b] in evaluate("connectx", [agent2, agent1], config, [], n_rounds-n_rounds//2)]
    print("Agent 1 Win Percentage:", np.round(outcomes.count([1,-1])/len(outcomes), 2))
    print("Agent 2 Win Percentage:", np.round(outcomes.count([-1,1])/len(outcomes), 2))
    print("Number of Invalid Plays by Agent 1:", outcomes.count([None, 0]))
    print("Number of Invalid Plays by Agent 2:", outcomes.count([0, None]))

# Versions

##### Pseudocode

Minimax Pseudocode:
```
function minimax(node, depth, maximizingPlayer) is
    if depth = 0 or node is a terminal node then
        return the heuristic value of node
    if maximizingPlayer then
        value := −∞
        for each child of node do
            value := max(value, minimax(child, depth − 1, FALSE))
        return value
    else (* minimizing player *)
        value := +∞
        for each child of node do
            value := min(value, minimax(child, depth − 1, TRUE))
        return value 

### Initial call:        
### alphabeta(origin, depth, TRUE)
```
[minimax algorithm](https://en.wikipedia.org/wiki/Minimax#Pseudocode) 

##### Implementation

In [None]:
#@title pre-Heuristic Agent (working)
def preheuristic(obs, config):
    #config is dict: {'rows': 6, 'columns': 7, 'inarow': 4}

    ################################
    # Imports and helper functions #
    ################################

    import numpy as np
    import random

    # constants
    ROWS = config.rows
    COLUMNS = config.columns
    CNCTX = config.inarow
    A = 2       #2 threes
    B = 200     #10 fours
    C = -1      #-1 opp-threes
    D = -100    #-10opp-fours   

    # lookahead depth:
    N_STEPS =  4#@param {type: "integer"}

    # Gets board at next step if agent drops piece in selected column
    def drop_piece(grid, col, mark, config):
        next_grid = grid.copy()
        for row in range(ROWS-1, -1, -1):       ###row in range(0,ROWS)??
            if next_grid[row][col] == 0:
                break
        next_grid[row][col] = mark
        return next_grid

    # Helper function for get_heuristic: checks if window satisfies heuristic conditions
    def check_window(window, num_discs, piece, config):
        return (window.count(piece) == num_discs and window.count(0) == CNCTX-num_discs)

    # Helper function for get_heuristic: counts number of windows satisfying specified heuristic conditions
    def count_windows(grid, num_discs, piece, config):
        num_windows = 0
        # horizontal
        for row in range(ROWS):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[row, col:col+CNCTX])
                if check_window(window, num_discs, piece, config):
                    num_windows += 1
        # vertical
        for row in range(ROWS-(CNCTX-1)):
            for col in range(COLUMNS):
                window = list(grid[row:row+CNCTX, col])
                if check_window(window, num_discs, piece, config):
                    num_windows += 1
        # positive diagonal
        for row in range(ROWS-(CNCTX-1)):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[range(row, row+CNCTX), range(col, col+CNCTX)])
                if check_window(window, num_discs, piece, config):
                    num_windows += 1
        # negative diagonal
        for row in range(CNCTX-1, ROWS):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[range(row, row-CNCTX, -1), range(col, col+CNCTX)])
                if check_window(window, num_discs, piece, config):
                    num_windows += 1
        return num_windows

    # Helper function for minimax: calculates value of heuristic for grid
    def get_heuristic(grid, mark, config, ):
        num_threes = count_windows(grid, 3, mark, config) #A
        num_fours = count_windows(grid, 4, mark, config)  #B
        num_threes_opp = count_windows(grid, 3, mark%2+1, config) #C
        num_fours_opp = count_windows(grid, 4, mark%2+1, config)  #D

        score = A*num_threes + B*num_fours + C*num_threes_opp + D*num_fours_opp
        return score

    # Helper function for minimax: checks if agent or opponent has four in a row in the window
    def is_terminal_window(window, config):
        return window.count(1) == CNCTX or window.count(2) == CNCTX

    # Helper function for minimax: checks if game has ended
    def is_terminal_node(grid, config):
        # Check for draw 
        if list(grid[0, :]).count(0) == 0:
            return True
        # Check for win: horizontal, vertical, or diagonal
        # horizontal 
        for row in range(ROWS):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[row, col:col+CNCTX])
                if is_terminal_window(window, config):
                    return True
        # vertical
        for row in range(ROWS-(CNCTX-1)):
            for col in range(COLUMNS):
                window = list(grid[row:row+CNCTX, col])
                if is_terminal_window(window, config):
                    return True
        # positive diagonal
        for row in range(ROWS-(CNCTX-1)):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[range(row, row+CNCTX), range(col, col+CNCTX)])
                if is_terminal_window(window, config):
                    return True
        # negative diagonal
        for row in range(CNCTX-1, ROWS):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[range(row, row-CNCTX, -1), range(col, col+CNCTX)])
                if is_terminal_window(window, config):
                    return True
        return False

    # Minimax implementation was here:
    def minimax(node, depth, maximizingPlayer, mark, config):
        if depth == 0:
            return get_heuristic(node, mark, config)
        if is_terminal_node(node, config):
            return get_heuristic(node, mark, config)
        
        valid_moves = [c for c in range(COLUMNS) if node[0][c] == 0]
        if maximizingPlayer:
            value = -np.Inf
            for col in valid_moves:
                child = drop_piece(node, col, mark, config)
                value = max(value, minimax(child, depth-1, False, mark, config))
            return value
        
        else: #minimizing player
            value = np.Inf
            for col in valid_moves:
                child = drop_piece(node, col, mark%2+1, config)
                value = min(value, minimax(child, depth-1, True, mark, config))
            return value

    # Uses minimax to calculate value of dropping piece in selected column
    def score_move(grid, col, mark, config, nsteps):
        next_grid = drop_piece(grid, col, mark, config)
        score = minimax(next_grid, nsteps-1, False, mark, config) 
        return score

    #########################
    # Agent makes selection #
    #########################

    # Get list of valid moves
    valid_moves = [c for c in range(COLUMNS) if obs.board[c] == 0]

    # Convert the board to a 2D grid
    ########## ENTER OBS:
    grid = np.asarray(obs.board).reshape(ROWS, COLUMNS)

    # Use the heuristic to assign a score to each possible board in the next step
    scores = dict(zip(valid_moves, [score_move(grid, col, obs.mark, config, N_STEPS) for col in valid_moves]))

    # Get a list of columns (moves) that maximize the heuristic
    max_cols = [key for key in scores.keys() if scores[key] == max(scores.values())]

    # Select at random from the maximizing columns
    return random.choice(max_cols)

In [None]:
#@title Heuristic TEST Agent (not working)
def test(obs, config):
    #config is dict: {'rows': 6, 'columns': 7, 'inarow': 4}

    ################################
    # Imports and helper functions #
    ################################

    import numpy as np
    import random

    # constants
    ROWS = config.rows
    COLUMNS = config.columns
    CNCTX = config.inarow
    A = 2       #2 threes
    B = 200     #10 fours
    C = -1      #-1 opp-threes
    D = -100    #-10opp-fours   

    # lookahead depth:
    N_STEPS = 3

    # Gets board at next step if agent drops piece in selected column
    def drop_piece(grid, col, mark):
        next_grid = grid.copy()
        for row in range(ROWS-1, -1, -1):       ###row in range(0,ROWS)??
            if next_grid[row][col] == 0:
                break
        next_grid[row][col] = mark
        return next_grid

    # Helper function for get_heuristic: checks if window satisfies heuristic conditions
    def check_window(window, num_discs, piece):
        return (window.count(piece) == num_discs and window.count(0) == CNCTX-num_discs)
    
    def check_for_CNCT4(window):
        return window.count(1) == CNCTX or window.count(2) == CNCTX
    
    # Helper function for get_heuristic: counts number of windows satisfying specified heuristic conditions
    def count_windows(grid, num_discs, piece):

        is_terminal = list(grid[0, :]).count(0) == 0
        
        num_windows = 0
        # horizontal
        for row in range(ROWS):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[row, col:col+CNCTX])
                is_terminal = check_for_CNCT4(window)
                if check_window(window, num_discs, piece):
                    a += 1
        # vertical
        for row in range(ROWS-(CNCTX-1)):
            for col in range(COLUMNS):
                window = list(grid[row:row+CNCTX, col])
                if check_window(window, num_discs, piece):
                    b += 1
        # positive diagonal
        for row in range(ROWS-(CNCTX-1)):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[range(row, row+CNCTX), range(col, col+CNCTX)])
                if check_window(window, num_discs, piece):
                    c += 1
        # negative diagonal
        for row in range(CNCTX-1, ROWS):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[range(row, row-CNCTX, -1), range(col, col+CNCTX)])
                if check_window(window, num_discs, piece):
                    d += 1
        
        return is_terminal, variables

    # Helper function for minimax: calculates value of heuristic for grid
    def get_heuristic(variables):
        score = A*variables[0] + B*variables[1] + C*variables[2] + D*variables[3]
        return score

 # Helper function for minimax: checks if game has ended
    def is_terminal_node(grid, piece):
        # Check for draw 
        if list(grid[0, :]).count(0) == 0:
            return True
        # Check for win: horizontal, vertical, or diagonal
        # horizontal 
        for row in range(ROWS):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[row, col:col+CNCTX])
                if is_terminal_window(window):#check_window(window, 0, piece):
                    return True
        # vertical
        for row in range(ROWS-(CNCTX-1)):
            for col in range(COLUMNS):
                window = list(grid[row:row+CNCTX, col])
                if is_terminal_window(window):#check_window(window, 0, piece):
                    return True
        # positive diagonal
        for row in range(ROWS-(CNCTX-1)):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[range(row, row+CNCTX), range(col, col+CNCTX)])
                if is_terminal_window(window):#check_window(window, 0, piece):
                    return True
        # negative diagonal
        for row in range(CNCTX-1, ROWS):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[range(row, row-CNCTX, -1), range(col, col+CNCTX)])
                if is_terminal_window(window):#check_window(window, 0, piece):
                    return True
        return False

    # Minimax implementation was here:
    def minimax(node, depth, alpha, beta, maximizingPlayer, mark):
        terminal, score = count_windows(node, mark)
        if depth == 0 or bool(terminal):
            return get_heuristic(score)
        
        valid_moves = [c for c in range(COLUMNS) if node[0][c] == 0]
        if maximizingPlayer:
            value = -np.Inf
            for col in valid_moves:
                child = drop_piece(node, col, mark)
                value = max(value, minimax(child, depth-1, alpha, beta, False, mark))
                alpha = max(alpha, value)
                if alpha >= beta:
                    break
            return value
        
        else: #minimizing player
            value = np.Inf
            for col in valid_moves:
                child = drop_piece(node, col, mark%2+1)
                value = min(value, minimax(child, depth-1, alpha, beta, True, mark))
                beta = min(beta,value)
                if alpha >= beta:
                    break
            return value

    # Uses minimax to calculate value of dropping piece in selected column
    def score_move(grid, col, mark, nsteps):
        next_grid = drop_piece(grid, col, mark)
        alpha = -np.Inf
        beta = np.Inf
        score = minimax(next_grid, nsteps-1, alpha, beta, False, mark) 
        return score

    #########################
    # Agent makes selection #
    #########################

    # Get list of valid moves
    valid_moves = [c for c in range(COLUMNS) if obs.board[c] == 0]

    # Convert the board to a 2D grid
    ########## ENTER OBS:
    grid = np.asarray(obs.board).reshape(ROWS, COLUMNS)

    # Use the heuristic to assign a score to each possible board in the next step
    scores = dict(zip(valid_moves, [score_move(grid, col, obs.mark, N_STEPS) for col in valid_moves]))

    # Get a list of columns (moves) that maximize the heuristic
    max_cols = [key for key in scores.keys() if scores[key] == max(scores.values())]

    # Select at random from the maximizing columns
    return random.choice(max_cols)

In [None]:
#@title AltHeuristic Test Agent (working)
def altheuristic(obs, config):
    # config is dict: {'rows': 6, 'columns': 7, 'inarow': 4}
    # obs.board is last move of opponent, obs.mark is current player
    # return column that max's next grid's score

    ################################
    # Imports and helper functions #
    ################################

    import numpy as np
    import random

    # constants
    ROWS = config.rows
    COLUMNS = config.columns
    CNCTX = config.inarow
    A = 10       #2 threes
    B = 1000     #10 fours
    C = -1      #-1 opp-threes
    D = -100    #-10opp-fours   

    # lookahead depth:
    N_STEPS = 3

    # Gets board at next step if agent drops piece in selected column
    def drop_piece(grid, col, mark):
        next_grid = grid.copy()
        for row in range(ROWS-1, -1, -1):       ###row in range(0,ROWS)??
            if next_grid[row][col] == 0:
                break
        next_grid[row][col] = mark
        return next_grid

    # Helper function for get_heuristic: checks if window satisfies heuristic conditions
    def check_window(window, num_discs, piece):
        return (window.count(piece) == num_discs and window.count(0) == CNCTX-num_discs)

    # Helper function for get_heuristic: counts number of windows satisfying specified heuristic conditions
    def count_windows(grid, num_discs, piece):
        num_windows = 0
        # horizontal
        for row in range(ROWS):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[row, col:col+CNCTX])
                if check_window(window, num_discs, piece):
                    num_windows += 1
        # vertical
        for row in range(ROWS-(CNCTX-1)):
            for col in range(COLUMNS):
                window = list(grid[row:row+CNCTX, col])
                if check_window(window, num_discs, piece):
                    num_windows += 1
        # positive diagonal
        for row in range(ROWS-(CNCTX-1)):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[range(row, row+CNCTX), range(col, col+CNCTX)])
                if check_window(window, num_discs, piece):
                    num_windows += 1
        # negative diagonal
        for row in range(CNCTX-1, ROWS):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[range(row, row-CNCTX, -1), range(col, col+CNCTX)])
                if check_window(window, num_discs, piece):
                    num_windows += 1
        return num_windows

    # Helper function for minimax: calculates value of heuristic for grid
    def get_heuristic(grid, mark):
        num_threes = count_windows(grid, 3, mark) #A
        num_fours = count_windows(grid, 4, mark)  #B
        num_threes_opp = count_windows(grid, 3, mark%2+1) #C
        num_fours_opp = count_windows(grid, 4, mark%2+1)  #D

        score = A*num_threes + B*num_fours + C*num_threes_opp + D*num_fours_opp
        return score

    # Helper function for minimax: checks if agent or opponent has four in a row in the window
    def is_terminal_window(window):
        return window.count(1) == CNCTX or window.count(2) == CNCTX

    # Helper function for minimax: checks if game has ended
    def is_terminal_node(grid):
        # Check for draw 
        if list(grid[0, :]).count(0) == 0:
            return True
        # Check for win: horizontal, vertical, or diagonal
        # horizontal 
        for row in range(ROWS):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[row, col:col+CNCTX])
                if is_terminal_window(window):
                    return True
        # vertical
        for row in range(ROWS-(CNCTX-1)):
            for col in range(COLUMNS):
                window = list(grid[row:row+CNCTX, col])
                if is_terminal_window(window):
                    return True
        # positive diagonal
        for row in range(ROWS-(CNCTX-1)):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[range(row, row+CNCTX), range(col, col+CNCTX)])
                if is_terminal_window(window):
                    return True
        # negative diagonal
        for row in range(CNCTX-1, ROWS):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[range(row, row-CNCTX, -1), range(col, col+CNCTX)])
                if is_terminal_window(window):
                    return True
        return False

    # Minimax implementation was here:
    def minimax(node, depth, maximizingPlayer, mark):
        if depth == 0:
            return get_heuristic(node, mark)
        if is_terminal_node(node):
            return get_heuristic(node, mark)
        
        valid_moves = [c for c in range(COLUMNS) if node[0][c] == 0]
        if maximizingPlayer:
            value = -np.Inf
            for col in valid_moves:
                child = drop_piece(node, col, mark)
                value = max(value, minimax(child, depth-1, False, mark))
            return value
        
        else: #minimizing player
            value = np.Inf
            for col in valid_moves:
                child = drop_piece(node, col, mark%2+1)
                value = min(value, minimax(child, depth-1, True, mark))
            return value

    # Uses minimax to calculate value of dropping piece in selected column
    def score_move(grid, col, mark, nsteps):
        next_grid = drop_piece(grid, col, mark)
        score = minimax(next_grid, nsteps-1, False, mark) 
        return score

    #########################
    # Agent makes selection #
    #########################

    # Get list of valid moves
    valid_moves = [c for c in range(COLUMNS) if obs.board[c] == 0]

    # Convert the board to a 2D grid
    ########## ENTER OBS:
    grid = np.asarray(obs.board).reshape(ROWS, COLUMNS)

    # Use the heuristic to assign a score to each possible board in the next step
    scores = dict(zip(valid_moves, [score_move(grid, col, obs.mark, N_STEPS) for col in valid_moves]))

    # Get a list of columns (moves) that maximize the heuristic
    max_cols = [key for key in scores.keys() if scores[key] == max(scores.values())]

    # Select at random from the maximizing columns
    return random.choice(max_cols)

In [None]:
#@title Heuristic Agent
def heuristic(obs, config):
    #config is dict: {'rows': 6, 'columns': 7, 'inarow': 4}
    # obs.board is last move of opponent, obs.mark is current player
    # return column that max's next grid's score

    ################################
    # Imports and helper functions #
    ################################

    import numpy as np
    import random

    # constants
    ROWS = config.rows
    COLUMNS = config.columns
    CNCTX = config.inarow
    A = 10       #2 threes
    B = 1000     #10 fours
    C = -1      #-1 opp-threes
    D = -100    #-10opp-fours   

    # lookahead depth:
    N_STEPS =  4#@param {type: "integer"}

    # Gets board at next step if agent drops piece in selected column
    def drop_piece(grid, col, mark):
        next_grid = grid.copy()
        for row in range(ROWS-1, -1, -1):       ###row in range(0,ROWS)??
            if next_grid[row][col] == 0:
                break
        next_grid[row][col] = mark
        return next_grid

    # Helper function for get_heuristic: checks if window satisfies heuristic conditions
    def check_window(window, num_discs, piece):
        return (window.count(piece) == num_discs and window.count(0) == CNCTX-num_discs)

    # Helper function for get_heuristic: counts number of windows satisfying specified heuristic conditions
    def count_windows(grid, num_discs, piece):
        num_windows = 0
        # horizontal
        for row in range(ROWS):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[row, col:col+CNCTX])
                if check_window(window, num_discs, piece):
                    num_windows += 1
        # vertical
        for row in range(ROWS-(CNCTX-1)):
            for col in range(COLUMNS):
                window = list(grid[row:row+CNCTX, col])
                if check_window(window, num_discs, piece):
                    num_windows += 1
        # positive diagonal
        for row in range(ROWS-(CNCTX-1)):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[range(row, row+CNCTX), range(col, col+CNCTX)])
                if check_window(window, num_discs, piece):
                    num_windows += 1
        # negative diagonal
        for row in range(CNCTX-1, ROWS):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[range(row, row-CNCTX, -1), range(col, col+CNCTX)])
                if check_window(window, num_discs, piece):
                    num_windows += 1
        return num_windows

    # Helper function for minimax: calculates value of heuristic for grid
    def get_heuristic(grid, mark):
        num_threes = count_windows(grid, 3, mark) #A
        num_fours = count_windows(grid, 4, mark)  #B
        num_threes_opp = count_windows(grid, 3, mark%2+1) #C
        num_fours_opp = count_windows(grid, 4, mark%2+1)  #D

        score = A*num_threes + B*num_fours + C*num_threes_opp + D*num_fours_opp
        return score

    # Helper function for minimax: checks if agent or opponent has four in a row in the window
    def is_terminal_window(window):
        return window.count(1) == CNCTX or window.count(2) == CNCTX

    # Helper function for minimax: checks if game has ended
    def is_terminal_node(grid):
        # Check for draw 
        if list(grid[0, :]).count(0) == 0:
            return True
        # Check for win: horizontal, vertical, or diagonal
        # horizontal 
        for row in range(ROWS):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[row, col:col+CNCTX])
                if is_terminal_window(window):
                    return True
        # vertical
        for row in range(ROWS-(CNCTX-1)):
            for col in range(COLUMNS):
                window = list(grid[row:row+CNCTX, col])
                if is_terminal_window(window):
                    return True
        # positive diagonal
        for row in range(ROWS-(CNCTX-1)):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[range(row, row+CNCTX), range(col, col+CNCTX)])
                if is_terminal_window(window):
                    return True
        # negative diagonal
        for row in range(CNCTX-1, ROWS):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[range(row, row-CNCTX, -1), range(col, col+CNCTX)])
                if is_terminal_window(window):
                    return True
        return False

    # Minimax implementation was here:
    def minimax(node, depth, maximizingPlayer, mark):
        if depth == 0:
            return get_heuristic(node, mark)
        if is_terminal_node(node):
            return get_heuristic(node, mark)
        
        valid_moves = [c for c in range(COLUMNS) if node[0][c] == 0]
        if maximizingPlayer:
            value = -np.Inf
            for col in valid_moves:
                child = drop_piece(node, col, mark)
                value = max(value, minimax(child, depth-1, False, mark))
            return value
        
        else: #minimizing player
            value = np.Inf
            for col in valid_moves:
                child = drop_piece(node, col, mark%2+1)
                value = min(value, minimax(child, depth-1, True, mark))
            return value

    # Uses minimax to calculate value of dropping piece in selected column
    def score_move(grid, col, mark, nsteps):
        next_grid = drop_piece(grid, col, mark)
        score = minimax(next_grid, nsteps-1, False, mark) 
        return score

    #########################
    # Agent makes selection #
    #########################

    # Get list of valid moves
    valid_moves = [c for c in range(COLUMNS) if obs.board[c] == 0]

    # Convert the board to a 2D grid
    grid = np.asarray(obs.board).reshape(ROWS, COLUMNS)

    # Use the heuristic to assign a score to each possible board in the next step
    scores = dict(zip(valid_moves, [score_move(grid, col, obs.mark, N_STEPS) for col in valid_moves]))

    # Get a list of columns (moves) that maximize the heuristic
    max_cols = [key for key in scores.keys() if scores[key] == max(scores.values())]

    # Select at random from the maximizing columns
    return random.choice(max_cols)

### Alpha-Beta pruning

##### Pseudocode

AlphaBeta Pruner Pseudocode:

```
function alphabeta(node, depth, α, β, maximizingPlayer) is
    if depth = 0 or node is a terminal node then
        return the heuristic value of node
    if maximizingPlayer then
        value := −∞
        for each child of node do
            value := max(value, alphabeta(child, depth − 1, α, β, FALSE))
            α := max(α, value)
            if α ≥ β then
                break (* β cutoff *)
        return value
    else
        value := +∞
        for each child of node do
            value := min(value, alphabeta(child, depth − 1, α, β, TRUE))
            β := min(β, value)
            if β ≤ α then
                break (* α cutoff *)
        return value

### Initial call:        
### alphabeta(origin, depth, −∞, +∞, TRUE)
```
[alpha-beta pruning](https://en.wikipedia.org/wiki/Alpha%E2%80%93beta_pruning#Pseudocode) 

##### Implementation

In [None]:
#@title AlphaBeta = OLD pruner (working, slow, depth=4)
def abprune(obs, config):
#### AlphaBeta Agent

    ################################
    # Imports and helper functions #
    ################################
    
    import numpy as np
    import random
    
    # constants
    ROWS = config.rows
    COLUMNS = config.columns
    CNCTX = config.inarow

    # lookahead depth:
    N_STEPS = 4
    
    #heuristic:    
    A = 10   #2 threes
    B = 1000  #10 fours
    C = -1  #-1 opp-threes
    D = -100 #-10opp-fours

    # Gets board at next step if agent drops piece in selected column
    def drop_piece(grid, col, mark, config):
        next_grid = grid.copy()
        for row in range(ROWS-1, -1, -1):
            if next_grid[row][col] == 0:
                break
        next_grid[row][col] = mark
        return next_grid

    # Helper function for get_heuristic: checks if window satisfies heuristic conditions
    def check_window(window, num_discs, piece, config):
        return (window.count(piece) == num_discs and window.count(0) == CNCTX-num_discs)

    # Helper function for get_heuristic: counts number of windows satisfying specified heuristic conditions
    def count_windows(grid, num_discs, piece, config):
        num_windows = 0
        # horizontal
        for row in range(ROWS):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[row, col:col+CNCTX])
                if check_window(window, num_discs, piece, config):
                    num_windows += 1
        # vertical
        for row in range(ROWS-(CNCTX-1)):
            for col in range(COLUMNS):
                window = list(grid[row:row+CNCTX, col])
                if check_window(window, num_discs, piece, config):
                    num_windows += 1
        # positive diagonal
        for row in range(ROWS-(CNCTX-1)):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[range(row, row+CNCTX), range(col, col+CNCTX)])
                if check_window(window, num_discs, piece, config):
                    num_windows += 1
        # negative diagonal
        for row in range(CNCTX-1, ROWS):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[range(row, row-CNCTX, -1), range(col, col+CNCTX)])
                if check_window(window, num_discs, piece, config):
                    num_windows += 1
        return num_windows
    
    # Helper function for minimax: calculates value of heuristic for grid
    def get_heuristic(grid, mark, config):
        num_threes = count_windows(grid, 3, mark, config) #A
        num_fours = count_windows(grid, 4, mark, config)  #B
        num_threes_opp = count_windows(grid, 3, mark%2+1, config) #C
        num_fours_opp = count_windows(grid, 4, mark%2+1, config)  #D
        score = A*num_threes + B*num_fours + C*num_threes_opp + D*num_fours_opp
        return score

    # Helper function for minimax: checks if agent or opponent has four in a row in the window
    def is_terminal_window(window, config):
        return window.count(1) == CNCTX or window.count(2) == CNCTX

    # Helper function for minimax: checks if game has ended
    def is_terminal_node(grid, config):
        # Check for draw 
        if list(grid[0, :]).count(0) == 0:
            return True
        # Check for win: horizontal, vertical, or diagonal
        # horizontal 
        for row in range(ROWS):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[row, col:col+CNCTX])
                if is_terminal_window(window, config):
                    return True
        # vertical
        for row in range(ROWS-(CNCTX-1)):
            for col in range(COLUMNS):
                window = list(grid[row:row+CNCTX, col])
                if is_terminal_window(window, config):
                    return True
        # positive diagonal
        for row in range(ROWS-(CNCTX-1)):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[range(row, row+CNCTX), range(col, col+CNCTX)])
                if is_terminal_window(window, config):
                    return True
        # negative diagonal
        for row in range(CNCTX-1, ROWS):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[range(row, row-CNCTX, -1), range(col, col+CNCTX)])
                if is_terminal_window(window, config):
                    return True
        return False

    # Minimax implementation with pruning:
    def alphabeta(node, depth, alpha, beta, maximizingPlayer, mark, config):
        is_terminal = is_terminal_node(node, config)
        valid_moves = [c for c in range(COLUMNS) if node[0][c] == 0]
        if depth == 0 or is_terminal:
            return get_heuristic(node, mark, config)

        if maximizingPlayer:
            value = -np.Inf
            for col in valid_moves:
                child = drop_piece(node, col, mark, config)
                value = max(value, alphabeta(child, depth-1, alpha, beta, False, mark, config))
                alpha = max(alpha, value)
                if alpha >= beta:
                    break
            return value
        
        else:
            value = np.Inf
            for col in valid_moves:
                child = drop_piece(node, col, mark%2+1, config)
                value = min(value, alphabeta(child, depth-1, alpha, beta, True, mark, config))
                beta = min(beta,value)
                if alpha >= beta:
                    break 
            return value
    
    # Uses alphabeta to calculate value of dropping piece in selected column
    def score_move(grid, col, mark, config, nsteps):
        next_grid = drop_piece(grid, col, mark, config)
        score = alphabeta(next_grid, nsteps-1, -np.Inf, np.Inf, False, mark, config)
        return score
    
    #########################
    # Agent makes selection #
    #########################
    
    # Get list of valid moves
    valid_moves = [c for c in range(COLUMNS) if obs.board[c] == 0]
    
    # Convert the board to a 2D grid
    grid = np.asarray(obs.board).reshape(ROWS, COLUMNS)
    
    # Use the heuristic to assign a score to each possible board in the next step
    scores = dict(zip(valid_moves, [score_move(grid, col, obs.mark, config, N_STEPS) for col in valid_moves]))
    
    # Get a list of columns (moves) that maximize the heuristic
    max_cols = [key for key in scores.keys() if scores[key] == max(scores.values())]
    
    # Select at random from the maximizing columns
    return random.choice(max_cols)

In [None]:
#@title Pruning Agent "pruner" (working)
def old_pruner(obs, config):
    #config is dict: {'rows': 6, 'columns': 7, 'inarow': 4}
    # obs.board is last move of opponent, obs.mark is current player
    # return column that max's next grid's score

    ################################
    # Imports and helper functions #
    ################################

    import numpy as np
    import random

    # constants
    ROWS = config.rows
    COLUMNS = config.columns
    CNCTX = config.inarow
    A = 10       #2 threes
    B = 1000     #10 fours
    C = -1      #-1 opp-threes
    D = -100    #-10opp-fours   

    # lookahead depth:
    N_STEPS =   4#@param {type: "number"}

    # Gets board at next step if agent drops piece in selected column
    def drop_piece(grid, col, mark):
        next_grid = grid.copy()
        for row in range(ROWS-1, -1, -1):       ###row in range(0,ROWS)??
            if next_grid[row][col] == 0:
                break
        next_grid[row][col] = mark
        return next_grid

    # Helper function for get_heuristic: checks if window satisfies heuristic conditions
    def check_window(window, num_discs, piece):
        return (window.count(piece) == num_discs and window.count(0) == CNCTX-num_discs)

    # Helper function for get_heuristic: counts number of windows satisfying specified heuristic conditions
    def count_windows(grid, num_discs, piece):
        num_windows = 0
        # horizontal
        for row in range(ROWS):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[row, col:col+CNCTX])
                if check_window(window, num_discs, piece):
                    num_windows += 1
        # vertical
        for row in range(ROWS-(CNCTX-1)):
            for col in range(COLUMNS):
                window = list(grid[row:row+CNCTX, col])
                if check_window(window, num_discs, piece):
                    num_windows += 1
        # positive diagonal
        for row in range(ROWS-(CNCTX-1)):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[range(row, row+CNCTX), range(col, col+CNCTX)])
                if check_window(window, num_discs, piece):
                    num_windows += 1
        # negative diagonal
        for row in range(CNCTX-1, ROWS):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[range(row, row-CNCTX, -1), range(col, col+CNCTX)])
                if check_window(window, num_discs, piece):
                    num_windows += 1
        return num_windows

    # Helper function for minimax: calculates value of heuristic for grid
    def get_heuristic(grid, mark):
        num_threes = count_windows(grid, 3, mark) #A
        num_fours = count_windows(grid, 4, mark)  #B
        num_threes_opp = count_windows(grid, 3, mark%2+1) #C
        num_fours_opp = count_windows(grid, 4, mark%2+1)  #D

        score = A*num_threes + B*num_fours + C*num_threes_opp + D*num_fours_opp
        return score

    # Helper function for minimax: checks if agent or opponent has four in a row in the window
    def is_terminal_window(window):
        return window.count(1) == CNCTX or window.count(2) == CNCTX

    # Helper function for minimax: checks if game has ended
    def is_terminal_node(grid):
        # Check for draw 
        if list(grid[0, :]).count(0) == 0:
            return True
        # Check for win: horizontal, vertical, or diagonal
        # horizontal 
        for row in range(ROWS):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[row, col:col+CNCTX])
                if is_terminal_window(window):
                    return True
        # vertical
        for row in range(ROWS-(CNCTX-1)):
            for col in range(COLUMNS):
                window = list(grid[row:row+CNCTX, col])
                if is_terminal_window(window):
                    return True
        # positive diagonal
        for row in range(ROWS-(CNCTX-1)):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[range(row, row+CNCTX), range(col, col+CNCTX)])
                if is_terminal_window(window):
                    return True
        # negative diagonal
        for row in range(CNCTX-1, ROWS):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[range(row, row-CNCTX, -1), range(col, col+CNCTX)])
                if is_terminal_window(window):
                    return True
        return False

    # Minimax implementation was here:
    def alphabeta(node, depth, alpha, beta, maximizingPlayer, mark):
        if depth == 0:
            return get_heuristic(node, mark)
        if is_terminal_node(node):
            return get_heuristic(node, mark)
        
        valid_moves = [c for c in range(COLUMNS) if node[0][c] == 0]
        if maximizingPlayer:
            value = -np.Inf
            for col in valid_moves:
                child = drop_piece(node, col, mark)
                value = max(value, alphabeta(child, depth-1, alpha, beta, False, mark))
                alpha = max(alpha, value)
                if alpha >= beta:
                    break
            return value
        
        else: #minimizing player
            value = np.Inf
            for col in valid_moves:
                child = drop_piece(node, col, mark%2+1)
                value = min(value, alphabeta(child, depth-1, alpha, beta, True, mark))
                beta = min(beta, value)
                if alpha >= beta:
                    break
            return value

    # Uses minimax to calculate value of dropping piece in selected column
    def score_move(grid, col, mark, nsteps):
        next_grid = drop_piece(grid, col, mark)
        score = alphabeta(next_grid, nsteps-1, -np.Inf, np.Inf, False, mark) 
        return score

    #########################
    # Agent makes selection #
    #########################

    # Get list of valid moves
    valid_moves = [c for c in range(COLUMNS) if obs.board[c] == 0]

    # Convert the board to a 2D grid
    ########## ENTER OBS:
    grid = np.asarray(obs.board).reshape(ROWS, COLUMNS)

    # Use the heuristic to assign a score to each possible board in the next step
    scores = dict(zip(valid_moves, [score_move(grid, col, obs.mark, N_STEPS) for col in valid_moves]))

    # Get a list of columns (moves) that maximize the heuristic
    max_cols = [key for key in scores.keys() if scores[key] == max(scores.values())]

    # Select at random from the maximizing columns
    return random.choice(max_cols)

In [100]:
#@title Current Pruning Agent "pruner" >>> my_agent
def my_agent(obs, config):
    #config is dict: {'rows': 6, 'columns': 7, 'inarow': 4}
    # obs.board is last move of opponent, obs.mark is current player
    # return column that max's next grid's score

    ################################
    # Imports and helper functions #
    ################################

    import numpy as np
    import random

    # constants
    ROWS = config.rows
    COLUMNS = config.columns
    CNCTX = config.inarow
    A = 10       #2 threes
    B = 1000     #10 fours
    C = -1      #-1 opp-threes
    D = -100    #-10opp-fours   

    # lookahead depth:
    N_STEPS =   4#@param {type: "number"}

    # Gets board at next step if agent drops piece in selected column
    def drop_piece(grid, col, mark):
        next_grid = grid.copy()
        for row in range(ROWS-1, -1, -1):       ###row in range(0,ROWS)??
            if next_grid[row][col] == 0:
                break
        next_grid[row][col] = mark
        return next_grid

    # Helper function for get_heuristic: checks if window satisfies heuristic conditions
    def check_window(window, num_discs, piece):
        return (window.count(piece) == num_discs and window.count(0) == CNCTX-num_discs)

    # Helper function for get_heuristic: counts number of windows satisfying specified heuristic conditions
    def count_windows(grid, num_discs, piece):
        num_windows = 0
        # horizontal
        for row in range(ROWS):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[row, col:col+CNCTX])
                if check_window(window, num_discs, piece):
                    num_windows += 1
        # vertical
        for row in range(ROWS-(CNCTX-1)):
            for col in range(COLUMNS):
                window = list(grid[row:row+CNCTX, col])
                if check_window(window, num_discs, piece):
                    num_windows += 1
        # positive diagonal
        for row in range(ROWS-(CNCTX-1)):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[range(row, row+CNCTX), range(col, col+CNCTX)])
                if check_window(window, num_discs, piece):
                    num_windows += 1
        # negative diagonal
        for row in range(CNCTX-1, ROWS):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[range(row, row-CNCTX, -1), range(col, col+CNCTX)])
                if check_window(window, num_discs, piece):
                    num_windows += 1
        return num_windows

    # Helper function for minimax: calculates value of heuristic for grid
    def get_score(grid, mark):

        num_threes = count_windows(grid, 3, mark) #A
        num_fours = count_windows(grid, 4, mark)  #B
        num_threes_opp = count_windows(grid, 3, mark%2+1) #C
        num_fours_opp = count_windows(grid, 4, mark%2+1)  #D

        score = A*num_threes + B*num_fours + C*num_threes_opp + D*num_fours_opp
        is_terminal = (not num_fours == 0) or (not num_fours_opp == 0) or (list(grid[0, :]).count(0) == 0)
        return score, is_terminal

    # Alpha Beta Pruning of MiniMax algorithm:
    def alphabeta(node, depth, alpha, beta, maximizingPlayer, mark):
        node_score, is_terminal = get_score(node, mark)
        if depth == 0 or is_terminal:
            return node_score
 
        valid_moves = [c for c in range(COLUMNS) if node[0][c] == 0]
        if maximizingPlayer:
            value = -np.Inf
            for col in valid_moves:
                child = drop_piece(node, col, mark)
                value = max(value, alphabeta(child, depth-1, alpha, beta, False, mark))
                alpha = max(alpha, value)
                if alpha >= beta or value >= 800:
                    break
            return value
        
        else: #minimizing player
            value = np.Inf
            for col in valid_moves:
                child = drop_piece(node, col, mark%2+1)
                value = min(value, alphabeta(child, depth-1, alpha, beta, True, mark))
                beta = min(beta, value)
                if alpha >= beta or value <= -80:
                    break
            return value

    # Uses minimax to calculate value of dropping piece in selected column
    def score_move(grid, col, mark, nsteps):
        next_grid = drop_piece(grid, col, mark)
        score = alphabeta(next_grid, nsteps-1, -np.Inf, np.Inf, False, mark) 
        return score

    #########################
    # Agent makes selection #
    #########################

    # Get list of valid moves
    valid_moves = [c for c in range(COLUMNS) if obs.board[c] == 0]

    # Convert the board to a 2D grid
    ########## ENTER OBS:
    grid = np.asarray(obs.board).reshape(ROWS, COLUMNS)

    # Use the heuristic to assign a score to each possible board in the next step
    scores = dict(zip(valid_moves, [score_move(grid, col, obs.mark, N_STEPS) for col in valid_moves]))

    # Get a list of columns (moves) that maximize the heuristic
    max_cols = [key for key in scores.keys() if scores[key] == max(scores.values())]

    # Select at random from the maximizing columns
    return random.choice(max_cols)

#Tests

In [None]:
#agent1
930//60

15

In [None]:
start_time = time.time()
n_rounds =   11#@param {type:"integer"}
agent1 = heuristic #@param
agent2 = heuristic #@param
get_win_percentages(agent1=agent1, agent2=agent2, n_rounds=n_rounds)
print ("Total time taken: {} seconds (per round: {} seconds)".format(round(time.time() - start_time, 1), 
                                                                     round((time.time() - start_time)/n_rounds,3)))

Agent 1 Win Percentage: 0.36
Agent 2 Win Percentage: 0.45
Number of Invalid Plays by Agent 1: 0
Number of Invalid Plays by Agent 2: 0
Total time taken: 930.3 seconds (per round: 84.57 seconds)


In [None]:
start_time = time.time()
n_rounds =   11#@param {type:"integer"}
agent1 = heuristic #@param
agent2 =  "random"#@param
get_win_percentages(agent1=agent1, agent2=agent2, n_rounds=n_rounds)
print ("Total time taken: {} seconds (per round: {} seconds)".format(round(time.time() - start_time, 1), 
                                                                     round((time.time() - start_time)/n_rounds,3)))

Agent 1 Win Percentage: 1.0
Agent 2 Win Percentage: 0.0
Number of Invalid Plays by Agent 1: 0
Number of Invalid Plays by Agent 2: 0
Total time taken: 320.4 seconds (per round: 29.13 seconds)


In [None]:
start_time = time.time()
n_rounds =   11#@param {type:"integer"}
agent1 = heuristic #@param
agent2 = pruner #@param
get_win_percentages(agent1=agent1, agent2=agent2, n_rounds=n_rounds)
print ("Total time taken: {} seconds (per round: {} seconds)".format(round(time.time() - start_time, 1), 
                                                                     round((time.time() - start_time)/n_rounds,3)))

Agent 1 Win Percentage: 0.27
Agent 2 Win Percentage: 0.73
Number of Invalid Plays by Agent 1: 0
Number of Invalid Plays by Agent 2: 0
Total time taken: 205.6 seconds (per round: 18.687 seconds)


In [64]:
start_time = time.time()
n_rounds =   2#@param {type:"integer"}
agent1 = pruner #@param
agent2 = "random" #@param
get_win_percentages(agent1=agent1, agent2=agent2, n_rounds=n_rounds)
print ("Total time taken: {} seconds (per round: {} seconds)".format(round(time.time() - start_time, 1), 
                                                                     round((time.time() - start_time)/n_rounds,3)))

Agent 1 Win Percentage: 1.0
Agent 2 Win Percentage: 0.0
Number of Invalid Plays by Agent 1: 0
Number of Invalid Plays by Agent 2: 0
Total time taken: 18.0 seconds (per round: 8.999 seconds)


In [65]:
start_time = time.time()
n_rounds =   2#@param {type:"integer"}
agent1 =  pruneX#@param
agent2 = "random" #@param
get_win_percentages(agent1=agent1, agent2=agent2, n_rounds=n_rounds)
print ("Total time taken: {} seconds (per round: {} seconds)".format(round(time.time() - start_time, 1), 
                                                                     round((time.time() - start_time)/n_rounds,3)))

Agent 1 Win Percentage: 0.5
Agent 2 Win Percentage: 0.5
Number of Invalid Plays by Agent 1: 0
Number of Invalid Plays by Agent 2: 0
Total time taken: 18.1 seconds (per round: 9.027 seconds)


In [66]:
start_time = time.time()
n_rounds =   2#@param {type:"integer"}
agent1 = pruneX #@param
agent2 = pruner #@param
get_win_percentages(agent1=agent1, agent2=agent2, n_rounds=n_rounds)
print ("Total time taken: {} seconds (per round: {} seconds)".format(round(time.time() - start_time, 1), 
                                                                     round((time.time() - start_time)/n_rounds,3)))

Agent 1 Win Percentage: 0.0
Agent 2 Win Percentage: 1.0
Number of Invalid Plays by Agent 1: 0
Number of Invalid Plays by Agent 2: 0
Total time taken: 65.1 seconds (per round: 32.574 seconds)


In [None]:
#@title Replay
start_time = time.time()
n_rounds =   11#@param {type:"integer"}
agent1 = pruneX #@param
agent2 = pruner #@param
get_win_percentages(agent1=agent1, agent2=agent2, n_rounds=n_rounds)
print ("Total time taken: {} seconds (per round: {} seconds)".format(round(time.time() - start_time, 1), 
                                                                     round((time.time() - start_time)/n_rounds,3)))

Agent 1 Win Percentage: 0.36
Agent 2 Win Percentage: 0.64
Number of Invalid Plays by Agent 1: 0
Number of Invalid Plays by Agent 2: 0
Total time taken: 391.3 seconds (per round: 35.576 seconds)


In [None]:
start_time = time.time()
n_rounds =   11#@param {type:"integer"}
agent1 =  abprune#@param
agent2 =  "random"#@param
get_win_percentages(agent1=agent1, agent2=agent2, n_rounds=n_rounds)
print ("Total time taken: {} seconds (per round: {} seconds)".format(round(time.time() - start_time, 1), 
                                                                     round((time.time() - start_time)/n_rounds,3)))

Agent 1 Win Percentage: 1.0
Agent 2 Win Percentage: 0.0
Number of Invalid Plays by Agent 1: 0
Number of Invalid Plays by Agent 2: 0
Total time taken: 138.7 seconds (per round: 12.609 seconds)


In [None]:
start_time = time.time()
n_rounds =   11#@param {type:"integer"}
agent1 =  abprune#@param
agent2 = pruner #@param
get_win_percentages(agent1=agent1, agent2=agent2, n_rounds=n_rounds)
print ("Total time taken: {} seconds (per round: {} seconds)".format(round(time.time() - start_time, 1), 
                                                                     round((time.time() - start_time)/n_rounds,3)))

Agent 1 Win Percentage: 0.55
Agent 2 Win Percentage: 0.36
Number of Invalid Plays by Agent 1: 0
Number of Invalid Plays by Agent 2: 0
Total time taken: 402.7 seconds (per round: 36.61 seconds)


In [60]:
pruner = my_agent

In [61]:
#@title Replay
start_time = time.time()
n_rounds =   1#@param {type:"integer"}
agent1 =  pruneX#@param
agent2 = pruner #@param
get_win_percentages(agent1=agent1, agent2=agent2, n_rounds=n_rounds)
print ("Total time taken: {} seconds (per round: {} seconds)".format(round(time.time() - start_time, 1), 
                                                                     round((time.time() - start_time)/n_rounds,3)))

Agent 1 Win Percentage: 0.0
Agent 2 Win Percentage: 1.0
Number of Invalid Plays by Agent 1: 0
Number of Invalid Plays by Agent 2: 0
Total time taken: 22.8 seconds (per round: 22.788 seconds)


#### TO DO: decide which is best, submit it

In [None]:
### >>> go with pruneX

In [None]:
# Create the game environment
env = make("connectx")

# Two random agents play one game round
env.run([no_config, heuristic])

# Show the game
env.render(mode="ipython")

In [None]:
#env.play([None,no_config])

#Experimental

In [98]:
#@title Time and Score Comparisons
start_time = time.time()
n_rounds =   1#@param {type:"integer"}
agent1 = pruneX #@param
agent2 = pruneX #@param
get_win_percentages(agent1=agent1, agent2=agent2, n_rounds=n_rounds)
print ("Total time taken: {} seconds (per round: {} seconds)".format(round(time.time() - start_time, 1), 
                                                                     round((time.time() - start_time)/n_rounds,3)))

Agent 1 Win Percentage: 0.0
Agent 2 Win Percentage: 1.0
Number of Invalid Plays by Agent 1: 0
Number of Invalid Plays by Agent 2: 0
Total time taken: 1.5 seconds (per round: 1.546 seconds)


In [99]:
# @title Experimental Pruning Agent "pruneX"
def pruneX(obs, config):
    #config is dict: {'rows': 6, 'columns': 7, 'inarow': 4}
    # obs.board is last move of opponent, obs.mark is current player
    # return column that max's next grid's score

    ################################
    # Imports and helper functions #
    ################################

    import numpy as np
    import random
    ########################### Regular pruner ################
    # constants (given by game)
    ROWS = 6
    COLUMNS = 7
    CNCTX = 4
    ## coefficients (weights on variable future outcomes)
    A = 10       #2 threes
    B = 1000     #10 fours
    C = -1      #-1 opp-threes
    D = -100     #-10 opp-fours   

        # lookahead depth:
    N_STEPS = 4#@param {type:"integer"}

    # Gets board at next step if agent drops piece in selected column
    def drop_piece(grid, col, mark):
        next_grid = grid.copy()
        for row in range(ROWS-1, -1, -1):       ###row in range(0,ROWS)??
            if next_grid[row][col] == 0:
                break
        next_grid[row][col] = mark
        return next_grid

    # Helper function for get_heuristic: checks if window satisfies heuristic conditions
    def check_window(window, num_discs, piece):
        return (window.count(piece) == num_discs and window.count(0) == CNCTX-num_discs)

    # Helper function for get_heuristic: counts number of windows satisfying specified heuristic conditions
    def count_windows(grid, num_discs, piece):
        num_windows = 0
        # horizontal
        for row in range(ROWS):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[row, col:col+CNCTX])
                if check_window(window, num_discs, piece):
                    num_windows += 1
        # vertical
        for row in range(ROWS-(CNCTX-1)):
            for col in range(COLUMNS):
                window = list(grid[row:row+CNCTX, col])
                if check_window(window, num_discs, piece):
                    num_windows += 1
        # positive diagonal
        for row in range(ROWS-(CNCTX-1)):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[range(row, row+CNCTX), range(col, col+CNCTX)])
                if check_window(window, num_discs, piece):
                    num_windows += 1
        # negative diagonal
        for row in range(CNCTX-1, ROWS):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[range(row, row-CNCTX, -1), range(col, col+CNCTX)])
                if check_window(window, num_discs, piece):
                    num_windows += 1
        return num_windows

    # Helper function for minimax: calculates value of heuristic for grid
    def get_score(grid, mark):
        num_threes = count_windows(grid, 3, mark) #A
        num_fours = count_windows(grid, 4, mark)  #B
        num_threes_opp = count_windows(grid, 3, mark%2+1) #C
        num_fours_opp = count_windows(grid, 4, mark%2+1)  #D

        score = A*num_threes + B*num_fours + C*num_threes_opp + D*num_fours_opp
        is_terminal = (not num_fours == 0) or (not num_fours_opp == 0) or (list(grid[0, :]).count(0) == 0)
        return score, is_terminal

    # Minimax implementation was here:
    def alphabeta(node, depth, alpha, beta, maximizingPlayer, mark):
        node_score, is_terminal = get_score(node, mark)
        if depth == 0 or is_terminal:
            return node_score
        
        valid_moves = [c for c in range(COLUMNS) if node[0][c] == 0]
        
        if maximizingPlayer:
            value = -np.Inf
            for col in valid_moves:
                child = drop_piece(node, col, mark)
                value = max(value, alphabeta(child, depth-1, alpha, beta, False, mark))
                alpha = max(alpha, value)
                if alpha >= beta:
                    break
            return value
        
        else: #minimizing player
            value = np.Inf
            for col in valid_moves:
                child = drop_piece(node, col, mark%2+1)
                value = min(value, alphabeta(child, depth-1, alpha, beta, True, mark))
                beta = min(beta, value)
                if alpha >= beta:
                    break
            return value

    # Uses minimax to calculate value of dropping piece in selected column
    def score_move(grid, col, mark, nsteps):
        next_grid = drop_piece(grid, col, mark)
        score = alphabeta(next_grid, nsteps-1, -np.Inf, np.Inf, False, mark)     
        return score
    #########################
    # Agent makes selection #
    #########################

    # Get list of valid moves
    valid_moves = [c for c in range(COLUMNS) if obs.board[c] == 0]

    # Convert the board to a 2D grid
    grid = np.asarray(obs.board).reshape(ROWS, COLUMNS)

    # Use the heuristic to assign a score to each possible board in the next step
    #scores = dict(zip(valid_moves, [score_move(grid, col, obs.mark, N_STEPS) for col in valid_moves]))
    returning_scores = []
    for col in valid_moves:
        col_score = score_move(grid, col, mark, N_STEPS)
        returning_scores.append(col_score)
        if col_score >= B:# or col_score <= D:
            break
    scores = dict(zip(valid_moves, returning_scores))   

    # Get a list of columns (moves) that maximize the heuristic
    max_cols = [key for key in scores.keys() if scores[key] == max(scores.values())]

    # Select at random from the maximizing columns
    return random.choice(max_cols)

**WITH FINAL PRUNE**

* Total time taken: 1.6 seconds (per round: 1.584 seconds)
 * two deep
* Total time taken: 2.9 seconds (per round: 2.904 seconds)
 * three deep
* Total time taken: 19.8 seconds (per round: 19.799 seconds)
 * four deep
*

**LAST TIME**

* Total time taken: 0.2 seconds (per round: 0.224 seconds)
 * previously
* Total time taken: 2.7 seconds (per round: 2.739 seconds)
 * two deep
* Total time taken: 3.0 seconds (per round: 2.991 seconds)
 * three deep
* Total time taken: 39.6 seconds (per round: 39.607 seconds)
 * four deep
*

In [None]:
import numpy as np
import random
import pandas as pd


########################### Regular pruner ################
# constants (given by game)
ROWS = 6
COLUMNS = 7
CNCTX = 4
## coefficients (weights on variable future outcomes)
A = 10       #2 threes
B = 1000     #10 fours
C = -1      #-1 opp-threes
D = -100     #-10 opp-fours   

    # lookahead depth:
N_STEPS = 4

# Gets board at next step if agent drops piece in selected column
def drop_piece(grid, col, mark):
    next_grid = grid.copy()
    for row in range(ROWS-1, -1, -1):       ###row in range(0,ROWS)??
        if next_grid[row][col] == 0:
            break
    next_grid[row][col] = mark
    print ("Dropped", mark,"into column", col, ", row:", row)
    return next_grid

# Helper function for get_heuristic: checks if window satisfies heuristic conditions
def check_window(window, num_discs, piece):
    return (window.count(piece) == num_discs and window.count(0) == CNCTX-num_discs)

# Helper function for get_heuristic: counts number of windows satisfying specified heuristic conditions
def count_windows(grid, num_discs, piece):
    num_windows = 0
    # horizontal
    for row in range(ROWS):
        for col in range(COLUMNS-(CNCTX-1)):
            window = list(grid[row, col:col+CNCTX])
            if check_window(window, num_discs, piece):
                num_windows += 1
    # vertical
    for row in range(ROWS-(CNCTX-1)):
        for col in range(COLUMNS):
            window = list(grid[row:row+CNCTX, col])
            if check_window(window, num_discs, piece):
                num_windows += 1
    # positive diagonal
    for row in range(ROWS-(CNCTX-1)):
        for col in range(COLUMNS-(CNCTX-1)):
            window = list(grid[range(row, row+CNCTX), range(col, col+CNCTX)])
            if check_window(window, num_discs, piece):
                num_windows += 1
    # negative diagonal
    for row in range(CNCTX-1, ROWS):
        for col in range(COLUMNS-(CNCTX-1)):
            window = list(grid[range(row, row-CNCTX, -1), range(col, col+CNCTX)])
            if check_window(window, num_discs, piece):
                num_windows += 1
    return num_windows

# Helper function for minimax: calculates value of heuristic for grid
def get_heuristic(grid, mark):
    num_threes = count_windows(grid, 3, mark) #A
    num_fours = count_windows(grid, 4, mark)  #B
    num_threes_opp = count_windows(grid, 3, mark%2+1) #C
    num_fours_opp = count_windows(grid, 4, mark%2+1)  #D
    score = A*num_threes + B*num_fours + C*num_threes_opp + D*num_fours_opp
    return score

# Helper function for minimax: checks if agent or opponent has four in a row in the window
def is_terminal_window(window):
    return window.count(1) == CNCTX or window.count(2) == CNCTX

# Helper function for minimax: checks if game has ended
def is_terminal_node(grid):
    # Check for draw 
    if list(grid[0, :]).count(0) == 0:
        return True
    # Check for win: horizontal, vertical, or diagonal
    # horizontal 
    for row in range(ROWS):
        for col in range(COLUMNS-(CNCTX-1)):
            window = list(grid[row, col:col+CNCTX])
            if is_terminal_window(window):
                return True
    # vertical
    for row in range(ROWS-(CNCTX-1)):
        for col in range(COLUMNS):
            window = list(grid[row:row+CNCTX, col])
            if is_terminal_window(window):
                return True
    # positive diagonal
    for row in range(ROWS-(CNCTX-1)):
        for col in range(COLUMNS-(CNCTX-1)):
            window = list(grid[range(row, row+CNCTX), range(col, col+CNCTX)])
            if is_terminal_window(window):
                return True
    # negative diagonal
    for row in range(CNCTX-1, ROWS):
        for col in range(COLUMNS-(CNCTX-1)):
            window = list(grid[range(row, row-CNCTX, -1), range(col, col+CNCTX)])
            if is_terminal_window(window):
                return True
    return False

# Minimax implementation was here:
def alphabeta(node, depth, alpha, beta, maximizingPlayer, mark):
    print ("~ Entering Pruner Loop ~")
    print ("depth: {} \talpha: {} \tbeta: {}".format(depth,
                                                    round(alpha,2),
                                                    round(beta,2)) )
    if depth == 0:
        node_score = get_heuristic(node, mark)
        print ("\n~~ Depth terminal grid ~~\n", node,"\nnode score:", node_score,"\n")
        return node_score
    if is_terminal_node(node): 
        node_score = get_heuristic(node, mark)       
        print ("Winning terminal grid\n", node,"\nterminal score:", node_score,"\n")
        return node_score
    
    valid_moves = [c for c in range(COLUMNS) if node[0][c] == 0]
    
    if maximizingPlayer:
        value = -np.Inf
        for col in valid_moves:
            child = drop_piece(node, col, mark)
            value = max(value, alphabeta(child, depth-1, alpha, beta, False, mark))
            alpha = max(alpha, value)
            if alpha >= beta:
                print ("\t\t<<<< BETA cut-off >>>>")
                print ("\t\t<< alpha:",alpha,">= beta:",beta,">>")
                break
        return value
    
    else: #minimizing player
        value = np.Inf
        for col in valid_moves:
            child = drop_piece(node, col, mark%2+1)
            value = min(value, alphabeta(child, depth-1, alpha, beta, True, mark))
            beta = min(beta, value)
            if alpha >= beta:
                print ("\t\t<<<< ALPHA cut-off >>>>")
                print ("\t\t<< alpha:",alpha,">= beta:",beta,">>")
                break
        return value

# Uses minimax to calculate value of dropping piece in selected column
def score_move(grid, col, mark, nsteps):
    print("\nv v v v v v v v v v v v v v v v v v\nGet best score for column:",col)
    next_grid = drop_piece(grid, col, mark)
    score = alphabeta(next_grid, nsteps-1, -np.Inf, np.Inf, False, mark)     
    print("Return score:",score, "for column", col)
    print("\n^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ \n")
    return score

#########################
# Agent makes selection #
#########################
# with:                 #
start_time = time.time()#
mark = 1                #
#grid = np.zeros((ROWS,COLUMNS))
#########################
grid = np.asarray([ [0, 0, 1, 0, 0, 0, 0],
                    [0, 0, 2, 0, 0, 0, 0],
                    [0, 0, 2, 0, 0, 0, 2],
                    [0, 1, 1, 0, 0, 0, 2],
                    [0, 1, 1, 1, 0, 2, 2],
                    [0, 1, 2, 2, 2, 1, 1] ])

valid_moves = [c for c in range(COLUMNS) if grid[0,c] == 0] # or grid[C-1,c]?

print("start")# grid\n", grid)

# Use the heuristic to assign a score to each possible board in the next step
scores = dict(zip(valid_moves, [score_move(grid, col, mark, N_STEPS) for col in valid_moves]))

# Get a list of columns (moves) that maximize the heuristic
max_cols = [key for key in scores.keys() if scores[key] == max(scores.values())]
max_choice = random.choice(max_cols)
print ("\n++++++++++++\nFinal scores:", [value for value in scores.values()])
print ("max col =", max_choice, "\tvalue:", scores[max_choice])
print ("++++++++++++\nstop")# grid\n", grid)

run_time = time.time() - start_time
print ("Total time taken: {} minutes and {} seconds".format(run_time//60, round(run_time%60,3)) )

standard AB pruner values and times
```
++++++++++++
Final scores: [-192, 1018, -182, 1020, -192, -192, -192]
max col = 3 	value: 1020
++++++++++++
Total time taken: 0.0 minutes and 1.713 seconds
```



In [94]:
import numpy as np
import random
import pandas as pd


########################### Minimalist  pruner 


# constants (given by game)
ROWS = 6
COLUMNS = 7
CNCTX = 4
## coefficients (weights on variable future outcomes)
A = 10       #2 threes
B = 1000     #10 fours
C = -1       #-1 opp-threes
D = -100     #-10 opp-fours   

    # lookahead depth:
N_STEPS = 4

# Gets board at next step if agent drops piece in selected column
def drop_piece(grid, col, mark):
    next_grid = grid.copy()
    for row in range(ROWS-1, -1, -1):       ###row in range(0,ROWS)??
        if next_grid[row][col] == 0:
            break
    next_grid[row][col] = mark
    print ("Dropped", mark,"into column", col, ", row:", row)
    return next_grid

# Helper function for get_heuristic: checks if window satisfies heuristic conditions
def check_window(window, num_discs, piece):
    return (window.count(piece) == num_discs and window.count(0) == CNCTX-num_discs)

# Helper function for get_heuristic: counts number of windows satisfying specified heuristic conditions
def count_windows(grid, num_discs, piece):
    num_windows = 0
    # horizontal
    for row in range(ROWS):
        for col in range(COLUMNS-(CNCTX-1)):
            window = list(grid[row, col:col+CNCTX])
            if check_window(window, num_discs, piece):
                num_windows += 1
    # vertical
    for row in range(ROWS-(CNCTX-1)):
        for col in range(COLUMNS):
            window = list(grid[row:row+CNCTX, col])
            if check_window(window, num_discs, piece):
                num_windows += 1
    # positive diagonal
    for row in range(ROWS-(CNCTX-1)):
        for col in range(COLUMNS-(CNCTX-1)):
            window = list(grid[range(row, row+CNCTX), range(col, col+CNCTX)])
            if check_window(window, num_discs, piece):
                num_windows += 1
    # negative diagonal
    for row in range(CNCTX-1, ROWS):
        for col in range(COLUMNS-(CNCTX-1)):
            window = list(grid[range(row, row-CNCTX, -1), range(col, col+CNCTX)])
            if check_window(window, num_discs, piece):
                num_windows += 1
    return num_windows

# Helper function for minimax: calculates value of heuristic for grid
def get_score(grid, mark):
    num_threes = count_windows(grid, 3, mark) #A
    num_fours = count_windows(grid, 4, mark)  #B
    num_threes_opp = count_windows(grid, 3, mark%2+1) #C
    num_fours_opp = count_windows(grid, 4, mark%2+1)  #D

    score = A*num_threes + B*num_fours + C*num_threes_opp + D*num_fours_opp
    is_terminal = (not num_fours == 0) or (not num_fours_opp == 0) or (list(grid[0, :]).count(0) == 0)
    return score, is_terminal

# Minimax implementation was here:
def alphabeta(node, depth, alpha, beta, maximizingPlayer, mark):
    print ("~ Entering Pruner Loop ~")    
    print ("depth: {} \talpha: {} \tbeta: {}".format(depth,
                                                round(alpha,2),
                                                round(beta,2)) )
    #if alpha >= 20: 
    #    return alpha
    #if beta <= -10:
    #    return beta

    node_score, is_terminal = get_score(node, mark)
    if depth == 0 or is_terminal:
        print ("\n~~ Terminal grid ~~\n", node,"\nnode score:", node_score,"\n")
        return node_score
     
    valid_moves = [c for c in range(COLUMNS) if node[0][c] == 0]
    
    if maximizingPlayer:
        value = -np.Inf
        for col in valid_moves:
            child = drop_piece(node, col, mark)
            value = max(value, alphabeta(child, depth-1, alpha, beta, False, mark))
            alpha = max(alpha, value)
            if alpha >= beta or (value >= (B + 2*C)):
                print ("\t\t<<<< BETA cut-off >>>>")
                print ("\t\t<< alpha:",alpha,">= beta:",beta,">>")
                print ("\t\t<<< value:",value,">=",B + 2*C," >>>")
                break
        return value
    
    else: #minimizing player
        value = np.Inf
        for col in valid_moves:
            child = drop_piece(node, col, mark%2+1)
            value = min(value, alphabeta(child, depth-1, alpha, beta, True, mark))
            beta = min(beta, value)
            if alpha >= beta or (value <= (D + 2*A)):
                print ("\t\t<<<< ALPHA cut-off >>>>")
                print ("\t\t<< alpha:",alpha,">= beta:",beta,">>")
                print ("\t\t<<< value:",value,"<=",D + 2*A," >>>")
                break
        return value

# Uses minimax to calculate value of dropping piece in selected column
def score_move(grid, col, mark, nsteps):
    print("\nv v v v v v v v v v v v v v v v v v\nGet best score for column:",col)
    next_grid = drop_piece(grid, col, mark)
    score = alphabeta(next_grid, nsteps-1, -np.Inf, np.Inf, False, mark)     
    print("Return score:",score, "for column", col)
    print("\n^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ \n")
    return score

#########################
# Agent makes selection #
#########################
# with:                 #
start_time = time.time()#
mark = 1                #
#grid = np.zeros((ROWS,COLUMNS))
#########################
grid = np.asarray([ [0, 0, 0, 0, 0, 0, 0],
                    [0, 0, 0, 0, 0, 0, 0],
                    [0, 0, 0, 0, 0, 0, 0],
                    [0, 0, 0, 2, 1, 0, 2],
                    [0, 1, 0, 2, 1, 0, 2],
                    [0, 1, 1, 2, 1, 1, 2] ])

valid_moves = [c for c in range(COLUMNS) if grid[0,c] == 0] # or grid[C-1,c]?

print("start")# grid\n", grid)

# Use the heuristic to assign a score to each possible board in the next step
#scores = dict(zip(valid_moves, [score_move(grid, col, mark, N_STEPS) for col in valid_moves]))
returning_scores = []
for col in valid_moves:
    col_score = score_move(grid, col, mark, N_STEPS)
    returning_scores.append(col_score)
    if col_score >= A:# or col_score <= D:
        break
scores = dict(zip(valid_moves, returning_scores))   

# Get a list of columns (moves) that maximize the heuristic
max_cols = [key for key in scores.keys() if scores[key] == max(scores.values())]
max_choice = random.choice(max_cols)
print ("\n++++++++++++\nFinal scores:", [value for value in scores.values()])
print ("max col =", max_choice, "\tvalue:", scores[max_choice])
print ("++++++++++++\nstop")# grid\n", grid)

run_time = time.time() - start_time
print ("Total time taken: {} minutes and {} seconds".format(run_time//60, round(run_time%60,3)) )

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
depth: 0 	alpha: -inf 	beta: inf

~~ Terminal grid ~~
 [[0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [2 0 0 0 0 0 0]
 [1 0 0 2 1 0 2]
 [2 1 0 2 1 0 2]
 [1 1 1 2 1 1 2]] 
node score: 8 

Dropped 2 into column 1 , row: 3
~ Entering Pruner Loop ~
depth: 0 	alpha: -inf 	beta: 8

~~ Terminal grid ~~
 [[0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [1 2 0 2 1 0 2]
 [2 1 0 2 1 0 2]
 [1 1 1 2 1 1 2]] 
node score: 8 

Dropped 2 into column 2 , row: 4
~ Entering Pruner Loop ~
depth: 0 	alpha: -inf 	beta: 8

~~ Terminal grid ~~
 [[0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [1 0 0 2 1 0 2]
 [2 1 2 2 1 0 2]
 [1 1 1 2 1 1 2]] 
node score: 8 

Dropped 2 into column 3 , row: 2
~ Entering Pruner Loop ~
depth: 0 	alpha: -inf 	beta: 8

~~ Terminal grid ~~
 [[0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [0 0 0 2 0 0 0]
 [1 0 0 2 1 0 2]
 [2 1 0 2 1 0 2]
 [1 1 1 2 1 1 2]] 
node score: -92 

		<<<< ALPHA cut-off >>>>
		<< alpha: -inf >= beta: -92 >>
		<<< 



```
First model:

Final scores: [-3, 27, 1, 23, -5, 22]
max col = 1 	value: 27
++++++++++++
stop
Total time taken: 0.0 minutes and 0.81 seconds
```




Minimalist Pruner
```
++++++++++++
Final scores: [-192, 1018, -182, 1020, -192, -192, -192]
max col = 3 	value: 1020
++++++++++++
Total time taken: 0.0 minutes and 0.692 seconds
```



In [93]:
# @ title Experimental Pruning Agent "pruneX"
#def pruneTTT(obs, config):
    #config is dict: {'rows': 6, 'columns': 7, 'inarow': 4}
    # obs.board is last move of opponent, obs.mark is current player
    # return column that max's next grid's score

    ################################
    # Imports and helper functions #
    ################################

import numpy as np
import random

########################### Regular pruner ################
# constants (given by game)
ROWS = 6
COLUMNS = 7
CNCTX = 4
## coefficients (weights on variable future outcomes)
A = 10       #2 threes
B = 1000     #10 fours
C = -1      #-1 opp-threes
D = -100     #-10 opp-fours   

    # lookahead depth:
N_STEPS = 4#@ param {type:"integer"}

# Gets board at next step if agent drops piece in selected column
def drop_piece(grid, col, mark):
    next_grid = grid.copy()
    for row in range(ROWS-1, -1, -1):       ###row in range(0,ROWS)??
        if next_grid[row][col] == 0:
            break
    next_grid[row][col] = mark
    return next_grid

# Helper function for get_heuristic: checks if window satisfies heuristic conditions
def check_window(window, num_discs, piece):
    return (window.count(piece) == num_discs and window.count(0) == CNCTX-num_discs)

# Helper function for get_heuristic: counts number of windows satisfying specified heuristic conditions
def count_windows(grid, num_discs, piece):
    num_windows = 0
    # horizontal
    for row in range(ROWS):
        for col in range(COLUMNS-(CNCTX-1)):
            window = list(grid[row, col:col+CNCTX])
            if check_window(window, num_discs, piece):
                num_windows += 1
    # vertical
    for row in range(ROWS-(CNCTX-1)):
        for col in range(COLUMNS):
            window = list(grid[row:row+CNCTX, col])
            if check_window(window, num_discs, piece):
                num_windows += 1
    # positive diagonal
    for row in range(ROWS-(CNCTX-1)):
        for col in range(COLUMNS-(CNCTX-1)):
            window = list(grid[range(row, row+CNCTX), range(col, col+CNCTX)])
            if check_window(window, num_discs, piece):
                num_windows += 1
    # negative diagonal
    for row in range(CNCTX-1, ROWS):
        for col in range(COLUMNS-(CNCTX-1)):
            window = list(grid[range(row, row-CNCTX, -1), range(col, col+CNCTX)])
            if check_window(window, num_discs, piece):
                num_windows += 1
    return num_windows

# Helper function for minimax: calculates value of heuristic for grid
def get_score(grid, mark):
    num_threes = count_windows(grid, 3, mark) #A
    num_fours = count_windows(grid, 4, mark)  #B
    num_threes_opp = count_windows(grid, 3, mark%2+1) #C
    num_fours_opp = count_windows(grid, 4, mark%2+1)  #D

    score = A*num_threes + B*num_fours + C*num_threes_opp + D*num_fours_opp
    is_terminal = (not num_fours == 0) or (not num_fours_opp == 0) or (list(grid[0, :]).count(0) == 0)
    return score, is_terminal

# Minimax implementation was here:
def alphabeta(node, depth, alpha, beta, maximizingPlayer, mark):
    print ("~ Entering Pruner Loop ~")    
    print ("depth: {} \talpha: {} \tbeta: {}".format(depth,
                                            round(alpha,2),
                                            round(beta,2)) )

    node_score, is_terminal = get_score(node, mark)
    if depth == 0 or is_terminal:
        print ("\n~~ Terminal grid ~~\n", node,"\nnode score:", node_score,"\n")
        return node_score
        
    valid_moves = [c for c in range(COLUMNS) if node[0][c] == 0]

    if maximizingPlayer:
        value = -np.Inf
        for col in valid_moves:
            child = drop_piece(node, col, mark)
            value = max(value, alphabeta(child, depth-1, alpha, beta, False, mark))
            alpha = max(alpha, value)
            if alpha >= beta or (value >= (B + 2*C)):
                break
        return value

    else: #minimizing player
        value = np.Inf
        for col in valid_moves:
            child = drop_piece(node, col, mark%2+1)
            value = min(value, alphabeta(child, depth-1, alpha, beta, True, mark))
            beta = min(beta, value)
            if alpha >= beta or (value <= (D + 2*A)):
                break
        return value

# Uses minimax to calculate value of dropping piece in selected column
def score_move(grid, col, mark, nsteps):
    next_grid = drop_piece(grid, col, mark)
    score = alphabeta(next_grid, nsteps-1, -np.Inf, np.Inf, False, mark)     
    return score

#########################
# Agent makes selection #
#########################
# with:                 #
start_time = time.time()#
mark = 1                #
#grid = np.zeros((ROWS,COLUMNS))
#########################
grid = np.asarray([ [0, 0, 0, 0, 0, 0, 0],
                    [0, 0, 0, 0, 0, 0, 0],
                    [0, 0, 0, 0, 0, 0, 0],
                    [0, 0, 0, 2, 1, 0, 2],
                    [0, 1, 0, 2, 1, 0, 2],
                    [0, 1, 1, 2, 1, 1, 2] ])

valid_moves = [c for c in range(COLUMNS) if grid[0,c] == 0] # or grid[C-1,c]?

print("start")# grid\n", grid)

# Use the heuristic to assign a score to each possible board in the next step
#scores = dict(zip(valid_moves, [score_move(grid, col, mark, N_STEPS) for col in valid_moves]))
returning_scores = []
for col in valid_moves:
    col_score = score_move(grid, col, mark, N_STEPS)
    returning_scores.append(col_score)
    if col_score >= A:# or col_score <= D:
        break
scores = dict(zip(valid_moves, returning_scores))   

# Get a list of columns (moves) that maximize the heuristic
max_cols = [key for key in scores.keys() if scores[key] == max(scores.values())]
max_choice = random.choice(max_cols)
print ("\n++++++++++++\nFinal scores:", [value for value in scores.values()])
print ("max col =", max_choice, "\tvalue:", scores[max_choice])
print ("++++++++++++\nstop")# grid\n", grid)

run_time = time.time() - start_time
print ("Total time taken: {} minutes and {} seconds".format(run_time//60, round(run_time%60,3)) )

start
~ Entering Pruner Loop ~
depth: 3 	alpha: -inf 	beta: inf
~ Entering Pruner Loop ~
depth: 2 	alpha: -inf 	beta: inf
~ Entering Pruner Loop ~
depth: 1 	alpha: -inf 	beta: inf
~ Entering Pruner Loop ~
depth: 0 	alpha: -inf 	beta: inf

~~ Terminal grid ~~
 [[0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [2 0 0 0 0 0 0]
 [1 0 0 2 1 0 2]
 [2 1 0 2 1 0 2]
 [1 1 1 2 1 1 2]] 
node score: 8 

~ Entering Pruner Loop ~
depth: 0 	alpha: -inf 	beta: 8

~~ Terminal grid ~~
 [[0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [1 2 0 2 1 0 2]
 [2 1 0 2 1 0 2]
 [1 1 1 2 1 1 2]] 
node score: 8 

~ Entering Pruner Loop ~
depth: 0 	alpha: -inf 	beta: 8

~~ Terminal grid ~~
 [[0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [1 0 0 2 1 0 2]
 [2 1 2 2 1 0 2]
 [1 1 1 2 1 1 2]] 
node score: 8 

~ Entering Pruner Loop ~
depth: 0 	alpha: -inf 	beta: 8

~~ Terminal grid ~~
 [[0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [0 0 0 2 0 0 0]
 [1 0 0 2 1 0 2]
 [2 1 0 2 1 0 2]
 [1 1 1 2 1 1 2]] 
node score: -92 

~ Entering Pruner Loop ~