Reused code

In [1]:
import random
import numpy as np

# Gets board at next step if agent drops piece in selected column
def drop_piece(grid, col, mark, config):
    next_grid = grid.copy()
    for row in range(config.rows-1, -1, -1):
        if next_grid[row][col] == 0:
            break
    next_grid[row][col] = mark
    return next_grid

# Helper function for get_heuristic: checks if window satisfies heuristic conditions
def check_window(window, num_discs, piece, config):
    return (window.count(piece) == num_discs and window.count(0) == config.inarow-num_discs)
    
# Helper function for get_heuristic: counts number of windows satisfying specified heuristic conditions
def count_windows(grid, num_discs, piece, config):
    num_windows = 0
    # horizontal
    for row in range(config.rows):
        for col in range(config.columns-(config.inarow-1)):
            window = list(grid[row, col:col+config.inarow])
            if check_window(window, num_discs, piece, config):
                num_windows += 1
    # vertical
    for row in range(config.rows-(config.inarow-1)):
        for col in range(config.columns):
            window = list(grid[row:row+config.inarow, col])
            if check_window(window, num_discs, piece, config):
                num_windows += 1
    # positive diagonal
    for row in range(config.rows-(config.inarow-1)):
        for col in range(config.columns-(config.inarow-1)):
            window = list(grid[range(row, row+config.inarow), range(col, col+config.inarow)])
            if check_window(window, num_discs, piece, config):
                num_windows += 1
    # negative diagonal
    for row in range(config.inarow-1, config.rows):
        for col in range(config.columns-(config.inarow-1)):
            window = list(grid[range(row, row-config.inarow, -1), range(col, col+config.inarow)])
            if check_window(window, num_discs, piece, config):
                num_windows += 1
    return num_windows

In [6]:
# Helper function for score_move: gets board at next step if agent drops piece in selected column
def drop_piece(grid, col, mark, config):
    next_grid = grid.copy()
    
    for row in range(config.rows-1, -1, -1):
        if next_grid[row][col] == 0:
            break
            
    next_grid[row][col] = mark
    return next_grid

In [25]:
def get_win_percentages(agent1, agent2, n_rounds=100):
    # Use default Connect 4 setup
    config = {'rows': 6, 'columns': 7, 'inarow': 4}
    
    # Agent 1 goes first (roughly) half the time
    outcomes = evaluate('connectx', [agent1, agent2], config, [], n_rounds//2)
    
    # Agent 2 goes first (roughly) half the time
    outcomes += [[b, a] for [a, b] in evaluate('connectx', [agent2, agent1], config, [], n_rounds-n_rounds//2)]
    
    print('Agent 1 Win Percent:', np.round(outcomes.count([1, -1])/len(outcomes), 2))
    print('Agent 2 Win Percent:', np.round(outcomes.count([-1, 1])/len(outcomes), 2))

# Adjust old code to account for opponent winning the game

In [4]:
# Helper function for score_move: calculates value of heuristic for grid
def get_heuristic(grid, mark, config):
    num_threes, num_fours, num_threes_opp, num_fours_opp = count_windows(grid, 4, mark, config)
    
    score = num_threes - 1e2 * num_threes_opp + 1e6 * num_fours - 1e4 * num_fours_opp
    return score
    
# Helper function for get_heuristic: counts number of windows satisfying specified heuristic conditions
def count_windows(grid, num_discs, piece, config):
    opp_mark = piece % 2 + 1
    num_windows = 0
    
    # Changed to add new condition to check
    windows_dict = {'p4':0, 'p3':0, 'o3':0, 'o4':0}
    
    # horizontal
    for row in range(config.rows):
        for col in range(config.columns - (config.inarow - 1)):
            window = list(grid[row, col:col + config.inarow])
            windows_dict = check_windows(window, num_discs, piece, config, windows_dict)
            
    # vertical
    for row in range(config.rows - (config.inarow - 1)):
        for col in range(config.columns):
            window = list(grid[row:row + config.inarow, col])
            windows_dict = check_windows(window, num_discs, piece, config, windows_dict)
                
    # positive diagonal
    for row in range(config.rows - (config.inarow - 1)):
        for col in range(config.columns - (config.inarow - 1)):
            window = list(grid[range(row, row + config.inarow), range(col, col + config.inarow)])
            windows_dict = check_windows(window, num_discs, piece, config, windows_dict)
                
    # negative diagonal
    for row in range(config.inarow - 1, config.rows):
        for col in range(config.columns-(config.inarow-1)):
            window = list(grid[range(row, row - config.inarow, -1), range(col, col + config.inarow)])
            windows_dict = check_windows(window, num_discs, piece, config, windows_dict)
                
    return windows_dict['p3'], windows_dict['p4'], windows_dict['o3'], windows_dict['o4']

In [5]:
# Check windows
def check_windows(window, num_discs, piece, config, windows_dict):
    # Check for player winning matches
    if check_window(window, num_discs, piece, config):
        windows_dict['p4'] += 1
            
    # Check for player almost winning matches
    if check_window(window, num_discs - 1, piece, config):
        windows_dict['p3'] += 1
            
    # Check for opponent almost winning matches
    if check_window(window, num_discs - 1, piece % 2 + 1, config):
        windows_dict['o3'] += 1
        
    # Check for opponent winning matches
    if check_window(window, num_discs, piece % 2 + 1, config):
        windows_dict['o4'] += 1
        
    return windows_dict

# Helper function for get_heuristic: checks if window satisfies heuristic conditions
def check_window(window, num_discs, piece, config):
    return (window.count(piece) == num_discs and window.count(0) == config.inarow-num_discs)

# New code

In [19]:
# Calculates score if agent drops piece in selected column
def score_move(grid, col, mark, config, nsteps):
    next_grid = drop_piece(grid, col, mark, config)
    #score = get_heuristic(next_grid, mark, config)
    score = minimax(next_grid, nsteps - 1, False, mark, config)
    return score

# Helper function for minimax: checks if agent or opponent has four in a row in the window
def is_terminal_window(window, config):
    return window.count(1) == config.inarow or window.count(2) == config.inarow

# Helper function for minimax: checks if game has ended
def is_terminal_node(grid, config):
    # Check for draw
    if list(grid[0, :]).count(0) == 0:
        return True
    
    # Check for win: horizontal, vertical, or diagonal
    # horizontal 
    for row in range(config.rows):
        for col in range(config.columns-(config.inarow-1)):
            window = list(grid[row, col:col+config.inarow])
            if is_terminal_window(window, config):
                return True
            
    # vertical
    for row in range(config.rows-(config.inarow-1)):
        for col in range(config.columns):
            window = list(grid[row:row+config.inarow, col])
            if is_terminal_window(window, config):
                return True
            
    # positive diagonal
    for row in range(config.rows-(config.inarow-1)):
        for col in range(config.columns-(config.inarow-1)):
            window = list(grid[range(row, row+config.inarow), range(col, col+config.inarow)])
            if is_terminal_window(window, config):
                return True
            
    # negative diagonal
    for row in range(config.inarow-1, config.rows):
        for col in range(config.columns-(config.inarow-1)):
            window = list(grid[range(row, row-config.inarow, -1), range(col, col+config.inarow)])
            if is_terminal_window(window, config):
                return True
            
    return False

In [3]:
# Minimax implementation
def minimax(node, depth, maximizingPlayer, mark, config):
    is_terminal = is_terminal_node(node, config)
    valid_moves = [c for c in range(config.columns) if node[0][c] == 0]
    
    if depth == 0 or is_terminal:
        return get_heuristic(node, mark, config)
    
    if maximizingPlayer:
        value = -np.Inf
        for col in valid_moves:
            child = drop_piece(node, col, mark, config)
            value = max(value, minimax(child, depth - 1, False, mark, config))
            
            return value
    else:
        value = np.Inf
        for col in valid_moves:
            child = drop_piece(node, col, mark % 2 + 1, config)
            value = min(value, minimax(child, depth - 1, True, mark, config))
            
            return value

In [13]:
# How deep to make hte game tree: higher values take longer to run!
N_STEPS = 3

def agent_Nsteps(obs, config):
    # Get list of valid moves
    valid_moves = [c for c in range(config.columns) if obs.board[c] == 0]
    
    # Convert the board to a 2D grid
    grid = np.asarray(obs.board).reshape(config.rows, config.columns)
    
    # Use the heuristic to assign a score to each possible board in the next step
    scores = dict(zip(valid_moves, [score_move(grid, col, obs.mark, config, N_STEPS) for col in valid_moves]))
    
    # Get a list of columns (moves) that maximize the heuristic
    max_cols = [key for key in scores.keys() if scores[key] == max(scores.values())]
    
    # Select at random from the maximizing columns
    return random.choice(max_cols)

In [39]:
from kaggle_environments import make, evaluate

env = make('connectx', debug=True)

env.run([agent_onestep, 'random'])

env.render(mode='ipython')

Traceback (most recent call last):
  File "C:\Users\Danny\anaconda3\envs\dsi\lib\site-packages\kaggle_environments\agent.py", line 151, in act
    action = self.agent(*args)
  File "<ipython-input-33-e1f3a2542152>", line 88, in agent_onestep
    scores = dict(zip(valid_moves, [score_move1(grid, col, obs.mark, config) for col in valid_moves]))
  File "<ipython-input-33-e1f3a2542152>", line 88, in <listcomp>
    scores = dict(zip(valid_moves, [score_move1(grid, col, obs.mark, config) for col in valid_moves]))
  File "<ipython-input-33-e1f3a2542152>", line 4, in score_move1
    score = get_heuristic1(next_grid, mark, config)
  File "<ipython-input-33-e1f3a2542152>", line 21, in get_heuristic1
    num_threes, num_fours, num_threes_opp = count_windows(grid, 4, mark, config)
ValueError: too many values to unpack (expected 3)
Error: ['Traceback (most recent call last):\n', '  File "C:\\Users\\Danny\\anaconda3\\envs\\dsi\\lib\\site-packages\\kaggle_environments\\agent.py", line 151, in act\n  

In [37]:
get_win_percentages(agent_Nsteps, 'random')

Agent 1 Win Percent: 0.98
Agent 2 Win Percent: 0.02


In [24]:
%timeit get_win_percentages(agent_Nsteps, 'random')

Agent 1 Win Percent: 0.99
Agent 2 Win Percent: 0.01
Number of invalid plays by Agent 1: 0
Number of invalid plays by Agent 2: 0
Agent 1 Win Percent: 0.95
Agent 2 Win Percent: 0.05
Number of invalid plays by Agent 1: 0
Number of invalid plays by Agent 2: 0
Agent 1 Win Percent: 0.94
Agent 2 Win Percent: 0.06
Number of invalid plays by Agent 1: 0
Number of invalid plays by Agent 2: 0
Agent 1 Win Percent: 1.0
Agent 2 Win Percent: 0.0
Number of invalid plays by Agent 1: 0
Number of invalid plays by Agent 2: 0
Agent 1 Win Percent: 1.0
Agent 2 Win Percent: 0.0
Number of invalid plays by Agent 1: 0
Number of invalid plays by Agent 2: 0
Agent 1 Win Percent: 0.99
Agent 2 Win Percent: 0.01
Number of invalid plays by Agent 1: 0
Number of invalid plays by Agent 2: 0
Agent 1 Win Percent: 0.99
Agent 2 Win Percent: 0.01
Number of invalid plays by Agent 1: 0
Number of invalid plays by Agent 2: 0
Agent 1 Win Percent: 0.98
Agent 2 Win Percent: 0.02
Number of invalid plays by Agent 1: 0
Number of invalid 

# Compare 1 step to 3 step

In [40]:
# Calculates score if agent drops piece in selected column
def score_move1(grid, col, mark, config):
    next_grid = drop_piece1(grid, col, mark, config)
    score = get_heuristic1(next_grid, mark, config)
    return score

# Helper function for score_move: gets board at next step if agent drops piece in selected column
def drop_piece1(grid, col, mark, config):
    next_grid = grid.copy()
    
    for row in range(config.rows-1, -1, -1):
        if next_grid[row][col] == 0:
            break
            
    next_grid[row][col] = mark
    return next_grid

# Helper function for score_move: calculates value of heuristic for grid
# Optimization idea: configure count windows to count all conditions at once
def get_heuristic1(grid, mark, config):
    num_threes, num_fours, num_threes_opp = count_windows1(grid, 4, mark, config)
    
    score = num_threes - 1e2 * num_threes_opp + 1e6 * num_fours
    return score
    
# Helper function for get_heuristic: counts number of windows satisfying specified heuristic conditions
def count_windows1(grid, num_discs, piece, config):
    opp_mark = piece % 2 + 1
    num_windows = 0
    windows_dict = {'p4':0, 'p3':0, 'o3':0}
    
    # horizontal
    for row in range(config.rows):
        for col in range(config.columns - (config.inarow - 1)):
            window = list(grid[row, col:col + config.inarow])
            windows_dict = check_windows1(window, num_discs, piece, config, windows_dict)
            
    # vertical
    for row in range(config.rows - (config.inarow - 1)):
        for col in range(config.columns):
            window = list(grid[row:row + config.inarow, col])
            windows_dict = check_windows1(window, num_discs, piece, config, windows_dict)
                
    # positive diagonal
    for row in range(config.rows - (config.inarow - 1)):
        for col in range(config.columns - (config.inarow - 1)):
            window = list(grid[range(row, row + config.inarow), range(col, col + config.inarow)])
            windows_dict = check_windows1(window, num_discs, piece, config, windows_dict)
                
    # negative diagonal
    for row in range(config.inarow - 1, config.rows):
        for col in range(config.columns-(config.inarow-1)):
            window = list(grid[range(row, row - config.inarow, -1), range(col, col + config.inarow)])
            windows_dict = check_windows1(window, num_discs, piece, config, windows_dict)
                
    return windows_dict['p3'], windows_dict['p4'], windows_dict['o3']

# Check windows
def check_windows1(window, num_discs, piece, config, windows_dict):

    # Check for player winning matches
    if check_window1(window, num_discs, piece, config):
        windows_dict['p4'] += 1
            
    # Check for player almost winning matches
    if check_window1(window, num_discs - 1, piece, config):
        windows_dict['p3'] += 1
            
    # Check for opponent almost winning matches
    if check_window1(window, num_discs - 1, piece % 2 + 1, config):
        windows_dict['o3'] += 1
        
    return windows_dict

# Helper function for get_heuristic: checks if window satisfies heuristic conditions
def check_window1(window, num_discs, piece, config):
    return (window.count(piece) == num_discs and window.count(0) == config.inarow-num_discs)

# The agent is always implemented as a Python function that accepts two arguments: obs and config
def agent_onestep(obs, config):
    # Get list of valid moves
    valid_moves = [c for c in range(config.columns) if obs.board[c] == 0]
    
    # Convert the board to a 2D grid
    grid = np.asarray(obs.board).reshape(config.rows, config.columns)
    
    # Use the heuristic to assign a score to each possible board in the next turn
    scores = dict(zip(valid_moves, [score_move1(grid, col, obs.mark, config) for col in valid_moves]))
    
    # Get a list of columns (moves) that maximize the heuristic
    max_cols = [key for key in scores.keys() if scores[key] == max(scores.values())]
    
    # Select at random from the maximizing columns
    return random.choice(max_cols)

In [42]:
get_win_percentages(agent_Nsteps, agent_onestep)

Agent 1 Win Percent: 0.5
Agent 2 Win Percent: 0.5


Wow, 50/50 despite the improvements? Hmmm