Development by https://www.kaggle.com/mrgeislinger

# Install kaggle-environments

In [None]:
# 1. Enable Internet in the Kernel (Settings side pane)

# 2. Curl cache may need purged if v0.1.6 cannot be found (uncomment if needed). 
# !curl -X PURGE https://pypi.org/simple/kaggle-environments

# ConnectX environment was defined in v0.1.6
!pip install 'kaggle-environments>=0.1.6'

In [None]:
debug = False

# Create ConnectX Environment

In [None]:
import numpy as np

from kaggle_environments import evaluate, make, utils
# Since utils.get_last_callable moved to agent.get_last_callable
# See https://github.com/Kaggle/kaggle-environments/blob/e4a5651a3a0775b823fc27fe2c24b55cbd340420/kaggle_environments/agent.py#L37
from kaggle_environments import agent as kaggle_env_agent

In [None]:
env = make("connectx", debug=True)
env.render()

# Create an Agent

To create the submission, an agent function should be fully encapsulated (no external dependencies).  

When your agent is being evaluated against others, it will not have access to the Kaggle docker image.  Only the following can be imported: Python Standard Library Modules, gym, numpy, scipy, pytorch (1.3.1, cpu only), and more may be added later.



In [None]:
# This agent one-step lookahead chooses a non-empty column
# kaggle.com/alexisbcook/one-step-lookahead
def my_agent(observation, configuration, N_STEPS=2, cutoff_time=None, cutoff_time_offset=0.6, debug=True):
    '''
    '''
    import numpy as np
    import random
    import time
    
    # Modify to 3 step lookead after about half the board is filled
    if observation.board.count(1) >= 11:
        N_STEPS=3
    
    # Parameters for keeping track of time while searching deeply
    START_TIME = time.time()
    
    # Use the configurations action timeout as basis the cutoff time
    if cutoff_time is None:
        cutoff_time = (configuration.get('actTimeout',cutoff_time) - cutoff_time_offset)
        
    if debug:
        print(f'###### Turn {observation.board.count(1):02} ######') 
        # Just to check for debugging (first 2 turns)
        if observation.board.count(1) <= 1:
            print(f'"configuration":{configuration}')   
        print('Cutoff Time:',cutoff_time)
        print(f'Using {N_STEPS} step lookahead')

    # Helper function for score_move: gets board at next step if agent drops piece in selected column
    def drop_piece(grid, col, mark, config):
        next_grid = grid.copy()
        for row in range(config.rows-1, -1, -1):
            if next_grid[row][col] == 0:
                break
        next_grid[row][col] = mark
        return next_grid

    # Helper function for get_heuristic: checks if window satisfies heuristic conditions
    def check_window(window, num_discs, piece, config):
        return (window.count(piece) == num_discs and window.count(0) == config.inarow-num_discs)

    # Helper function for get_heuristic: counts number of windows satisfying specified heuristic conditions
    def count_windows(grid, num_discs, piece, config):
        num_windows = 0
        # horizontal
        for row in range(config.rows):
            for col in range(config.columns-(config.inarow-1)):
                window = list(grid[row, col:col+config.inarow])
                if check_window(window, num_discs, piece, config):
                    num_windows += 1
        # vertical
        for row in range(config.rows-(config.inarow-1)):
            for col in range(config.columns):
                window = list(grid[row:row+config.inarow, col])
                if check_window(window, num_discs, piece, config):
                    num_windows += 1
        # positive diagonal
        for row in range(config.rows-(config.inarow-1)):
            for col in range(config.columns-(config.inarow-1)):
                window = list(grid[range(row, row+config.inarow), range(col, col+config.inarow)])
                if check_window(window, num_discs, piece, config):
                    num_windows += 1
        # negative diagonal
        for row in range(config.inarow-1, config.rows):
            for col in range(config.columns-(config.inarow-1)):
                window = list(grid[range(row, row-config.inarow, -1), range(col, col+config.inarow)])
                if check_window(window, num_discs, piece, config):
                    num_windows += 1
        return num_windows

    # Helper function for score_move: gets board at next step if agent drops piece in selected column
    def drop_piece(grid, col, mark, config):
        next_grid = grid.copy()
        for row in range(config.rows-1, -1, -1):
            if next_grid[row][col] == 0:
                break
        next_grid[row][col] = mark
        return next_grid

    # Helper function for minimax: calculates value of heuristic for grid
    def get_heuristic(grid, mark, config):
        num_twos = count_windows(grid, 2, mark, config)
        num_threes = count_windows(grid, 3, mark, config)
        num_fours = count_windows(grid, 4, mark, config)
        num_threes_opp = count_windows(grid, 3, mark%2+1, config)
        num_fours_opp = count_windows(grid, 4, mark%2+1, config)
        # Only consider fours & threes
        score = 1e6*num_fours + 1e0*num_threes - 1e2*num_threes_opp - 1e4*num_fours_opp 
        return score

    # Uses minimax to calculate value of dropping piece in selected column
    def score_move(grid, col, mark, config, nsteps):
        next_grid = drop_piece(grid, col, mark, config)
        # If time is getting close, stop everything!
        time_to_search_col = (cutoff_time/config.columns)*(col+1)
        if (time.time() - START_TIME ) >= time_to_search_col:
            print('timeout!!')
            score = get_heuristic(grid, mark, config)
        else:
            minimax_out = minimax(next_grid, nsteps-1, False, mark, config, time_to_search_col)
            score = minimax_out
        if debug:
            summary_stats = {
                'column': col,
                'score': score,
                'nsteps_to_take': nsteps,
                'time_to_search_col': time_to_search_col,
                'time_elapsed':time.time() - START_TIME
            }
            print(f'"summary_stats":{summary_stats}')
        return score

    # Helper function for minimax: checks if agent or opponent has four in a row in the window
    def is_terminal_window(window, config):
        return window.count(1) == config.inarow or window.count(2) == config.inarow

    # Helper function for minimax: checks if game has ended
    def is_terminal_node(grid, config):
        # Check for draw 
        if list(grid[0, :]).count(0) == 0:
            return True
        # Check for win: horizontal, vertical, or diagonal
        # horizontal 
        for row in range(config.rows):
            for col in range(config.columns-(config.inarow-1)):
                window = list(grid[row, col:col+config.inarow])
                if is_terminal_window(window, config):
                    return True
        # vertical
        for row in range(config.rows-(config.inarow-1)):
            for col in range(config.columns):
                window = list(grid[row:row+config.inarow, col])
                if is_terminal_window(window, config):
                    return True
        # positive diagonal
        for row in range(config.rows-(config.inarow-1)):
            for col in range(config.columns-(config.inarow-1)):
                window = list(grid[range(row, row+config.inarow), range(col, col+config.inarow)])
                if is_terminal_window(window, config):
                    return True
        # negative diagonal
        for row in range(config.inarow-1, config.rows):
            for col in range(config.columns-(config.inarow-1)):
                window = list(grid[range(row, row-config.inarow, -1), range(col, col+config.inarow)])
                if is_terminal_window(window, config):
                    return True
        return False

    # Minimax implementation
    def minimax(node, depth, maximizingPlayer, mark, config, timeout_elapsed):
        is_terminal = is_terminal_node(node, config)
        valid_moves = [c for c in range(config.columns) if node[0][c] == 0]
        # Check if we've reached the cutoff time
        elapsed_time = time.time() - START_TIME
        if (elapsed_time >= timeout_elapsed):
            # If time runs out, just look for the current heuristic at this depth
            return get_heuristic(node, mark, config)
        elif depth == 0 or is_terminal:
            return get_heuristic(node, mark, config)
        if maximizingPlayer:
            value = -np.Inf
            for col in valid_moves:
                child = drop_piece(node, col, mark, config)
                # Get the deepest it went
                minimax_out = minimax(child, depth-1, False, mark, config, timeout_elapsed)
                value = max(value, minimax_out)
        else:
            value = np.Inf
            for col in valid_moves:
                child = drop_piece(node, col, mark%2+1, config)
                minimax_out = minimax(child, depth-1, True, mark, config, timeout_elapsed)
                value = min(value, minimax_out)
        return value

    
    # Get list of valid moves
    valid_moves = [c for c in range(configuration.columns) if observation.board[c] == 0]
    # Convert the board to a 2D grid
    grid = np.asarray(observation.board).reshape(configuration.rows, configuration.columns)
    # Use the heuristic to assign a score to each possible board in the next step
    scores = dict(zip(valid_moves, [score_move(grid, col, observation.mark, configuration, N_STEPS) for col in valid_moves]))
    # Get a list of columns (moves) that maximize the heuristic
    max_cols = [key for key in scores.keys() if scores[key] == max(scores.values())]
    # Select at random from the maximizing columns    
    chosen_col = random.choice(max_cols)
    
    # Try to pick the middle column if it is maximal choice
    mid_col = configuration.columns // 2
    if mid_col in max_cols:
        chosen_col = mid_col
    # Otherwise choose the off-by-one from the center column
    elif (mid_col + 1) in max_cols:
        chosen_col = mid_col+1
        if (mid_col - 1) in max_cols:
            chosen_col = random.choice([mid_col-1,mid_col+1])
    elif (mid_col - 1) in max_cols:
            chosen_col = mid_col-1
    
    if debug:
        print(f'Total time: {time.time()- START_TIME}')
    
    return chosen_col

# Test your Agent

In [None]:
env.reset()
# Play against "negamax" agent until my agent loses
test_agent = debug # Set to True to test 
debug_agent = lambda x,y: my_agent(x,y, debug=True)
while test_agent:
    env.reset()
    env.run([debug_agent, 'negamax'])
    # Don't count ties as losses
    if len(env.steps) == 43:
        print('tie')
    elif len(env.steps) % 2 == 1:
        print('lost')
        break
    else:
        print('won')
    print('=======')
env.render(mode="ipython", width=500, height=450)

# Debug/Train your Agent

In [None]:
# Play as first position against random agent.
trainer = env.train([None, "negamax"])

observation = trainer.reset()

while debug and not env.done:
    my_action = my_agent(observation, env.configuration)
    print("My Action", my_action)
    observation, reward, done, info = trainer.step(my_action)
    env.render(mode="ipython", width=100, height=90, header=False, controls=False)
env.render()

# Evaluate your Agent

In [None]:
def mean_reward(rewards):
    return sum(r[0] for r in rewards) / float(len(rewards))

# Run multiple episodes to estimate its performance.
if debug:
    print("My Agent vs Random Agent:", mean_reward(evaluate("connectx", [my_agent, "random"], num_episodes=10)))
    print("My Agent vs Negamax Agent:", mean_reward(evaluate("connectx", [my_agent, "negamax"], num_episodes=10)))

# Play your Agent
Click on any column to place a checker there ("manually select action").

In [None]:
# "None" represents which agent you'll manually play as (first or second player).
env.play([None, my_agent], width=500, height=450)

# Write Submission File



In [None]:
import inspect
import os

def write_agent_to_file(function, file):
    with open(file, "a" if os.path.exists(file) else "w") as f:
        f.write(inspect.getsource(function))
        print(function, "written to", file)

In [None]:
submission_file = 'submission.py'
write_agent_to_file(my_agent, submission_file)

# Validate Submission
Play your submission against itself.  This is the first episode the competition will run to weed out erroneous agents.

Why validate? This roughly verifies that your submission is fully encapsulated and can be run remotely.

In [None]:
# Note: Stdout replacement is a temporary workaround.
import sys
out = sys.stdout
submission = utils.read_file("/kaggle/working/submission.py")
agent = kaggle_env_agent.get_last_callable(submission)
sys.stdout = out

In [None]:
env = make("connectx", debug=True)
env.run([agent, agent])
print("Success!" if env.state[0].status == env.state[1].status == "DONE" else "Failed...")

# Submit to Competition

1. Save this kernel.
2. View the commited version.
3. Go to "Data" section and find submission.py file.
4. Click "Submit to Competition"
5. Go to [My Submissions](https://kaggle.com/c/connectx/submissions) to view your score and episodes being played.