Derived from a public notebook by https://www.kaggle.com/mrgeislinger

In [22]:
debug = False

#To DO: Custom Env Policy 

# Install kaggle-environments

In [None]:
# 1. Enable Internet in the Kernel (Settings side pane)

# 2. Curl cache may need purged if v0.1.6 cannot be found (uncomment if needed). 
# !curl -X PURGE https://pypi.org/simple/kaggle-environments

# ConnectX environment was defined in v0.1.6
!pip install 'kaggle-environments>=0.1.6'

# Create ConnectX Environment

In [24]:
import numpy as np
import time

from kaggle_environments import evaluate, make, utils
# Since utils.get_last_callable moved to agent.get_last_callable
# See https://github.com/Kaggle/kaggle-environments/blob/e4a5651a3a0775b823fc27fe2c24b55cbd340420/kaggle_environments/agent.py#L37
from kaggle_environments import agent as kaggle_env_agent

In [25]:
env = make("connectx", debug=True)
#env.render()

In [None]:
# Check version of tensorflow
!pip install 'tensorflow==1.15.0'
import tensorflow as tf
#tf.__version__
!apt-get update
!apt-get install -y cmake libopenmpi-dev python3-dev zlib1g-dev
!pip install "stable-baselines[mpi]==2.9.0"
from gym import spaces
from stable_baselines import PPO1 
from stable_baselines.common.policies import CnnPolicy

model = PPO1.load('/content/given_random.zip', verbose=0)

# Create an Agent

To create the submission, an agent function should be fully encapsulated (no external dependencies).  

When your agent is being evaluated against others, it will not have access to the Kaggle docker image.  Only the following can be imported: Python Standard Library Modules, gym, numpy, scipy, pytorch (1.3.1, cpu only), and more may be added later.



In [27]:
#@ title Agent to debug
def my_agent(obs, config, START_TIME=None, N_STEPS=2, cutoff_time=None, debug=False):

    import numpy as np
    import random
    import time

    ########################### Regular pruner ################
    # constants (given by game)
    ROWS = config.rows
    COLUMNS = config.columns
    CNCTX = config.inarow
    ## coefficients (weights on variable future outcomes)
    A = 1     #my twos
    B = 100    #my threes
    C = 10000   #my fours         
    D = -10    #opp-threes
    E = -1000   #opp-fours
    
    # vary lookahead depth according to state of play:
    #if debug:
        #board = np.reshape(obs.board,(6,7))
        #print("obs.board:\n", board)
    if obs.board.count(0) >= ROWS*COLUMNS//2:# or list(obs.board[0]).count(0) >= COLUMNS//2:
        N_STEPS =      2
    else:
        N_STEPS =      3  # deeper search after half the board is filled

    cutoff_time_offset=0.2
    if START_TIME is None:
        START_TIME = time.time()
    if cutoff_time is None:
        cutoff_time = (config.get('actTimeout', cutoff_time) - cutoff_time_offset)

    if debug:
        if obs.board.count(1) == 0:
            print(f'"configuration":{config}')  
        print(f'\n###### Agent Turn {obs.board.count(1):02} ######') 
        print(f'Using {N_STEPS} step lookahead')

    # Gets board at next step if agent drops piece in selected column
    def drop_piece(grid, col, mark):
        next_grid = grid.copy()
        for row in range(ROWS-1, -1, -1):
            if next_grid[row][col] == 0:
                break
        next_grid[row][col] = mark
        return next_grid

    # Helper function for get_heuristic: checks if window satisfies heuristic conditions
    def check_window(window, num_discs, piece):
        return (window.count(piece) == num_discs and window.count(0) == CNCTX-num_discs)

    # Helper function for get_heuristic: counts number of windows satisfying specified heuristic conditions
    def count_windows(grid, num_discs, piece):
        num_windows = 0
        # horizontal
        for row in range(ROWS):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[row, col:col+CNCTX])
                if check_window(window, num_discs, piece):
                    num_windows += 1
        # vertical
        for row in range(ROWS-(CNCTX-1)):
            for col in range(COLUMNS):
                window = list(grid[row:row+CNCTX, col])
                if check_window(window, num_discs, piece):
                    num_windows += 1
        # positive diagonal
        for row in range(ROWS-(CNCTX-1)):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[range(row, row+CNCTX), range(col, col+CNCTX)])
                if check_window(window, num_discs, piece):
                    num_windows += 1
        # negative diagonal
        for row in range(CNCTX-1, ROWS):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[range(row, row-CNCTX, -1), range(col, col+CNCTX)])
                if check_window(window, num_discs, piece):
                    num_windows += 1
        return num_windows

    # Helper function for minimax: calculates value of heuristic for grid
    def get_score(grid, mark):
        num_twos = count_windows(grid, 2, mark) #A
        num_threes = count_windows(grid, 3, mark)  #B
        num_fours = count_windows(grid, 4, mark)   #C
        num_threes_opp = count_windows(grid, 3, mark%2+1) #D
        num_fours_opp = count_windows(grid, 4, mark%2+1)  #E     
        score = A*num_twos + B*num_threes + C*num_fours + D*num_threes_opp + E*num_fours_opp
        is_terminal = (not num_fours == 0) or (not num_fours_opp == 0) or (list(grid[0, :]).count(0) == 0)
        return score, is_terminal

    # Minimax implementation was here:
    def alphabeta(node, depth, alpha, beta, maximizingPlayer, mark, column_time):
        node_score, is_terminal = get_score(node, mark)
        # check the time
        elapsed_time = time.time() - column_time
        if depth == 0 or is_terminal or (elapsed_time >= 1.0):#time_left):
             return node_score

        valid_moves = [c for c in range(COLUMNS) if node[0][c] == 0]
        if maximizingPlayer:
            value = -np.Inf
            for col in valid_moves:
                child = drop_piece(node, col, mark)
                value = max(value, alphabeta(child, depth-1, alpha, beta, False, mark, column_time))
                alpha = max(alpha, value)
                if alpha >= beta:
                    break
            return value

        else: #minimizing player
            value = np.Inf
            for col in valid_moves:
                child = drop_piece(node, col, mark%2+1)
                value = min(value, alphabeta(child, depth-1, alpha, beta, True, mark, column_time))
                beta = min(beta, value)
                if alpha >= beta:
                    break
            return value

    # Uses alphabeta pruning to calculate value
    # of dropping piece in selected column
    def score_move(grid, col, mark, depth):
        column_time = time.time()
        next_grid = drop_piece(grid, col, mark)
        # "If time is getting close, stop everything!"
        elapsed_time = (column_time - START_TIME ) 
        time_left = cutoff_time - elapsed_time        #(cutoff_time/config.columns)*(col+1)
        if elapsed_time >= cutoff_time:
            if debug:
                print('\n*** TIMEOUT ***\n')
            score, _ = get_score(grid, mark)
        else:
            score = alphabeta(next_grid, depth-1, -np.Inf, np.Inf, False, mark, column_time)
        column_time = time.time() - column_time      
        if debug:
            summary_stats = {
                'column': col,
                'score': score,
                'column_time': round(column_time, 4),
                'time_left': round(time_left, 3),
                'time_elapsed': round(time.time() - START_TIME, 3)
            }
            print(f'"summary_stats":{summary_stats}')
        return score
    
    #########################
    # Agent makes selection #
    #########################

    # Get list of valid moves
    valid_moves = [c for c in range(COLUMNS) if obs.board[c] == 0]

    # Convert the board to a 2D grid
    grid = np.asarray(obs.board).reshape(ROWS, COLUMNS)

    # Use the heuristic to assign a score to each possible board in the next step
    scores = dict(zip(valid_moves, [score_move(grid, col, obs.mark, N_STEPS) for col in valid_moves]))

    # Get a list of columns (moves) that maximize the heuristic
    max_cols = [key for key in scores.keys() if scores[key] == max(scores.values())]
    
    #select column in order of preference
    for pref in [3,4,2]: 
        if pref in max_cols:
            choice = pref
            break
        else:
            choice = random.choice(max_cols)
    if debug:
        print("Chosen column:", choice)

    return choice

In [28]:
def opp(obs, config):
    
    #Import saved model trained on Agent Heuristic
    from stable_baselines import PPO1
    trained_model = PPO1.load('/content/given_random.zip', env=env,verbose=0)
    
    # Use the trained model to select a column
    col, _ = trained_model.predict(np.array(obs['board']).reshape(6,7,1))
    # Check if selected column is valid
    is_valid = (obs['board'][int(col)] == 0)
    
    if debug:
        print("Model predicted column:", col)#, "Valid =",is_valid)
    
    # If not valid, select random move. 
    if is_valid:
        return int(col)
    else:
        return random.choice([col for col in range(config.columns) if obs.board[int(col)] == 0])

# Test your Agent

In [29]:
#from prunerBD import prunerBD as opp
debug = True

In [31]:
env.reset()
START_TIME = time.time()
# Play against opponent until debug_agent loses
test_agent = debug # Set to True to test
debug_agent = lambda x,y: my_agent(x,y, START_TIME=START_TIME, debug=True)

while test_agent:
    env.reset()
    env.run([debug_agent, opp])
    # Don't count ties as losses
    if len(env.steps) == 43:
        print('tie')
        break
    elif len(env.steps) % 2 == 1:
        print('--- my_agent lost ---')
        break
    else:
        print('+++ my_agent won +++')
        break

print('###### Game Over ######')
print(f'Game time: {round(time.time()-START_TIME,3)}')
#env.render(mode="ipython", width=500, height=450)

"configuration":{'episodeSteps': 1000, 'agentTimeout': 16, 'actTimeout': 8, 'runTimeout': 1200, 'isProduction': False, 'columns': 7, 'rows': 6, 'inarow': 4, 'timeout': 8}

###### Agent Turn 00 ######
Using 2 step lookahead
"summary_stats":{'column': 0, 'score': 0, 'column_time': 0.0214, 'time_left': 7.79, 'time_elapsed': 0.032}
"summary_stats":{'column': 1, 'score': 0, 'column_time': 0.0167, 'time_left': 7.768, 'time_elapsed': 0.048}
"summary_stats":{'column': 2, 'score': 0, 'column_time': 0.0165, 'time_left': 7.752, 'time_elapsed': 0.065}
"summary_stats":{'column': 3, 'score': 0, 'column_time': 0.0184, 'time_left': 7.735, 'time_elapsed': 0.083}
"summary_stats":{'column': 4, 'score': 0, 'column_time': 0.0165, 'time_left': 7.717, 'time_elapsed': 0.1}
"summary_stats":{'column': 5, 'score': 0, 'column_time': 0.017, 'time_left': 7.7, 'time_elapsed': 0.117}
"summary_stats":{'column': 6, 'score': 0, 'column_time': 0.0166, 'time_left': 7.683, 'time_elapsed': 0.134}
Chosen column: 3
Traceback 

In [None]:
env.render(mode="ipython", width=290, height=300)

# Debug/Train your Agent

In [None]:
# Play as first position against random agent.
trainer = env.train([None, "negamax"])

observation = trainer.reset()

while debug and not env.done:
    my_action = my_agent(observation, env.configuration)
    print("My Action", my_action)
    observation, reward, done, info = trainer.step(my_action)
    env.render(mode="ipython", width=100, height=90, header=False, controls=False)
env.render()

# Evaluate your Agent

In [None]:
from prunerBD import prunerBD as opp2

In [None]:
def mean_reward(rewards):
    return sum(r[0] for r in rewards) / float(len(rewards))

# Run multiple episodes to estimate its performance.
if debug:
    print("Debug Agent vs Opp Agent:", mean_reward(evaluate("connectx", [my_agent, "negamax"], num_episodes=10)))
    print("Opp Agent vs Debug Agent:", mean_reward(evaluate("connectx", ["negamax", my_agent], num_episodes=10)))

# Play your Agent
Click on any column to place a checker there ("manually select action").

In [None]:
# "None" represents which agent you'll manually play as (first or second player).
env.play([None, my_agent], width=500, height=450)

# Write Submission File



In [None]:
import inspect
import os

def write_agent_to_file(function, file):
    with open(file, "a" if os.path.exists(file) else "w") as f:
        f.write(inspect.getsource(function))
        print(function, "written to", file)

In [None]:
submission_file = 'submission.py'
write_agent_to_file(my_agent, submission_file)

# Validate Submission
Play your submission against itself.  This is the first episode the competition will run to weed out erroneous agents.

Why validate? This roughly verifies that your submission is fully encapsulated and can be run remotely.

In [None]:
# Note: Stdout replacement is a temporary workaround.
import sys
out = sys.stdout
submission = utils.read_file("submission.py")
agent = kaggle_env_agent.get_last_callable(submission)
sys.stdout = out

In [None]:
env = make("connectx", debug=True)
env.run([agent, agent])
print("Success!" if env.state[0].status == env.state[1].status == "DONE" else "Failed...")

# Submit to Competition

1. Save this kernel.
2. View the commited version.
3. Go to "Data" section and find submission.py file.
4. Click "Submit to Competition"
5. Go to [My Submissions](https://kaggle.com/c/connectx/submissions) to view your score and episodes being played.