Derived from a public notebook by https://www.kaggle.com/mrgeislinger

In [None]:
debug = False

# Install kaggle-environments

# Create ConnectX Environment

In [None]:
import numpy as np
import time

!pip install 'kaggle-environments>=0.1.6'
from kaggle_environments import evaluate, make, utils
# Since utils.get_last_callable moved to agent.get_last_callable
# See https://github.com/Kaggle/kaggle-environments/blob/e4a5651a3a0775b823fc27fe2c24b55cbd340420/kaggle_environments/agent.py#L37
from kaggle_environments import agent as kaggle_env_agent

In [None]:
env = make("connectx", debug=True)
#env.render()

In [None]:
# Check version of tensorflow
!pip install 'tensorflow==1.15.0'
import tensorflow as tf
#tf.__version__
!apt-get update
!apt-get install -y cmake libopenmpi-dev python3-dev zlib1g-dev
!pip install "stable-baselines[mpi]==2.9.0"
from gym import spaces
#For Trained Agent
from stable_baselines import PPO1 
from stable_baselines.common.policies import CnnPolicy

In [None]:
#@title Trained Agent
def trained_agent(obs, config, model=None, debug=False):
    start = time.time()
    #Import saved model trained on Agent Heuristic
    if model is None:
        trained_model = stable_baselines.PPO1.load('/content/trained.zip', env=None, verbose=0)
    else:
        trained_model = model
    
    # Use the trained model to select a column
    col, _ = trained_model.predict(np.array(obs['board']).reshape(6,7,1))
    # Check if selected column is valid
    is_valid = (obs['board'][int(col)] == 0)
    
    if debug:
        print("\nTrained model predicted column:", col)
        print("Time taken =", time.time() - start)
    
    # If not valid, select random move. 
    if is_valid:
        return int(col)
    else:
        return random.choice([col for col in range(config.columns) if obs.board[int(col)] == 0])

# Create an Agent

To create the submission, an agent function should be fully encapsulated (no external dependencies).  

When your agent is being evaluated against others, it will not have access to the Kaggle docker image.  Only the following can be imported: Python Standard Library Modules, gym, numpy, scipy, pytorch (1.3.1, cpu only), and more may be added later.



In [None]:
def debug_agent(obs, config, START_TIME=None, N_STEPS=2, cutoff_time=None, debug=False):

    import numpy as np
    import random
    import time

    ########################### Regular pruner ################
    # constants (given by game)
    ROWS = config.rows
    COLUMNS = config.columns
    CNCTX = config.inarow
    ## coefficients (weights on variable future outcomes)
    A = 2     #my twos
    B = 20    #my threes
    C = 200   #my fours
    D = -1    #opp-twos
    E = -10    #opp-threes
    F = -100   #opp-fours
    
    # vary lookahead depth according to state of play:
    #if debug:
        #board = np.reshape(obs.board,(6,7))
        #print("obs.board:\n", board)
    if obs.board.count(0) >= ROWS*COLUMNS//2:# or list(obs.board[0]).count(0) >= COLUMNS//2:
        N_STEPS =      3
    else:
        N_STEPS =      4  # deeper search after half the board is filled

    cutoff_time_offset=0.2
    if START_TIME is None:
        START_TIME = time.time()
    if cutoff_time is None:
        cutoff_time = (config.get('actTimeout', cutoff_time) - 0.2)

    if debug:
        if obs.board.count(1) == 0:
            print(f'"configuration":{config}')  
        print(f'\n###### Agent Turn {obs.board.count(1):02} ######') 
        print(f'Using {N_STEPS} step lookahead')

    # Gets board at next step if agent drops piece in selected column
    def drop_piece(grid, col, mark):
        next_grid = grid.copy()
        for row in range(ROWS-1, -1, -1):
            if next_grid[row][col] == 0:
                break
        next_grid[row][col] = mark
        return next_grid

    # Helper function for get_heuristic: checks if window satisfies heuristic conditions
    def check_window(window, num_discs, piece):
        return (window.count(piece) == num_discs and window.count(0) == CNCTX-num_discs)

    # Helper function for get_heuristic: counts number of windows satisfying specified heuristic conditions
    def count_windows(grid, num_discs, piece):
        num_windows = 0
        # horizontal
        for row in range(ROWS):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[row, col:col+CNCTX])
                if check_window(window, num_discs, piece):
                    num_windows += 1
        # vertical
        for row in range(ROWS-(CNCTX-1)):
            for col in range(COLUMNS):
                window = list(grid[row:row+CNCTX, col])
                if check_window(window, num_discs, piece):
                    num_windows += 1
        # positive diagonal
        for row in range(ROWS-(CNCTX-1)):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[range(row, row+CNCTX), range(col, col+CNCTX)])
                if check_window(window, num_discs, piece):
                    num_windows += 1
        # negative diagonal
        for row in range(CNCTX-1, ROWS):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[range(row, row-CNCTX, -1), range(col, col+CNCTX)])
                if check_window(window, num_discs, piece):
                    num_windows += 1
        return num_windows

    # Helper function for minimax: calculates value of heuristic for grid
    def get_score(grid, mark):
        num_twos = count_windows(grid, 2, mark) #A
        num_threes = count_windows(grid, 3, mark)  #B
        num_fours = count_windows(grid, 4, mark)   #C
        num_twos_opp = count_windows(grid, 3, mark%2+1) #D
        num_threes_opp = count_windows(grid, 3, mark%2+1) #E
        num_fours_opp = count_windows(grid, 4, mark%2+1)  #F     
        score = A*num_twos + B*num_threes + C*num_fours + D*num_twos_opp + E*num_threes_opp + F*num_fours_opp
        is_terminal = (not num_fours == 0) or (not num_fours_opp == 0) or (list(grid[0, :]).count(0) == 0)
        return score, is_terminal

    # Minimax implementation was here:
    def alphabeta(node, depth, alpha, beta, maximizingPlayer, mark, column_time):
        node_score, is_terminal = get_score(node, mark)
        time_left = column_time + cutoff_time/7 - time.time()
        if depth == 0 or is_terminal or time_left <= 0:
             if debug:
                 print ("score is: {}\tdepth is {}\tis_terminal is {}\ttime_left is {}".format(node_score, depth, is_terminal, round(time_left,3)))
             return node_score
        valid_moves = [c for c in range(COLUMNS) if node[0][c] == 0]
        if maximizingPlayer:
            value = -np.Inf
            for col in valid_moves:
                child = drop_piece(node, col, mark)
                value = max(value, alphabeta(child, depth-1, alpha, beta, False, mark, column_time))
                alpha = max(alpha, value)
                if alpha >= beta:
                    break
            return value

        else: #minimizing player
            value = np.Inf
            for col in valid_moves:
                child = drop_piece(node, col, mark%2+1)
                value = min(value, alphabeta(child, depth-1, alpha, beta, True, mark, column_time))
                beta = min(beta, value)
                if alpha >= beta:
                    break
            return value

    # Uses alphabeta pruning to calculate value
    # of dropping piece in selected column
    def score_move(grid, col, mark, depth):
        column_time = time.time()
        next_grid = drop_piece(grid, col, mark)
        elapsed_time = column_time - START_TIME 
        if elapsed_time >= cutoff_time:
            if debug:
                print('\t*** TIMEOUT at column {} ***'.format(col))
            #score, _ = get_score(grid, mark)
            depth = 1 #get this grid value and return it 
        #else:
        score = alphabeta(next_grid, depth-1, -np.Inf, np.Inf, False, mark, column_time)
        column_time = time.time() - column_time      
        if debug:
            summary_stats = {
                'column': col,
                'score': score,
                'column_time': round(column_time, 4),
                'time_left': round(START_TIME + cutoff_time/7 - time.time(), 3),
                'time_elapsed': round(time.time() - START_TIME, 3)
            }
            print(f'"summary_stats":{summary_stats}')
        return score
    
    def first_pass(grid, col, mark):
        next_grid = drop_piece(grid, col, mark)
        score, is_terminal = get_score(next_grid, mark)
        if is_terminal:
            return col
        else:
            return -1

    #########################
    # Agent makes selection #
    #########################

    # Get list of valid moves
    valid_moves = [c for c in range(COLUMNS) if obs.board[c] == 0]

    # Convert the board to a 2D grid
    grid = np.asarray(obs.board).reshape(ROWS, COLUMNS)

    # Do a quick pass at depth zero to see if there is a positive terminal node
    picked = False
    if debug:
        quick_pick = -1
        for col in valid_moves:
            quick_pick = first_pass(grid, col, obs.mark)
            if quick_pick > 0:
                picked = True
                break

    if picked:   
        if debug:
            print("Column {} is terminal.".format(quick_pick)) 
        choice = quick_pick
    else:   
        # Use the heuristic to assign a score to each possible board in the next step
        scores = dict(zip(valid_moves, [score_move(grid, col, obs.mark, N_STEPS) for col in valid_moves]))

        # Get a list of columns (moves) that maximize the heuristic
        max_cols = [key for key in scores.keys() if scores[key] == max(scores.values())]
        
        #select column in order of preference
        for pref in [3,4,2]: 
            if pref in max_cols:
                choice = pref
                break
            else:
                choice = random.choice(max_cols)
        if debug:
            print("Chosen column:", choice)

    return choice


# Test your Agent

In [None]:
#from test_agent_v1 import my_agent as test_agent_v1
from pruner_v7 import my_agent as pruner
from heuristic_v8 import my_agent as heuristic
from test_agent_v4 import my_agent as self_agent

In [None]:
import random
env.reset()
START_TIME = time.time()

test_run = debug # Set debug to True to test
agent1 = lambda x,y: debug_agent(x,y, debug=True)
agent2 = heuristic

while test_run:
    env.reset()
    if random.choice([True, False]):
        env.run([agent1, agent2])
        print("Agent order: [debug_agent, opponent]") 
    else:
        env.run([agent2, agent1])
        print("Agent order: [opponent, debug_agent]")

    # Don't count ties as losses
    if len(env.steps) == 43:
        print('tie')
        break
    elif len(env.steps) % 2 == 1:
        print('--- b1ue agent lost ---')
        break
    else:
        print('+++ b1ue agent won +++')
        break

print('\n###### Game Over ######')
print(f'Game time: {round(time.time()-START_TIME,3)}')
#env.render(mode="ipython", width=500, height=450)

In [None]:
#@title Configurable Test Agent
def config_test_agent(obs, config, debug=False):

    import numpy as np
    import random
    import time

    # constants (given by game)
    ROWS = config.rows
    COLUMNS = config.columns
    CNCTX = config.inarow
    ## coefficients (weights on variable future outcomes)
    A = 1     #my twos
    B = 100    #my threes
    C = 10000   #my fours         
    D = -10    #opp-threes
    E = -1000   #opp-fours
    
    # vary lookahead depth according to state of play:
    if obs.board.count(1) < 2:
        N_STEPS =      1
    elif obs.board.count(1) <= ROWS*COLUMNS//(2*3): # 2 up to one third
        N_STEPS =      2 
    elif obs.board.count(1) <= 3*ROWS*COLUMNS//(2*4): # 3 up to three fouths
        N_STEPS =      3
    else:                                             # 4 last 25% of game
        N_STEPS =      4

    if debug:
        if obs.board.count(1) == 0:
            print(f'"configuration":{config}')  
        print(f'\n###### Agent mark {obs.board.count(1):02} ######') 
        print(f'###### Game marks {(obs.board.count(2) + obs.board.count(1)):02} ######') 
        print(f'Using {N_STEPS} step lookahead')

    # Gets board at next step if agent drops piece in selected column
    def drop_piece(grid, col, mark):
        next_grid = grid.copy()
        for row in range(ROWS-1, -1, -1):
            if next_grid[row][col] == 0:
                break
        next_grid[row][col] = mark
        return next_grid

    # Helper function for get_score: checks if window satisfies heuristic conditions
    def check_window(window, num_discs, piece):
        return (window.count(piece) == num_discs and window.count(0) == CNCTX-num_discs)

    # Helper function for get_score: counts number of windows satisfying specified heuristic conditions
    def count_windows(grid, num_discs, piece):
        num_windows = 0
        # horizontal
        for row in range(ROWS):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[row, col:col+CNCTX])
                if check_window(window, num_discs, piece):
                    num_windows += 1
        # vertical
        for row in range(ROWS-(CNCTX-1)):
            for col in range(COLUMNS):
                window = list(grid[row:row+CNCTX, col])
                if check_window(window, num_discs, piece):
                    num_windows += 1
        # positive diagonal
        for row in range(ROWS-(CNCTX-1)):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[range(row, row+CNCTX), range(col, col+CNCTX)])
                if check_window(window, num_discs, piece):
                    num_windows += 1
        # negative diagonal
        for row in range(CNCTX-1, ROWS):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[range(row, row-CNCTX, -1), range(col, col+CNCTX)])
                if check_window(window, num_discs, piece):
                    num_windows += 1
        return num_windows

    # Helper function for minimax: calculates value of heuristic score
    # for grid and checks if the grid is terminal
    def get_score(grid, mark):
        num_twos = count_windows(grid, 2, mark) #A
        num_threes = count_windows(grid, 3, mark)  #B
        num_fours = count_windows(grid, 4, mark)   #C
        num_threes_opp = count_windows(grid, 3, mark%2+1) #D
        num_fours_opp = count_windows(grid, 4, mark%2+1)  #E     
        score = A*num_twos + B*num_threes + C*num_fours + D*num_threes_opp + E*num_fours_opp
        is_terminal = (not num_fours == 0) or (not num_fours_opp == 0) or (list(grid[0, :]).count(0) == 0)
        return score, is_terminal

    # Minimax algorithm with alphabeta pruning implementation:
    def alphabeta(node, depth, alpha, beta, maximizingPlayer, mark):
        node_score, is_terminal = get_score(node, mark)
        if depth == 0 or is_terminal:
             return node_score

        valid_moves = [c for c in range(COLUMNS) if node[0][c] == 0]
        if maximizingPlayer:
            value = -np.Inf
            for col in valid_moves:
                child = drop_piece(node, col, mark)
                value = max(value, alphabeta(child, depth-1, alpha, beta, False, mark))
                alpha = max(alpha, value)
                if alpha >= beta:
                    break
            return value

        else: #minimizing player
            value = np.Inf
            for col in valid_moves:
                child = drop_piece(node, col, mark%2+1)
                value = min(value, alphabeta(child, depth-1, alpha, beta, True, mark))
                beta = min(beta, value)
                if alpha >= beta:
                    break
            return value

    # Uses alphabeta pruning to calculate value
    # of dropping piece in selected column
    def score_move(grid, col, mark, depth):
        if debug:
            column_time = time.time()
        next_grid = drop_piece(grid, col, mark)
        score = alphabeta(next_grid, depth-1, -np.Inf, np.Inf, False, mark)      
        if debug:
            column_time = time.time() - column_time 
            summary_stats = {
                'column': col,
                'score': score,
                'column_time': round(column_time, 4),
                #'time_left': round(time_left, 3),
                #'time_elapsed': round(time.time() - ON_TIME, 3)
            }
            print(f'"summary_stats":{summary_stats}')
        return score
    
    #########################
    # Agent makes selection #
    #########################

    # Get list of valid moves
    valid_moves = [c for c in range(COLUMNS) if obs.board[c] == 0]

    # Convert the board to a 2D grid
    grid = np.asarray(obs.board).reshape(ROWS, COLUMNS)

    # Use the heuristic to assign a score to each possible board in the next step
    #scores = dict(zip(valid_moves, [score_move(grid, col, obs.mark, N_STEPS) for col in valid_moves]))

    # Get a list of columns (moves) that maximize the heuristic
    #max_cols = [key for key in scores.keys() if scores[key] == max(scores.values())]
    
    #select column in order of preference
    #for pref in [3,4,2,6,0,5,1]: 
        #if pref in max_cols:
        #    choice = pref
        #    break
    choice = random.choice([0,1,2,3,4,5,6])

    if debug:
        print("Chosen column:", choice)

    return choice

In [None]:
#@title experimental_agent

def experimental_agent(obs, config, ON_TIME=None, N_STEPS=2, cutoff_time=None, debug=False):

    import numpy as np
    import random
    import time

    # constants (given by game)
    ROWS = config.rows
    COLUMNS = config.columns
    CNCTX = config.inarow
    ## coefficients (weights on variable future outcomes)
    A = 1     #my twos
    B = 100    #my threes
    C = 10000   #my fours         
    D = -10    #opp-threes
    E = -1000   #opp-fours
    
    cutoff_time_offset=0.2
    if ON_TIME is None:	
        ON_TIME = time.time()   
    if cutoff_time is None:	
        cutoff_time = (config.get('actTimeout', cutoff_time) - cutoff_time_offset)

    #vary lookahead depth according to state of play:
    if obs.board.count(1) < 2:
        N_STEPS =      3
    elif obs.board.count(1) <= ROWS*COLUMNS//(2*4):   # 3 up to one fourth
        N_STEPS =      3 
    elif obs.board.count(1) <= ROWS*COLUMNS//(2*2): # 4 up to one half
        N_STEPS =      4
    elif obs.board.count(1) <= 3*ROWS*COLUMNS//(2*4): # 5 up to three fourth
        N_STEPS =      5
    else:                                             
        N_STEPS =      6

    if debug:
        if obs.board.count(1) == 0:
            print(f'"configuration":{config}')  
        print(f'\n###### Agent marks {obs.board.count(1):02} ######') 
        print(f'###### Total marks {(obs.board.count(2) + obs.board.count(1)):02} ######') 
        print(f'Using {N_STEPS} step lookahead')

    # Gets board at next step if agent drops piece in selected column
    def drop_piece(grid, col, mark):
        next_grid = grid.copy()
        for row in range(ROWS-1, -1, -1):
            if next_grid[row][col] == 0:
                break
        next_grid[row][col] = mark
        return next_grid

    # Helper function for get_score: checks if window satisfies heuristic conditions
    def check_window(window, num_discs, piece):
        return (window.count(piece) == num_discs and window.count(0) == CNCTX-num_discs)

    # Helper function for get_score: counts number of windows satisfying specified heuristic conditions
    def count_windows(grid, num_discs, piece):
        num_windows = 0
        # horizontal
        for row in range(ROWS):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[row, col:col+CNCTX])
                if check_window(window, num_discs, piece):
                    num_windows += 1
        # vertical
        for row in range(ROWS-(CNCTX-1)):
            for col in range(COLUMNS):
                window = list(grid[row:row+CNCTX, col])
                if check_window(window, num_discs, piece):
                    num_windows += 1
        # positive diagonal
        for row in range(ROWS-(CNCTX-1)):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[range(row, row+CNCTX), range(col, col+CNCTX)])
                if check_window(window, num_discs, piece):
                    num_windows += 1
        # negative diagonal
        for row in range(CNCTX-1, ROWS):
            for col in range(COLUMNS-(CNCTX-1)):
                window = list(grid[range(row, row-CNCTX, -1), range(col, col+CNCTX)])
                if check_window(window, num_discs, piece):
                    num_windows += 1
        return num_windows

    # Helper function for minimax: calculates value of heuristic score
    # for grid and checks if the grid is terminal
    def get_score(grid, mark):
        num_twos = count_windows(grid, 2, mark) #A
        num_threes = count_windows(grid, 3, mark)  #B
        num_fours = count_windows(grid, 4, mark)   #C
        num_threes_opp = count_windows(grid, 3, mark%2+1) #D
        num_fours_opp = count_windows(grid, 4, mark%2+1)  #E     
        score = A*num_twos + B*num_threes + C*num_fours + D*num_threes_opp + E*num_fours_opp
        is_terminal = (not num_fours == 0) or (not num_fours_opp == 0) or (list(grid[0, :]).count(0) == 0)
        return score, is_terminal

    # Minimax algorithm with alphabeta pruning implementation:
    def alphabeta(node, depth, alpha, beta, maximizingPlayer, mark, time_remaining):
        node_score, is_terminal = get_score(node, mark)
        if depth == 0 or is_terminal:
             return node_score
        if time_remaining <= cutoff_time_offset/5:  # 5 for number columns plus offset
            depth = 1
        valid_moves = [c for c in range(COLUMNS) if node[0][c] == 0]
        if maximizingPlayer:
            value = -np.Inf
            for col in valid_moves:
                child = drop_piece(node, col, mark)
                value = max(value, alphabeta(child, depth-1, alpha, beta, False, mark, time_remaining - time.time() ))
                alpha = max(alpha, value)
                if alpha >= beta:
                    break
            return value

        else: #minimizing player
            value = np.Inf
            for col in valid_moves:
                child = drop_piece(node, col, mark%2+1)
                value = min(value, alphabeta(child, depth-1, alpha, beta, True, mark, time_remaining - time.time()))
                beta = min(beta, value)
                if alpha >= beta:
                    break
            return value

    # Uses alphabeta pruning to calculate value
    # of dropping piece in selected column
    def score_move(grid, col, mark, depth):
        column_time = time.time()
        time_remaining = column_time + (cutoff_time-cutoff_time_offset)/7 # 5 for number columns plus offset
        next_grid = drop_piece(grid, col, mark)
        if time.time()-ON_TIME > (col+1)*(cutoff_time-cutoff_time_offset)/7:
            depth = 1
        score = alphabeta(next_grid, depth-1, -np.Inf, np.Inf, False, mark, time_remaining - time.time())      
        if debug:
            time_remaining = time_remaining - time.time()
            summary_stats = {
                'column': col,
                'score': score,
                'column_time': round(time.time() - column_time, 4),
                'col_time_remaining': round(time_remaining, 4),
                'tot_time_elapsed': round(time.time() - ON_TIME, 3)
            }
            print(f'"summary_stats":{summary_stats}')
        return score
    
    #########################
    # Agent makes selection #
    #########################

    # Get list of valid moves
    valid_moves = [c for c in range(COLUMNS) if obs.board[c] == 0]

    # Convert the board to a 2D grid
    grid = np.asarray(obs.board).reshape(ROWS, COLUMNS)

    # Use the heuristic to assign a score to each possible board in the next step
    scores = dict(zip(valid_moves, [score_move(grid, col, obs.mark, N_STEPS) for col in valid_moves]))

    # Get a list of columns (moves) that maximize the heuristic
    max_cols = [key for key in scores.keys() if scores[key] == max(scores.values())]
    
    #select column in order of preference (sometimes)
    if True:
        for pref in [3,4,2,6,0,5,1]: 
            if pref in max_cols:
                choice = pref
                break
    else:
        choice = random.choice(max_cols)
    
    if debug: print("Chosen column:", choice)
    return choice


In [None]:
debug = True

In [None]:
env.render(mode="ipython", width=244, height=250)

# Evaluate your Agent

In [None]:
def get_win_percentages(agent1, agent2, n_rounds=10):
    # Use default Connect Four setup
    import numpy as np
    config = {'rows': 6, 'columns': 7, 'inarow': 4}
    # Agent 1 goes first (roughly) half the time          
    outcomes = evaluate("connectx", [agent1, agent2], config, [], n_rounds//2)
    # Agent 2 goes first (roughly) half the time      
    outcomes += [[b,a] for [a,b] in evaluate("connectx", [agent2, agent1], config, [], n_rounds-n_rounds//2)]
    print("Agent 1 Win Percentage:", np.round(outcomes.count([1,-1])/len(outcomes), 3))
    print("Agent 2 Win Percentage:", np.round(outcomes.count([-1,1])/len(outcomes), 3))
    print("Number of Invalid Plays by Agent 1:", outcomes.count([None, 0]))
    print("Number of Invalid Plays by Agent 2:", outcomes.count([0, None]))
    return outcomes

In [None]:
import time
num_episodes = 77
start = time.time()
outcomes = get_win_percentages(debug_agent, heuristic, num_episodes)
end = time.time()
print ("Total time:",round(end-start,3),"\tAvg game time:",round((end-start)/num_episodes,3))


In [None]:
import time
num_episodes = 77
start = time.time()
outcomes = get_win_percentages(self_agent, heuristic, num_episodes)
end = time.time()
print ("Total time:",round(end-start,3),"\tAvg game time:",round((end-start)/num_episodes,3))


In [None]:
import time
num_episodes = 111
start = time.time()
outcomes = get_win_percentages(self_agent, debug_agent, num_episodes)
end = time.time()
print ("Total time:",round(end-start,3),"\tAvg game time:",round((end-start)/num_episodes,3))


In [None]:
305/60



*   Agent 1 Win Percentage: 0.48
*   Agent 2 Win Percentage: 0.52
*   Number of Invalid Plays by Agent 1: 0
*   Number of Invalid Plays by Agent 2: 0
*   Total time: 224.808 	Avg game time: 6.812



In [None]:
def mean_reward(rewards):
    return sum(r[0] for r in rewards) / float(len(rewards))

# Run multiple episodes to estimate its performance.
num_episodes = 33
agent1 = debug_agent
agent2 = self_agent   #  "negamax"
if debug:
    print("Debug Agent goes first:", mean_reward(evaluate("connectx", [agent1, agent2], num_episodes)))
    print("Opp Agent goes first:", mean_reward(evaluate("connectx", [agent2, agent1], num_episodes)))

# Step through your Agent
Click on any column to place a checker there ("manually select action").

In [None]:
# Play as first position against random agent.
trainer = env.train([None, "negamax"])

observation = trainer.reset()

while debug and not env.done:
    my_action = agent1(observation, env.configuration)
    print("My Action", my_action)
    observation, reward, done, info = trainer.step(my_action)
    env.render(mode="ipython", width=100, height=90, header=False, controls=False)
env.render()

# Write Submission File



In [None]:
import inspect
import os

def write_agent_to_file(function, file):
    with open(file, "a" if os.path.exists(file) else "w") as f:
        f.write(inspect.getsource(function))
        print(function, "written to", file)

In [None]:
submission_file = 'submission.py'
write_agent_to_file(my_agent, submission_file)

# Validate Submission
Play your submission against itself.  This is the first episode the competition will run to weed out erroneous agents.

Why validate? This roughly verifies that your submission is fully encapsulated and can be run remotely.

In [None]:
# Note: Stdout replacement is a temporary workaround.
import sys
out = sys.stdout
submission = utils.read_file("submission.py")
agent = kaggle_env_agent.get_last_callable(submission)
sys.stdout = out

In [None]:
env = make("connectx", debug=True)
env.run([agent, agent])
print("Success!" if env.state[0].status == env.state[1].status == "DONE" else "Failed...")

# Submit to Competition

1. Save this kernel.
2. View the commited version.
3. Go to "Data" section and find submission.py file.
4. Click "Submit to Competition"
5. Go to [My Submissions](https://kaggle.com/c/connectx/submissions) to view your score and episodes being played.