# Kaggle - Connect X
https://www.kaggle.com/c/connectx

Version history:
- v1.0 Initial version

For this competition we will work with an agent that will recive these elements:
- configuration
 + columns - The number of columns on the board
 + rows - The number of rows on the board
 + inarow - The number of checkers in a row required to win
- observation
 + board - array [rows x columns] beginning from top left and ending at bottom right. Each element has three possible values:
    * 0 = Empty
    * 1 = Player1
    * 2 = Player2
 + mark - Which player is the agent. 1 or 2

The agent should return which column to place a checker in

In [5]:
# Additional documentation (especially interfaces) can be found on all public functions:
from kaggle_environments import make
help(make)
env = make("connectx")
dir(env)
help(env.specification)

Help on function make in module kaggle_environments.core:

make(environment, configuration={}, info={}, steps=[], logs=[], debug=False, state=None)
    Creates an instance of an Environment.
    
    Args:
        environment (str|Environment):
        configuration (dict, optional):
        info (dict, optional):
        steps (list, optional):
        debug (bool=False, optional):
    
    Returns:
        Environment: Instance of a specific environment.

Help on Struct in module kaggle_environments.utils object:

class Struct(builtins.dict)
 |  Struct(**entries)
 |  
 |  dict() -> new empty dictionary
 |  dict(mapping) -> new dictionary initialized from a mapping object's
 |      (key, value) pairs
 |  dict(iterable) -> new dictionary initialized as if via:
 |      d = {}
 |      for k, v in iterable:
 |          d[k] = v
 |  dict(**kwargs) -> new dictionary initialized with the name=value pairs
 |      in the keyword argument list.  For example:  dict(one=1, two=2)
 |  
 |  Method 

First simple example

In [1]:
from kaggle_environments import make
env = make("connectx", {"rows": 10, "columns": 8, "inarow": 5})
# observation.board es un array que va de arriba a abajo!!! board[0] --> celda de arriba a la izquierda
def agent(observation, configuration):
    print(observation) # {board: [...], mark: 1}
    print(configuration) # {rows: 10, columns: 8, inarow: 5}
    if (observation.board[0] == 0):
        return 0
    else:
        return 3
#    return 3 # Action: always place a mark in the 3rd column.

# Run an episode using the agent above vs the default random agent.
env.run([agent, "random"])
env.render(mode="ipython", width=500, height=450)

# Print schemas from the specification.
print("observation:")
print(env.specification.observation)
print("configuration:")
print(env.specification.configuration)
print("action:")
print(env.specification.action)

observation:
{'board': {'description': 'Serialized grid (rows x columns). 0 = Empty, 1 = P1, 2 = P2', 'type': 'array', 'shared': True, 'default': []}, 'mark': {'defaults': [1, 2], 'description': 'Which checkers are the agents.', 'enum': [1, 2]}}
configuration:
{'episodeSteps': {'description': 'Maximum number of steps in the episode.', 'type': 'integer', 'minimum': 1, 'default': 1000}, 'agentTimeout': {'description': 'Maximum runtime (seconds) to initialize an agent.', 'type': 'number', 'minimum': 0, 'default': 16}, 'actTimeout': {'description': 'Maximum runtime (seconds) to obtain an action from an agent.', 'type': 'number', 'minimum': 0, 'default': 8}, 'runTimeout': {'description': 'Maximum runtime (seconds) of an episode (not necessarily DONE).', 'type': 'number', 'minimum': 0, 'default': 1200}, 'isProduction': {'description': "Whether this episode is running in Kaggle's production evaluation system. Undefined behavior when set to True locally.", 'type': 'boolean', 'default': False},

Environment in JSON format

In [2]:
env.toJSON()

{'id': 'c39a315e-698c-11eb-adae-a45d367e5e68',
 'name': 'connectx',
 'title': 'ConnectX',
 'description': 'Classic Connect in a row but configurable.',
 'version': '1.0.1',
 'configuration': {'rows': 10,
  'columns': 8,
  'inarow': 5,
  'episodeSteps': 1000,
  'agentTimeout': 16,
  'actTimeout': 8,
  'runTimeout': 1200,
  'isProduction': False,
  'timeout': 8},
 'specification': {'action': {'description': 'Column to drop a checker onto the board.',
   'type': 'integer',
   'minimum': 0,
   'default': 0},
  'agents': [2],
  'configuration': {'episodeSteps': {'description': 'Maximum number of steps in the episode.',
    'type': 'integer',
    'minimum': 1,
    'default': 1000},
   'agentTimeout': {'description': 'Maximum runtime (seconds) to initialize an agent.',
    'type': 'number',
    'minimum': 0,
    'default': 16},
   'actTimeout': {'description': 'Maximum runtime (seconds) to obtain an action from an agent.',
    'type': 'number',
    'minimum': 0,
    'default': 8},
   'runTime

Description of element observation:

In [5]:
env.specification.observation

{'board': {'description': 'Serialized grid (rows x columns). 0 = Empty, 1 = P1, 2 = P2',
  'type': 'array',
  'shared': True,
  'default': []},
 'mark': {'defaults': [1, 2],
  'description': 'Which checkers are the agents.',
  'enum': [1, 2]}}

Description of element configuration:

In [6]:
env.specification.configuration

{'episodeSteps': {'description': 'Maximum number of steps in the episode.',
  'type': 'integer',
  'minimum': 1,
  'default': 1000},
 'agentTimeout': {'description': 'Maximum runtime (seconds) to initialize an agent.',
  'type': 'number',
  'minimum': 0,
  'default': 16},
 'actTimeout': {'description': 'Maximum runtime (seconds) to obtain an action from an agent.',
  'type': 'number',
  'minimum': 0,
  'default': 8},
 'runTimeout': {'description': 'Maximum runtime (seconds) of an episode (not necessarily DONE).',
  'type': 'number',
  'minimum': 0,
  'default': 1200},
 'isProduction': {'description': "Whether this episode is running in Kaggle's production evaluation system. Undefined behavior when set to True locally.",
  'type': 'boolean',
  'default': False},
 'columns': {'description': 'The number of columns on the board',
  'type': 'integer',
  'default': 7,
  'minimum': 1},
 'rows': {'description': 'The number of rows on the board',
  'type': 'integer',
  'default': 6,
  'minimum'

Description of element action:

In [7]:
env.specification.action

{'description': 'Column to drop a checker onto the board.',
 'type': 'integer',
 'minimum': 0,
 'default': 0}

In [5]:
env.specification.agents

[2]

In [2]:
env.agents

{'random': <function kaggle_environments.envs.connectx.connectx.random_agent(obs, config)>,
 'negamax': <function kaggle_environments.envs.connectx.connectx.negamax_agent(obs, config)>}

In [11]:
help(env.run)

Help on method run in module kaggle_environments.core:

run(agents) method of kaggle_environments.core.Environment instance
    Steps until the environment is "done" or the runTimeout was reached.
    
    Args:
        agents (list of any): List of agents to obtain actions from.
    
    Returns:
        tuple of:
            list of list of dict: The agent states of all steps executed.
            list of list of dict: The agent logs of all steps executed.



In [6]:
import numpy as np

from kaggle_environments import make
env = make("connectx", {"rows": 10, "columns": 8, "inarow": 5}, debug=True)

# given a board, the position (index_row, index_column) and the size of the board (rows x columns), returns the status of the
# cell --> 0 (empty), 1 (player 1), 2 (player 2), -1 (error)
# Cell with position (0, 0) is top left
def get_mark_board_list(board, index_row, index_col, rows, columns):
    # check if rows*columns fits with the list size 
    if (len(board) != rows * columns):
        return -1
    # check if position (index_row, index_col) fits with the list size
    if (len(board) < index_row * index_col):
        return -1
    return board[index_row*columns+index_col]

def agent(observation, configuration):
    
    ################################## START NESTED FUNCTIONS ###########################################
    
    # Given a board and a column, returns the index of the first row available
    def first_row_avail(board,col,num_rows):
        """
        Given a board and a column, returns the index of the first row available
        """
        for i in range(num_rows-1,-1,-1): # from bottom to top
            if board[i,col] == 0:
                return i
        return -1

    # Evaluate the given board. Useful for a minimax approach or for reinforcement learning
    def get_reward(board, in_a_row, mark):
        """
        Navigate through the board (bottom to top, left to right) reviewing the number of checkers "in a row"
        An array with 8 elements will be created in this review. Each element will store this number of checkers:
        0. Horizontal by me
           X 1 1 0
        1. Vertical by me
           0
           1
           1
           X
        2. Diagonal by me (/)
           X X 0
           X 1 X
           1 X X
        3. Diagonal by me (\)
           0 X X
           X 1 X
           X X 1
        4. Horizontal by other player
        5. Vertical by other player
        6. Diagonal by other player (/)
        7. Diagonal by other player (\)
        """
        MAX_REWARD = 1000000 # Wining board --> idea: Que sea potencia de in_a_row??
        max_reward = 1*10**(in_a_row + 2) # Wining board
        if (len(board.shape) != 2): # board is NOT a bi-dimensional array
            print("get_reward: ERROR - board is NOT a bi-dimensional array")
            return -1 * max_reward
        num_rows = board.shape[0]
        num_cols = board.shape[1]
        board_review = np.zeros(shape=(num_rows,num_cols,8), dtype=np.int8)
        reward = 0
        # If we have a wining board finish the evaluation
        for i in range(num_rows-1,-1,-1): # from bottom to top - rows
            for j in range(num_cols): # columns
                if board[i,j]==mark: # Me
                    if j != 0: # We are not in the first column (from left to right)
                        # HORIZONTAL
                        board_review[i,j,0] = board_review[i,j-1,0] + 1
                        if board_review[i,j,0] == in_a_row: return max_reward # end if it's a wining movement
                        # Update the reward if there is a number of checkers together and they can increase (tail)
                        if (j != num_cols-1): # We need two if statements (lazy evaluation)
                            if (board[i,j+1] == 0): reward += 10**board_review[i,j,0]
                        # Update the reward if there is a number of checkers together and they can increase (head)
                        if (j - board_review[i,j,0] >= 0): # We need two if statements (lazy evaluation)
                            if (board[i,j-board_review[i,j,0]] == 0): reward += 10**board_review[i,j,0]
                    else:
                        board_review[i,j,0] = 1
                        # Update the reward if the number of checkers together can increase (tail)
                        if (j != num_cols-1): # We need two if statements (lazy evaluation)
                            if (board[i,j+1] == 0): reward += 10**board_review[i,j,0]
                        board_review[i,j,2] = 1
                    if i != num_rows-1: # We are not in the first row (from bottom to top)
                        # VERTICAL
                        board_review[i,j,1] = board_review[i+1,j,1] + 1
                        if board_review[i,j,1] == in_a_row: return max_reward # end if it's a wining movement
                        # Update the reward if there is a number of checkers together and they can increase
                        if (i != 0) and (board[i-1,j] == 0): reward += 10**board_review[i,j,1]
                        # DIAGONAL (/)
                        if j != 0: # We are not in the first column (from left to right)
                            board_review[i,j,2] = board_review[i+1,j-1,2] + 1
                            if board_review[i,j,2] == in_a_row: return max_reward # end if it's a wining movement
                            # Update the reward if there is a number of checkers together and they can increase
                            if (i != 0) and (j != num_cols-1) and (board[i-1,j+1] == 0): reward += 10**board_review[i,j,2]
                        # DIAGONAL (\)
                        if j != num_cols-1: # We are not in the last column (from left to right)
                            board_review[i,j,3] = board_review[i+1,j+1,3] + 1
                            if board_review[i,j,3] == in_a_row: return max_reward # end if it's a wining movement
                            # Update the reward if there is a number of checkers together and they can increase
                            if (i != 0) and (j != 0) and (board[i-1,j-1] == 0): reward += 10**board_review[i,j,3]
                        else: # We are in the last column (from left to right), so this is the first element
                            board_review[i,j,3] = 1
                    else:
                        board_review[i,j,1] = 1
                        board_review[i,j,2] = 1
                        board_review[i,j,3] = 1
                elif board[i,j]!=0: # Other player
                    if j != 0: # We are not in the first column (from left to right)
                        board_review[i,j,4] = board_review[i,j-1,4] + 1
                        # If the other player is about to win (in the next movement), avoid it
                        if (board_review[i,j,4] == in_a_row - 1):
                            # The number of checkers can increase in the next movement (tail)
                            if (get_mark_board(board, i, j+1) == 0) and (get_mark_board(board, i+1, j+1) != 0):
                                return (-1 * max_reward) -1
                            # The number of checkers can increase in the next movement (head)
                            if (get_mark_board(board, i, j-in_a_row) == 0) and (get_mark_board(board, i+1, j-in_a_row) != 0):
                                return (-1 * max_reward) -2
                    else:
                        board_review[i,j,4] = 1
                        board_review[i,j,6] = 1
                    if i != num_rows-1: # We are not in the first row (from bottom to top)
                        board_review[i,j,5] = board_review[i+1,j,5] + 1
                        #print("DEBUG - Row:"+str(i)+" Column:"+str(j)+" TOT VERT:"+str(board_review[i,j,5]))
                        # If the other player is about to win (in the next movement), avoid it
                        if (board_review[i,j,5] == in_a_row - 1) and get_mark_board(board, i-1, j) == 0:
                            return (-1 * max_reward) -3
                        if j != 0: # We are not in the first column (from left to right)
                            board_review[i,j,6] = board_review[i+1,j-1,6] + 1
                        # If the other player is about to win (in the next movement), avoid it
                        if (board_review[i,j,6] == in_a_row - 1):
                            # The number of checkers can increase in the next movement (tail)
                            if (get_mark_board(board, i-1, j+1) == 0) and (get_mark_board(board, i, j+1) != 0):
                                return (-1 * max_reward)
                            # The number of checkers can increase in the next movement (head)
                            if (get_mark_board(board, i+in_a_row, j-in_a_row) == 0) and (get_mark_board(board, i+in_a_row+1, j-in_a_row) != 0):
                                return (-1 * max_reward)
                        if (j != num_cols-1): # We are not in the last column (from left to right)
                            board_review[i,j,7] = board_review[i+1,j+1,7] + 1
                        else: # We are in the last column (from left to right), so this is the first element
                            board_review[i,j,7] = 1
                        # If the other player is about to win (in the next movement), avoid it
                        if (board_review[i,j,7] == in_a_row - 1):
                            # The number of checkers can increase in the next movement (tail)
                            if (get_mark_board(board, i-1, j-1) == 0) and (get_mark_board(board, i, j-1) != 0):
                                return (-1 * max_reward)
                            # The number of checkers can increase in the next movement (head)
                            if (get_mark_board(board, i+in_a_row, j+in_a_row) == 0) and (get_mark_board(board, i+in_a_row+1, j+in_a_row) != 0):
                                return (-1 * max_reward)
                    else:
                        board_review[i,j,5] = 1
                        board_review[i,j,6] = 1
                        board_review[i,j,7] = 1
        return reward

    # Given a board and the position (index_row, index_column), returns the status of the cell:
    # 0 (empty), 1 (player 1), 2 (player 2), -1 (error)
    # Cell with position (0, 0) is top left
    def get_mark_board(board, index_row, index_col):
        """
        Given a board and the position (index_row, index_column), returns the status of the cell:
        0 (empty), 1 (player 1), 2 (player 2), -1 (error)
        Cell with position (0, 0) is top left
        Parameters:
        - board: Bi-dimensional array
        - index_row, index_col: position of the element to be accessed
        """
        # Check size of the array
        if (len(board.shape) != 2): # board is NOT a bi-dimensional array
            print("get_mark_board: ERROR - board is NOT a bi-dimensional array")
            return -1
        num_rows = board.shape[0]
        num_cols = board.shape[1]
        if (index_row >= num_rows): # index_row out of the size of the array
            print("get_mark_board: ERROR - Row " + str(index_row) + " is out of the size of the array. Rows:[0," + str(num_rows-1) + "]. Col " + str(index_col))
            return -1
        if (index_col >= num_cols): # index_col out of the size of the array
            print("get_mark_board: ERROR - Row " + str(index_row) + " Col " + str(index_col) + " is out of the size of the array. Cols:[0," + str(num_cols-1) + "]")
            return -1
        return board[index_row,index_col]
    
    
    ################################## END NESTED FUNCTIONS ###########################################
    
    
    print(configuration) # {rows: 10, columns: 8, inarow: 5}
    # Number of rows on the board
    num_rows = configuration.rows
    # Number of columns on the board
    num_cols = configuration.columns
    # Number of checkers "in a row" needed to win
    in_a_row = configuration.inarow
    print(observation) # {board: [...], mark: 1}
    # The current serialized board (rows x columns) - array [rows x columns] with top row first
    board = observation.board
    # Which player the agent is playing as (1 or 2).
    mark = observation.mark
    # If the board is empty, put the first checker in the middle column
    if max(board)==0:
        return round(num_cols/2)
    # List of available columns
    available_cols = [col for col in range(num_cols) if board[col] == 0]
    # Convert the list board into a two-dimensional array
    board_array=np.array(board)
    board_array=board_array.reshape((num_rows,num_cols))
    # Evaluate the target board with a checker in each available column, and get the best one
    reward_list=[] # List with rewards of each movement
    for target_col in available_cols:
        target_board = board_array.copy()
        target_row = first_row_avail(board_array,target_col,num_rows)
        target_board[target_row,target_col] = mark
        reward = get_reward(target_board, in_a_row, mark)
        reward_list.append(reward)
        print("Row:"+str(target_row)+" Column:"+str(target_col)+" Reward:"+str(reward))
    # Get the highest reward
    max_reward = max (reward_list)
    # Return the column with the highest reward
    return available_cols[reward_list.index(max_reward)]

# Run an episode using the agent above vs the default random agent.
#env.run([agent, "random"])
env.run([agent, "negamax"])
env.render(mode="ipython", width=500, height=450)
env.state

{'rows': 10, 'columns': 8, 'inarow': 5, 'episodeSteps': 1000, 'agentTimeout': 16, 'actTimeout': 8, 'runTimeout': 1200, 'isProduction': False, 'timeout': 8}
{'board': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'mark': 1}
{'rows': 10, 'columns': 8, 'inarow': 5, 'episodeSteps': 1000, 'agentTimeout': 16, 'actTimeout': 8, 'runTimeout': 1200, 'isProduction': False, 'timeout': 8}
{'board': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 0, 0, 0], 'mark': 1}
Row:9 Column:0 Reward:20
Row:9 Column:1 Reward:30
Row:9 Column:2 Reward:20
Row:8 Column:3 Reward:150
Row:8 Column:4 Reward:150
Row:9 Column:5 Reward:100
Row:9 Col

{'rows': 10, 'columns': 8, 'inarow': 5, 'episodeSteps': 1000, 'agentTimeout': 16, 'actTimeout': 8, 'runTimeout': 1200, 'isProduction': False, 'timeout': 8}
{'board': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, 2, 0, 2, 2, 0, 1, 0, 0, 1, 1, 1, 1, 0, 2, 0, 0, 1, 1, 1, 2, 0, 2, 0, 0, 1, 1, 1, 1, 2, 2, 0, 0, 1, 2, 1, 2, 2, 2], 'mark': 1}
Row:9 Column:0 Reward:46490
Row:9 Column:1 Reward:46580
Row:4 Column:2 Reward:46530
Row:5 Column:3 Reward:55660
Row:4 Column:4 Reward:46520
Row:3 Column:5 Reward:46530
Row:7 Column:6 Reward:46390
Row:3 Column:7 Reward:46500
{'rows': 10, 'columns': 8, 'inarow': 5, 'episodeSteps': 1000, 'agentTimeout': 16, 'actTimeout': 8, 'runTimeout': 1200, 'isProduction': False, 'timeout': 8}
{'board': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 2, 0, 0, 2, 1, 2, 2, 0, 1, 0, 0, 1, 1, 1, 1, 0, 2, 0, 0, 1, 1, 1, 2, 0, 2

[{'action': 1,
  'reward': 1,
  'info': {},
  'observation': {'board': [0,
    0,
    2,
    2,
    2,
    2,
    0,
    1,
    2,
    0,
    1,
    1,
    1,
    2,
    0,
    2,
    2,
    0,
    1,
    1,
    1,
    2,
    0,
    2,
    1,
    0,
    2,
    2,
    1,
    1,
    0,
    2,
    1,
    0,
    1,
    2,
    1,
    2,
    0,
    2,
    2,
    0,
    2,
    1,
    2,
    2,
    0,
    1,
    1,
    0,
    1,
    1,
    1,
    1,
    0,
    2,
    2,
    0,
    1,
    1,
    1,
    2,
    0,
    2,
    1,
    1,
    1,
    1,
    1,
    1,
    2,
    2,
    2,
    1,
    1,
    2,
    1,
    2,
    2,
    2],
   'mark': 1},
  'status': 'DONE'},
 {'action': 0,
  'reward': -1,
  'info': {},
  'observation': {'mark': 2},
  'status': 'DONE'}]

In [3]:
help(env.step)

Help on method step in module kaggle_environments.core:

step(actions, logs=None) method of kaggle_environments.core.Environment instance
    Execute the environment interpreter using the current state and a list of actions.
    
    Args:
        actions (list): Actions to pair up with the current agent states.
        logs (list): Logs to pair up with each agent for the current step.
    
    Returns:
        list of dict: The agents states after the step.



You can initialize the environment from a prior state (episode resume), which is useful for debugging.

In [13]:
from kaggle_environments import make

env = make("connectx", {"rows": 10, "columns": 8, "inarow": 5}, steps=[[{'action': 0,
    'reward': 0,
    'info': {},
    'observation': {'board': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 2, 2, 0, 1, 1, 0, 0, 2, 1, 1, 0, 2, 2, 0, 2, 1, 1, 1, 0, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 1, 2, 1, 1, 1, 1, 2, 2],
     'mark': 1},
    'status': 'ACTIVE'},
   {'action': 0,
    'reward': 0,
    'info': {},
    'observation': {'mark': 2},
    'status': 'INACTIVE'}]], debug=True)

env.specification.observation.board = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 2, 2, 0, 1, 1, 0, 0, 2, 1, 1, 0, 2, 2, 0, 2, 1, 1, 1, 0, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 1, 2, 1, 1, 1, 1, 2, 2]
env.specification.observation.mark = 1
print("Obs: ",env.specification.observation)
print(type(env.specification.observation.board))
print("Board: ",env.specification.observation.board)
print("Conf: ",env.configuration)
agent(env.specification.observation, env.configuration)



Obs:  {'board': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 2, 2, 0, 1, 1, 0, 0, 2, 1, 1, 0, 2, 2, 0, 2, 1, 1, 1, 0, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 1, 2, 1, 1, 1, 1, 2, 2], 'mark': 1}
<class 'list'>
Board:  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 2, 2, 0, 1, 1, 0, 0, 2, 1, 1, 0, 2, 2, 0, 2, 1, 1, 1, 0, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 1, 2, 1, 1, 1, 1, 2, 2]
Conf:  {'rows': 10, 'columns': 8, 'inarow': 5, 'episodeSteps': 1000, 'agentTimeout': 16, 'actTimeout': 8, 'runTimeout': 1200, 'isProduction': False, 'timeout': 8}
{'rows': 10, 'columns': 8, 'inarow': 5, 'episodeSteps': 1000, 'agentTimeout': 16, 'actTimeout': 8, 'runTimeout': 1200, 'isProduction': False, 'timeout': 8}
{'board': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 2, 2, 0,

4

You can setup a lightweight training environment for a single agent. It is a starting point which can be integrated with other frameworks (i.e. gym, stable-baselines).

In [15]:
from kaggle_environments import make

env = make("connectx", debug=True)

# Training agent in first position (player 1) against the default random agent.
trainer = env.train([None, "random"])

obs = trainer.reset()
for _ in range(100):
    env.render()
    action = 0 # Action for the agent being trained.
    obs, reward, done, info = trainer.step(action)
    if done:
        obs = trainer.reset()

Invalid Action: Invalid column: 0
Invalid Action: Invalid column: 0
Invalid Action: Invalid column: 0
Invalid Action: Invalid column: 0
Invalid Action: Invalid column: 0
Invalid Action: Invalid column: 0
Invalid Action: Invalid column: 0


In [3]:
help(env.train)

Help on method train in module kaggle_environments.core:

train(agents=[]) method of kaggle_environments.core.Environment instance
    Setup a lightweight training environment for a single agent.
    Note: This is designed to be a lightweight starting point which can
          be integrated with other frameworks (i.e. gym, stable-baselines).
          The reward returned by the "step" function here is a diff between the
          current and the previous step.
    
    Example:
        env = make("tictactoe")
        # Training agent in first position (player 1) against the default random agent.
        trainer = env.train([None, "random"])
    
        obs = trainer.reset()
        done = False
        while not done:
            action = 0 # Action for the agent being trained.
            obs, reward, done, info = trainer.step(action)
        env.render()
    
    Args:
        agents (list): List of agents to obtain actions from while training.
                       The agent to tr

Another way of training your agent.

In [22]:
# Play as first position against random agent.
trainer = env.train([None, "random"])

observation = trainer.reset()

while not env.done:
    my_action = agent(observation, env.configuration)
    print("My Action:", my_action)
    observation, reward, done, info = trainer.step(my_action)
    print ("Observation:", observation)
    print ("Reward:", reward)
    print ("Done:", done)
    print ("Info:", info)
    env.render(mode="ipython", width=100, height=90, header=False, controls=False)
#env.render(mode="ipython", width=445, height=400)#, header=False, controls=False)

{'episodeSteps': 1000, 'agentTimeout': 16, 'actTimeout': 8, 'runTimeout': 1200, 'isProduction': False, 'columns': 7, 'rows': 6, 'inarow': 4, 'timeout': 8}
{'board': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'mark': 1}
My Action: 4
Observation: {'board': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 2], 'mark': 1}
Reward: 0
Done: False
Info: {}


{'episodeSteps': 1000, 'agentTimeout': 16, 'actTimeout': 8, 'runTimeout': 1200, 'isProduction': False, 'columns': 7, 'rows': 6, 'inarow': 4, 'timeout': 8}
{'board': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 2], 'mark': 1}
Row:5 Column:0 Reward:30
Row:5 Column:1 Reward:40
Row:5 Column:2 Reward:40
Row:5 Column:3 Reward:210
Row:4 Column:4 Reward:160
Row:5 Column:5 Reward:110
Row:4 Column:6 Reward:40
My Action: 3
Observation: {'board': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 1, 1, 0, 2], 'mark': 1}
Reward: 0
Done: False
Info: {}


{'episodeSteps': 1000, 'agentTimeout': 16, 'actTimeout': 8, 'runTimeout': 1200, 'isProduction': False, 'columns': 7, 'rows': 6, 'inarow': 4, 'timeout': 8}
{'board': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 1, 1, 0, 2], 'mark': 1}
Row:5 Column:0 Reward:220
Row:5 Column:1 Reward:230
Row:5 Column:2 Reward:2110
Row:4 Column:3 Reward:440
Row:4 Column:4 Reward:440
Row:5 Column:5 Reward:1110
Row:3 Column:6 Reward:230
My Action: 2
Observation: {'board': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, 1, 1, 1, 0, 2], 'mark': 1}
Reward: 0
Done: False
Info: {}


{'episodeSteps': 1000, 'agentTimeout': 16, 'actTimeout': 8, 'runTimeout': 1200, 'isProduction': False, 'columns': 7, 'rows': 6, 'inarow': 4, 'timeout': 8}
{'board': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, 1, 1, 1, 0, 2], 'mark': 1}
Row:5 Column:0 Reward:2120
Row:5 Column:1 Reward:1000000
Row:4 Column:2 Reward:2340
Row:4 Column:3 Reward:2420
Row:3 Column:4 Reward:2160
Row:5 Column:5 Reward:1000000
Row:3 Column:6 Reward:2130
My Action: 1
Observation: {'board': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 1, 1, 1, 1, 0, 2], 'mark': 1}
Reward: 1
Done: True
Info: {}


The function 'evaluate' returns a list of lists (one list per episode).  
These lists per episode contains two elements (one per player), and these elements have these possible values:
- 1: This player wins
- -1: This player losts
- 0: Draw (I guess there will be a value for draw)  
Example:  
[[1, -1], # First episode: Player 1 won  
 [1, -1], # Second episode: Player 1 won  
 [1, -1], # Third episode: Player 1 won  
 [1, -1], # ...  
 [1, -1],  
 [1, -1],  
 [-1, 1],  
 [1, -1],  
 [1, -1],  
 [-1, 1]]

In [8]:
from kaggle_environments import evaluate
help(evaluate)

Help on function evaluate in module kaggle_environments.core:

evaluate(environment, agents=[], configuration={}, steps=[], num_episodes=1)
    Evaluate and return the rewards of one or more episodes (environment and agents combo).
    
    Args:
        environment (str|Environment):
        agents (list):
        configuration (dict, optional):
        steps (list, optional):
        num_episodes (int=1, optional): How many episodes to execute (run until done).
    
    Returns:
        list of list of int: List of final rewards for all agents for all episodes.



In [18]:
evaluate("connectx", [agent, "random"], num_episodes=10)
#evaluate("connectx", [agent, "negamax"], num_episodes=10)

[[1, -1],
 [1, -1],
 [1, -1],
 [1, -1],
 [1, -1],
 [1, -1],
 [1, -1],
 [1, -1],
 [1, -1],
 [1, -1]]

Test our agent against random and negamax agent.  
- The closer the value is to 1 the better is our agent (wins more times than loses)
- The closer the value is to -1 the worst is our agent (loses more times than wins)

In [9]:
def mean_reward(rewards):
    return sum(r[0] for r in rewards) / float(len(rewards))

# Run multiple episodes to estimate its performance.
print("My Agent vs Random Agent:", mean_reward(evaluate("connectx", [agent, "random"], num_episodes=10)))
print("My Agent vs Negamax Agent:", mean_reward(evaluate("connectx", [agent, "negamax"], num_episodes=10)))

My Agent vs Random Agent: 1.0
My Agent vs Negamax Agent: 0.4


Play manually against your agent (it seems that it doesn't work...)

In [23]:
env.play([agent, None], width=500, height=450)

{'episodeSteps': 1000, 'agentTimeout': 16, 'actTimeout': 8, 'runTimeout': 1200, 'isProduction': False, 'columns': 7, 'rows': 6, 'inarow': 4, 'timeout': 8}
{'board': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'mark': 1}


Put our agent in a Python file

In [3]:
import inspect
import os

def write_agent_to_file(function, file):
    with open(file, "a" if os.path.exists(file) else "w") as f:
        f.write("import numpy as np\n\n") # We need numpy library in our agent
        f.write(inspect.getsource(function))
        print(function, "written to", file)

#write_line_to_file("import numpy as np","submission.py")
write_agent_to_file(agent, "submission.py")

<function agent at 0x0000002729BDB510> written to submission.py


#### Validate Submission  
Play your submission against itself. This is the first episode the competition will run to weed out erroneous agents.

In [14]:
from kaggle_environments import utils
from kaggle_environments import agent

# Note: Stdout replacement is a temporary workaround.
import sys
out = sys.stdout
#submission = utils.read_file("C:/Mis cosas/IA/Kaggle/20200906_ConnectX/submission.py")
submission = utils.read_file("submission.py")
agent = agent.get_last_callable(submission)
sys.stdout = out

env = make("connectx", debug=True)
env.run([agent, agent])
print("Success!" if env.state[0].status == env.state[1].status == "DONE" else "Failed...")

{'board': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'mark': 1}
{'episodeSteps': 1000, 'agentTimeout': 16, 'actTimeout': 8, 'runTimeout': 1200, 'isProduction': False, 'columns': 7, 'rows': 6, 'inarow': 4, 'timeout': 8}
{'mark': 2, 'board': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0]}
{'episodeSteps': 1000, 'agentTimeout': 16, 'actTimeout': 8, 'runTimeout': 1200, 'isProduction': False, 'columns': 7, 'rows': 6, 'inarow': 4, 'timeout': 8}
Row:5 Column:0 Reward:10
Row:5 Column:1 Reward:20
Row:5 Column:2 Reward:20
Row:5 Column:3 Reward:10
Row:4 Column:4 Reward:50
Row:5 Column:5 Reward:10
Row:5 Column:6 Reward:10
{'board': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 1, 0, 0], 'mark': 1}
{'episodeSteps': 1000, 'agentTimeout': 16, 'actTimeout': 8, 'runTime

#### Results
v1
- 15/Mar/2021 --> Position 300 out of 516 (58.13%). Score: 707.5