# ConnectX Kaggle Competition

https://www.kaggle.com/c/connectx/overview/environment-rules

In [1]:
from platform import python_version
python_version()

'3.7.7'

## Load and Test Kaggle Environment

In [2]:
from kaggle_environments import evaluate, make, utils

env = make("connectx", debug=True)
env.render()

+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+



## Load my Game Environment

In [3]:
import numpy as np
from gamelearner import RandomPlayer, HumanPlayer, GameController
from connectx import Connect4, Connect4BasicPlayer

In [4]:
game = Connect4()
while not game.game_over:
    role = game.turn
    moves = game.available_moves()
    move = np.random.choice(moves)
    game.make_move((role, move))
game.show_state()
print(f"Winner: {game.winner}")

X _ _ _ _ _ _
O _ _ _ _ _ _
O _ _ _ _ _ _
X _ O _ _ _ X
O X X O O O O
X X O X X O X
Winner: 2


## Competition Submission File

In [5]:
def my_agent(observation, configuration, run_tests=False):

    import numpy as np

    # Game parameters
    _connect = 4
    _shape = (6, 7)
    
    # Algorithm parameters
    _terminal_values = {'win': 1, 'loss': -1, 'draw':0}

    # Data objects for analyzing board
    _steps = {
        'u': (1, 0),
        'd': (-1, 0),
        'r': (0, 1),
        'l': (0, -1),
        'ur': (1, 1),
        'dr': (-1, 1),
        'ul': (1, -1),
        'dl': (-1, -1)
    }

    # Function used by _check_positions method
    _fcum = lambda x1, x2: (x1 + x2)*x2

    def empty_board_state():
        """Initialize board_full and state."""
        # board_full has a border set to -1
        board_full = -np.ones(np.array(_shape) + (2, 2), dtype='int8')
        state = state_from_board_full(board_full)
        state[:] = 0
        return board_full, state

    def get_fill_levels(state):
        # Note: This assumes proper filling!
        return (state > 0).sum(axis=0)

    def state_from_board_full(board_full):
        return board_full[1:1+_shape[0], 1:1+_shape[1]]

    def available_moves(fill_levels):
        """Returns list of available (empty) moves (slots)."""
        spaces_left = fill_levels < _shape[0]
        return np.nonzero(spaces_left)[0]

    def chain_in_direction(position, direction, role, board_full):
        """Finds number of matching discs in one direction."""
        step = _steps[direction]
        for i in range(_connect):
            position = (step[0]+position[0], step[1]+position[1])
            x = board_full[position]
            if x != role:
                break
        return i

    def check_game_state_from_position(position, role, board_full):
        results = {}
        for direction, step in _steps.items():
            n = chain_in_direction(position, direction, role, board_full)
            if n == _connect - 1:
                return True
            results[direction] = n
        for d1, d2 in [('u', 'd'), ('l', 'r'), ('ul', 'dr'), ('dl', 'ur')]:
            if results[d1] + results[d2] >= _connect-1:
                return True
        return False

    def check_game_state_after_move(move, board_full, state=None):
        if state is None:
            state = state_from_board_full(board_full)
        fill_levels = get_fill_levels(state)
        role, column = move
        level = fill_levels[column]
        assert level < _shape[0]
        position = (level+1, column+1)
        assert board_full[position] == 0
        return check_game_state_from_position(position, role, 
                                              board_full=board_full)

    def wins_from_next_move(role, board_full, state=None, moves=None):
        if state is None:
            state = state_from_board_full(board_full)
        fill_levels = get_fill_levels(state)
        if moves is None:
            moves = available_moves(fill_levels)
        wins = {}
        for col in moves:
            pos_fb = (fill_levels[col]+1, col+1)
            win = check_game_state_from_position(pos_fb, role, 
                                                 board_full=board_full)
            wins[col] = win
        return wins

    def check_for_obvious_move(role, board_full, state=None, 
                               fill_levels=None,
                               terminal_values=_terminal_values, depth=1):
        """Analyses the current board state (board_full) from the 
        perspective of the player role.

        Returns
            value, positions (float, list): value of the current
                state if it is a terminal state (from terminal_values) 
                else None, and a list of best positions (columns) to 
                play on next move.
        """
        if state is None:
            state = state_from_board_full(board_full)
        if fill_levels is None:
            fill_levels = get_fill_levels(state)

        # 0. Check if board full (draw):
        if fill_levels.sum() == _shape[0]*_shape[1]:
            print("Check_for_obvious_move called with full board")
            breakpoint()
            #raise ValueError("No available moves")

        opponent = role ^ 3
        win_value, loss_value = (terminal_values['win'], 
                                 terminal_values['loss'])

        # 1. Check for a win by role on next move
        possible_moves = wins_from_next_move(role, board_full=board_full, 
                                             state=state)
        n_wins = sum(possible_moves.values())
        if n_wins > 0:
            winning_moves = [col for col, win in possible_moves.items() if win]
            return win_value, winning_moves

        if len(possible_moves) == 1:
            # 2. Check if draw (last move but no win)
            if fill_levels.sum() == _shape[0]*_shape[1] - 1:
                return terminal_values['draw'], list(possible_moves.keys())

        #TODO: Could continue deeper search if only one move possible
        if depth > 0:
            # 3. Check what opponent could do next for each possible move
            bf2 = board_full.copy()  # TODO: Could remove if sure
            state = state_from_board_full(bf2)
            fill_levels = get_fill_levels(state)
            opp_wins = {}
            opp_losses = {}
            other_moves = []
            opp_move_values = {}
            for col in possible_moves:
                assert state[fill_levels[col], col] == 0  # TODO: delete later
                state[fill_levels[col], col] = role  # Next state after move
                fill_levels[col] += 1
                value, moves = check_for_obvious_move(opponent, board_full=bf2, 
                                                      state=state, 
                                                      fill_levels=fill_levels, 
                                                      depth=depth-1)
                opp_move_values[col] = value
                if value == win_value:
                    opp_wins[col] = len(moves)
                elif value == loss_value:
                    opp_losses[col] = len(moves)
                else:
                    other_moves.append(col)
                # Reverse move
                fill_levels[col] -= 1
                state[fill_levels[col], col] = 0

            # 4. Take any move where opponent will definitely lose
            if len(opp_losses) > 0:
                return win_value, [col for col, value in opp_move_values.items()
                                   if value == loss_value]

            # 5. If opponent will possibly win for all moves, assume defeat
            if len(opp_wins) == len(possible_moves):
                fewest = [col for col, n_win_moves in opp_wins.items()
                          if n_win_moves == min(opp_wins.values())]
                return loss_value, fewest

            # 6. Avoid any move where opponent will definitely win
            if len(opp_wins) > 0:
                return None, other_moves

        # Otherwise, return no value
        return None, list(possible_moves.keys())

    def tests():

        # Tests
        board_full, state = empty_board_state()
        assert np.array_equal(state, np.zeros(_shape, dtype='int8'))
        fill_levels = get_fill_levels(state)
        assert np.all(fill_levels == 0)
        assert (available_moves(fill_levels).tolist()
                == [0, 1, 2, 3, 4, 5, 6])
        role = 1
        assert (list(wins_from_next_move(role, board_full=board_full).keys())
                == list(range(7)))
        assert not any(wins_from_next_move(role, board_full=board_full).values())
        assert not any(wins_from_next_move(role, board_full=board_full, 
                                           state=state).values())
        moves = check_for_obvious_move(role, board_full, state=state)
        assert moves == (None, [0, 1, 2, 3, 4, 5, 6])
        moves = check_for_obvious_move(role, board_full, 
                                       fill_levels=fill_levels, depth=3)
        assert moves == (None, [0, 1, 2, 3, 4, 5, 6])
        state[:] = np.array([
            [0, 0, 1, 1, 2, 0, 2],
            [0, 0, 1, 1, 1, 0, 2],
            [0, 0, 2, 1, 0, 0, 2],
            [0, 0, 0, 2, 0, 0, 1],
            [0, 0, 0, 0, 0, 0, 0],
            [0, 0, 0, 0, 0, 0, 0]
        ])
        role = 2
        moves = check_for_obvious_move(role, board_full, state=state, depth=0)
        assert moves == (None, [0, 1, 2, 3, 4, 5, 6])
        moves = check_for_obvious_move(role, board_full, state=state, depth=1)
        assert moves == (None, [0, 2, 3, 4, 6])
        moves = check_for_obvious_move(role, board_full, state=state, depth=2)
        assert moves == (None, [0, 2, 3, 4, 6])
        moves = check_for_obvious_move(role, board_full, state=state, depth=3)
        assert moves == (-1, [1, 5])
        print("Tests complete")
    
    if run_tests:
        tests()

    else:
        columns = configuration.columns
        rows = configuration.rows
        inarow = configuration.inarow
        role = observation.mark
        assert columns == _shape[1], "columns not valid"
        assert rows == _shape[0], "rows not valid"
        assert inarow == _connect, "inarow not valid"
        assert role in (1, 2), "role not valid"
        board = np.array(observation.board).reshape(_shape)

        board_full, state = empty_board_state()
        state[:] = np.flip(board, axis=0)
        value, cols = check_for_obvious_move(role, board_full, 
                                             state=state, depth=3)
        col = np.random.choice(cols)
        assert col in range(columns), "col not valid"

        return int(col)


my_agent(None, None, run_tests=True)

Tests complete


## Test on Kaggle Environment

In [6]:
env = make("connectx", debug=True)

# Play as first position against random agent.
trainer = env.train([None, "random"])

observation = trainer.reset()

while not env.done:
    my_action = my_agent(observation, env.configuration)
    print("My Action", my_action)
    observation, reward, done, info = trainer.step(my_action)
    # env.render(mode="ipython", width=100, height=90, header=False, controls=False)

env.render()

My Action 3
My Action 0
My Action 3
My Action 0
My Action 0
My Action 0
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 1 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 1 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 1 | 0 | 0 | 1 | 2 | 0 | 0 |
+---+---+---+---+---+---+---+
| 1 | 2 | 0 | 1 | 2 | 2 | 2 |
+---+---+---+---+---+---+---+



In [7]:
# Evaluate against random agent.
rewards = evaluate("connectx", [my_agent, "random"], num_episodes=10)
print(rewards)
np.mean([r[0] for r in rewards])

[[1, -1], [1, -1], [1, -1], [1, -1], [1, -1], [1, -1], [1, -1], [1, -1], [1, -1], [1, -1]]


1.0

In [8]:
# Evaluate against random agent.
rewards = evaluate("connectx", ["random", my_agent], num_episodes=10)
print(rewards)
np.mean([r[0] for r in rewards])

[[-1, 1], [-1, 1], [-1, 1], [-1, 1], [-1, 1], [-1, 1], [-1, 1], [-1, 1], [-1, 1], [-1, 1]]


-1.0

In [9]:
# Evaluate against negamax agent.
rewards = evaluate("connectx", [my_agent, "negamax"], num_episodes=10)
print(rewards)
np.mean([r[0] for r in rewards])

[[1, -1], [1, -1], [0, 0], [1, -1], [1, -1], [-1, 1], [1, -1], [1, -1], [1, -1], [1, -1]]


0.7

In [10]:
# Depth 3 (100 episodes):
# random: 1.0
# negamax: 0.22 - 0.25

# Depth 4 (100 episodes):
# random: 1.0
# negamax: 0.47

In [11]:
# Play against itself
for _ in range(10):
    env = make("connectx", debug=True)
    env.run([my_agent, my_agent])
    env.render()

+---+---+---+---+---+---+---+
| 1 | 2 | 0 | 1 | 0 | 1 | 2 |
+---+---+---+---+---+---+---+
| 2 | 2 | 0 | 2 | 0 | 1 | 1 |
+---+---+---+---+---+---+---+
| 1 | 2 | 2 | 1 | 0 | 2 | 2 |
+---+---+---+---+---+---+---+
| 2 | 1 | 1 | 1 | 1 | 2 | 2 |
+---+---+---+---+---+---+---+
| 2 | 2 | 1 | 1 | 1 | 2 | 2 |
+---+---+---+---+---+---+---+
| 2 | 1 | 1 | 2 | 1 | 1 | 1 |
+---+---+---+---+---+---+---+

+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 2 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 1 | 1 | 1 | 2 | 2 |
+---+---+---+---+---+---+---+
| 0 | 0 | 1 | 2 | 2 | 1 | 2 |
+---+---+---+---+---+---+---+
| 0 | 0 | 1 | 1 | 2 | 2 | 1 |
+---+---+---+---+---+---+---+
| 2 | 1 | 1 | 1 | 2 | 1 | 2 |
+---+---+---+---+---+---+---+

+---+---+---+---+---+---+---+
| 2 | 1 | 2 | 0 | 0 | 1 | 0 |
+---+---+---+---+---+---+---+
| 1 | 2 | 2 | 1 | 1 | 1 | 1 |
+---+---+---+---+---+---+---+
| 1 | 2 | 1 | 1 | 2 | 2 | 2 |
+---+---+---+---+---+---+---+
| 1 | 2 

+---+---+---+---+---+---+---+
| 1 | 1 | 2 | 2 | 1 | 1 | 2 |
+---+---+---+---+---+---+---+
| 1 | 2 | 2 | 1 | 1 | 2 | 2 |
+---+---+---+---+---+---+---+
| 2 | 1 | 1 | 2 | 2 | 1 | 1 |
+---+---+---+---+---+---+---+
| 2 | 2 | 1 | 1 | 2 | 2 | 1 |
+---+---+---+---+---+---+---+
| 2 | 2 | 1 | 2 | 2 | 1 | 2 |
+---+---+---+---+---+---+---+
| 1 | 1 | 2 | 1 | 1 | 2 | 1 |
+---+---+---+---+---+---+---+

+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 1 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 2 | 1 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 2 | 2 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 2 | 1 | 1 | 1 | 2 | 0 | 0 |
+---+---+---+---+---+---+---+
| 1 | 1 | 2 | 1 | 2 | 2 | 0 |
+---+---+---+---+---+---+---+

+---+---+---+---+---+---+---+
| 1 | 1 | 2 | 2 | 2 | 1 | 2 |
+---+---+---+---+---+---+---+
| 1 | 2 | 1 | 1 | 2 | 2 | 1 |
+---+---+---+---+---+---+---+
| 1 | 1 | 2 | 2 | 1 | 1 | 2 |
+---+---+---+---+---+---+---+
| 2 | 2 

+---+---+---+---+---+---+---+
| 1 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 2 | 0 | 1 | 2 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 2 | 0 | 2 | 1 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 1 | 0 | 2 | 2 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 1 | 1 | 1 | 2 | 2 | 2 | 1 |
+---+---+---+---+---+---+---+
| 1 | 2 | 1 | 1 | 1 | 2 | 2 |
+---+---+---+---+---+---+---+

+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 1 |
+---+---+---+---+---+---+---+
| 0 | 1 | 0 | 0 | 2 | 1 | 2 |
+---+---+---+---+---+---+---+
| 0 | 1 | 0 | 0 | 2 | 2 | 1 |
+---+---+---+---+---+---+---+
| 0 | 2 | 1 | 1 | 2 | 2 | 2 |
+---+---+---+---+---+---+---+
| 1 | 2 | 1 | 1 | 2 | 2 | 1 |
+---+---+---+---+---+---+---+

+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 1 | 0 |
+---+---+---+---+---+---+---+
| 2 | 1 | 0 | 0 | 1 | 1 | 0 |
+---+---+---+---+---+---+---+
| 1 | 2 | 2 | 1 | 2 | 2 | 1 |
+---+---+---+---+---+---+---+
| 2 | 1 

+---+---+---+---+---+---+---+
| 2 | 1 | 2 | 1 | 1 | 1 | 2 |
+---+---+---+---+---+---+---+
| 2 | 2 | 2 | 1 | 1 | 2 | 1 |
+---+---+---+---+---+---+---+
| 1 | 1 | 1 | 2 | 2 | 1 | 2 |
+---+---+---+---+---+---+---+
| 2 | 2 | 1 | 1 | 2 | 1 | 2 |
+---+---+---+---+---+---+---+
| 2 | 1 | 2 | 1 | 1 | 1 | 2 |
+---+---+---+---+---+---+---+
| 2 | 2 | 1 | 1 | 2 | 2 | 1 |
+---+---+---+---+---+---+---+

+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 2 | 2 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 2 | 2 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 2 | 0 | 1 | 1 | 0 |
+---+---+---+---+---+---+---+
| 2 | 0 | 2 | 1 | 1 | 1 | 1 |
+---+---+---+---+---+---+---+
| 2 | 0 | 1 | 2 | 1 | 1 | 1 |
+---+---+---+---+---+---+---+

+---+---+---+---+---+---+---+
| 1 | 2 | 1 | 1 | 2 | 1 | 2 |
+---+---+---+---+---+---+---+
| 2 | 1 | 1 | 2 | 2 | 1 | 1 |
+---+---+---+---+---+---+---+
| 1 | 2 | 1 | 1 | 1 | 2 | 1 |
+---+---+---+---+---+---+---+
| 2 | 1 

+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 2 |
+---+---+---+---+---+---+---+
| 2 | 2 | 0 | 0 | 0 | 2 | 1 |
+---+---+---+---+---+---+---+
| 1 | 1 | 0 | 0 | 0 | 1 | 2 |
+---+---+---+---+---+---+---+
| 1 | 1 | 1 | 1 | 2 | 1 | 2 |
+---+---+---+---+---+---+---+
| 2 | 2 | 1 | 1 | 2 | 1 | 2 |
+---+---+---+---+---+---+---+

+---+---+---+---+---+---+---+
| 0 | 0 | 1 | 1 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 1 | 0 | 1 | 2 | 2 | 0 | 0 |
+---+---+---+---+---+---+---+
| 2 | 0 | 1 | 2 | 1 | 0 | 0 |
+---+---+---+---+---+---+---+
| 2 | 2 | 2 | 1 | 2 | 1 | 0 |
+---+---+---+---+---+---+---+
| 2 | 1 | 2 | 1 | 1 | 1 | 2 |
+---+---+---+---+---+---+---+
| 1 | 1 | 2 | 2 | 1 | 2 | 2 |
+---+---+---+---+---+---+---+

+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 2 | 0 | 2 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 1 | 0 | 1 |
+---+---+---+---+---+---+---+
| 2 | 1 | 0 | 1 | 1 | 0 | 1 |
+---+---+---+---+---+---+---+
| 1 | 1 

## Log moves for Debugging 

In [12]:
env = make("connectx", debug=True)

# Play as first position against other agent.
trainer = env.train([None, "negamax"])
_shape = (env.configuration.rows, env.configuration.columns)

def get_board_as_flipped_array(observation):
    return np.flip(np.array(observation.board).reshape(_shape), axis=0)

my_role = observation.mark
opp_role = observation.mark ^ 3
observation = trainer.reset()
prev_state = get_board_as_flipped_array(observation)
moves = []
while not env.done:
    my_action = my_agent(observation, env.configuration)
    observation, reward, done, info = trainer.step(my_action)
    new_state = get_board_as_flipped_array(observation)
    changes = new_state - prev_state
    new_moves = []
    for role in (1, 2):
        if role in changes:
            rows, cols = np.nonzero(changes == role)
            assert len(rows) == 1
            new_moves.append((role, cols[0]))
    print(new_moves)
    moves = moves + new_moves
    prev_state = new_state

assert env.done
env.render()
state = np.array(env.state[0]['observation']['board']).reshape((6,7))
if reward == 0:
    print("Draw")
else:
    winner = my_role if reward == 1 else opp_role
    print("Winner", winner)

[(1, 3), (2, 6)]
[(1, 4), (2, 2)]
[(1, 6), (2, 5)]
[(1, 2), (2, 5)]
[(1, 6), (2, 5)]
[(1, 5), (2, 3)]
[(1, 4), (2, 3)]
[(1, 4), (2, 4)]
[(1, 5), (2, 4)]
[(1, 6), (2, 6)]
[(1, 1), (2, 5)]
[(1, 2), (2, 0)]
[(1, 2), (2, 2)]
[(1, 1), (2, 6)]
[(1, 1), (2, 1)]
[(1, 0), (2, 4)]
[(1, 0), (2, 0)]
[(1, 1), (2, 1)]
[(1, 0), (2, 2)]
[(1, 0), (2, 3)]
[(1, 3)]
+---+---+---+---+---+---+---+
| 1 | 2 | 2 | 0 | 2 | 2 | 2 |
+---+---+---+---+---+---+---+
| 1 | 1 | 2 | 1 | 2 | 1 | 2 |
+---+---+---+---+---+---+---+
| 2 | 2 | 1 | 2 | 2 | 1 | 1 |
+---+---+---+---+---+---+---+
| 1 | 1 | 1 | 2 | 1 | 2 | 1 |
+---+---+---+---+---+---+---+
| 1 | 1 | 1 | 2 | 1 | 2 | 1 |
+---+---+---+---+---+---+---+
| 2 | 1 | 2 | 1 | 1 | 2 | 2 |
+---+---+---+---+---+---+---+

Winner 1


In [13]:
len(moves)

41

In [14]:
game = Connect4(moves=moves)
game.show_state()
if game.game_over:
    if game.winner:
        print("\nWinner:", game.winner)
    else:
        print("\nDraw")

X O O _ O O O
X X O X O X O
O O X O O X X
X X X O X O X
X X X O X O X
O X O X X O O

Winner: 1
