# Linear TD($\lambda$) Agent
As a first-pass at constructing an agent to play Connect 4 in the Kaggle [ConnectX tournament](https://www.kaggle.com/c/connectx), I'll construct an agent that acts greedily via a linear value function that is approximated using coarse coding and the TD($\lambda$) algorithm. First, we'll build a class for ConnectX to train our model with. 

In [1]:
import numpy as np

class ConnectX:
    """Implements a simple environment for Connect X training.
    """

    def __init__(self):
        self.config = {'columns':None, 'rows': None, 'inarow': None}
        self.state = {'board': None, 'mark': None}

        self.termination = None
        self.count = 0
        self.reward = -1

    def start_game(self, rows = 6, columns = 7, inarow = 4, mark = 1):
        """Setup for the environment called when an episode first starts.
        Note:
            Initialize a tuple with the reward, first state observation, boolean
            indicating if it's terminal.
        """

        self.config['columns'] = columns
        self.config['rows'] = rows
        self.config['inarow'] = inarow

        self.state['mark'] = mark
        self.state['board'] = [0 for i in range(columns) for j in range(rows)]

        self.termination = False
        self.reward = -1
        self.count = 0

    def render(self):

        columns = self.config['columns']
        rows = self.config['rows']
        board = self.state['board']

        def print_row(values, delim="|"):
            return f"{delim} " + f" {delim} ".join(str(v) for v in values) + f" {delim}\n"

        row_bar = "+" + "+".join(["---"] * columns) + "+\n"
        out = row_bar
        for r in range(rows):
            out = out + \
            print_row(board[r * columns: r * columns + columns]) + row_bar

        print(out)
    
    def play(self, column):
        
        board = self.state['board']
        mark = self.state['mark']
        
        columns = self.config['columns']
        rows = self.config['rows']
        
        row = max([r for r in range(rows) if board[column + (r * columns)] == 0])
        board[column + (row * columns)] = mark
        
        self.count +=1
        

    def is_win(self, column, has_played=True):
        
        board = self.state['board']
        config = self.config
        mark = self.state['mark']
        
        columns = config['columns']
        rows = config['rows']
        inarow = config['inarow'] - 1
        row = (min([r for r in range(rows) if board[column + (r * columns)] == mark]) if has_played
        else max([r for r in range(rows) if board[column + (r * columns)] == 0])
            )

        def count(offset_row, offset_column):
            for i in range(1, inarow + 1):
                r = row + offset_row * i
                c = column + offset_column * i
                if (
                    r < 0
                    or r >= rows
                    or c < 0
                    or c >= columns
                    or board[c + (r * columns)] != mark
                ):
                    return i - 1
            return inarow

        return (
            count(1, 0) >= inarow  # vertical.
            or (count(0, 1) + count(0, -1)) >= inarow  # horizontal.
            or (count(-1, -1) + count(1, 1)) >= inarow  # top left diagonal.
            or (count(-1, 1) + count(1, -1)) >= inarow  # top right diagonal.
        )

    def step(self, column):
        
        mark = self.state['mark']
        
        #play move
        self.play(column)
        
        if self.is_win(column):
            self.reward = 30
            self.termination = True
            
            return (self.reward, self.termination, self.state)
            
        # Check for a tie.
        if all(mark != 0 for mark in self.state['board']):
            self.reward = 0
            self.termination = True
            
            return (self.reward, self.termination, self.state)
        
        return (self.reward, self.termination, self.state)
            
        

In [3]:
game = ConnectX()
game.start_game()
game.render()

reward, done, state = game.step(0,1)
game.render()
print(reward, done)

reward, done, state = game.step(0,1)
game.render()
print(reward, done)

reward, done, state = game.step(0,1)
game.render()
print(reward, done)

reward, done, state = game.step(0,2)
game.render()
print(reward, done)

+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+

+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 1 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+

0 False
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+


In [4]:
#random game
from random import choice

def random_agent(obs, config):
    return choice([c for c in range(config['columns']) if obs['board'][c] == 0])

game.start_game()

mark = 1

while game.termination != True:
    action = random_agent(game.state, game.config)
    reward, done, state = game.step(action, mark)
    game.render()
    print(reward, done)
    
    #switch turns
    mark = 1 + mark%2

+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 1 | 0 | 0 |
+---+---+---+---+---+---+---+

0 False
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 2 | 0 | 1 | 0 | 0 |
+---+---+---+---+---+---+---+

0 False
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+-

In [2]:
def argmax(q_values):
    top = float("-inf")
    ties = []
    
    for i in range(len(q_values)):
        if q_values[i] > top:
            top = q_values[i]
            ties = []

        if q_values[i] == top:
            ties.append(i)
            
    return np.random.choice(ties)


#linear TD(lambda) Agent
class TDAgent:
    """
    Initialization of TD(lambda) Agent. All values are set to None so they can
    be initialized in the agent_init method.
    """
    def __init__(self):
        self.lambd = None
        self.gamma = None
        self.delta = None
        self.w = None
        self.alpha = None
        self.prev_features = None
        self.columns = None
        self.rows = None
        self.inarow = None
        self.z = None
        self.epsilon = None

    def agent_init(self, agent_info={}):
        """Setup for the agent called when the experiment first starts."""
        self.lambd = agent_info.get("lambda", 0.7)
        self.gamma = agent_info.get("gamma", 0.5)
        self.alpha = agent_info.get("alpha", 0.1)
        self.epsilon = agent_info.get("epsilon", 0.2)
        self.columns = agent_info.get("columns", 7)
        self.rows = agent_info.get("rows", 6)
        self.inarow = agent_info.get("inarow", 4)
        num_features = 2 * (self.columns + self.rows + 2 * (self.rows + self.columns - 1))
        self.w = agent_info.get("w", np.random.normal( 50.0,25.0, size = num_features ) )
        self.z = np.zeros(num_features)
        self.delta = 0.0

    def select_action(self, state):
        """
        Selects an action using epsilon greedy
        Args:
        tiles - np.array, an array of active tiles
        Returns:
        (chosen_action, action_value) - (int, float), tuple of the chosen action
                                        and it's value
        """
        
        board = state['board']
        columns = self.columns
        rows = self.rows
        mark = state['mark']
        
        possible_actions = [c for c in range(columns) if board[c] == 0]
        action_values = []
        possible_features = []
        
        def test_drop(board, column):
            test_board = np.copy(board)
            row = max([r for r in range(rows) if board[column + (r * columns)] == 0])
            test_board[column + (row * columns)] = mark
        
            return test_board
        
        for c in possible_actions:
            possible_board = test_drop(board, c)
            X = coarse_code_board(possible_board, mark, rows, columns)
            possible_features.append(X)
            action_values.append( np.dot(self.w,X) )
        
        if np.random.random() < self.epsilon:
            action = np.random.choice(possible_actions)
            features = possible_features[possible_actions.index(action)]
        
        else:
            action = possible_actions[argmax(action_values)]
            features = possible_features[argmax(action_values)]
        
        self.prev_features = features
        
        return action
    
    def agent_update(self, reward, state):
        """The first method called when the experiment starts, called after
        the environment starts.
        Args:
            state (Numpy array): the state observation from the
                environment's evn_start function.
        Returns:
            The first action the agent takes.
        """
        board = state['board']
        columns = self.columns
        rows = self.rows
        mark = state['mark']
        
        #get the new features
        current_features = coarse_code_board(board, mark, rows, columns)
        current_v = np.dot(self.w, current_features )
        
        #update agent weights
        self.z = self.gamma * self.lambd * self.z + self.prev_features
        self.delta = reward + self.gamma * current_v - np.dot(self.w, self.prev_features)
        self.w += self.alpha * self.delta * self.z
        
    def last_agent_update(self, reward):

        """The first method called when the experiment starts, called after
        the environment starts.
        Args:
            state (Numpy array): the state observation from the
                environment's evn_start function.
        Returns:
            The first action the agent takes.
        """
        
        #update agent weights
        self.z = self.gamma * self.lambd * self.z + self.prev_features
        self.delta = reward - np.dot(self.w, self.prev_features)
        self.w = self.w + self.alpha * self.delta * self.z
        
    

In [3]:
def coarse_code_board(board, mark, rows, cols):
    
    board_grid = np.reshape(board, (rows, cols))
    
    my_marks = board_grid == mark
    opp_marks = np.multiply(board_grid != mark, board_grid > 0)
    
    my_X = np.hstack( (np.sum(my_marks,axis = 1)/rows , np.sum(my_marks, axis = 0)/cols) )
    opp_X = np.hstack( (np.sum(opp_marks,axis = 1)/rows , np.sum(opp_marks, axis = 0)/cols) )
    
    right_diags = []
    for d in range(-rows+1,cols):
        right_diags.append( np.average(np.diag(my_marks,d)) )
    
    left_diags = []
    for d in range(cols-1,-rows,-1):
        left_diags.append( np.average(np.diag(np.flip(my_marks, axis = 1),d)) )
    
    my_diags = np.hstack( (right_diags, left_diags) )
    my_X = np.hstack( (my_X,right_diags, left_diags) )
    
    right_diags = []
    for d in range(-rows+1,cols):
        right_diags.append( np.average(np.diag(opp_marks,d)) )
    
    left_diags = []
    for d in range(cols-1,-rows,-1):
        left_diags.append( np.average(np.diag(np.flip(opp_marks, axis = 1),d)) )
    
    opp_diags = np.hstack( (right_diags, left_diags) )
    opp_X = np.hstack( (opp_X,right_diags, left_diags) )
    
    X = np.hstack( (my_X,opp_X) )
    
    return X

In [4]:
board = [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0,
              0, 0, 0, 2, 2, 0, 0, 0, 0, 2, 1, 2, 0, 
              0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 2, 1, 2, 
              0, 2, 0]

mark = 1
columns = 7
rows = 6
inarow = 4
features = coarse_code_board(board, mark, rows, columns)
print(features)

[0.         0.33333333 0.         0.16666667 0.5        0.16666667
 0.         0.14285714 0.57142857 0.28571429 0.         0.
 0.         0.         0.         0.66666667 0.25       0.4
 0.         0.16666667 0.2        0.         0.         0.
 0.         0.         0.         0.         0.25       0.2
 0.33333333 0.16666667 0.4        0.         0.         0.
 0.         0.         0.         0.33333333 0.33333333 0.
 0.5        0.         0.28571429 0.14285714 0.42857143 0.
 0.14285714 0.         0.         0.5        0.         0.5
 0.         0.5        0.16666667 0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.4        0.16666667 0.33333333 0.         0.25       0.
 0.5        0.        ]


In [4]:
class RandomAgent:
    
    def __init__(self):
        
        self.columns = None
        self.rows = None
        
    def agent_init(self, agent_info={}):
        
        self.columns = agent_info.get("columns", 7)
        self.rows = agent_info.get("rows", 6)
    
    def select_action(self, state):
        """
        Selects an action using epsilon greedy
        Args:
        tiles - np.array, an array of active tiles
        Returns:
        (chosen_action, action_value) - (int, float), tuple of the chosen action
                                        and it's value
        """
        
        board = state['board']
        columns = self.columns
        rows = self.rows
        mark = state['mark']
        
        return choice([c for c in range(columns) if board[c] == 0])
    
    def agent_update(self, reward, state):
        pass
        
    def last_agent_update(self, reward):
        pass

You can use the player above to view the game in detail: every move is captured and can be replayed. Try this now!

As you'll soon see, this information will prove incredibly useful for brainstorming ways to improve our agents.

# Defining Agents

To participate in the competition, you'll create your own agents.

Your agent should be implemented as a Python function that accepts two arguments: obs and config. It returns an integer with the selected column, where indexing starts at zero. So, the returned value is one of 0-6, inclusive.

We'll start with a few examples, to provide some context. In the code cell below:

* The first agent behaves identically to the "random" agent above.
* The second agent always selects the middle column, whether it's valid or not! Note that if any agent selects an invalid move, it loses the game.
* The third agent selects the leftmost valid column.

In [5]:
from random import choice

agent1 = TDAgent()
agent1.agent_init()

agent2 = TDAgent()
agent2.agent_init()

players = [agent1, agent2]

win_record = []

for episode in range(10000):
    agent1.z = np.zeros(74)
    agent2.z = np.zeros(74)
    first_play = choice([1,2])

    game = ConnectX()
    game.start_game(mark = first_play)
  
    while game.termination != True:
        action = players[game.state['mark']-1].select_action(game.state)
        reward, done, state = game.step(action)
        if done:
            players[game.state['mark']-1].last_agent_update(reward)
            if reward >0:
                win_record.append(game.state['mark'])
            else:
                win_record.append(0)
        else:
            players[game.state['mark']-1].agent_update(reward, state)
        #switch turns
        game.state['mark'] = 1 + game.state['mark']%2

In [6]:
print(np.average( np.asarray(win_record) ==1 ) )
print(win_record)

0.458
[1, 2, 2, 1, 2, 1, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 1, 1, 2, 1, 1, 2, 2, 1, 1, 1, 2, 1, 2, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 1, 2, 1, 1, 2, 2, 1, 2, 1, 2, 1, 2, 2, 1, 1, 2, 2, 0, 1, 2, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2, 1, 2, 1, 2, 1, 1, 1, 1, 1, 2, 2, 2, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 2, 1, 2, 2, 1, 2, 2, 2, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 2, 2, 2, 1, 2, 1, 2, 2, 1, 2, 1, 2, 2, 1, 2, 1, 2, 1, 2, 1, 2, 2, 1, 2, 1, 2, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 1, 2, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 1, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 1, 2, 2, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 

In [7]:
print(agent1.w)
print(agent2.w)

[-20.57204108  -6.37090741  68.96297124   1.78063306  -9.03055496
 -16.42366691   1.24289462  14.97352051   9.52213671  10.06766456
  11.41473222   0.71293106 -42.42541834  -3.14450753  -6.81302825
  -7.66539553 -12.52027404   0.64886489 -23.29401126  21.05150062
   6.31610531  29.5589523   21.61320528  20.59591555  33.88101201
  18.33430844  -2.71825221   1.47567517   4.55726736  15.28735159
   3.89161822   7.83756001  -9.2946297    9.44637516  -8.19591571
   9.82597411   0.6106773   66.20437064  -5.1059798  -11.97706846
 -71.9078885  -13.98646929  -0.44195062  -5.67477957   2.56559316
   7.93955168  18.83497508  15.46671021 -10.79607337  24.33355542
   2.11696167  -0.76073021   5.95715292  -9.53813742   1.84949401
 -31.13959545   7.6378486  -21.56446465 -21.38233288   9.42274245
 -18.87900069  19.22892952  11.91315041  -0.82544539  22.30051987
   7.1961058   15.88511271 -14.2366737   -2.08432208 -12.31822672
  -8.51911281 -10.25984768   4.19189136  -4.80722391]
[  2.06752123 -19.1306

In [11]:
from random import choice

eval1 = TDAgent()
eval1.agent_init(agent_info={"w": np.copy(agent1.w), "epsilon": 0.0})

eval2 = RandomAgent()
eval2.agent_init()

players = [eval1, eval2]

win_record = []

for episode in range(1000):
    eval1.z = np.zeros(74)
    #agent2.z = np.zeros(74)
    first_play = choice([1,2])

    game = ConnectX()
    game.start_game(mark = first_play)
  
    while game.termination != True:
        action = players[game.state['mark']-1].select_action(game.state)
        reward, done, state = game.step(action)
        if done:
            #players[game.state['mark']-1].last_agent_update(reward)
            if reward >0:
                win_record.append(game.state['mark'])
            else:
                win_record.append(0)
            #players[game.state['mark']-1].agent_update(reward, state)
        #switch turns
        game.state['mark'] = 1 + game.state['mark']%2

In [12]:
print(np.average( np.asarray(win_record) ==1 ) )
print(win_record)

0.716
[1, 2, 1, 1, 2, 1, 1, 2, 2, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 2, 1, 2, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2, 2, 1, 2, 1, 1, 1, 2, 2, 2, 1, 2, 1, 1, 2, 2, 2, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 2, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2, 2, 2, 1, 1, 1, 2, 1, 1, 2, 2, 1, 1, 2, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 2, 2, 2, 2, 2, 1, 2, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2, 1, 2, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 2, 1, 2, 2, 1, 1, 1, 1, 1, 2, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 

In [31]:
from random import choice

stepthrough1 = TDAgent()
stepthrough1.agent_init(agent_info={"w": np.copy(agent2.w), "epsilon": 0.0})

eval2 = RandomAgent()
eval2.agent_init()

players = [stepthrough1, eval2]

first_play = choice([1,2])
game = ConnectX()
game.start_game(mark = first_play)
  
while game.termination != True:
    action = players[game.state['mark']-1].select_action(game.state)
    reward, done, state = game.step(action)
    game.render()
    
    game.state['mark'] = 1 + game.state['mark']%2

+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 1 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+

+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 1 | 0 | 0 | 0 | 2 | 0 | 0 |
+---+---+---+---+---+---+---+

+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 | 0 | 0 | 0 | 0 | 0 |
+---+---+---+---+---+---+---+
| 0 | 0 