In [1]:
from hexapawn import State, INF, X, O, BLANK, N, DOWN, UP
from ai_moves import ab_pruning_move, TDLeaf_move, TD_move, random_move, ab_pruning
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

games = []

In [18]:
N = 3

def make_expert_game(turn=None):
    if turn is None:
        turn = np.random.choice([X, O])
        
    s = State(turn, N=N)
    states = [s.board]
    
    print(s)
    while s.terminal_test() is None:
        print('\n', list(enumerate(s.actions())), sep='')
        i = int(input())
        s.move(s.actions()[i])
        states.append(s.board)

        print(s)
    print('\nGame Over!')
    games.append((states, s.turn))
    
make_expert_game()

_______
|X|X|X|   0  1  2
| | | |   3  4  5
|O|O|O|   6  7  8

[(0, (6, 3)), (1, (7, 4)), (2, (8, 5))]
1
_______
|X|X|X|   0  1  2
| |O| |   3  4  5
|O| |O|   6  7  8

[(0, (0, 3)), (1, (0, 4)), (2, (2, 5)), (3, (2, 4))]
0
_______
| |X|X|   0  1  2
|X|O| |   3  4  5
|O| |O|   6  7  8

[(0, (4, 2)), (1, (8, 5))]
0
_______
| |X|O|   0  1  2
|X| | |   3  4  5
|O| |O|   6  7  8

Game Over!


In [27]:
flipped = []
for game in games:
    g = game[0]
    f = []
    for b in g:
        b = b[::-1]
        flip = ''
        for piece in b:
            if piece == 'X':
                flip += 'O'
            elif piece == 'O':
                flip += 'X'
            else:
                flip += ' '
        f.append(flip)

    turn = 'X' if game[1] == 'O' else 'O'
    flipped.append((f, turn))
games += flipped
games

[(['XXX   OOO',
   'XXX  OOO ',
   'X X  XOO ',
   'X X  OO  ',
   '  XX OO  ',
   '  XX OO  '],
  'O'),
 (['XXX   OOO',
   ' XXX  OOO',
   ' XXXO O O',
   ' X XX O O',
   ' X XO O  ',
   ' X XO O  '],
  'X'),
 (['XXX   OOO',
   'XX   XOOO',
   'XX   OO O',
   'X    XO O',
   'X  O X  O',
   'X  O X  O'],
  'X'),
 (['XXX   OOO', 'XXX  OOO ', ' XXX OOO ', ' OXX  OO ', ' OXX  OO '], 'X'),
 (['XXX   OOO', 'X X X OOO', 'X XOX  OO', 'X XO   OX', 'X XO   OX'], 'O'),
 (['XXX   OOO', 'XXX O O O', ' XXXO O O', ' XOX  O O', ' XOX  O O'], 'X'),
 (['XXX   OOO',
   ' XXX  OOO',
   ' XXO  O O',
   '  XX  O O',
   '  XX OO  ',
   '  XX OO  '],
  'X'),
 (['XXX   OOO',
   'XXX  OOO ',
   'X X XOOO ',
   'X X OO O ',
   '  X XO O ',
   '  X XO O '],
  'O'),
 (['XXX   OOO',
   'XXXO   OO',
   'X XX   OO',
   'X XO    O',
   'X  O X  O',
   'X  O X  O'],
  'O'),
 (['XXX   OOO', ' XXX  OOO', ' XXX OOO ', ' XX  OOX ', ' XX  OOX '], 'O'),
 (['XXX   OOO', 'XXX O O O', 'XX  OXO O', 'OX   XO O', 'OX   XO O'], '

In [296]:
def get_features(b):
    return np.array([1.0, pieces(b, X), pieces(b, O), n_actions(b, X), 
                n_pieces_can_attack(b, X), n_opp_can_be_attacked(b, X)] + \
                [row(b, r, X) for r in range(N-1)] + \
                [row(b, r, O) for r in range(1, N)] + \
                [end_in_sight(b, col, player) for col in range(N) for player in [X, O]] + \
                [furthest_piece(b, col, player) for col in range(N) for player in [X, O]])

def pieces(board, player):
    print(board, player)
    return board.count(player)

def row(board, r, player):
    return board[N*r:N*(r+1)].count(player)

def n_actions(board, player):
    s = State(player, board, N=N)
    return len(s.actions())

def end_in_sight(board, column, player):
    ''' Doesnt account for situation XOX '''
    direction = DOWN if player == X else UP
    opponent = O if player == X else X
    col = [board[i] for i in range(column, N**2, N)][::direction]
    if player in col:
        if opponent in col and col.index(player) < col.index(opponent):
            return 0
        return 1
    return -1

def furthest_piece(board, column, player):
    direction = DOWN if player == X else UP
    col = [board[i] for i in range(column, N**2, N)][::-direction]
    if player in col:
        return N - col.index(player) - 1
    return -1

def n_pieces_can_attack(board, player):
    ''' Number of players pieces that can attack opponent '''
    s = State(player, board, N=N)

    attacks = []
    for a in s.actions():
        # If not vertical
        if not(a[0] + N == a[1] or a[0] - N == a[1]):
            attacks.append(a[0])

    return len(set(attacks))  

def n_opp_can_be_attacked(board, player):
    ''' Number of opponents pieces that can be attacked '''
    s = State(player, board, N=N)

    attacks = []
    for a in s.actions():
        # If not vertical
        if not(a[0] + N == a[1] or a[0] - N == a[1]):
            attacks.append(a[1])

    return len(set(attacks))

In [297]:
train = []

for game in games:
    s = State(game[1], game[0][-1], N=N)
    rn = 1 if s.terminal_test() == 'X' else -1
    for board in game[0][:-1]:
        print(game, board)
        train.append((get_features(board), rn))

(['XXX   OOO', 'XXX  OOO ', 'X X  XOO ', 'X X  OO  ', '  XX OO  '], 'O') XXX   OOO
XXX   OOO X
XXX   OOO O
(['XXX   OOO', 'XXX  OOO ', 'X X  XOO ', 'X X  OO  ', '  XX OO  '], 'O') XXX  OOO 
XXX  OOO  X
XXX  OOO  O
(['XXX   OOO', 'XXX  OOO ', 'X X  XOO ', 'X X  OO  ', '  XX OO  '], 'O') X X  XOO 
X X  XOO  X
X X  XOO  O
(['XXX   OOO', 'XXX  OOO ', 'X X  XOO ', 'X X  OO  ', '  XX OO  '], 'O') X X  OO  
X X  OO   X
X X  OO   O
(['XXX   OOO', ' XXX  OOO', ' XXXO O O', ' X XX O O', ' X XO O  '], 'X') XXX   OOO
XXX   OOO X
XXX   OOO O
(['XXX   OOO', ' XXX  OOO', ' XXXO O O', ' X XX O O', ' X XO O  '], 'X')  XXX  OOO
 XXX  OOO X
 XXX  OOO O
(['XXX   OOO', ' XXX  OOO', ' XXXO O O', ' X XX O O', ' X XO O  '], 'X')  XXXO O O
 XXXO O O X
 XXXO O O O
(['XXX   OOO', ' XXX  OOO', ' XXXO O O', ' X XX O O', ' X XO O  '], 'X')  X XX O O
 X XX O O X
 X XX O O O
(['XXX   OOO', 'XX   XOOO', 'XX   OO O', 'X    XO O', 'X  O X  O'], 'X') XXX   OOO
XXX   OOO X
XXX   OOO O
(['XXX   OOO', 'XX   XOOO', 'XX   OO 

In [298]:
x = []
y = []
for r in train:
    x.append(r[0])
    y.append(r[1])
x = np.array(x)
y = np.array(y)

In [299]:
w = np.dot(np.dot(np.linalg.inv(np.dot(np.transpose(x), x)), np.transpose(x)), y)

In [306]:
s = State(O, N=3)
s.move((8, 5))
s

_______
|X|X|X|   0  1  2
| | |O|   3  4  5
|O|O| |   6  7  8

In [307]:
from math import tanh

s.actions()
for a in s.actions():
    res = State(X, s, N=3)
    res.move(a)
    print(a, tanh(np.dot(get_features(res.board), w)))

 XXX OOO  X
 XXX OOO  O
(0, 3) -1.0
X X XOOO  X
X X XOOO  O
(1, 4) -1.0
X X  XOO  X
X X  XOO  O
(1, 5) 1.0


In [304]:
s.move((1,4))
s.move()
s

_______
|X| |X|   0  1  2
| |X| |   3  4  5
|O|O|O|   6  7  8