In [None]:
from tic_tac_toe import UltimateBoard, ClassicBoard
import pandas as pd
import numpy as np

import seaborn as sns
from matplotlib import pyplot as plt
plt.style.use('ggplot')
%pylab inline

colors=['#547fc4']

In [None]:
class UltimateBoard():
    
    def __init__(self, x_turn=True):
        if not isinstance(x_turn, bool):
            raise ValueError("x_turn must be boolean")
        self.boards = {i:ClassicBoard(i) for i in range(9)}
        self.meta_board = ClassicBoard('meta_board')
        self.game_over = False
        self.won = False
        self.active_boards = set(range(9))
        self.x_turn = x_turn 
        self.history = []
        self.meta_history = []
        
    def play(self, board, pos, label):
        if board not in self.active_boards:
            raise ValueError("That board is not in play")
            
        if isinstance(pos, tuple):
            pos = 3*pos[0] + pos[1]
            
        if ((self.x_turn and label != 'X') 
            or (not self.x_turn and label != 'O')):
            raise ValueError("It is the other player's turn")
            
        self.boards[board].play(pos, label)
        self._update_state(board, pos, label)
        
    def _update_state(self, board, pos, label):
        '''
        Update state assuming successful play 00
        '''
        
        b = self.boards[board]
        self.history.append((board, pos, {'X':1, 'O':2}[label]))
        
        if b.game_over:
            if b.won:
                self.meta_board.play(board, label)
            
            if self.boards[pos].game_over:
                self.active_boards = {i for i in range(9) \
                                      if not self.boards[i].game_over}
            else:
                self.active_boards = {pos}
            
        else:
            if self.boards[pos].game_over:
                self.active_boards = {i for i in range(9) \
                                      if not self.boards[i].game_over}
            else:
                self.active_boards = {pos}
                
        if len(self.active_boards) == 0:
            self.winner = 'Cat'
            self.game_over = True
   
        if self.meta_board.game_over:
            if self.meta_board.won:
                self.winner = label

            else:
                self.winner = 'Cat'
                
            self.game_over = True
            self.won = True
        
        self.meta_history.append(self.meta_board.board.flatten())
        self._flip_turn()
        
    def get_history(self):
        big_board = np.zeros(81)
        meta_board = np.zeros(9)
        
        final_res = []
        
        # mv is list of tuples (board (int), pos (int), label (int))
        for i, mv in enumerate(self.history):
            big_board[mv[0]*9 + mv[1]] = mv[2]
            app_list = list(big_board) + list(self.meta_history[i])
            if self.won:
                app_list.append(self.winner)
            final_res.append(app_list)
            
        return final_res
    
    def undo_move(self):
        if len(self.history) == 0:
            raise ValueError("No moves to undo")
        board, cell, player = self.history[-1]
        self.boards[board].undo_move()
        self.history = self.history[:-1]
        last_valid_move = self.history[-1]
        
        lv_board, lv_cell, lv_player = last_valid_move
        self.x_turn = True if lv_player == 2 else False
        if self.boards[lv_cell].game_over:
            self.active_boards = {i for i in range(9) \
                                  if not self.boards[i].game_over}
        else:
            self.active_boards = {lv_cell}
    
    def _flip_turn(self):
        self.x_turn = not self.x_turn
    
    def __str__(self):
        board_strs = [self.boards[i].__str__() for i in range(9)]
        
        all_rows = []
        for i in range(0,9,3):
            row = board_strs[i:i+3]
            top_rows = ['|'.join(r.split('\n')[i] for r in row) \
                        for i in range(3)]
            all_rows.append('\n'.join(top_rows))
        
        return ('\n' + '-'*17 + '\n').join(all_rows)

In [None]:
game = UltimateBoard()

In [None]:
game.play(4, 0, 'X')
game.play(0, 5, 'O')
game.play(5, 3, 'X')
game.play(3, 8, 'O')

In [None]:
print(game)

In [None]:
import pickle
model = pickle.load(open('gbm_x_wins','rb'))

In [None]:
model.predict_proba(np.array(game.get_history()[-1]).reshape(1,-1))

In [None]:
for i in range(0,81,9):
    print(np.array(h1[i:i+9]).reshape((3,3)))

In [None]:
h1 = game.get_history()[-1]
# game.undo_move()
# h2 = game.

In [None]:
game.undo_move()

In [None]:
print(game)

In [None]:
game.play(3, 8, 'O')

In [None]:
print(game)

In [None]:
new_game = UltimateBoard(True)

for board, square, player in game_hist:
    new_game.play(board, square, 'X' if player == 1 else 'O')

In [None]:
model.predict_proba(np.array(new_game.get_history()[-1]).reshape(1,-1))

In [None]:
new_game.get_history()[-1][81:]

In [None]:
pd.DataFrame(new_game.get_history())

In [None]:
new_game.x_turn

In [None]:
print(new_game)

In [None]:
game_hist = [(4, 0, 1), (0, 5, 2), (5, 3, 1), 
             (3, 8, 2), (8, 6, 1), (6, 4, 2), 
             (4, 4, 1), (4, 6, 2), (6, 2, 1), 
             (2, 4, 2), (4, 8, 1), (8, 3, 2), 
             (3, 3, 1), (3, 1, 2), (1, 2, 1), 
             (2, 0, 2), (0, 6, 1), (6, 3, 2), 
             (3, 0, 1), (0, 1, 2), (1, 1, 1), 
             (1, 7, 2), (7, 8, 1)]

In [None]:
def sim_game(i):
    x_turn = i % 2 == 0
    board = UltimateBoard(x_turn)
    turns = 0
    
    while not board.game_over:

            lab = 'X' if x_turn else 'O'
            brd = np.random.choice(list(board.active_boards), size=1)[0]
            pos = np.random.choice(list(board.boards[brd].remaining_squares), size=1)[0]

            board.play(brd, pos, lab)

            x_turn = not x_turn
            turns += 1
    
#     print(board,'\n')
    df = pd.DataFrame(board.get_history())
    df['game'] = i
    return df

In [None]:
from multiprocessing import Pool
from time import time

In [None]:
tic = time()
p = Pool(4)
res = p.map(sim_game, range(10000))
print(time() - tic)

In [None]:
full_res = pd.concat(res)

In [None]:
full_res.head()

In [None]:
winner_map = {'Cat':0, 'X':1, 'O':2}
full_res['winner'] = full_res[90].map(winner_map)
full_res['x_wins'] = (full_res[90] == 'X').astype(int)

In [None]:
full_res_xo = full_res[full_res[90].isin(['O','X'])]

In [None]:
train_games, val_test_games = train_test_split(list(range(10000)), test_size=.25)
val_games, test_games = train_test_split(val_test_games, test_size=.5)

In [None]:
len(train_games).intersectiolen(val_games) + len(test_games)

In [None]:
train = full_res_xo[full_res_xo['game'].isin(train_games)].copy().reset_index(drop=True)
val = full_res_xo[full_res_xo['game'].isin(val_games)].copy().reset_index(drop=True)
test = full_res_xo[full_res_xo['game'].isin(test_games)].copy().reset_index(drop=True)

In [None]:
gbm = GradientBoostingClassifier(verbose=3, n_estimators=25, subsample=.33, max_depth=10)
gbm.fit(train[list(range(90))], train['x_wins'])

In [None]:
set(train['game']).intersection(set(val['game']))

In [None]:
train, val_test = [df.copy() for df in train_test_split(full_res_xo, test_size=.3, random_state=42)]
val, test = [df.copy() for df in train_test_split(val_test, test_size=.5, random_state=21)]

In [None]:
train.shape, val.shape, test.shape

In [None]:
val['pred_x'] = gbm.predict_proba(val[list(range(90))])[:,1]
train['pred_x'] = gbm.predict_proba(train[list(range(90))])[:,1]

In [None]:
roc_auc_score(val['x_wins'], val['pred_x']), roc_auc_score(train['x_wins'], train['pred_x'])

In [None]:
roc_auc_score(val['x_wins'], val['pred_x']), roc_auc_score(train['x_wins'], train['pred_x'])

In [None]:
import pickle
pickle.dump(gbm, open('gbm_x_wins','wb'))

In [None]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score,e accuracy_score\

In [None]:
gbm = model

In [None]:
game = UltimateBoard(True)
while not game.game_over:
    b, s, l = input("Next Move\n").split()
    b = int(b)
    s = int(s)

    print(game.active_boards)
    game.play(b,s,l)
    print(game)
    for b in game.active_boards:
        for s in game.boards[b].remaining_squares:
            game.play(b, s, 'X' if game.x_turn else 'O')
#             g = game.get_history()[-1]
#             g[b*9 + s] = 1 if l == 'O' else 2
            print(b,s,gbm.predict_proba(np.array(game.get_history()[-1]).reshape(1,-1)))
            game.undo_move()

#     print(b,s,l)

In [None]:
game_hist = [(4, 0, 1), (0, 5, 2), (5, 3, 1), 
             (3, 8, 2), (8, 6, 1), (6, 4, 2), 
             (4, 4, 1), (4, 6, 2), (6, 2, 1), 
             (2, 4, 2), (4, 8, 1), (8, 3, 2), 
             (3, 3, 1), (3, 1, 2), (1, 2, 1), 
             (2, 0, 2), (0, 6, 1), (6, 3, 2), 
             (3, 0, 1), (0, 1, 2), (1, 1, 1), 
             (1, 7, 2), (7, 8, 1)]

In [None]:
(8, 0, 2)

In [None]:
game = UltimateBoard(True)
for gh in game_hist:
    game.play(gh[0], gh[1], 'X' if gh[2] == 1 else 'O')

In [None]:
# game = UltimateBoard(True)
while not game.game_over:
    b, s, l = input("Next Move\n").split()
    b = int(b)
    s = int(s)

    print(game.active_boards)
    game.play(b,s,l)
    print(game)
    for b in game.active_boards:
        for s in game.boards[b].remaining_squares:
            g = game.get_history()[-1]
            g[b*9 + s] = 1 if l == 'O' else 2
            print(b,s,gbm.predict_proba(np.array(g).reshape(1,-1)))

#     print(b,s,l)

In [None]:
train.head()

In [None]:
pd.DataFrame([(x,y) for x,y in zip(list(range(90)), gbm.feature_importances_)]).sort_values(1, ascending=False)

In [None]:
%debug

In [None]:
import re

re.findall(r'[0-9] [0-9] [OX]', s)

In [None]:
game.play(1, 2, 'X')

In [None]:
game.play(2,2,'O')

In [None]:
for b in game.active_boards:
    for s in game.boards[b].remaining_squares:
        g = game.get_history()[-1]
        g[b*9 + s] = 1
        print(b,s,gbm.predict_proba(np.array(g).reshape(1,-1)))
        

In [None]:
gbm.predict(game.get_history()[-1][:-1])

#### print(game)

In [None]:
val['decile'] = pd.qcut(val['pred_x'], 10, labels=range(10))