In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
import keras
from keras.models import load_model
import numpy
from agent import BackendAgent, HumanAgent
import matplotlib.pyplot
import time
import concurrent
import enum
import itertools
import logging
import matplotlib
import numpy
import sys
import time
import traceback
import abc
import subprocess

POS_TO_LETTER = 'abcdefghjklmnop'
LETTER_TO_POS = {letter: pos for pos, letter in enumerate(POS_TO_LETTER)}
width, height = 15, 15
shape = (width, height)
line_length = 5

def to_move(pos):
    return POS_TO_LETTER[pos[1]] + str(pos[0] + 1)

def to_pos(move):
    return int(move[1:]) - 1, LETTER_TO_POS[move[0]]

def list_positions(board, player):
    return numpy.vstack(numpy.nonzero(board == player)).T

def sequence_length(board, I, J, value):
    length = 0

    for i, j in zip(I, J):
        if board[i, j] != value:
            break
        length += 1

    return length

class Agent(metaclass=abc.ABCMeta):
    @abc.abstractmethod
    def policy(game):
        '''Return probabilty matrix of possible actions'''

    @abc.abstractmethod
    def name():
        '''return name of agent'''

class Player(enum.IntEnum):
    NONE = 0
    BLACK = -1
    WHITE = 1

    def another(self):
        return Player(-self)

    def __repr__(self):
        if self == Player.BLACK:
            return 'black'
        elif self == Player.WHITE:
            return 'white'
        else:
            return 'none'

    def __str__(self):
        return self.__repr__()


class Game:
    width, height = 15, 15
    shape = (width, height)
    line_length = 5

    def __init__(self):
        self._result = Player.NONE
        self._player = Player.BLACK
        self._board = numpy.full(self.shape, Player.NONE, dtype=numpy.int8)
        self._positions = list()

    def __bool__(self):
        return self.result() == Player.NONE and \
            len(self._positions) < self.width * self.height

    def move_n(self):
        return len(self._positions)

    def player(self):
        return self._player

    def result(self):
        return self._result

    def board(self):
        return self._board

    def positions(self, player=Player.NONE):
        if not player:
            return self._positions

        begin = 0 if player == Player.BLACK else 1
        return self._positions[begin::2]

    def dumps(self):
        return ' '.join(map(to_move, self._positions))

    @staticmethod
    def loads(dump):
        game = Game()
        for pos in map(to_pos, dump.split()):
            game.move(pos)
        return game


    def is_posible_move(self, pos):
        return 0 <= pos[0] < self.height \
            and 0 <= pos[1] < self.width \
            and not self._board[pos]

    def move(self, pos):
        assert self.is_posible_move(pos), 'impossible pos: {pos}'.format(pos=pos)

        self._positions.append(pos)
        self._board[pos] = self._player

        if not self._result and check(self._board, pos):
            self._result = self._player
            return

        self._player = self._player.another()

def number_shift(n):
    if n >= 100:
        return (0.32, 0.15)
    if n >= 10:
        return (0.22, 0.15)
    return (0.10, 0.15)

class PyPlotUI:
    def __init__(self, black='black', white='white'):
        matplotlib.pyplot.ion()
        self._board = matplotlib.pyplot.figure(figsize=(8, 8))

        self._ax = self._board.add_subplot(111)
        self._ax.set_navigate(False)

        self._ax.set_title('{black} vs {white}'.format(black=black, white=white))

        self._ax.set_xlim(-1, width)
        self._ax.set_ylim(-1, height)

        self._ax.set_xticks(numpy.arange(0, width))
        self._ax.set_xticklabels(POS_TO_LETTER)

        self._ax.set_yticks(numpy.arange(0, height))
        self._ax.set_yticklabels(numpy.arange(1, Game.height + 1))

        self._ax.grid(zorder=2)

        self._black= self._ax.scatter(
            (),(),
            color = 'black',
            s = 500,
            edgecolors = 'black',
            zorder = 3
        )
        self._white = self._ax.scatter(
            (),(),
            color = 'white',
            s = 500,
            edgecolors = 'black',
            zorder = 3
        )

        self._probs = self._ax.imshow(
            numpy.zeros(Game.shape),
            cmap = 'Reds',
            interpolation = 'none',
            vmin = 0.0,
            vmax = 1.0,
            zorder = 1
        )

        self._board.show()


    def update(self, game, probs):
        board = game.board()

        black_positions = list_positions(board, Player.BLACK)
        self._black.set_offsets(black_positions[:, (1, 0)])

        white_positions = list_positions(board, Player.WHITE)
        self._white.set_offsets(white_positions[:, (1, 0)])

        self._ax.texts = []
        for n, (i, j) in enumerate(game.positions(), 1):
            shift = number_shift(n)
            self._ax.text(
                j - shift[0],
                i - shift[1],
                str(n),
                color = 'white' if n % 2 else 'black',
                fontsize = 10,
                zorder = 4
            )

        self._probs.set_data(probs / 2 * max(probs.max(), 1e-6))

        self._board.canvas.draw()

        return self

def loop(game, black, white, timeout=None):
    with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
        yield game, numpy.zeros(game.shape)

        for agent in itertools.cycle([black, white]):
            if not game:
                break

            future = executor.submit(lambda game: agent.policy(game), game)
            probs = future.result(timeout=timeout)

            pos = numpy.unravel_index(probs.argmax(), game.shape)
            game.move(pos)

            yield game, probs

def run_test(black, white, timeout=None):
    game = Game()
    ui = PyPlotUI(black.name(), white.name())

    try:
        for game, probs in loop(game, black, white, timeout):
            ui.update(game, probs)

    except:
        _, e, tb = sys.exc_info()
        print(e)
        traceback.print_tb(tb)
        return game.player().another()

    return game.result()

def run(black, white, max_move_n=60, timeout=10):
    game = Game()

    try:
        for game, _ in loop(game, black, white, timeout):
            logging.debug(game.dumps() + '\n' + str(game.board()))
            if game.move_n() >= max_move_n:
                break

    except:
        logging.error('Error!', exc_info=True, stack_info=True)
        return game.player().another(), game.dumps()

    return game.result(), game.dumps()

def wait_for_game_update():
    data = sys.stdin.buffer.readline().rstrip()
    return Game.loads(data.decode())

def move(move):
    sys.stdout.buffer.write(move.encode() + b'\n')
    sys.stdout.flush()

def check_horizontal(board, pos):
    player = board[pos]
    if not player:
        return False

    i, j = pos
    length = 1

    length += sequence_length(
        board,
        itertools.repeat(i),
        range(j + 1, min(j + Game.line_length, Game.width)),
        player
    )

    length += sequence_length(
        board,
        itertools.repeat(i),
        range(j - 1, max(j - Game.line_length, -1), -1),
        player
    )

    return length >= Game.line_length

def check_vertical(board, pos):
    player = board[pos]
    if not player:
        return False

    i, j = pos
    length = 1

    length += sequence_length(
        board,
        range(i + 1, min(i + Game.line_length, Game.height)),
        itertools.repeat(j),
        player
    )

    length += sequence_length(
        board,
        range(i - 1, max(i - Game.line_length, -1), -1),
        itertools.repeat(j),
        player
    )

    return length >= Game.line_length

def check_main_diagonal(board, pos):
    player = board[pos]
    if not player:
        return False

    i, j = pos
    length = 1

    length += sequence_length(
        board,
        range(i + 1, min(i + Game.line_length, Game.height)),
        range(j + 1, min(j + Game.line_length, Game.width)),
        player
    )

    length += sequence_length(
        board,
        range(i - 1, max(i - line_length, -1), -1),
        range(j - 1, max(j - line_length, -1), -1),
        player
    )

    return length >= Game.line_length

def check_side_diagonal(board, pos):
    player = board[pos]
    if not player:
        return False

    i, j = pos
    length = 1

    length += sequence_length(
        board,
        range(i - 1, max(i - Game.line_length, -1), -1),
        range(j + 1, min(j + Game.line_length, Game.width)),
        player
    )

    length += sequence_length(
        board,
        range(i + 1, min(i + Game.line_length, Game.height)),
        range(j - 1, max(j - Game.line_length, -1), -1),
        player
    )

    return length >= Game.line_length

def check(board, pos):
    if not board[pos]:
        return False

    return check_vertical(board, pos) \
        or check_horizontal(board, pos) \
        or check_main_diagonal(board, pos) \
        or check_side_diagonal(board, pos)

def choose_best(rewards, visits, probs):
    x = ((rewards > visits * 0.85) +\
            (rewards > 0.25 * visits) + (rewards / (1 + visits))\
             + 10 * probs / (1 + visits))
    return x

def one_to_two(number):
    return number // 15, number % 15

class Node():
    def __init__(self, color):
        self.children = [None for i in range(225)]
        self.n_children = 0
        self.color = color
        self.probs = np.zeros(225)
        self.visit = np.zeros(225)
        self.rewds = np.zeros(225)
        self.position = np.zeros((15, 15))

    def add_child(self, number):
        child = Node(-self.color)
        self.children[number] = child
        self.n_children += 1


class GodPlayer(Agent):
    def __init__(self, name, color, r_model, w_model, b_model):
        print("INIT_PLAYER")
        self._name = name
        self.position = np.zeros((15, 15))
        if (color == 'white'):
            self._polic = w_model[0]
            self._gpolic = w_model[1]
            self.color = -1
        else:
            self._polic = b_model[0]
            self._gpolic = b_model[1]
            self.color = 1
        self._rollout = r_model[0]
        self._grollout = r_model[1]
            
    def name(self):
        return self._name

    def deeper_pls(self):
        cur_node = self.node
        cur_deep = 0
        max_deep = 10
        cur_pose = self.position.copy()
        path = []
        reward = 0
        while (cur_deep < max_deep):
            if (cur_node.n_children == 0):
                
                with self._gpolic.as_default():
                    cur_node.probs = self._polic.predict(cur_pose.reshape(1, 15, 15, 1))
                    
                #cur_node.probs = self.polic.predict(cur_pose.reshape(1, 15, 15, 1))
                best_move_ever = np.argmax(cur_node.probs)
                x, y = one_to_two(best_move_ever)
                if (cur_deep == 0):
                    print(to_move([best_move_ever // 15, best_move_ever % 15]))
                cur_pose[x][y] = cur_node.color
                path.append(best_move_ever)
                cur_node.add_child(best_move_ever)
                if check(cur_pose, one_to_two(best_move_ever)):
                    return path, cur_node.color
                return path, self.make_rollout(cur_pose, -cur_node.color)
            else:
                best_move_ever = np.argmax(choose_best(cur_node.rewds.reshape((1, 225)),\
                                                       cur_node.visit.reshape((1, 225)), cur_node.probs))
                x, y = one_to_two(best_move_ever)
                cur_pose[x][y] = cur_node.color
                path.append(best_move_ever)
                if cur_node.visit[best_move_ever] == 0:
                    cur_node.add_child(best_move_ever)
                if check(cur_pose, one_to_two(best_move_ever)):
                    return path, cur_node.color
                cur_node = cur_node.children[best_move_ever]
                cur_deep += 1
        return path, reward
    
    def make_rollout(self, position, node_color):
        color = node_color
        cur_deep = 0
        max_deep = 15
        while (cur_deep < max_deep):
            with self._grollout.as_default():
                next_move = np.argmax(self._rollout.predict(position.reshape(1, 15, 15, 1)))
            
            #next_move = np.argmax(self.rollout.predict(position.reshape(1, 15, 15, 1)))
            x, y = one_to_two(next_move)
            position[x][y] = color
            if check(position, one_to_two(next_move)):
                return color
            color *= -1
            cur_deep += 1
        return 0
    
    def make_move(self, position):
        self.position = position
        self.node = Node(self.color)
        self.node.position = position
        start_time = time.time()
        max_time = 2.8
        while (time.time() - start_time < max_time):
            path, reward = self.deeper_pls()
            cur_node = self.node
            for move in path:
                cur_node.rewds[move] += cur_node.color * reward # Отнормировать реворды
                cur_node.visit[move] += 1
                cur_node = cur_node.children[move]
        best_move_ever = np.argmax(choose_best(self.node.rewds.reshape((1, 225)),\
                                               self.node.visit.reshape((1, 225)), self.node.probs))
        
        big_values = (self.node.visit.reshape((1, 225)) > 50) * self.node.rewds.reshape((1, 225))\
                / (1 + self.node.visit.reshape((1, 225)))
        #possible_moves = 
        if np.max(big_values) > 0.8:
            return big_values
        else:
            return self.node.visit.reshape((1, 225))
        #return one_to_two(best_move_ever) # Что надо вернуть? Буква + цифра?
        return choose_best(self.node.rewds.reshape((1, 225)), self.node.visit.reshape((1, 225)), self.node.probs)
    
    def policy(self, game):
        position = -game.board()
        #position = game
        arr = self.make_move(position)
        
        available = numpy.zeros((1, 225))
        positions = list_positions(game.board(), Player.NONE)
        #positions = list_positions(game, Player.NONE)
    
        for pos in positions:
            available[0][pos[0] * 15 + pos[1]] = 1
        arr *= available
        
        code_move = numpy.argmax(arr)
        print(self._name + ':', to_move([code_move // 15, code_move % 15]))
        return arr

letters = 'abcdefghjklmnop'

def make_number(string):
    number = letters.find(string[0]) * 15 + int(string[1:]) - 1
    return number

def make_string(number):
    string = letters[number // 15] + str((number % 15) + 1)
    return string

class CnnAgent(Agent):
    def __init__(self, color, name, model):
        self._name = name
        self.color = color
        self._model = model[0]
        self._graph = model[1]
        # self.model = load_model(color + '.h5')

    def name(self):
        return self._name

    def policy(self, game):
        with self._graph.as_default():
            predictions = self._model.predict(-game.board().reshape(1, 15, 15, 1))

        available = numpy.zeros((1, 225))
        positions = list_positions(game.board(), Player.NONE)
    
        for pos in positions:
            available[0][pos[0] * 15 + pos[1]] = 1

        code_move = numpy.argmax(arr * available)
        print(self._name + ':', to_move([code_move // 15, code_move % 15]))
        return arr * available
    
class TestAgent(Agent):
    def __init__(self, color, name, model):
        self._name = name
        self.color = color
        self._model = model
        # self.model = load_model(color + '.h5')

    def name(self):
        return self._name

    def policy(self, game):
        predictions = self._model.predict(-game.board().reshape(1, 15, 15, 1))

        available = numpy.zeros((225, 1))
        positions = list_positions(game.board(), Player.NONE)
    
        for pos in positions:
            available[pos[0] * 15 + pos[1]] = 1
        arr = predictions.T + available * 0.1

        code_move = numpy.argmax(arr)
        print(self._name + ':', to_move([code_move // 15, code_move % 15]))
        return arr

class HumanAgent(Agent):
    def __init__(self, name='Human'):
        self._name = name

    def name(self):
        return self._name

    def policy(self, game):
        move = input()
        pos = to_pos(move)

        probs = numpy.zeros(game.shape)
        probs[pos] = 1.0

        return probs

In [None]:
%matplotlib notebook

black_model = load_model('model-2.h5')
black_model_graph = tf.get_default_graph()
white_model = load_model('model-2.h5')
white_model_graph = tf.get_default_graph()
rollout_model = load_model('rot-2-128.h5')
rollout_model_graph = tf.get_default_graph()

model_0b = GodPlayer(color = 'black', name = 'Naruto_Black', r_model = (rollout_model, rollout_model_graph),\
                     w_model = (white_model, white_model_graph),\
                     b_model = (black_model, black_model_graph))
model_0w = GodPlayer(color = 'white', name = 'Naruto_White', r_model = (rollout_model, rollout_model_graph),\
                     w_model = (white_model, white_model_graph),\
                     b_model = (black_model, black_model_graph))
Sanya = HumanAgent('Sanya')

In [None]:
# Run, if u want to move first
run_test(Sanya, model_0w)

In [None]:
# Run, if u want to move second
run_test(model_0b, Sanya)