In [1]:
import numpy as np
import copy
board_size = 8

In [2]:
class Environ:
    def __init__(self, board=None):
        if board:
            self.board = board
        else:
            self.board = [[0 for x in range(board_size)] for y in range(board_size)]
            self.board[3][3] = 1
            self.board[4][4] = 1
            self.board[3][4] = -1
            self.board[4][3] = -1

    def is_allowed(self, (x,y), player):
        board = self.board
        opponent = -player
        if board[y][x] != 0:
            return False
        directions = [(dx,dy) for dx in [-1,0,1] for dy in [-1,0,1] if not (dx == 0 and dy == 0)]
        for direction in directions:
            posx, posy = x, y
            state = 0
            while (True):
                posx += direction[0]
                posy += direction[1]
                if posx < 0 or posx >= board_size or posy < 0 or posy >= board_size:
                    break
                cell = board[posy][posx]
                if state == 0:
                    if cell == opponent:
                        state = 1
                        continue
                    else:
                        break
                if state == 1:
                    if cell == player:
                        return True
                    elif cell != opponent:
                        break
        return False
    
    def get_candidates(self, player):
        candidates = []
        grid = [(x,y) for y in range(board_size) for x in range(board_size)]
        for (x,y) in grid:
            if self.is_allowed((x,y), player=player):
                candidates.append((x,y))
        return candidates
    
    def update_state(self, (x, y), player):
        if not self.is_allowed((x,y), player):
            return None
        self.board[y][x] = player
        board = self.board
        opponent = -player
        score = 0
        directions = [(dx,dy) for dx in [-1,0,1] for dy in [-1,0,1] if not (dx == 0 and dy == 0)]

        for direction in directions:
            posx, posy = x, y
            state = 0
            candidates = []
            while (True):
                posx += direction[0]
                posy += direction[1]
                if posx < 0 or posx >= board_size or posy < 0 or posy >= board_size:
                    break
                cell = board[posy][posx]
                if state == 0:
                    if cell == opponent:
                        state = 1
                        candidates.append((posx,posy))
                        continue
                    else:
                        break
                if state == 1:
                    if cell == player:
                        for (x0, y0) in candidates:
                            self.board[y0][x0] = player
                            score += 1
                        break
                    elif cell == opponent:
                        candidates.append((posx,posy))
                        continue
                    else:
                        break
        return score    

In [3]:
class Agent:        
    # mini-max method
    def get_action(self, qnet, env, player):
        candidates = env.get_candidates(player)
        if len(candidates) == 0:
            return None

        next_boards = []
        for (x,y) in candidates:
            localenv = Environ(board=copy.deepcopy(env.board))
            localenv.update_state((x,y), player)
            next_boards.append(copy.deepcopy(localenv.board))

        # Estimate opponents move for each candidate
        values = []
        for next_board in next_boards:
            localenv = Environ(board=copy.deepcopy(next_board))
            next_candidates = localenv.get_candidates(-player)
            if len(next_candidates) == 0:
                # the opponent to pass.
                # Note that the meaning of 'value' depends on the player
                values.append(player * qnet.get_values([next_board])[0][0])
                continue
            next_next_boards = []
            for (x,y) in next_candidates:
                localenv = Environ(board=copy.deepcopy(next_board))
                localenv.update_state((x,y), -player)
                next_next_boards.append(copy.deepcopy(localenv.board))
            # Note that the meaning of 'value' depends on the player
            value = min(player * qnet.get_values(next_next_boards))[0]
            values.append(value)

        action = candidates[np.argmax(values)]
        return action

In [4]:
class SimpleMiniMax:
    def get_values(self, boards):
        result = []
        for board in boards:
            score = 0.0
            # The value is defined from the player=1's point of view.
            for c in sum(board, []):
                if c == 1: score += 1
                if c == -1: score -= 1
            result.append([score])
        return np.array(result)

In [5]:
def flat_board():
    result = ''
    for x in sum(env.board, []):
        result += '%d,' % x
    return result

def run_opponent():
    action = agent.get_action(SimpleMiniMax(), env, player=-1)
    if action:
        env.update_state(action, player=-1)
            
input_form = """
<body>
    <div id="enchant-stage" style="width:360px;height:360px;"></div>
</body>
"""

javascript = """
<script type="text/javascript" src="https://cdn.rawgit.com/uei/enchant.js-builds/v0.8.3/build/enchant.js"></script>
<script type="text/javascript">
enchant();

var core = new Core(621, 621);
core.preload('goban.png', 'goishi.png');
core.fps = 10;

var psize = 64;

var Piece = Class.create(Sprite, {
    initialize: function(x, y, num) {
        Sprite.call(this, psize, psize);
        this.x = x * psize + 54;
        this.y = y * psize + 54;
        this.frame = num;
        this.image = core.assets['goishi.png'];
        core.rootScene.addChild(this);
    }
})

var kernel = IPython.notebook.kernel;

core.onload = function() {
    var bg = new Sprite(621, 621);
    bg.image = core.assets['goban.png'];
    bg.x = 0;
    bg.y = 0;
    core.rootScene.addChild(bg);

    var lock = 0;
    var board = [];

    var refresh_board = function() {
        var touchFunc = function() { put_stone(this) }
        var show_board = function(out) {
            var res = out.content.data['text/plain'];
            var result = res.substring(1).split(',');
            
            for (var y = 0; y < 8; y++) {
                for (var x = 0; x < 8; x++) {
                    if (result[y*8+x] == -1) {
                        board[y][x].frame = 1;
                        board[y][x].removeEventListener("touchstart", touchFunc);
                    } else if (result[y*8+x] == 1) {
                        board[y][x].frame = 0;
                        board[y][x].removeEventListener("touchstart", touchFunc);
                    } else {
                        board[y][x].frame = 2;
                        board[y][x].on('touchstart', touchFunc);
                    }
                } 
            }
        };
        
        kernel.execute(
            'flat_board()',
            {'iopub': {"output": show_board}}, {silent:false}
        );
    };
    
    var put_stone = function(piece) {
        var check_score = function(out) {
            var score = out.content.data['text/plain'];
            if (score == -1) {
                lock = 0;
                return;
            }

            refresh_board();
            var cb = function() {
                kernel.execute('run_opponent()');
                refresh_board();
                lock = 0;
            };
            setTimeout(cb, 1000); 
        };
        if (lock == 1) return;
        lock = 1;
        var x = (piece.x - 54) / psize;
        var y = (piece.y - 54) / psize;
        var command = 'env.update_state((' + x + ',' + y + '), player=1) or -1';
        kernel.execute(command, {'iopub': {"output": check_score}}, {silent:false});
    };

    for (var y = 0; y < 8; y++) {
        board[y] = [];
        for (var x = 0; x < 8; x++) {
            board[y][x] = new Piece(x, y, 2);
        }
    }
    refresh_board();

};
core.start();
</script>
"""

In [6]:
env = Environ()
agent = Agent()
from IPython.display import HTML
HTML(input_form + javascript)