In [1]:
import numpy as np
import random
from chess.pgn import read_game
from IPython.display import SVG
import tensorflow as tf
import itertools
from copy import copy
import datetime

In [2]:
class TicTacToe(object):
    def __init__(self):
        self.board = np.zeros((3,3,2))
        self.turn = False
        
    def reset(self):
        self.board = np.zeros((3,3,2))
        self.turn = False
        return self.board, self.reward()
    
    def reward(self, board=None):
        if board is None:
            board = self.board
        if any(board[:,:,0].sum(axis=0)==3) or any(board[:,:,0].sum(axis=1)==3) or board[:,:,0][np.eye(3)==1].sum()==3 or board[:,:,0][np.rot90(np.eye(3))==1].sum()==3:
            return 1
        elif any(board[:,:,1].sum(axis=0)==3) or any(board[:,:,1].sum(axis=1)==3) or board[:,:,1][np.eye(3)==1].sum()==3 or board[:,:,1][np.rot90(np.eye(3))==1].sum()==3:
            return -1
        elif board.sum()==9:
            return 0
        else:
            return None
    
    def step(self, board):
        self.board = board
        self.turn = not self.turn
    
    def get_candidate_boards(self, board=None):
        if board is None:
            board = self.board
            
        candidate_boards = []
        if self.reward() is None:
            empty_xs, empty_ys = np.where(board.sum(axis=2)==0)
            for candidate_action in zip(empty_xs, empty_ys):
                candidate_board = copy(board)
                candidate_board[candidate_action[0], candidate_action[1], int(self.turn)] = 1
                candidate_boards.append(candidate_board)
        return candidate_boards
    
    def print(self):
        s=''
        for i in range(3):
            for j in range(3):
                if self.board[i,j,0]==1:
                    s+='X'
                elif self.board[i,j,1]==1:
                    s+='O'
                else:
                    s+='-'
            s+='\n'
        print(s)

In [3]:
boards_placeholder = tf.placeholder(tf.float32, shape=[None, 3, 3, 2], name='candidate_boards')
turn_placeholder = tf.placeholder(tf.float32, shape = [None, 1], name='turn')
reshaped_candidate_boards = tf.reshape(boards_placeholder, [tf.shape(boards_placeholder)[0], 18])
feature_vectors = tf.concat(1, [reshaped_candidate_boards, turn_placeholder], name='feature_vectors')
W_1 = tf.Variable(tf.truncated_normal([19,100], stddev=0.1), name='W_1')
b_1 = tf.Variable(tf.constant(0.0, shape=[100]), name='b_1')
layer_1 = tf.nn.relu_layer(feature_vectors, W_1, b_1, name='layer_1')
W_2 = tf.Variable(tf.truncated_normal([100,1], stddev=0.1), name='W_2')
b_2 = tf.Variable(tf.constant(0.0, shape=[1]), name='b_2')
J = tf.nn.tanh(tf.matmul(layer_1, W_2) + b_2, name='J')
J_next = tf.placeholder('float', [1, 1], name='J_next')

turn_number = tf.Variable(tf.constant(0.0), name='turn_number', trainable=False)
turn_number_op = turn_number.assign_add(1.0)

loss_sum = tf.Variable(tf.constant(0.0), name='loss_sum', trainable=False)
loss_op = tf.reduce_mean(tf.square(J_next - J), name='loss')
loss_sum_op = loss_sum.assign_add(loss_op)
loss_avg_op = loss_sum / tf.maximum(turn_number, 1.0)
tf.scalar_summary('loss_avg', loss_avg_op)

tvars = tf.trainable_variables()
grads = tf.gradients(J, tvars)

for grad, var in zip(grads, tvars):
    tf.histogram_summary(var.name, var)
    tf.histogram_summary(var.name + '/gradient', grad)

turn_number_reset_op = turn_number.assign(0.0)
loss_sum_reset_op = loss_sum.assign(0.0)
reset_op = tf.group(*[loss_sum_reset_op, turn_number_reset_op])

summaries_op = tf.merge_all_summaries()

delta_op = tf.reduce_sum(J_next - J, name='delta')

lamda = 0.7
alpha = .00001

apply_gradients = []
with tf.variable_scope('apply_gradients'):
    for grad, var in zip(grads, tvars):
        with tf.variable_scope('trace'):
            # e-> = lambda * e-> + <grad of output w.r.t weights>
            trace = tf.Variable(tf.zeros(grad.get_shape()), trainable=False, name='trace')
            trace_op = trace.assign((lamda * trace) + grad)

        # grad with trace = alpha * delta * e
        grad_trace = alpha * delta_op * trace_op
        grad_apply = var.assign_add(grad_trace)
        apply_gradients.append(grad_apply)
    
with tf.control_dependencies([loss_op]):
    train_op = tf.group(*apply_gradients, name='train')
        
init = tf.initialize_all_variables()

In [7]:
num_epochs = 20000
with tf.Session() as sess:
    env = TicTacToe()
    sess.run(init)
    writer = tf.train.SummaryWriter('/Users/adam/Documents/projects/chess_deep_learning/log/%s'%str(datetime.datetime.today()), graph=sess.graph)
    for epoch in range(num_epochs):
        if epoch>0 and epoch%100==0:
            print('epoch', epoch)
        while env.reward() is None:
            candidate_boards = env.get_candidate_boards()
            Js = sess.run(J, feed_dict={boards_placeholder: np.array(candidate_boards), turn_placeholder: float(not env.turn)*np.ones((len(candidate_boards), 1))})

            if len(next_candidate_boards):
                next_Js = sess.run(J, feed_dict={boards_placeholder: np.array(next_candidate_boards)})
                if env.turn:
                    next_J = np.min(Js)
                    next_idx = np.argmin(Js)
                else:
                    next_J = np.max(Js)
                    next_idx = np.argmax(Js)
            else:
                next_J = env.reward()
            sess.run([train_op, loss_sum_op, turn_number_op], feed_dict={boards_placeholder: np.array([env.board]), J_next: np.array([[next_J]])})
            env.step(best_board)
#             env.print()
        summary = sess.run(summaries_op, feed_dict={boards_placeholder: np.array([env.board])})
        writer.add_summary(summary, epoch)
        sess.run(reset_op)
        env.reset()

InvalidArgumentError: You must feed a value for placeholder tensor 'turn' with dtype float
	 [[Node: turn = Placeholder[dtype=DT_FLOAT, shape=[], _device="/job:localhost/replica:0/task:0/cpu:0"]()]]
Caused by op 'turn', defined at:
  File "/Users/adam/anaconda/lib/python3.5/runpy.py", line 170, in _run_module_as_main
    "__main__", mod_spec)
  File "/Users/adam/anaconda/lib/python3.5/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/Users/adam/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py", line 3, in <module>
    app.launch_new_instance()
  File "/Users/adam/anaconda/lib/python3.5/site-packages/traitlets/config/application.py", line 596, in launch_instance
    app.start()
  File "/Users/adam/anaconda/lib/python3.5/site-packages/ipykernel/kernelapp.py", line 442, in start
    ioloop.IOLoop.instance().start()
  File "/Users/adam/anaconda/lib/python3.5/site-packages/zmq/eventloop/ioloop.py", line 162, in start
    super(ZMQIOLoop, self).start()
  File "/Users/adam/anaconda/lib/python3.5/site-packages/tornado/ioloop.py", line 883, in start
    handler_func(fd_obj, events)
  File "/Users/adam/anaconda/lib/python3.5/site-packages/tornado/stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "/Users/adam/anaconda/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/Users/adam/anaconda/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/Users/adam/anaconda/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/Users/adam/anaconda/lib/python3.5/site-packages/tornado/stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "/Users/adam/anaconda/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 276, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/Users/adam/anaconda/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 228, in dispatch_shell
    handler(stream, idents, msg)
  File "/Users/adam/anaconda/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 391, in execute_request
    user_expressions, allow_stdin)
  File "/Users/adam/anaconda/lib/python3.5/site-packages/ipykernel/ipkernel.py", line 199, in do_execute
    shell.run_cell(code, store_history=store_history, silent=silent)
  File "/Users/adam/anaconda/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2723, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/Users/adam/anaconda/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2825, in run_ast_nodes
    if self.run_code(code, result):
  File "/Users/adam/anaconda/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2885, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-3-607ca387e717>", line 2, in <module>
    turn_placeholder = tf.placeholder(tf.float32, shape = [None, 1], name='turn')
  File "/Users/adam/anaconda/lib/python3.5/site-packages/tensorflow/python/ops/array_ops.py", line 1212, in placeholder
    name=name)
  File "/Users/adam/anaconda/lib/python3.5/site-packages/tensorflow/python/ops/gen_array_ops.py", line 1530, in _placeholder
    name=name)
  File "/Users/adam/anaconda/lib/python3.5/site-packages/tensorflow/python/framework/op_def_library.py", line 703, in apply_op
    op_def=op_def)
  File "/Users/adam/anaconda/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 2317, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "/Users/adam/anaconda/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 1239, in __init__
    self._traceback = _extract_stack()


In [5]:
np.ones(5)

array([ 1.,  1.,  1.,  1.,  1.])