In [1]:
import numpy as np
import random
from chess.pgn import read_game
from IPython.display import SVG
import tensorflow as tf
import itertools
from copy import copy
import time

In [3]:
class TicTacToe(object):
    def __init__(self):
        self.board = np.zeros((3,3,2))
        self.turn = False
        
    def reset(self):
        self.board = np.zeros((3,3,2))
        self.turn = False
        return self.board, self.reward()
    
    def reward(self, board=None):
        if board is None:
            board = self.board
        if any(board[:,:,0].sum(axis=0)==3) or any(board[:,:,0].sum(axis=1)==3) or board[:,:,0][np.eye(3)==1].sum()==3 or board[:,:,0][np.rot90(np.eye(3))==1].sum()==3:
            return 1
        elif any(board[:,:,1].sum(axis=0)==3) or any(board[:,:,1].sum(axis=1)==3) or board[:,:,1][np.eye(3)==1].sum()==3 or board[:,:,1][np.rot90(np.eye(3))==1].sum()==3:
            return -1
        elif board.sum()==9:
            return 0
        else:
            return None
    
    def step(self, board):
        self.board = board
        self.turn = not self.turn
    
    def get_candidate_boards(self, board=None):
        if board is None:
            board = self.board
            
        candidate_boards = []
        if self.reward() is None:
            empty_xs, empty_ys = np.where(board.sum(axis=2)==0)
            for candidate_action in zip(empty_xs, empty_ys):
                candidate_board = copy(board)
                candidate_board[candidate_action[0], candidate_action[1], int(self.turn)] = 1
                candidate_boards.append(candidate_board)
        return candidate_boards
    
    def print(self):
        s=''
        for i in range(3):
            for j in range(3):
                if self.board[i,j,0]==1:
                    s+='X'
                elif self.board[i,j,1]==1:
                    s+='O'
                else:
                    s+='-'
            s+='\n'
        print(s)
    
    def play(self, players):
        while self.reward() is None:
            candidate_boards = self.get_candidate_boards()
            player = players[int(self.turn)]
            selected_board = player.select_board(candidate_boards)
            self.step(selected_board)
            self.print()
        return self.reward()

class RandomAgent(object):
    def select_board(self, boards):
        selected_board = random.choice(boards)
        return selected_board

In [3]:
class NeuralNetAgent(object):
    def __init__(self, sess, model_path, summary_path, checkpoint_path, restore=False):
        self.sess = sess
        self.model_path = model_path
        self.checkpoint_path = checkpoint_path
        self.summary_path = summary_path
        
        self.boards_placeholder = tf.placeholder(tf.float32, shape=[None, 3, 3, 2], name='candidate_boards')
        self.turn_placeholder = tf.placeholder(tf.float32, shape=[None,1], name='turn')
        reshaped_candidate_boards = tf.reshape(self.boards_placeholder, [tf.shape(self.boards_placeholder)[0], 18])
        feature_vectors = tf.concat(1,[reshaped_candidate_boards, self.turn_placeholder])
        W_1 = tf.Variable(tf.truncated_normal([19,100], stddev=0.1), name='W_1')
        b_1 = tf.Variable(tf.constant(0.0, shape=[100]), name='b_1')
        layer_1 = tf.nn.relu(tf.matmul(feature_vectors, W_1) + b_1, name='layer1')
        W_2 = tf.Variable(tf.truncated_normal([100,1], stddev=0.1), name='W_2')
        b_2 = tf.Variable(tf.constant(0.0, shape=[1]), name='b_2')
        self.J = tf.nn.tanh(tf.matmul(layer_1, W_2) + b_2, name='J')

        self.J_next = tf.placeholder('float', [1, 1], name='J_next')
        delta_op = tf.reduce_sum(self.J_next - self.J, name='delta')

        turn_count = tf.Variable(tf.constant(0.0), name='turn_number', trainable=False)
        self.turn_count_op = turn_count.assign_add(1.0)
        self.turn_count_reset_op = turn_count.assign(0.0)

        loss_sum = tf.Variable(tf.constant(0.0), name='loss_sum', trainable=False)
        loss_op = tf.reduce_mean(tf.square(self.J_next - self.J), name='loss')
        self.loss_sum_op = loss_sum.assign_add(loss_op)
        loss_avg_op = loss_sum / tf.maximum(turn_count, 1.0)
        self.loss_sum_reset_op = loss_sum.assign(0.0)
        tf.scalar_summary('loss_avg', loss_avg_op)
        tf.scalar_summary('turn_count', turn_count)
        
        lamda = 0.7

        update_traces = []
        update_trace_sums = []
        reset_trace_sums = []
        reset_traces = []
        trace_sums = []

        tvars = tf.trainable_variables()
        opt = tf.train.GradientDescentOptimizer(learning_rate=1)
    
        grads_and_vars = opt.compute_gradients(self.J, var_list=tvars, )

        with tf.variable_scope('update_traces'):
            for grad, var in grads_and_vars:
                if grad is None:
                    grad = tf.zeros_like(var)
                with tf.variable_scope('trace'):
                    trace = tf.Variable(tf.zeros(var.get_shape()), trainable=False, name='trace')            
                    update_trace_op = trace.assign( delta_op * ( (lamda * trace) + grad) )
                    update_traces.append(update_trace_op)

                    reset_trace_op = trace.assign(tf.zeros_like(trace))
                    reset_traces.append(reset_trace_op)

                trace_sum = tf.Variable(tf.zeros(var.get_shape()), trainable=False, name='trace_sum')
                trace_sums.append(trace_sum)

                update_trace_sum_op = trace_sum.assign_add(trace)
                update_trace_sums.append(update_trace_sum_op)
                reset_trace_sum_op = trace_sum.assign(tf.zeros_like(trace_sum))
                reset_trace_sums.append(reset_trace_sum_op)

        self.apply_gradients_op = opt.apply_gradients(zip([-ts/turn_count for ts in trace_sums], tvars)) #, global_step=global_step)

        for tvar, ts in zip(tvars, trace_sums):
            tf.histogram_summary(tvar.name, tvar)
            tf.histogram_summary(tvar.name + '/trace_sums', ts)

        self.summaries_op = tf.merge_all_summaries()

        self.update_traces_op = tf.group(*update_traces, name='update_traces')
        self.update_trace_sums_op = tf.group(*update_trace_sums, name='update_trace_sums')
        self.reset_trace_sums_op = tf.group(*reset_trace_sums, name='reset_trace_sums')
        self.reset_traces_op = tf.group(*reset_traces, name='reset_traces')
                
        self.saver = tf.train.Saver(max_to_keep=1)

        self.sess.run(tf.global_variables_initializer())
        
        if restore:
            self.restore()
    
    def train(self, num_epochs, batch_size, epsilon):
        env = TicTacToe()
        tf.train.write_graph(self.sess.graph_def, self.model_path, 'td_tictactoe.pb', as_text=False)
        summary_writer = tf.train.SummaryWriter('{0}{1}'.format(self.summary_path, int(time.time()), self.sess.graph_def))
        saver = tf.train.Saver(max_to_keep=1)
        for epoch in range(num_epochs):
            if epoch>0 and epoch%1==0:
#                 print('epoch', epoch)
                saver.save(self.sess, self.model_path)
            for episode in range(batch_size):
                while True:
                    candidate_boards = env.get_candidate_boards()
                    turns = float(not env.turn)*np.ones((len(candidate_boards), 1))
                    candidate_Js = self.sess.run(self.J, feed_dict={self.boards_placeholder: np.array(candidate_boards),
                                                                    self.turn_placeholder: turns})

                    if np.random.rand() < epsilon:
                        next_idx = np.random.choice(range(len(candidate_Js)))
                        next_J = env.reward(candidate_boards[next_idx])
                        if next_J is None:
                            next_J = candidate_Js[next_idx][0]
                    else:
                        if env.turn:
                            next_idx = np.argmin(candidate_Js)
                            next_J = env.reward(candidate_boards[next_idx])
                            if next_J is None:
                                next_J = np.max(candidate_Js)

                        else:
                            next_idx = np.argmax(candidate_Js)
                            next_J = env.reward(candidate_boards[next_idx])
                            if next_J is None:
                                next_J = np.min(candidate_Js)
                                
                    self.sess.run([self.update_traces_op, self.loss_sum_op, self.turn_count_op], 
                                  feed_dict={self.boards_placeholder: np.array([env.board]), 
                                             self.turn_placeholder: np.array([[env.turn]]),
                                             self.J_next: np.array([[next_J]])})
                    env.step(candidate_boards[next_idx])
                    if env.reward() is not None:
                        break

                self.sess.run([self.update_trace_sums_op])
                self.sess.run([self.reset_traces_op])
                env.reset()

#             print('final loss avg:', self.sess.run(self.loss_avg_op))
            self.sess.run(self.apply_gradients_op)

            summary = self.sess.run(self.summaries_op)
            summary_writer.add_summary(summary, epoch)
            self.sess.run([self.loss_sum_reset_op, 
                           self.reset_trace_sums_op, 
                           self.turn_count_reset_op])
        summary_writer.close()
        
    def restore(self):
        latest_checkpoint_path = tf.train.latest_checkpoint(self.checkpoint_path)
        if latest_checkpoint_path:
            print('Restoring checkpoint: {0}'.format(latest_checkpoint_path))
            self.saver.restore(self.sess, latest_checkpoint_path)
            
    def select_board(self, boards):
        turns = float(not env.turn)*np.ones((len(boards), 1))
        Js = self.sess.run(self.J, feed_dict={self.boards_placeholder: np.array(boards),
                                              self.turn_placeholder: turns})
        board_idx = np.random.choice(range(len(Js)))
        return boards[board_idx]

In [4]:
sess = tf.InteractiveSession()

In [6]:
nn_agent = NeuralNetAgent(sess, 
                          '/Users/adam/Documents/projects/chess_deep_learning/model/',
                          '/Users/adam/Documents/projects/chess_deep_learning/log/',
                          '/Users/adam/Documents/projects/chess_deep_learning/checkpoints/')
nn_agent.train(1, 1, 0.1)
    
#     random_agent = RandomAgent()
#     env = TicTacToe()
#     env.play([nn_agent,random_agent])

InvalidArgumentError: Duplicate tag loss_avg found in summary inputs
	 [[Node: MergeSummary_1/MergeSummary = MergeSummary[N=28, _device="/job:localhost/replica:0/task:0/cpu:0"](ScalarSummary, ScalarSummary_1, HistogramSummary, HistogramSummary_1, HistogramSummary_2, HistogramSummary_3, HistogramSummary_4, HistogramSummary_5, HistogramSummary_6, HistogramSummary_7, ScalarSummary_2, ScalarSummary_3, HistogramSummary_8, HistogramSummary_9, HistogramSummary_10, HistogramSummary_11, HistogramSummary_12, HistogramSummary_13, HistogramSummary_14, HistogramSummary_15, HistogramSummary_16, HistogramSummary_17, HistogramSummary_18, HistogramSummary_19, HistogramSummary_20, HistogramSummary_21, HistogramSummary_22, HistogramSummary_23)]]

Caused by op 'MergeSummary_1/MergeSummary', defined at:
  File "/Users/adam/anaconda/lib/python3.5/runpy.py", line 170, in _run_module_as_main
    "__main__", mod_spec)
  File "/Users/adam/anaconda/lib/python3.5/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/Users/adam/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py", line 3, in <module>
    app.launch_new_instance()
  File "/Users/adam/anaconda/lib/python3.5/site-packages/traitlets/config/application.py", line 596, in launch_instance
    app.start()
  File "/Users/adam/anaconda/lib/python3.5/site-packages/ipykernel/kernelapp.py", line 442, in start
    ioloop.IOLoop.instance().start()
  File "/Users/adam/anaconda/lib/python3.5/site-packages/zmq/eventloop/ioloop.py", line 162, in start
    super(ZMQIOLoop, self).start()
  File "/Users/adam/anaconda/lib/python3.5/site-packages/tornado/ioloop.py", line 883, in start
    handler_func(fd_obj, events)
  File "/Users/adam/anaconda/lib/python3.5/site-packages/tornado/stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "/Users/adam/anaconda/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/Users/adam/anaconda/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/Users/adam/anaconda/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/Users/adam/anaconda/lib/python3.5/site-packages/tornado/stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "/Users/adam/anaconda/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 276, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/Users/adam/anaconda/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 228, in dispatch_shell
    handler(stream, idents, msg)
  File "/Users/adam/anaconda/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 391, in execute_request
    user_expressions, allow_stdin)
  File "/Users/adam/anaconda/lib/python3.5/site-packages/ipykernel/ipkernel.py", line 199, in do_execute
    shell.run_cell(code, store_history=store_history, silent=silent)
  File "/Users/adam/anaconda/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2723, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/Users/adam/anaconda/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2825, in run_ast_nodes
    if self.run_code(code, result):
  File "/Users/adam/anaconda/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2885, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-6-1e325e940baf>", line 4, in <module>
    '/Users/adam/Documents/projects/chess_deep_learning/checkpoints/')
  File "<ipython-input-3-cf5a3013964c>", line 73, in __init__
    self.summaries_op = tf.merge_all_summaries()
  File "/Users/adam/anaconda/lib/python3.5/site-packages/tensorflow/python/ops/logging_ops.py", line 246, in merge_all_summaries
    return merge_summary(summary_ops)
  File "/Users/adam/anaconda/lib/python3.5/site-packages/tensorflow/python/ops/logging_ops.py", line 225, in merge_summary
    val = gen_logging_ops._merge_summary(inputs=inputs, name=name)
  File "/Users/adam/anaconda/lib/python3.5/site-packages/tensorflow/python/ops/gen_logging_ops.py", line 227, in _merge_summary
    result = _op_def_lib.apply_op("MergeSummary", inputs=inputs, name=name)
  File "/Users/adam/anaconda/lib/python3.5/site-packages/tensorflow/python/framework/op_def_library.py", line 759, in apply_op
    op_def=op_def)
  File "/Users/adam/anaconda/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 2240, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "/Users/adam/anaconda/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 1128, in __init__
    self._traceback = _extract_stack()

InvalidArgumentError (see above for traceback): Duplicate tag loss_avg found in summary inputs
	 [[Node: MergeSummary_1/MergeSummary = MergeSummary[N=28, _device="/job:localhost/replica:0/task:0/cpu:0"](ScalarSummary, ScalarSummary_1, HistogramSummary, HistogramSummary_1, HistogramSummary_2, HistogramSummary_3, HistogramSummary_4, HistogramSummary_5, HistogramSummary_6, HistogramSummary_7, ScalarSummary_2, ScalarSummary_3, HistogramSummary_8, HistogramSummary_9, HistogramSummary_10, HistogramSummary_11, HistogramSummary_12, HistogramSummary_13, HistogramSummary_14, HistogramSummary_15, HistogramSummary_16, HistogramSummary_17, HistogramSummary_18, HistogramSummary_19, HistogramSummary_20, HistogramSummary_21, HistogramSummary_22, HistogramSummary_23)]]


In [2]:
from collections import Counter


In [3]:
l=[1,0,1,-1,1,1,1,0,-1]
c = Counter(l)

In [4]:
c

Counter({-1: 2, 0: 2, 1: 5})

In [8]:
c.update([1])

In [9]:
c

Counter({-1: 2, 0: 2, 1: 6})

In [4]:
ttt= TicTacToe()

In [5]:
ttt.turn

False

In [25]:
c = tf.placeholder(tf.float32,[1],name='c')
x = tf.Variable(tf.constant(1.0, shape=[1]), name='x')
f = c*x*x
# g = tf.gradients(f,x)
opt = tf.train.GradientDescentOptimizer(1.0)
g = opt.compute_gradients(f,[x])
a = opt.apply_gradients(g)
init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    xval = sess.run(x)
    print(xval)
    grad = sess.run(g, feed_dict={c:[1]})
    print(grad)
    sess.run(a, feed_dict={c:[1]})
    xval = sess.run(x)
    print(xval)

[ 1.]
[(array([ 2.], dtype=float32), array([ 1.], dtype=float32))]
[-1.]


In [1]:
2**2

4

In [16]:
import numpy as np
board = np.zeros((3, 3, 2))
board[0,0,0]=1
board[1,1,0]=1
board[2,2,0]=1

board[:, :, 0][np.eye(3) == 1]

array([ 1.,  1.,  1.])