In [1]:
import tensorflow as tf
import numpy as np
import math
import random
import os
import copy
import time
from IPython import display

In [2]:
num_actions = 6 * 9
state_size = 3 * 3 * 3 + 6 * 2 + 1 #board + hands * 2 + turn= 40

checkpoint_path = "training3/cp/cp-{epoch:04d}.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)
transfer_path = "training3/transfer_weights.ckpt"
transfer_dir = os.path.dirname(transfer_path)

cp_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_path,
    save_weights_only=True,
    verbose=1,
    save_freq=1000
)

dqn = tf.keras.Sequential([
    tf.keras.layers.Dense(50, activation='relu'),
    tf.keras.layers.Dense(50, activation='relu'),
    tf.keras.layers.Dense(num_actions),
])

dqn.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.4),
    loss=tf.keras.losses.MeanAbsoluteError(),
    metrics=['accuracy'],
    run_eagerly=True
)

dqn_target = tf.keras.Sequential([
    tf.keras.layers.Dense(50, activation='relu'),
    tf.keras.layers.Dense(50, activation='relu'),
    tf.keras.layers.Dense(num_actions),
])

dqn_target.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.2),
    loss=tf.keras.losses.MeanAbsoluteError(),
    metrics=['accuracy'],
    run_eagerly=True
)

dqn.build((1, state_size))
dqn.save_weights(transfer_path)
dqn_target.load_weights(transfer_path)

file_print = False

def fprint(string):
    if file_print:
        with open('chess_log.txt', 'a') as f:
            f.write(string + '\n')

Metal device set to: Apple M1 Pro


2022-01-11 14:08:11.762888: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-01-11 14:08:11.763060: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [3]:
state = np.zeros((1, state_size), dtype=np.uint8)
board_size = 9
board = state[0]
location = [0] * 12
for i in range(12):
    board[board_size * 3 + i] = i + 1
    location[i] = board_size * 3 + i

turn = 1
board[state_size - 1] = turn


#board: top9-middle9-bottom9-myHand-enemyHand
#piece: 1 2 3 4 5 6 / 7 8 9 10 11 12
#turn: 1 / 2

def initialize():
    global state, board, turn
    state = np.zeros((1, state_size), dtype=np.uint8)
    board = state[0]
    for i in range(12):
        board[board_size * 3 + i] = i + 1
        location[i] = board_size * 3 + i
    turn = 1
    board[state_size - 1] = turn

def getSize(piece):
    if piece >= 7 and piece <= 12:
        piece -= 6
    return (piece + 1) // 2

def getColor(piece):
    if piece >= 1 and piece < 7:
        return 1
    if piece >= 7 and piece < 13:
        return 2
    return 0

def getIndex(piece):
    return (piece - 1) % 6

def getRowCol(loc):
    loc %= 9
    return loc // 3, loc % 3

def getTopPiece(row, col):
    if board[row * 3 + col] != 0:
        return board[row * 3 + col]
    if board[row * 3 + col + board_size] != 0:
        return board[row * 3 + col + board_size]
    if board[row * 3 + col + board_size * 2] != 0:
        return board[row * 3 + col + board_size * 2]
    return 0

def validHandMove(index, row, col):
    piece = 0
    if turn == 2:
        index += 6
    if board[board_size * 3 + index] == 0:
        return False
    else:
        piece = board[board_size * 3 + index]
    
    if board[row * 3 + col] != 0:
        return False
    elif board[row * 3 + col + board_size] != 0:
        if getSize(board[row * 3 + col + board_size]) >= getSize(piece):
            return False
        board[row * 3 + col] = piece
        location[int(piece) - 1] = row * 3 + col
    elif board[row * 3 + col + board_size * 2] != 0:
        if getSize(board[row * 3 + col + board_size * 2]) >= getSize(piece):
            return False
        board[row * 3 + col + board_size] = piece
        location[int(piece) - 1] = row * 3 + col + board_size
    else:
        board[row * 3 + col + board_size * 2] = piece
        location[int(piece) - 1] = row * 3 + col + board_size * 2
    
    board[board_size * 3 + index] = 0
    return True



def validBoardMove(row1, col1, row2, col2):
    piece = 0
    remove_index = -1
    if board[row1 * 3 + col1] != 0:
        piece = board[row1 * 3 + col1]
        if getColor(piece) != turn:
            return False
        remove_index = row1 * 3 + col1
    elif board[row1 * 3 + col1 + board_size] != 0:
        piece = board[row1 * 3 + col1 + board_size]
        if getColor(piece) != turn:
            return False
        remove_index = row1 * 3 + col1 + board_size
    elif board[row1 * 3 + col1 + board_size * 2] != 0:
        piece = board[row1 * 3 + col1 + board_size * 2]
        if getColor(piece) != turn:
            return False
        remove_index = row1 * 3 + col1 + board_size * 2
        
    if piece == 0:
        return False
    if getColor(piece) != turn:
        return False
    
    if board[row2 * 3 + col2] != 0:
        return False
    elif board[row2 * 3 + col2 + board_size] != 0:
        if getSize(board[row2 * 3 + col2 + board_size]) >= getSize(piece):
            return False
        board[row2 * 3 + col2] = piece
        location[int(piece) - 1] = row2 * 3 + col2
    elif board[row2 * 3 + col2 + board_size * 2] != 0:
        if getSize(board[row2 * 3 + col2 + board_size * 2]) >= getSize(piece):
            return False
        board[row2 * 3 + col2 + board_size] = piece
        location[int(piece) - 1] = row2 * 3 + col2 + board_size
    else:
        board[row2 * 3 + col2 + board_size * 2] = piece
        location[int(piece) - 1] = row2 * 3 + col2 + board_size * 2
            
    if remove_index == -1:
        return False
    board[remove_index] = 0
    return True

def checkLine():
    #check row
    for row in range(3):
        color0 = getColor(getTopPiece(row, 0))
        color1 = getColor(getTopPiece(row, 1))
        color2 = getColor(getTopPiece(row, 2))
        if color0 == color1 and color0 == color2:
            return color0

    #check column
    for col in range(3):
        color0 = getColor(getTopPiece(0, col))
        color1 = getColor(getTopPiece(1, col))
        color2 = getColor(getTopPiece(2, col))
        if color0 == color1 and color0 == color2:
            return color0
            
    #check diagonal
    color0 = getColor(getTopPiece(0, 0))
    color1 = getColor(getTopPiece(1, 1))
    color2 = getColor(getTopPiece(2, 2))
    if color0 == color1 and color0 == color2:
        return color0

    color0 = getColor(getTopPiece(0, 2))
    color1 = getColor(getTopPiece(1, 1))
    color2 = getColor(getTopPiece(2, 0))
    if color0 == color1 and color0 == color2:
        return color0

    return 0

def showBoard(b):
    file = False
    t = b[state_size - 1]
    b = np.array(b)
    if file:
        fprint('turn: ' + str(t))
        fprint('board:\n' + str(np.reshape(b[0:27], (-1, 3, 3))))
    else:
        print('turn: ' + str(t))
        print('board:\n' + str(np.reshape(b[0:27], (-1, 3, 3))))
    if t == 1:
        my_hand = b[27:33]
        enemy_hand = b[33:39]
    else:
        my_hand = b[33:39]
        enemy_hand = b[27:33]
    if file:
        fprint('my hand:' + str(my_hand))
        fprint('enemy hand:' + str(enemy_hand))
    else:
        print('my hand:' + str(my_hand))
        print('enemy hand:' + str(enemy_hand))

In [4]:
class Bot:
    def __init__(self, turn):
        self.board = copy.deepcopy(board)
        self.location = copy.deepcopy(location)
        self.turn = turn

    def botGetTopPiece(self, row, col):
        if self.board[row * 3 + col] != 0:
            return self.board[row * 3 + col]
        if self.board[row * 3 + col + board_size] != 0:
            return self.board[row * 3 + col + board_size]
        if self.board[row * 3 + col + board_size * 2] != 0:
            return self.board[row * 3 + col + board_size * 2]
        return 0
        
    def botValidHandMove(self, index, row, col):
        piece = 0
        if turn == 2:
            index += 6
        if self.board[board_size * 3 + index] == 0:
#             print(1)
            return False
        else:
            piece = self.board[board_size * 3 + index]

        if self.board[row * 3 + col] != 0:
#             print(2)
            return False
        elif self.board[row * 3 + col + board_size] != 0:
            if getSize(self.board[row * 3 + col + board_size]) >= getSize(piece):
#                 print(3)
                return False
            self.board[row * 3 + col] = piece
            self.location[int(piece) - 1] = row * 3 + col
        elif self.board[row * 3 + col + board_size * 2] != 0:
            if getSize(self.board[row * 3 + col + board_size * 2]) >= getSize(piece):
#                 print(4)
                return False
            self.board[row * 3 + col + board_size] = piece
            self.location[int(piece) - 1] = row * 3 + col + board_size
        else:
            self.board[row * 3 + col + board_size * 2] = piece
            self.location[int(piece) - 1] = row * 3 + col + board_size * 2

        self.board[board_size * 3 + index] = 0
        return True



    def botValidBoardMove(self, row1, col1, row2, col2):
        piece = 0
        remove_index = -1
        if self.board[row1 * 3 + col1] != 0:
            piece = self.board[row1 * 3 + col1]
            if getColor(piece) != turn:
#                 print(5)
                return False
            remove_index = row1 * 3 + col1
        elif self.board[row1 * 3 + col1 + board_size] != 0:
            piece = self.board[row1 * 3 + col1 + board_size]
            if getColor(piece) != turn:
#                 print(6)
                return False
            remove_index = row1 * 3 + col1 + board_size
        elif self.board[row1 * 3 + col1 + board_size * 2] != 0:
            piece = self.board[row1 * 3 + col1 + board_size * 2]
            if getColor(piece) != turn:
#                 print(7)
                return False
            remove_index = row1 * 3 + col1 + board_size * 2

        if piece == 0:
#             print(8)
            return False
        if getColor(piece) != turn:
#             print(9)
            return False

        if self.board[row2 * 3 + col2] != 0:
#             print(10)
            return False
        elif self.board[row2 * 3 + col2 + board_size] != 0:
            if getSize(self.board[row2 * 3 + col2 + board_size]) >= getSize(piece):
#                 print(11)
                return False
            self.board[row2 * 3 + col2] = piece
            self.location[int(piece) - 1] = row2 * 3 + col2
        elif self.board[row2 * 3 + col2 + board_size * 2] != 0:
            if getSize(self.board[row2 * 3 + col2 + board_size * 2]) >= getSize(piece):
#                 print(12)
                return False
            self.board[row2 * 3 + col2 + board_size] = piece
            self.location[int(piece) - 1] = row2 * 3 + col2 + board_size
        else:
            self.board[row2 * 3 + col2 + board_size * 2] = piece
            self.location[int(piece) - 1] = row2 * 3 + col2 + board_size * 2

        if remove_index == -1:
#             print(13)
            return False
        self.board[remove_index] = 0
        return True
    
    def botCheckLine(self):
        #check row
        for row in range(3):
            color0 = getColor(self.botGetTopPiece(row, 0))
            color1 = getColor(self.botGetTopPiece(row, 1))
            color2 = getColor(self.botGetTopPiece(row, 2))
            if color0 == color1 and color0 == color2:
                return color0

        #check column
        for col in range(3):
            color0 = getColor(self.botGetTopPiece(0, col))
            color1 = getColor(self.botGetTopPiece(1, col))
            color2 = getColor(self.botGetTopPiece(2, col))
            if color0 == color1 and color0 == color2:
                return color0

        #check diagonal
        color0 = getColor(self.botGetTopPiece(0, 0))
        color1 = getColor(self.botGetTopPiece(1, 1))
        color2 = getColor(self.botGetTopPiece(2, 2))
        if color0 == color1 and color0 == color2:
            return color0

        color0 = getColor(self.botGetTopPiece(0, 2))
        color1 = getColor(self.botGetTopPiece(1, 1))
        color2 = getColor(self.botGetTopPiece(2, 0))
        if color0 == color1 and color0 == color2:
            return color0
    
    def observe(self):
        self.board = copy.deepcopy(board)
        self.location = copy.deepcopy(location)
        
    def randomAction(self):
        return random.randrange(num_actions)
    
    def getAction(self):
        beforeBoard = copy.deepcopy(self.board)
        beforeLocation = copy.deepcopy(self.location)
        win = -1
        for action in range(num_actions):
#             print('action:',action)
            piece = action // 9 + 1
#             print(self.location)
            loc = self.location[piece - 1]
            action_to = action % 9
#             print('before:')
#             showBoard(self.board, self.board[state_size - 1])
            if loc >= board_size * 3:
                index = getIndex(piece)
                row_to, col_to = (action_to // 3), (action_to % 3)
                self.botValidHandMove(index, row_to, col_to)
            else:
                row_from, col_from = getRowCol(loc)
                row_to, col_to = (action_to // 3), (action_to % 3)
                self.botValidBoardMove(row_from, col_from, row_to, col_to)
#             print('after:')
#             showBoard(self.board, self.board[state_size - 1])
            if self.botCheckLine() == self.turn:
                win = action
                break
            self.board = copy.deepcopy(beforeBoard)
            self.location = copy.deepcopy(beforeLocation)
        if win == -1:
            return self.randomAction()
        else:
            return win

In [5]:
memory_size = 500
batch_size = 50
discount = 0.9

class Replay:
    def __init__(self):
        self.states = np.empty((memory_size, state_size), dtype=np.float32)
        self.actions = np.zeros(memory_size, dtype=np.uint8)
        self.rewards = np.empty(memory_size, dtype=np.float32)
        self.game_over = np.empty(memory_size, dtype=np.bool)
        self.next_states = np.empty((memory_size, state_size), dtype=np.float32)
        self.memory_length = 0
        self.current = 0
        
    def remember(self, current_state, action, reward, next_state, game_over):
        self.states[self.current] = current_state
        self.actions[self.current] = action
        self.rewards[self.current] = reward
        self.game_over[self.current] = game_over
        self.next_states[self.current] = next_state
        self.memory_length = max(self.memory_length, self.current + 1)
        self.current = (self.current + 1) % memory_size
    
    def getBatch(self):
        current_batch_size = min(batch_size, self.memory_length)
        inputs = np.zeros((current_batch_size, state_size))
        targets = np.zeros((current_batch_size, num_actions))

        for i in range(current_batch_size):
            random_index = random.randrange(0, self.memory_length)
            current_state = self.states[random_index]
            action = self.actions[random_index]
            reward = self.rewards[random_index]
            game_over = self.game_over[random_index]
            next_state = self.next_states[random_index]
            
            target = dqn.predict(np.reshape(current_state, (1, state_size)))
            if game_over: #game over or action is invalid
                target[0][action] = reward
            else:
                next_max_action = np.argmax(dqn.predict(np.reshape(next_state, (1, state_size))))
                next_max_q = dqn_target.predict(np.reshape(next_state, (1, state_size)))[0][next_max_action]
                next_max_q = min(max(next_max_q, -1), 1)
                target[0][action] = reward + discount * next_max_q
#             print('action in train:',action // 9, action % 9 // 3, action % 9 % 3)
#             showBoard(current_state, current_state[state_size - 1], False)
#             showBoard(next_state, next_state[state_size - 1], False)
#             print(np.reshape(dqn.predict(np.reshape(current_state, (1, state_size))), (6, 3, 3)))
#             print('target:',target[0][action])
    
            inputs[i] = current_state
            targets[i] = target[0]
        
        return inputs, targets

In [6]:
isPlayer = [True, False]
epsilon = 0.1
epsilon_min = 0.1
ddqn = 20
ddqn_cnt = ddqn

current_state = copy.deepcopy(board)
current_action = 0
last_state = copy.deepcopy(board)
last_action = 0

states = [
    [[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,0,11,12,0,0,0,0,0,0,3,4,5,6,7,8,9,10,0,0,1]],
    [[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,0,7,8,0,0,0,0,0,0,3,4,5,6,0,0,9,10,11,12,1]],
    [[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,0,0,0,0,0,11,0,0,0,3,4,5,6,7,8,9,10,0,12,1]],
    [[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,3,4,5,6,7,8,9,10,11,12,1]]
]

ttt = [
    [-1,-1,0,-1,-1,0,0,0,0, -1,-1,0,-1,-1,0,0,0,0, 0,0,1,-1,-1,0,0,0,0, 0,0,1,-1,-1,0,0,0,0, 0,0,1,-1,-1,0,0,0,0, 0,0,1,-1,-1,0,0,0,0,],
    [-1,-1,0,-1,-1,0,0,0,0, -1,-1,0,-1,-1,0,0,0,0, 0,0,1,0.5,0.5,0,0,0,0, 0,0,1,0.5,0.5,0,0,0,0, 0,0,1,0.5,0.5,0,0,0,0, 0,0,1,0.5,0.5,0,0,0,0,],
    [-1,-1,0,0,0,0,0,-1,0, -1,-1,0,0,0,0,0,-1,0, 0,0,1,0,0,0,0,-1,0, 0,0,1,0,0,0,0,-1,0, 0,0,1,0,0,0,0,-1,0, 0,0,1,0,0,0,0,-1,0,],
    [0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0, ]
]

replay = Replay()
bot = Bot(1)
isBot = True
n = 0
first_move = True
invalid = []
win1 = []
win2 = []
if tf.train.latest_checkpoint(checkpoint_dir) != None:
    print('checkpoint restored')
#     dqn.load_weights(tf.train.latest_checkpoint(checkpoint_dir))

checkpoint restored


In [7]:
## Game Iteration
while True:
    if n % 100 == 0:
        print('n:',n)
        print('time:', time.ctime())
        if len(invalid) == 0:
            print('invalid: 0')
        else:
            print('invalid:', sum(invalid) / len(invalid))
        print('win1:', sum(win1))
        print('win2:', sum(win2))
        if (sum(win1) + sum(win2)) != 0:
            print('winrate of 2:', sum(win2) / (sum(win1) + sum(win2)))
        else:
            print('winrate of 2: 0')
        invalid = []
        win1 = []
        win2 = []
    n += 1
    
    if isPlayer[turn - 1]:
#         showBoard(board, turn)
        if not isBot:
            print('turn: ' + str(turn))
            print('board:\n', np.reshape(board[0:27], (-1, 3, 3)))
            if turn == 1:
                my_hand = board[27:33]
                enemy_hand = board[33:39]
            else:
                my_hand = board[33:39]
                enemy_hand = board[27:33]
            print('my hand:', my_hand)
            print('enemy hand:', enemy_hand)

            piece = int(input())
            if piece == 0:
                showBoard(last_state)
                print(np.reshape(dqn.predict(np.reshape(last_state, (1, state_size))), (6, 3,3 )))
                continue
            if not (piece >= 1 and piece <= 6):
                print('invalid piece')
                continue
            if turn == 2:
                piece += 6
            loc = location[piece - 1]

            valid = False
            if loc >= board_size * 3:
                index = getIndex(piece)
                row_to, col_to = input().split()
                row_to = int(row_to)
                col_to = int(col_to)
                action = getIndex(piece) * 9 + (row_to * 3 + col_to)
                valid = validHandMove(index, row_to, col_to)
            else:
                row_from, col_from = getRowCol(loc)
                row_to, col_to = input().split()
                row_to = int(row_to)
                col_to = int(col_to)
                action = getIndex(piece) * 9 + (row_to * 3 + col_to)
                valid = validBoardMove(row_from, col_from, row_to, col_to)
        if isBot:
            bot.observe()
            action = bot.getAction()
            piece = action // 9 + 1
            if turn == 2:
                piece += 6
            loc = location[piece - 1]
            action_to = action % 9
            valid = False
            if loc >= board_size * 3:
                index = getIndex(piece)
                row_to, col_to = (action_to // 3), (action_to % 3)
                fprint('action1:' + str(piece) + str(row_to) + str(col_to))
                valid = validHandMove(index, row_to, col_to)
            else:
                row_from, col_from = getRowCol(loc)
                row_to, col_to = (action_to // 3), (action_to % 3)
                fprint('action1:' + str(piece) + str(row_to) + str(col_to))
                valid = validBoardMove(row_from, col_from, row_to, col_to)
        
    else:
#         showBoard(board, turn)
#         print(np.reshape(dqn.predict(state), (6, 3,3 )))
        actions = dqn.predict(state)
        action = np.argmax(actions)
        if random.random() < epsilon:
            action = random.randrange(0, num_actions)
        if epsilon > epsilon_min:
            epsilon -= 0.001
        piece = action // 9 + 1
        if turn == 2:
            piece += 6
        loc = location[piece - 1]
        action_to = action % 9
        valid = False
        if loc >= board_size * 3:
            index = getIndex(piece)
            row_to, col_to = (action_to // 3), (action_to % 3)
            fprint('action2:' + str(piece) + str(row_to) + str(col_to))
            valid = validHandMove(index, row_to, col_to)
        else:
            row_from, col_from = getRowCol(loc)
            row_to, col_to = (action_to // 3), (action_to % 3)
            fprint('action2:' + str(piece) + str(row_to) + str(col_to))
            valid = validBoardMove(row_from, col_from, row_to, col_to)
        if not valid:
            invalid.append(1)
        else:
            invalid.append(0)
    
    current_action = action
    
    if valid:
        #display.clear_output(wait=False)
        winner = checkLine()
        if winner != 0:
            print('winner:',winner)
            showBoard(board)
            for s in states:
                print(dqn.predict(s)[0])
            ss = [[1,2,3,4,5,6,7,8,9,9,8,7,6,5,4,3,2,1,5,2,3,4,7,2,9,1,3,4,1,2,6,2,0,4,4,6,2,6,3,4]]
            print(dqn.predict(ss)[0])
            replay.remember(current_state, current_action, 1, board, True)
#             print('remember1:')
#             showBoard(current_state)
#             print('action:',current_action // 9 + 1, current_action % 9 // 3, current_action % 9 % 3)
#             print('reward: 1')
            
            replay.remember(last_state, last_action, -1, current_state, True)
#             print('remember2:')
#             showBoard(last_state)
#             print('action:',last_action // 9 + 1, last_action % 9 // 3, last_action % 9 % 3)
#             print('reward: -1')

#             showBoard(last_state)
#             print(np.reshape(dqn.predict(np.reshape(last_state, (1, state_size))), (6, 3,3 )))
            initialize()
            first_move = True
            last_state = copy.deepcopy(board)
            last_action = 0
            if winner == 1:
                win1.append(1)
            elif winner == 2:
                win2.append(1)
            if ddqn_cnt < 0:
                ddqn_cnt = ddqn
                dqn.save_weights(transfer_path)
                dqn_target.load_weights(transfer_path)
                print('copied to target dqn')
            else:
                ddqn_cnt -= 1
        else:
            if not first_move:
                replay.remember(last_state, last_action, 0, current_state, False)
#                 print('remember3:')
#                 showBoard(last_state)
#                 print('action:',last_action // 9 + 1, last_action % 9 // 3, last_action % 9 % 3)
#                 print('reward: 0')
            first_move = False
            if turn == 1:
                turn = 2
            else:
                turn = 1
            board[state_size - 1] = turn
        last_state = copy.deepcopy(current_state)
        last_action = current_action
        current_state = copy.deepcopy(board)
        current_action = action
    else:
        fprint('not valid')
        replay.remember(current_state, current_action, -1, current_state, True)
#         print('remember4:')
#         showBoard(current_state)
#         print('action:',current_action // 9 + 1, current_action % 9 // 3, current_action % 9 % 3)
#         print('reward: -10')
        
    inputs, targets = replay.getBatch()
#     b = np.reshape(dqn.predict(s), (6, 3, 3))
    if len(inputs) > 0:
        dqn.fit(inputs, targets, verbose=False, epochs=10)
#     a = np.reshape(dqn.predict(s), (6, 3, 3))
#     print('minus:',a - b)
#     print('before:',b)
#     print('after:',a)
        

print('done')
print(np.reshape(dqn.predict(state), (6, 3,3 )))

n: 0
time: Tue Jan 11 14:08:11 2022
invalid: 0
win1: 0
win2: 0
winrate of 2: 0


2022-01-11 14:08:11.975027: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


winner: 1
turn: 1
board:
[[[ 0  0  0]
  [ 0  0  0]
  [ 0  0  0]]

 [[ 0  0  0]
  [ 0  0  0]
  [ 0  0  0]]

 [[ 4  0 11]
  [ 5  8  9]
  [ 1  0  2]]]
my hand:[0 0 3 0 0 6]
enemy hand:[ 7  0  0 10  0 12]
[ 0.38820565  1.006716    1.1868575  -0.66561425 -2.5283546   2.8026633
  2.6009324  -2.5160465  -2.3971596  -1.6835861  -0.94331044  4.260965
  4.129904   -1.1714891  -0.34828627 -0.909489   -0.40751004 -1.1289935
 -0.45237768 -1.1471174  -0.45658767  3.172246    4.11403     1.0267628
 -0.48664868  1.1786827   2.8033657  -0.61434793 -0.5295726  -0.49863732
  0.43851888  3.1166062  -0.40518856  1.9072787  -0.46909082  0.29384243
 -1.2578119   3.0144906  -0.5293884  -2.4423766   3.806803    0.42626953
 -0.46311402 -0.9902999   2.4963732  -1.0027548   0.25217068 -3.7586184
  3.775689   -0.00899231  1.3290975  -3.4142532  -0.0607208   3.08495   ]
[-1.3898311  -0.66551626 -0.44358677  1.140807   -0.85940796  1.1136966
  0.8299072  -0.8473755  -0.77633846 -0.06514263  0.69855314  2.585567
  2.

winner: 1
turn: 1
board:
[[[ 0  0  0]
  [ 0  0  0]
  [ 0  0  0]]

 [[ 0  0  0]
  [ 0  0  0]
  [ 0  0  0]]

 [[ 3  2  1]
  [ 0  0 10]
  [ 0  9  0]]]
my hand:[0 0 0 4 5 6]
enemy hand:[ 7  8  0  0 11 12]
[-1.5963858  -1.1355866  -1.0236171   1.7337333  -0.5360307   0.821419
  0.8163662  -0.59842676 -0.8547865   0.57305366  0.93414253  1.3051424
  1.1010735   0.9431972   1.5588368   0.89814526  1.6454202   1.162924
  1.5783241  -1.5782493   0.30320522  0.8046758   0.5041514  -0.1780351
  1.5761664   1.6960052   0.7638609   1.1296036   1.5139962   1.5790871
 -1.6402652   0.73204935  1.7464218  -0.41227034  1.5783241  -1.6957084
  0.5927763   0.73204935  1.361962   -0.90108526  1.5638121  -1.5761664
  1.5588368   1.3177571   0.4614891  -1.5955484  -2.0565574  -1.7927533
  1.7339437   0.44969073 -0.6085499  -1.383998   -2.2866507   1.0786262 ]
[-1.5963858  -1.1355866  -1.0236171   1.7337333  -0.5360307   0.821419
  0.8163662  -0.59842676 -0.8547865   0.57305366  0.93414253  1.3051424
  1.1010

KeyboardInterrupt: 

In [None]:
# # Game Play
# isPlayer = [True, False]
# n = 0
# showQ = False
# initialize()
# while True:
#     if n % 100 == 0:
#         print('n:',n)
#         print('time:', time.ctime())
#     n += 1
    
#     if isPlayer[turn - 1]:
#         print('turn: ' + str(turn))
#         print('board:\n', np.reshape(board[0:27], (-1, 3, 3)))
#         if turn == 1:
#             my_hand = board[27:33]
#             enemy_hand = board[33:39]
#         else:
#             my_hand = board[33:39]
#             enemy_hand = board[27:33]
#         print('my hand:', my_hand)
#         print('enemy hand:', enemy_hand)
        
#         piece = int(input())
#         if piece == 0:
#             showQ = !showQ
#             continue
#         if not (piece >= 1 and piece <= 6):
#             print('invalid piece')
#             continue
#         if turn == 2:
#             piece += 6
#         loc = location[piece - 1]
        
#         valid = False
#         if loc >= board_size * 3:
#             index = getIndex(piece)
#             row_to, col_to = input().split()
#             row_to = int(row_to)
#             col_to = int(col_to)
#             action = getIndex(piece) * 9 + (row_to * 3 + col_to)
#             valid = validHandMove(index, row_to, col_to)
#         else:
#             row_from, col_from = getRowCol(loc)
#             row_to, col_to = input().split()
#             row_to = int(row_to)
#             col_to = int(col_to)
#             action = getIndex(piece) * 9 + (row_to * 3 + col_to)
#             valid = validBoardMove(row_from, col_from, row_to, col_to)
            
#     else:
#         if showQ:
#             print(np.reshape(dqn.predict(state), (6, 3,3 )))
#         actions = dqn.predict(state)
#         action = np.argmax(actions)
#         piece = action // 9 + 1
#         if turn == 2:
#             piece += 6
#         loc = location[piece - 1]
#         action_to = action % 9
#         valid = False
#         if loc >= board_size * 3:
#             index = getIndex(piece)
#             row_to, col_to = (action_to // 3), (action_to % 3)
#             print(index, row_to, col_to)
#             valid = validHandMove(index, row_to, col_to)
#         else:
#             row_from, col_from = getRowCol(loc)
#             row_to, col_to = (action_to // 3), (action_to % 3)
#             print(row_from, col_from, row_to, col_to)
#             valid = validBoardMove(row_from, col_from, row_to, col_to)
    
#     if valid:
#         #display.clear_output(wait=False)
#         winner = checkLine()
#         if winner != 0:
#             initialize()
#         else:
#             if turn == 1:
#                 turn = 2
#             else:
#                 turn = 1
#             board[state_size - 1] = turn
#     else:
#         print('not valid move')
#         break
        

# print('done')

In [None]:
print(tf.train.latest_checkpoint("training2/cp"))
print(state[0])
print(np.reshape(dqn.predict(state), (6, 3,3 )))

In [None]:
# dqn.load_weights("training2/cp/cp-0005.ckpt")