<a href="https://colab.research.google.com/github/alpacaYiChun/ML/blob/master/PVG_Eager.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
import numpy as np
from keras.layers import Conv2D, Flatten, Dense, Input, BatchNormalization, Add
from keras.models import Model
from keras.optimizers import Adam
from keras.regularizers import l2
import pickle
import keras.backend as K
import copy
import math
from unittest.mock import MagicMock
from keras.models import load_model
import matplotlib.pyplot as plt
import seaborn as sns
import concurrent.futures
import time
!pip install objgraph
import objgraph
import gc


Collecting objgraph
  Downloading objgraph-3.6.0-py2.py3-none-any.whl (17 kB)
Installing collected packages: objgraph
Successfully installed objgraph-3.6.0


In [None]:
class PolicyValueNet():
    """policy-value network """
    def __init__(self, board_width, board_height, gpu = False, model_file=None):
        self.board_width = board_width
        self.board_height = board_height
        self.gpu = gpu
        self.l2_const = 1e-4  # coef of l2 penalty
        #self.create_policy_value_net()
        self.create_policy_value_net('/content/gdrive/My Drive/885_cpu/qiguai_2740.h5')
        self._loss_train_op()

    def create_policy_value_net(self, path = None):
        if path is None:
          """create the policy value network """
          in_x = None
          network = None
          if self.gpu:
            in_x = network = Input((3, self.board_width, self.board_height))
            network = Conv2D(filters=32, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network)
            network = BatchNormalization()(network)
            network = Conv2D(filters=64, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network)
            network = BatchNormalization()(network)
            network = Conv2D(filters=128, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network)
            shortcut = Conv2D(filters=128, kernel_size=(3, 3), padding="same", data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(in_x)
            network = Add()([network, shortcut])
            network = BatchNormalization()(network)
          else:
            in_x = network = Input((self.board_width, self.board_height, 3))
            network = Conv2D(filters=32, kernel_size=(3, 3), padding="same", activation="relu", kernel_regularizer=l2(self.l2_const))(network)
            network = BatchNormalization()(network)
            network = Conv2D(filters=64, kernel_size=(3, 3), padding="same", activation="relu", kernel_regularizer=l2(self.l2_const))(network)
            network = BatchNormalization()(network)
            network = Conv2D(filters=128, kernel_size=(3, 3), padding="same", activation="relu", kernel_regularizer=l2(self.l2_const))(network)
            shortcut = Conv2D(filters=128, kernel_size=(3, 3), padding="same", activation="relu", kernel_regularizer=l2(self.l2_const))(in_x)
            network = Add()([network, shortcut])
            network = BatchNormalization()(network)
          # action policy layers
          policy_net = None
          if self.gpu:
            policy_net = Conv2D(filters=4, kernel_size=(1, 1), data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network)
          else:
            policy_net = Conv2D(filters=4, kernel_size=(1, 1), activation="relu", kernel_regularizer=l2(self.l2_const))(network)
          policy_net = BatchNormalization()(policy_net)
          policy_net = Flatten()(policy_net)
          self.policy_net = Dense(self.board_width*self.board_height, activation="softmax", kernel_regularizer=l2(self.l2_const))(policy_net)
          # state value layers
          if self.gpu:
            value_net = Conv2D(filters=2, kernel_size=(1, 1), data_format="channels_first", activation="relu", kernel_regularizer=l2(self.l2_const))(network)
          else:
            value_net = Conv2D(filters=2, kernel_size=(1, 1), activation="relu", kernel_regularizer=l2(self.l2_const))(network)
          value_net = BatchNormalization()(value_net)
          value_net = Flatten()(value_net)
          value_net = Dense(64, kernel_regularizer=l2(self.l2_const))(value_net)
          self.value_net = Dense(1, activation="tanh", kernel_regularizer=l2(self.l2_const))(value_net)

          self.model = Model(in_x, [self.policy_net, self.value_net])
        else:
          self.model = load_model(path)

    def _loss_train_op(self):
        """
        Three loss terms：
        loss = (z - v)^2 + pi^T * log(p) + c||theta||^2
        """

        # get the train op
        opt = Adam()
        losses = ['categorical_crossentropy', 'mean_squared_error']
        self.model.compile(optimizer=opt, loss=losses)

        def self_entropy(probs):
            return -np.mean(np.sum(probs * np.log(probs + 1e-10), axis=1))

        def train_step(state_input, old_probs, mcts_probs, winner, learning_rate):
            state_input_union = np.array(state_input)
            print(state_input_union.shape)
            mcts_probs_union = np.array(mcts_probs)
            winner_union = np.array(winner)
            loss = self.model.evaluate(state_input_union, [mcts_probs_union, winner_union], batch_size=len(state_input), verbose=0)


            K.set_value(self.model.optimizer.lr, learning_rate)
            self.model.fit(state_input_union, [mcts_probs_union, winner_union], batch_size=len(state_input), verbose=0)
            action_probs, _ = self.model.predict_on_batch(state_input_union)
            entropy = self_entropy(action_probs)
            kl = np.mean(np.sum(old_probs * (np.log(old_probs + 1e-10) - np.log(action_probs + 1e-10)),axis=1))
            return loss[0], loss[1], entropy, kl

        self.train_step = train_step

In [None]:
C_GREED = 5
H = 8
W = 8
LINE = 5
SIM = 400
TEMP = 1.0
LEARNING_RATE = 2e-3
CADENCE = 1
EPOCH = 5
GPU = False
KL_TAR = 0.02
DIRICH = 0.3
NOISE_RATE = 0.25

def softmax(x):
    probs = np.exp(x - np.max(x))
    probs /= np.sum(probs)
    return probs

In [None]:
from numpy.random.mtrand import dirichlet
def headDraw(matrix):
  plt.figure(figsize=(10, 7))
  sns.heatmap(matrix, annot=True, cmap="YlGnBu", cbar_kws={'label': 'Probability'})
  plt.title('Probability Heatmap')
  plt.xlabel('j')
  plt.ylabel('i')
  plt.show()
dirs = [[1,0],[0,1],[1,1],[-1,0],[0,-1],[-1,-1],[1,-1],[-1,1]]
def maxLine(nits, h, w):
  max = 0
  for i in range(h):
    for j in range(w):
      if (i,j) not in nits:
        continue
      for d in dirs:
        count = 0
        ii = i
        jj = j
        while ii < h and jj < w and ii>=0 and jj>=0 and (ii,jj) in nits:
          count += 1
          ii += d[0]
          jj += d[1]
        if count > max:
          max = count
  return max
class Board():
  def __init__(self, h, w, line, nits_me, nits_op, last):
    self.h = h
    self.w = w
    self.nits_me = nits_me
    self.nits_op = nits_op
    self.last = last
    self.line = line
  def getGrid(self, gpu = False):
    grid = None
    if gpu:
      grid = np.zeros((3, self.h, self.w))
      for nit in self.nits_me:
        grid[0][nit[0]][nit[1]] = 1
      for nit in self.nits_op:
        grid[1][nit[0]][nit[1]] = 1
      if self.last is not None:
        grid[2][self.last[0]][self.last[1]] = 1
    else:
      grid = np.zeros((self.h, self.w, 3))
      for nit in self.nits_me:
        grid[nit[0]][nit[1]][0] = 1
      for nit in self.nits_op:
        grid[nit[0]][nit[1]][1] = 1
      if self.last is not None:
        grid[self.last[0]][self.last[1]][2] = 1
    return grid
  def take_move(self, move):
    new_nits_me = copy.deepcopy(self.nits_op)
    new_nits_op = copy.deepcopy(self.nits_me)
    new_nits_op.append(move)
    return Board(self.h, self.w, self.line, new_nits_me, new_nits_op, move)
  def getPossibleMoves(self):
    ret = []
    for i in range(self.h):
      for j in range(self.w):
        if (i,j) not in self.nits_me and (i,j) not in self.nits_op:
          ret.append((i,j))
    return ret
  def draw_grid(self, m):
    plt.figure(figsize=(self.w, self.h))

    plt.grid(True, which='both', color='black', linewidth=1)

    # Draw the dots

    nm = np.array(self.nits_me)
    no = np.array(self.nits_op)
    me_x_coords = np.empty(0)
    me_y_coords = np.empty(0)
    op_x_coords = np.empty(0)
    op_y_coords = np.empty(0)
    if len(self.nits_me) > 0:
      me_x_coords = nm[:, 0]
      me_y_coords = nm[:, 1]
    if len(self.nits_op) > 0:
      op_x_coords = no[:, 0]
      op_y_coords = no[:, 1]
    (c_me, c_op) = ('blue', 'red') if m == 0 else ('red', 'blue')
    plt.scatter(me_x_coords, me_y_coords, color=c_me, marker='o', s=300)
    plt.scatter(op_x_coords, op_y_coords, color=c_op, marker='o', s=300)

    plt.axis('on')
    plt.gca().set_aspect('equal')

    plt.xlim(-0.5, self.w-0.5)
    plt.ylim(-0.5, self.h-0.5)

    plt.show()
    plt.close()
  def getConclusion(self):
    max_me = maxLine(self.nits_me, self.h, self.w)
    max_op = maxLine(self.nits_op, self.h, self.w)
    if max_me == self.line:
      return 1.0
    if max_op == self.line:
      return -1.0
    if max_me > self.line:
      return -1.0
    if max_op > self.line:
      return 1.0
    if len(self.nits_op) + len(self.nits_me) == self.w * self.h:
      return 0
    return None
def test_board():
  grid = np.array([[
      [0,1,0,1,1,0],
      [0,0,0,0,0,0],
      [0,0,1,0,0,0],
      [0,0,0,0,0,0],
      [0,0,0,0,0,0],
      [0,0,0,0,0,0]
  ],[
      [0,0,0,0,0,0],
      [0,0,0,0,0,0],
      [0,0,0,0,0,0],
      [0,0,0,0,0,1],
      [0,0,0,0,0,1],
      [0,0,0,1,1,1]
  ]])
  t_board = Board(6,6,4,[(0,1),(0,3),(0,4),(2,2)], [(3,5),(4,5),(5,5),(5,4),(5,3)], (5,3))
  t_board = t_board.take_move((1,1))
  print(t_board.nits_me)
  print(t_board.nits_op)
  print(t_board.getGrid(gpu=True))
  t_board = t_board.take_move((2,5))
  print(t_board.nits_me)
  print(t_board.nits_op)
  print(t_board.getGrid(gpu=True))
  print(t_board.getConclusion())
test_board()

class TreeNode():
  def __init__(self, parent, board, model, P, v, dist, C, S):
    self.parent:TreeNode = parent
    self.board:Board = board
    self.model:Model = model

    # Raw model estimation
    self.v = v
    # The probablity of my being selected by my parent
    self.P = P
    # How many times I have been visited
    self.N = 0
    # The estimated win chance of me
    self.Q = 0
    # The action distribution from here
    self.dist = dist
    self.mcts = None

    self.C = C
    self.S = S

    self.children = None
    self.notified = False

    self.conclusion = self.board.getConclusion()
    if self.conclusion is not None:
      self.v = self.conclusion

  def depth(self):
    if self.isLeaf():
      return 1
    max_depth = max([c.depth() for c in self.children])
    return max_depth + 1

  def isLeaf(self):
    return self.children is None

  # Greedy value in the eyes of the parent node (opponent)
  def getGreedyWalkValue(self):
    if self.parent is None:
      raise Exception("Don't try to select a root!")
    if self.conclusion is not None:
      return -1 * self.conclusion * (1 + self.C)
    return  -1 * self.Q + self.C * self.P * np.sqrt(self.parent.N) / (self.N + 1)

  # Think greedily to the leaf of both sides
  def greedyWalk(self):
    if self.isLeaf():
      return self
    greed_values = np.array([c.getGreedyWalkValue() for c in self.children])
    max_child_index = np.argmax(greed_values)
    return self.children[max_child_index].greedyWalk()

  def expand(self):
    if self.children is not None:
      raise Exception("Don't try to revirgin the node!")
    moves = self.board.getPossibleMoves()
    next_Ps = []
    for move in moves:
      mask = move[0] * self.board.w + move[1]
      next_Ps.append(self.dist[mask])
    next_boards = [self.board.take_move(move) for move in moves]
    conclusions = [board.getConclusion() for board in next_boards]
    network_input = np.array([board.getGrid(GPU) for board in next_boards])
    dists, vs = self.model.predict_on_batch(network_input)
    self.children = []
    for i in range(len(next_boards)):
      v_use = vs[i][0] if conclusions[i] is None else conclusions[i]
      dist_use = dists[i] if conclusions[i] is None else np.zeros(dists[i].shape)
      child = TreeNode(self, next_boards[i], self.model, next_Ps[i], v_use, dist_use, self.C, self.S)
      self.children.append(child)

  def update(self):
    if self.notified and self.conclusion is None:
      raise Exception("Don't try to cheat the value!")
    self.notified = True
    now:TreeNode = self
    attitude = 1.0
    while now is not None:
      w = now.N * now.Q + self.v * attitude
      now.N += 1
      now.Q = w / now.N
      now = now.parent
      attitude *= -1

  def force(self, i, j, gpu):
    force_board = self.board.take_move((i,j))
    force_child = createFreshNodeFromBoard(force_board, self.model, gpu, self.C, self.S)
    return force_child

  def go(self, show_decision_map = False, show_depth = False):
    for s in range(self.S):
      # Simulation
      leaf = self.greedyWalk()
      # Expand
      if leaf.conclusion is None:
        leaf.expand()
        # Update
      leaf.update()

    if show_depth:
      print(f'Depth of search: {self.depth()}')

    # Select
    mcts = None
    weights = None
    eager = False

    indices = [c.board.last[0] * W + c.board.last[1] for c in self.children if c.conclusion == -1]
    slims = [i for i, c in enumerate(self.children) if c.conclusion == -1]
    # Eager win
    if len(indices) > 0:
        unique = 1.0 / len(indices)
        mcts = np.zeros(H*W)
        mcts[indices] = unique
        weights = np.zeros(len(self.children))
        weights[slims] = unique
        eager = True
    # No eager win
    else:
      Ns = np.array([child.N for child in self.children])
      weights = softmax(1.0 / TEMP * np.log(Ns + 1e-10))
      mcts = np.zeros(H*W)
      for j in range(len(weights)):
          last = self.children[j].board.last
          mask = last[0]*W+last[1]
          mcts[mask] = weights[j]

    self.mcts = mcts
    p = weights
    if not eager:
      p = (1 - NOISE_RATE) * weights + NOISE_RATE * np.random.dirichlet(DIRICH * np.ones(len(weights)))

    if show_decision_map:
      heat_mx = np.zeros((self.board.h, self.board.w))
      for i,c in enumerate(self.children):
        pi = p[i]
        movei = self.children[i].board.last
        heat_mx[movei[1]][movei[0]] = pi
      headDraw(heat_mx)
      mcts_mx = np.reshape(mcts, (self.board.h, self.board.w))
      headDraw(mcts_mx)

    next = np.random.choice(self.children, p=p)
    return next

def testNode(h,w,line):
  def mock_predict(states):
    n = states.shape[0]
    return np.ones((n, h*w)) / (h*w),  np.ones((n,1)) * 0.5

  def examine_visit(node:TreeNode):
    if node.children is not None:
      for c in node.children:
        examine_visit(c)
      sum_N = sum(c.N for c in node.children)
      if sum_N + 1 != node.N:
        print(f"fuck! sum of children is {sum_N}, but me is {node.N}")

  def examine_value(node:TreeNode):
    if node.children is not None:
      for c in node.children:
        examine_value(c)
      sum_W = -1 * sum(c.N*c.Q for c in node.children) + node.v
      my_W = node.Q * node.N
      if not math.isclose(sum_W, my_W, rel_tol=1e-9):
        print(f'Fuck value: {sum_W} vs {my_W}')

  def size_tree(root:TreeNode):
    if root.isLeaf():
      return 1,1
    max_depth = 0
    max_total = 0
    for c in root.children:
      d,t = size_tree(c)
      max_depth = max(max_depth, d)
      max_total += t
    return max_depth + 1, max_total + 1

  mock_model = MagicMock()
  mock_model.predict_on_batch.side_effect = mock_predict

  root = TreeNode(None, Board(h,w,line,[],[],None), mock_model, 0.5, 0.5, np.ones(h*w)/(h*w), 5, 2000)
  root.go()

  examine_visit(root)
  examine_value(root)

  depth, total = size_tree(root)
  print(f'depth={depth}, total={total}')
testNode(3,3,3)

class DataCollector():
  def __init__(self, gpu = False):
    self.boards = []
    self.labels = []
    self.dists = []
    self.gpu = gpu
  def collect(self, node: TreeNode):
    x1 = 0
    x2 = 1
    if self.gpu:
      x1 = 1
      x2 = 2
    grid = node.board.getGrid(self.gpu)
    rotated_90 = np.rot90(grid, k=1, axes=(x1,x2))
    rotated_180 = np.rot90(grid, k=2, axes=(x1,x2))
    rotated_270 = np.rot90(grid, k=3, axes=(x1,x2))
    flipped_1 = np.flip(grid, axis=x1)
    flipped_2 = np.flip(grid, axis=x2)
    self.boards.append(grid)
    self.boards.append(rotated_90)
    self.boards.append(rotated_180)
    self.boards.append(rotated_270)
    self.boards.append(flipped_1)
    self.boards.append(flipped_2)
    for i in range(6):
      self.labels.append(1)
      self.dists.append(node.mcts)
  def flush(self):
    self.boards.clear()
    self.labels.clear()
    self.dists.clear()
  def size(self):
    return len(self.boards)
  def assign(self, count, result):
    attitude = 1.0
    idx = len(self.labels) - 1
    for i in range(count):
      for j in range(6):
        self.labels[idx] = attitude * result
        idx -= 1
      attitude *= -1

def test_collector():
  collector = DataCollector(gpu = True)
  t_board = Board(6,6,4,[(0,1),(0,3),(0,4),(2,2)], [(3,5),(4,5),(5,5),(5,4),(5,3)], (5,3))
  test_node = TreeNode(None, t_board, None, 0.5, 0.5, np.zeros(10), 0.5, 2)
  collector.collect(test_node)
  collector.collect(test_node)
  collector.collect(test_node)
  collector.assign(3, -1)
  print(collector.boards)
  print(collector.labels)
  print(collector.dists)
test_collector()

class Game():
  def __init__(self, root, collector):
    self.root: TreeNode = root
    self.collector: DataCollector = collector

  def play(self, show=False, show_depth = False):
    count = 0
    turn = 0
    while self.root.conclusion is None:
      next = self.root.go(show, show_depth)
      if show:
        next.board.draw_grid(turn)
      self.collector.collect(self.root)
      count += 1
      next.parent.children.clear()
      next.parent.children = None
      next.parent = None
      self.root = next
      turn = 1 - turn
    self.collector.assign(count, self.root.conclusion * -1)
    return self.root.conclusion

def createFreshNodeFromBoard(board:Board, model:Model, gpu, c, s):
    grid = board.getGrid(gpu)
    grid_ex = np.expand_dims(grid, 0)
    init_dist, init_value = model.predict_on_batch(grid_ex)
    root = TreeNode(None, board, model, 0, init_value[0][0], init_dist[0], c, s)
    return root

def row(start, end, h, w, line, c, s, gpu=False):
  net = PolicyValueNet(W, H, gpu, None)
  model = net.model
  print(model.summary())
  collector = DataCollector(gpu)
  lr = LEARNING_RATE

  for i in range(start, end):
    if i % CADENCE == 0 and collector.size() > 0:
      old_probs, old_ = model.predict_on_batch(np.array(collector.boards))
      for e in range(EPOCH):
        print(f'lr={lr}')
        loss_policy, loss_value, entrophy, kl = net.train_step(collector.boards, old_probs, collector.dists, collector.labels, lr)
        print(f'i={i}, loss_policy={loss_policy}, loss_value = {loss_value}, entrophy={entrophy}, kl={kl}')
        if kl > KL_TAR * 4:
          break
      collector.flush()

      if kl > KL_TAR * 2 and lr > LEARNING_RATE / 10:
        lr /= 1.5
      elif kl < KL_TAR / 2 and lr < LEARNING_RATE * 10:
        lr *= 1.5

    if i % 10 == 0:
      model.save(f'/content/gdrive/My Drive/885_cpu/qiguai_{i}.h5')

    # Create root node
    empty_board = Board(h, w, line, [], [], None)
    root = createFreshNodeFromBoard(empty_board, model, gpu, c, s)

    # Play the game
    g = Game(root, collector)
    pub = False
    '''
    if i % 5 == 0:
      pub = True
    '''
    show_depth = False
    if i % 20 == 0:
      show_depth = True
    g.play(pub, show_depth)

    # Memory
    if i % 10 == 0:
      objgraph.show_most_common_types()
      gc.collect()

[(3, 5), (4, 5), (5, 5), (5, 4), (5, 3)]
[(0, 1), (0, 3), (0, 4), (2, 2), (1, 1)]
[[[0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 1.]
  [0. 0. 0. 0. 0. 1.]
  [0. 0. 0. 1. 1. 1.]]

 [[0. 1. 0. 1. 1. 0.]
  [0. 1. 0. 0. 0. 0.]
  [0. 0. 1. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0.]]

 [[0. 0. 0. 0. 0. 0.]
  [0. 1. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0.]]]
[(0, 1), (0, 3), (0, 4), (2, 2), (1, 1)]
[(3, 5), (4, 5), (5, 5), (5, 4), (5, 3), (2, 5)]
[[[0. 1. 0. 1. 1. 0.]
  [0. 1. 0. 0. 0. 0.]
  [0. 0. 1. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0.]]

 [[0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 1.]
  [0. 0. 0. 0. 0. 1.]
  [0. 0. 0. 0. 0. 1.]
  [0. 0. 0. 1. 1. 1.]]

 [[0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 1.]
  [0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0.]]]
-1.0
depth=10, total=596

In [None]:
row(2741,5001,H,W,LINE,C_GREED,SIM,GPU)

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 8, 8, 3)]            0         []                            
                                                                                                  
 conv2d (Conv2D)             (None, 8, 8, 32)             896       ['input_1[0][0]']             
                                                                                                  
 batch_normalization (Batch  (None, 8, 8, 32)             128       ['conv2d[0][0]']              
 Normalization)                                                                                   
                                                                                                  
 conv2d_1 (Conv2D)           (None, 8, 8, 64)             18496     ['batch_normalization[0][0

  saving_api.save_model(


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
lr=0.001333333333333333
(186, 8, 8, 3)
i=2928, loss_policy=3.8639116287231445, loss_value = 3.1764187812805176, entrophy=2.9383466243743896, kl=0.00280610378831625
lr=0.001333333333333333
(186, 8, 8, 3)
i=2928, loss_policy=3.81945538520813, loss_value = 3.150336503982544, entrophy=2.9286158084869385, kl=0.007490680553019047
lr=0.001333333333333333
(186, 8, 8, 3)
i=2928, loss_policy=3.761042833328247, loss_value = 3.1168649196624756, entrophy=2.917407274246216, kl=0.015108526684343815
lr=0.001333333333333333
(186, 8, 8, 3)
i=2928, loss_policy=3.6923699378967285, loss_value = 3.078364133834839, entrophy=2.904648542404175, kl=0.026008903980255127
lr=0.001333333333333333
(78, 8, 8, 3)
i=2929, loss_policy=3.807796001434326, loss_value = 2.8129382133483887, entrophy=2.531651735305786, kl=0.0016196358483284712
lr=0.001333333333333333
(78, 8, 8, 3)
i=2929, loss_policy=3.775941848754883, loss_value = 2.7939071655273438, entrophy=2

In [None]:
def pk(id1, id2, models, first, gpu, h, w, line, c, s):
  init_board = Board(h, w, line, [], [], None)
  current_model = 0 if first else 1
  current_node: TreeNode = createFreshNodeFromBoard(init_board, models[current_model], gpu, c, s)
  while current_node.conclusion is None:
    current_node = current_node.go()
    current_board = current_node.board
    current_model = 1 - current_model
    current_node = createFreshNodeFromBoard(current_board, models[current_model], gpu, c, s)
  winner = None
  if current_node.conclusion == 1:
    winner = current_model
  elif current_node.conclusion == -1:
    winner = 1- current_model
  if winner == 0:
    return [id1, id2, first, 1, -1]
  if winner == 1:
    return [id1, id2, first, -1, 1]
  return [id1, id2, first, 0, 0]

In [None]:
def pk_pool(max_threads, max_tasks_at_once, round, gpu, h, w, line, c, s):

  with concurrent.futures.ThreadPoolExecutor(max_workers=max_threads) as executor:
        tasks = set()  # To keep track of tasks

        for i in range(1000, 99, -100):  # Assume we have 20 tasks to submit
            modelI = load_model(f'/content/gdrive/My Drive/664_three_0.3_res/qiguai_{i}.h5')
            for j in range(10, 11, 1):
              modelJ = load_model(f'/content/gdrive/My Drive/664_three_0.3_res/qiguai_{j}.h5')
              models = [modelI, modelJ]
              for k in range(round):
                if len(tasks) >= max_tasks_at_once:
                    done, not_done = concurrent.futures.wait(tasks, return_when=concurrent.futures.FIRST_COMPLETED)
                    for f in done:
                      result= f.result()
                      print(result)
                    tasks = not_done

                future1 = executor.submit(pk, i, j, models, True, gpu, h, w, line, c, s)
                tasks.add(future1)
                future2 = executor.submit(pk, i, j, models, False, gpu, h, w, line, c, s)
                tasks.add(future2)

        # Wait for the remaining tasks to complete
        remain = concurrent.futures.wait(tasks)
        for f in remain:
          reuslt = r.result()
          print(result)

#pk_pool(10, 7, 100, GPU, H, W, LINE, 5, 800)

In [None]:
NOISE_RATE = 0.0
def show(path, gpu, c, s, human = False):
  model = load_model(path)
  init_board = Board(H, W, LINE, [], [], None)
  current_node: TreeNode = createFreshNodeFromBoard(init_board, model, gpu, c, s)
  should_next = 0
  while current_node.conclusion is None:
    current_node.board.draw_grid(should_next)
    if not human or should_next == 0:
      current_node = current_node.go(True, True)
    else:
      input_str = input()
      a, b = map(int, input_str.split())
      current_node = current_node.force(a, b, gpu)
    should_next = 1 - should_next

#show('/content/gdrive/My Drive/664_three_0.3_res/qiguai_1000.h5', False, 0, 1600, True)

