<a href="https://colab.research.google.com/github/cafeblue999/sgf_4/blob/master/sgf_pro4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:


# -*- coding: utf-8 -*-

from sys import stderr, stdout, exit
from datetime import datetime, date, timedelta
import numpy as np
import logging
import google.colab
import googleapiclient.discovery
import googleapiclient.http
import pickle
import tensorflow as tf
import glob
import time
from collections import Counter
import random
import os

from google.colab import files
from google.colab import drive


drive.mount('/content/drive', force_remount=True)

np.set_printoptions(threshold=np.inf)

basename = datetime.now().strftime("%Y%m%d-%H%M%S")
log_file = 'log_' + basename + '.txt'

logging.basicConfig(format='%(asctime)s\t%(levelname)s\t%(message)s', datefmt='%Y/%m/%d %H:%M:%S', filename=log_file, level=logging.INFO)

def stdout_log(str):
    now_dt = datetime.now()
    dt_jst = now_dt + timedelta(minutes=540)
    str_datetime = datetime.strftime(dt_jst, "%Y/%m/%d-%H:%M:%S")
    stdout.write(str_datetime + " " + str + "\n")
    #log_file = open("log.txt", "a")
    #log_file.write(str)
    #log_file.close()

    
BSIZE = 19  # board size
EBSIZE = BSIZE + 2  # extended board size
BVCNT = BSIZE ** 2  # vertex count
EBVCNT = EBSIZE ** 2  # extended vertex count
PASS = EBVCNT  # pass
VNULL = EBVCNT + 1  # invalid position
KOMI = 6.5
dir4 = [1, EBSIZE, -1, -EBSIZE]
diag4 = [1 + EBSIZE, EBSIZE - 1, -EBSIZE - 1, 1 - EBSIZE]
KEEP_PREV_CNT = 7
FEATURE_CNT = KEEP_PREV_CNT * 2 + 3  # 7
x_labels = "ABCDEFGHJKLMNOPQRST"


def ev2xy(ev):
    return ev % EBSIZE, ev // EBSIZE


def xy2ev(x, y):
    return y * EBSIZE + x


def rv2ev(rv):
    if rv == BVCNT:
        return PASS
    return rv % BSIZE + 1 + (rv // BSIZE + 1) * EBSIZE


def ev2rv(ev):
    if ev == PASS:
        return BVCNT
    return ev % EBSIZE - 1 + (ev // EBSIZE - 1) * BSIZE


def ev2str(ev):
    if ev >= PASS:
        
        return "PASS"
    x, y = ev2xy(ev)
    return x_labels[x - 1] + str(y)


def str2ev(v_str):
    v_str = v_str.upper()
    if v_str == "PASS" or v_str == "RESIGN":
        return PASS
    else:
        x = x_labels.find(v_str[0]) + 1
        y = int(v_str[1:])
        return xy2ev(x, y)

def turn2str(turn):
    if turn == 0:
        return 'W'
    else:
        return 'B'


rv_list = [rv2ev(i) for i in range(BVCNT)]


class StoneGroup(object):

    def __init__(self):
        self.lib_cnt = VNULL  # liberty count
        self.size = VNULL  # stone size
        self.v_atr = VNULL  # liberty position if in Atari
        self.libs = set()  # set of liberty positions

    def clear(self, stone=True):
        # clear as placed stone or empty
        self.lib_cnt = 0 if stone else VNULL
        self.size = 1 if stone else VNULL
        self.v_atr = VNULL
        self.libs.clear()

    def add(self, v):
        # add liberty at v
        if v not in self.libs:
            self.libs.add(v)
            self.lib_cnt += 1
            self.v_atr = v

    def sub(self, v):
        # remove liberty at v
        if v in self.libs:
            self.libs.remove(v)
            self.lib_cnt -= 1

    def merge(self, other):
        # merge with aother stone group
        self.libs |= other.libs
        self.lib_cnt = len(self.libs)
        self.size += other.size
        if self.lib_cnt == 1:
            for lib in self.libs:
                self.v_atr = lib


class Board(object):

    def __init__(self):
        # 1-d array ([EBVCNT]) of stones or empty or exterior
        # 0: white 1: black
        # 2: empty 3: exterior
        self.color = np.full(EBVCNT, 3)
        self.sg = [StoneGroup() for _ in range(EBVCNT)]  # stone groups
        self.clear()

    def clear(self):
        self.color[rv_list] = 2  # empty
        self.id = np.arange(EBVCNT)  # id of stone group
        self.next = np.arange(EBVCNT)  # next position in the same group
        for i in range(EBVCNT):
            self.sg[i].clear(stone=False)
        self.prev_color = [np.copy(self.color) for _ in range(KEEP_PREV_CNT)]

        self.ko = VNULL  # illegal position due to Ko
        self.turn = 1  # black
        self.move_cnt = 0  # move count
        self.prev_move = VNULL  # previous move
        self.remove_cnt = 0  # removed stones count
        self.history = []

    def copy(self, b_cpy):
        b_cpy.color = np.copy(self.color)
        b_cpy.id = np.copy(self.id)
        b_cpy.next = np.copy(self.next)
        for i in range(EBVCNT):
            b_cpy.sg[i].lib_cnt = self.sg[i].lib_cnt
            b_cpy.sg[i].size = self.sg[i].size
            b_cpy.sg[i].v_atr = self.sg[i].v_atr
            b_cpy.sg[i].libs |= self.sg[i].libs
        for i in range(KEEP_PREV_CNT):
            b_cpy.prev_color[i] = np.copy(self.prev_color[i])

        b_cpy.ko = self.ko
        b_cpy.turn = self.turn
        b_cpy.move_cnt = self.move_cnt
        b_cpy.prev_move = self.prev_move
        b_cpy.remove_cnt = self.remove_cnt

        for h in self.history:
            b_cpy.history.append(h)

    def remove(self, v):
        # remove stone group including stone at v
        v_tmp = v
        while 1:
            self.remove_cnt += 1
            self.color[v_tmp] = 2  # empty
            self.id[v_tmp] = v_tmp  # reset id
            for d in dir4:
                nv = v_tmp + d
                # add liberty to neighbor groups
                self.sg[self.id[nv]].add(v_tmp)
            v_next = self.next[v_tmp]
            self.next[v_tmp] = v_tmp
            v_tmp = v_next
            if v_tmp == v:
                break  # finish when all stones are removed

    def merge(self, v1, v2):
        # merge stone groups at v1 and v2
        id_base = self.id[v1]
        id_add = self.id[v2]
        if self.sg[id_base].size < self.sg[id_add].size:
            id_base, id_add = id_add, id_base  # swap
        self.sg[id_base].merge(self.sg[id_add])

        v_tmp = id_add
        while 1:
            self.id[v_tmp] = id_base  # change id to id_base
            v_tmp = self.next[v_tmp]
            if v_tmp == id_add:
                break
        # swap next id for circulation
        self.next[v1], self.next[v2] = self.next[v2], self.next[v1]

    def place_stone(self, v):
        self.color[v] = self.turn
        self.id[v] = v
        self.sg[self.id[v]].clear(stone=True)
        for d in dir4:
            nv = v + d
            if self.color[nv] == 2:
                self.sg[self.id[v]].add(nv)  # add liberty
            else:
                self.sg[self.id[nv]].sub(v)  # remove liberty
        # merge stone groups
        for d in dir4:
            nv = v + d
            if self.color[nv] == self.turn and self.id[nv] != self.id[v]:
                self.merge(v, nv)
        # remove opponent's stones
        self.remove_cnt = 0
        for d in dir4:
            nv = v + d
            if self.color[nv] == int(self.turn == 0) and \
                    self.sg[self.id[nv]].lib_cnt == 0:
                self.remove(nv)

    def legal(self, v):
        if v == PASS:
            return True
        elif v == self.ko or self.color[v] != 2:
            return False

        stone_cnt = [0, 0]
        atr_cnt = [0, 0]
        for d in dir4:
            nv = v + d
            c = self.color[nv]
            if c == 2:
                return True
            elif c <= 1:
                stone_cnt[c] += 1
                if self.sg[self.id[nv]].lib_cnt == 1:
                    atr_cnt[c] += 1

        return (atr_cnt[int(self.turn == 0)] != 0 or
                atr_cnt[self.turn] < stone_cnt[self.turn])

    def eyeshape(self, v, pl):
        if v == PASS:
            return False
        for d in dir4:
            c = self.color[v + d]
            if c == 2 or c == int(pl == 0):
                return False

        diag_cnt = [0, 0, 0, 0]
        for d in diag4:
            nv = v + d
            diag_cnt[self.color[nv]] += 1

        wedge_cnt = diag_cnt[int(pl == 0)] + int(diag_cnt[3] > 0)
        if wedge_cnt == 2:
            for d in diag4:
                nv = v + d
                if self.color[nv] == int(pl == 0) and \
                        self.sg[self.id[nv]].lib_cnt == 1 and \
                        self.sg[self.id[nv]].v_atr != self.ko:
                    return True

        return wedge_cnt < 2

    def play(self, v, not_fill_eye=True):

        #logging.info("board play : {}".format(ev2str(v)))

        if not self.legal(v):
            logging.info("board play : not legal = {}".format(ev2str(v)))
            return 1
        elif not_fill_eye and self.eyeshape(v, self.turn):
            return 2
        else:
            for i in range(KEEP_PREV_CNT - 1)[::-1]:
                self.prev_color[i + 1] = np.copy(self.prev_color[i])
            self.prev_color[0] = np.copy(self.color)

            if v == PASS:
                self.ko = VNULL
            else:
                self.place_stone(v)
                id = self.id[v]
                self.ko = VNULL
                if self.remove_cnt == 1 and \
                        self.sg[id].lib_cnt == 1 and \
                        self.sg[id].size == 1:
                    self.ko = self.sg[id].v_atr

        self.prev_move = v
        self.history.append(v)
        self.turn = int(self.turn == 0)
        self.move_cnt += 1

        return 0

    def random_play(self):
        empty_list = np.where(self.color == 2)[0]
        np.random.shuffle(empty_list)

        for v in empty_list:
            if self.play(v, True) == 0:
                return v

        self.play(PASS)
        return PASS

    def score(self):
        stone_cnt = [0, 0]
        for rv in range(BVCNT):
            v = rv2ev(rv)
            c = self.color[v]
            if c <= 1:
                stone_cnt[c] += 1
            else:
                nbr_cnt = [0, 0, 0, 0]
                for d in dir4:
                    nbr_cnt[self.color[v + d]] += 1
                if nbr_cnt[0] > 0 and nbr_cnt[1] == 0:
                    stone_cnt[0] += 1
                elif nbr_cnt[1] > 0 and nbr_cnt[0] == 0:
                    stone_cnt[1] += 1
        return stone_cnt[1] - stone_cnt[0] - KOMI

    def rollout(self, show_board=False):
        while self.move_cnt < EBVCNT * 2:
            prev_move = self.prev_move
            move = self.random_play()
            if show_board and move != PASS:
                stderr.write("\nmove count=%d\n" % b.move_cnt)
                b.showboard()
            if prev_move == PASS and move == PASS:
                break

    def showboard(self):

        def pirnt_xlabel():
            line_str = "  "
            for x in range(BSIZE):
                line_str += " " + x_labels[x] + " "
            stderr.write(line_str + "\n")

        pirnt_xlabel()

        for y in range(1, BSIZE + 1)[::-1]:  # 9, 8, ..., 1
            line_str = str(y) if y >= 10 else " " + str(y)
            for x in range(1, BSIZE + 1):
                v = xy2ev(x, y)
                x_str = " . "
                color = self.color[v]
                if color <= 1:
                    stone_str = "O" if color == 0 else "X"
                    if v == self.prev_move:
                        x_str = "[" + stone_str + "]"
                    else:
                        x_str = " " + stone_str + " "
                line_str += x_str
            line_str += str(y) if y >= 10 else " " + str(y)
            stderr.write(line_str + "\n")

        pirnt_xlabel()
        stderr.write("\n")

    def showboard_file(self):

        def pirnt_xlabel():
            line_str = "  "
            for x in range(BSIZE):
                line_str += " " + x_labels[x] + " "
            #logging.info(line_str + "\n")
            logging.info(line_str)

        pirnt_xlabel()

        for y in range(1, BSIZE + 1)[::-1]:  # 9, 8, ..., 1
            line_str = str(y) if y >= 10 else " " + str(y)
            for x in range(1, BSIZE + 1):
                v = xy2ev(x, y)
                x_str = " . "
                color = self.color[v]
                if color <= 1:
                    stone_str = "O" if color == 0 else "x"
                    if v == self.prev_move:
                        x_str = "[" + stone_str + "]"
                    else:
                        x_str = " " + stone_str + " "
                line_str += x_str
            line_str += str(y) if y >= 10 else " " + str(y)
            #logging.info(line_str + "\n")
            logging.info(line_str)

        pirnt_xlabel()
        #logging.info("\n")
        logging.info("\n")

    def feature(self):
        feature_ = np.zeros((EBVCNT, FEATURE_CNT), dtype=np.float)
        my = self.turn
        opp = int(self.turn == 0)

        feature_[:, 0] = (self.color == my)
        feature_[:, 1] = (self.color == opp)
        for i in range(KEEP_PREV_CNT):
            feature_[:, (i + 1) * 2] = (self.prev_color[i] == my)
            feature_[:, (i + 1) * 2 + 1] = (self.prev_color[i] == opp)
        feature_[:, FEATURE_CNT - 1] = my

        return feature_[rv_list, :]

    def hash(self):
        return (hash(self.color.tostring()) ^
                hash(self.prev_color[0].tostring()) ^ self.turn)

    def info(self):
        empty_list = np.where(self.color == 2)[0]
        cand_list = []
        for v in empty_list:
            if self.legal(v) and not self.eyeshape(v, self.turn):
                cand_list.append(ev2rv(v))
        cand_list.append(ev2rv(PASS))
        return (self.hash(), self.move_cnt, cand_list)

      # -*- coding: utf-8 -*-



FILTER_CNT = 256

w_wdt = 0.007
b_wdt = 0.015

class DualNetwork(object):

    def get_variable(self, shape_, width_=0.007, name_="weight"):
        var = tf.get_variable(name_, shape=shape_,
                              initializer=tf.random_normal_initializer(
                                  mean=0, stddev=width_))

        if not tf.get_variable_scope()._reuse:
            tf.add_to_collection("vars_train", var)

        return var

    def conv2d(self, x, w):
        return tf.nn.conv2d(x, w, strides=[1, 1, 1, 1],
                            padding='SAME', name="conv2d")

    def res_block(self, x, input_size, middle_size, output_size,
                  dr_block=1.0, scope_name="res"):

        with tf.variable_scope(scope_name + "_0"):
            w0 = self.get_variable([3, 3, input_size, middle_size],
                                   w_wdt, name_="weight")
            b0 = self.get_variable([middle_size], b_wdt, name_="bias")
            conv0 = tf.nn.relu(self.conv2d(x, w0) + b0)
        with tf.variable_scope(scope_name + "_1"):
            w1 = self.get_variable([3, 3, middle_size, output_size],
                                   w_wdt, name_="weight")
            b1 = self.get_variable([output_size], b_wdt, name_="bias")
            conv1 = tf.nn.dropout(self.conv2d(conv0, w1) + b1, dr_block)

        if input_size == output_size:
            x_add = x
        elif input_size < output_size:
            x_add = tf.pad(x, [[0, 0], [0, 0], [0, 0],
                               [0, output_size - input_size]])
        else:
            x_add = tf.slice(x, [0, 0, 0, 0],
                             [-1, BSIZE, BSIZE, output_size])

        return tf.nn.relu(tf.add(conv1, x_add))

    def model(self, x, temp=1.0, dr=1.0):
        stdout_log("FILTER_CNT = {}".format(FILTER_CNT))
        stdout_log("BLOCK_CNT = {}".format(BLOCK_CNT))
        hi = []
        prev_h = tf.reshape(x, [-1, BSIZE, BSIZE, FEATURE_CNT])

        # residual blocks with N layers
        for i in range(BLOCK_CNT):
            input_size = FEATURE_CNT if i == 0 else FILTER_CNT
            dr_block = 1 - (1 - dr) / BLOCK_CNT * i

            hi.append(self.res_block(prev_h, input_size, FILTER_CNT, FILTER_CNT,
                                     dr_block=dr_block, scope_name="res%d" % i))
            prev_h = hi[i]

        # policy connection
        with tf.variable_scope('pfc'):
            # 1st layer
            # [-1, BSIZE, BSIZE, FILTER_CNT] => [-1, BSIZE**2 * 2]
            w_pfc0 = self.get_variable([1, 1, FILTER_CNT, 2],
                                       w_wdt, name_="weight0")
            b_pfc0 = self.get_variable([BSIZE, BSIZE, 2], b_wdt, name_="bias0")
            conv_pfc0 = tf.reshape(self.conv2d(hi[BLOCK_CNT - 1], w_pfc0)
                                   + b_pfc0, [-1, BVCNT * 2])

            # 2nd layer
            # [-1, BSIZE**2 * 2] => [-1, BSIZE**2 + 1]
            w_pfc1 = self.get_variable([BVCNT * 2, BVCNT + 1],
                                       w_wdt, name_="weight1")
            b_pfc1 = self.get_variable([BVCNT + 1], b_wdt, name_="bias1")
            conv_pfc1 = tf.matmul(conv_pfc0, w_pfc1) + b_pfc1

            # divided by softmax temp and apply softmax
            policy = tf.nn.softmax(tf.div(conv_pfc1, temp), name="policy")

        # value connection
        with tf.variable_scope('vfc'):
            # 1st layer
            # [-1, BSIZE, BSIZE, FILTER_CNT] => [-1, BSIZE**2]
            w_vfc0 = self.get_variable([1, 1, FILTER_CNT, 1],
                                       w_wdt, name_="weight0")
            b_vfc0 = self.get_variable([BSIZE, BSIZE, 1], b_wdt, name_="bias0")
            conv_vfc0 = tf.reshape(self.conv2d(hi[BLOCK_CNT - 1], w_vfc0)
                                   + b_vfc0, [-1, BVCNT])

            # 2nd layer
            # [-1, BSIZE**2] => [-1, 256]
            w_vfc1 = self.get_variable([BVCNT, 256], w_wdt, name_="weight1")
            b_vfc1 = self.get_variable([256], b_wdt, name_="bias1")
            conv_vfc1 = tf.matmul(conv_vfc0, w_vfc1) + b_vfc1
            relu_vfc1 = tf.nn.relu(conv_vfc1)

            # 3rd layer
            # [-1, 256] => [-1, 1]
            w_vfc2 = self.get_variable([256, 1], w_wdt, name_="weight2")
            b_vfc2 = self.get_variable([1], b_wdt, name_="bias2")
            conv_vfc2 = tf.matmul(relu_vfc1, w_vfc2) + b_vfc2

            # apply tanh
            value = tf.nn.tanh(tf.reshape(conv_vfc2, [-1]), name="value")

        return policy, value

    def create_sess(self, ckpt_path=""):
        with tf.get_default_graph().as_default():

            sess_ = tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True, log_device_placement=False))
            vars_train = tf.get_collection("vars_train")
            v_to_init = list(set(tf.global_variables()) - set(vars_train))

            saver = tf.train.Saver(vars_train)
            
            #stdout_log('-- create_sess ckpt_path restore : {} is {}'.format(ckpt_path, os.path.isfile(ckpt_path)))
            if ckpt_path != "":
            #if os.path.exists(ckpt_path):
                stdout_log('-- Model resotore : {}'.format(ckpt_path))
                saver.restore(sess_, ckpt_path)
                sess_.run(tf.variables_initializer(v_to_init))
            else:
                stdout_log('-- Model is Blank.\n')
                sess_.run(tf.global_variables_initializer())

        return sess_

    def save_vars(self, sess_, ckpt_path=""):
        with tf.get_default_graph().as_default():

            vars_train = tf.get_collection("vars_train")
            saver = tf.train.Saver(vars_train)
            save_path = saver.save(sess_, ckpt_path)

        return save_path


class sgf_data(object):

    def __init__(self):
        self.size = BSIZE
        self.komi = KOMI
        self.handicap = 0
        self.result = 0
        self.history = []
        self.move_cnt = 0

    def sgf2ev(self, v_sgf):
        if len(v_sgf) != 2:
            return (self.size + 2) ** 2
        labels = "abcdefghijklmnopqrs"
        x = labels.find(v_sgf[0]) + 1
        y = labels.find(v_sgf[1]) + 1
        return x + (self.size + 1 - y) * (self.size + 2)

    def import_file(self, file_path):
        f = open(file_path, encoding="utf-8")
        lines = []
        try:
            lines = f.readlines()
        except:
            print("reedlines exception = {}".format(file_path))
        for line in lines:
            str = line.rstrip("\n")
            while len(str) > 3:
                open_br = str.find("[")
                close_br = str.find("]")
                if open_br < 0 or close_br < 0:
                    break
                elif close_br == 0:
                    str = str[close_br + 1:]
                    continue

                key = str[0:open_br].lstrip(";")
                val = str[open_br + 1:close_br]

                if key == "SZ":
                    self.size = int(val)
                elif key == "KM":
                    try:
                        self.komi = float(val)
                    except:
                        print("KM error = {}".format(file_path))
                        self.komi = 6.5
                        #continue
                elif key == "HA":
                    try:
                        self.handicap = int(val)
                    except:
                        print("HA error = {}".format(file_path))
                        #continue
                elif key == "RE":
                    if val.find("B") >= 0:
                        self.result = 1
                    elif val.find("W") >= 0:
                        self.result = -1
                    else:
                        self.result = 0
                elif key == "B" or key == "W":
                    self.history.append(self.sgf2ev(val))
                    self.move_cnt += 1

                str = str[close_br + 1:]
        if self.result == 0 and len(self.history) >= 2:
            pass_ = (self.size + 2) ** 2
            if self.history[-1] != pass_ or self.history[-2] != pass_:
                self.result = 1 if len(self.history) % 2 == 1 else -1


def import_sgf(dir_path):
    print('dir_path:', dir_path)
    #dir_path += "/*.sgf"
    file_list = glob.glob(dir_path)
    sd_list = []
    print("file_list = {}".format(len(file_list)))
    # b = Board()
    fl = 0
    for f in file_list:
        fl += 1
        if fl % 100 == 0:
            print("import_sgf file_list count = {}".format(fl))
        sd_list.append(sgf_data())
        sd_list[-1].import_file(f)

#         b.clear()
#         for v in sd_list[-1].history:
#             err = b.play(v, not_fill_eye=False)
#             if err:
#                 stderr.write("file %d\n" % len(sd_list))
#                 b.showboard()
#                 stderr.write("move=(%d,%d)\n" % ev2xy(v))
#                 raw_input()

#         if len(sd_list) % 5000 == 0:
#             stderr.write(".")

    return sd_list


def sgf2feed(sgf_list):
    total_cnt = 0
    for s in sgf_list:
        if s.size != BSIZE or s.handicap != 0 or s.result == 0:
            continue
        total_cnt += s.move_cnt

    feature = np.zeros((total_cnt, BVCNT, FEATURE_CNT), dtype=np.uint8)
    move = np.zeros((total_cnt, BVCNT + 1), dtype=np.uint8)
    result = np.zeros((total_cnt), dtype=np.int8)

    
    train_idx = 0
    b = Board()
    sf_count = 0
    stdout_log("sgf2feed sgf_list = {}".format(len(sgf_list)))
    for s in sgf_list:
        sf_count += 1
        if sf_count % 100 == 0:
            print("sgf2feed sgf_list count = {}".format(sf_count))
        if s.size != BSIZE or s.handicap != 0 or s.result == 0:
            continue
        b.clear()
        for v in s.history:
            feature[train_idx] = b.feature()
            #logging.info("feature = {}".format(feature[train_idx]))
            move[train_idx, ev2rv(v)] = 1
            #logging.info("move = {}".format(move[train_idx]))
            result[train_idx] = s.result * (2 * b.turn - 1)
            #logging.info("result = {}".format(result[train_idx]))

            b.play(v, False)
            train_idx += 1

    return feature, move, result


rnd_array = [np.arange(BVCNT + 1)]
for i in range(1, 8):
    rnd_array.append(rnd_array[i - 1])
    rot_array = rnd_array[i][:BVCNT].reshape(BSIZE, BSIZE)
    if i % 2 == 0:
        rot_array = rot_array.transpose(1, 0)
    else:
        rot_array = rot_array[::-1, :]
    rnd_array[i][:BVCNT] = rot_array.reshape(BVCNT)


class Feed(object):

    def __init__(self, f_, m_, r_):
        self._feature = f_
        self._move = m_
        self._result = r_
        self.size = self._feature.shape[0]
        self._idx = 0
        self._perm = np.arange(self.size)
        np.random.shuffle(self._perm)

        #logging.info("self._feature = {}".format(f_))
        logging.info("self._move shape = {}".format(m_.shape))
        #logging.info("self._move = {}".format(m_))
        logging.info("self._result shape = {}".format(r_.shape))
        #logging.info("self._result = {}".format(r_))
        logging.info("self.feature shape = {}".format(self._feature.shape))
        logging.info("self.size = {}".format(self._feature.shape[0]))
        logging.info("self._idx = {}".format(self._idx))
        logging.info("self._perm shape = {}".format(self._perm.shape))
        #logging.info("self._perm = {}".format(self._perm))

    def next_batch(self, batch_size=128):
        if self._idx > self.size:
            np.random.shuffle(self._perm)
            self._idx = 0
        start = self._idx
        self._idx += batch_size
        end = self._idx

        #logging.info("start = {} end = {}".format(start, end))
        #logging.info("self._feature = {}".format(self._feature))

        rnd_cnt = np.random.choice(np.arange(8))

        f_batch = self._feature[self._perm[start:end]]  # slice for mini-batch
        #logging.info('f_batch_1 = {}'.format(f_batch))
        
        f_batch = f_batch[:, rnd_array[rnd_cnt][:BVCNT]].astype(np.float32)
        #logging.info('f_batch_2 = {}'.format(f_batch))
        
        m_batch = self._move[self._perm[start:end]]  # slice for mini-batch
        #logging.info('m_batch_1 = {}'.format(m_batch))
        
        m_batch = m_batch[:, rnd_array[rnd_cnt]].astype(np.float32)
        #logging.info('f_batch_2 = {}'.format(m_batch))
        
        r_batch = self._result[self._perm[start:end]].astype(np.float32)
        #logging.info('r_batch = {}'.format(r_batch))

        return f_batch, m_batch, r_batch


def average_gradients(tower_grads):
    average_grads = []
    for grad_and_vars in zip(*tower_grads):

        grads = []
        for g, _ in grad_and_vars:
            grads.append(tf.expand_dims(g, 0))

        grad = tf.reduce_mean(tf.concat(grads, 0), 0)
        v = grad_and_vars[0][1]
        average_grads.append((grad, v))

    return average_grads


def learn(lr_=1e-4, dr_=0.7, sgf_dir="sgf/", use_gpu=True, gpu_cnt=1, model_name=""):
    
    device_name = "gpu" if use_gpu else "cpu"
    with tf.get_default_graph().as_default(), tf.device("/cpu:0"):

        # placeholders
        f_list = []
        r_list = []
        m_list = []
        for gpu_idx in range(gpu_cnt):
            f_list.append(tf.placeholder(
                "float", shape=[None, BVCNT, FEATURE_CNT],
                name="feature_%d" % gpu_idx))
            r_list.append(tf.placeholder(
                "float", shape=[None], name="result_%d" % gpu_idx))
            m_list.append(tf.placeholder(
                "float", shape=[None, BVCNT + 1], name="move_%d" % gpu_idx))

        lr = tf.placeholder(tf.float32, shape=[], name="learning_rate")

        opt = tf.train.AdamOptimizer(lr)
        dn = DualNetwork()

        # compute and apply gradients
        tower_grads = []
        with tf.variable_scope(tf.get_variable_scope()):
            for gpu_idx in range(gpu_cnt):
                with tf.device("/%s:%d" % (device_name, gpu_idx)):

                    policy_, value_ = dn.model(
                        f_list[gpu_idx], temp=1.0, dr=dr_)
                    policy_ = tf.clip_by_value(policy_, 1e-6, 1)

                    loss_p = -tf.reduce_mean(tf.log(
                        tf.reduce_sum(tf.multiply(m_list[gpu_idx], policy_), 1)))
                    loss_v = tf.reduce_mean(
                        tf.square(tf.subtract(value_, r_list[gpu_idx])))
                    if gpu_idx == 0:
                        vars_train = tf.get_collection("vars_train")
                    loss_l2 = tf.add_n([tf.nn.l2_loss(v) for v in vars_train])
                    loss = loss_p + 0.05 * loss_v + 1e-4 * loss_l2

                    tower_grads.append(opt.compute_gradients(loss))
                    tf.get_variable_scope().reuse_variables()

        train_op = opt.apply_gradients(average_gradients(tower_grads))

        # calculate accuracy
        with tf.variable_scope(tf.get_variable_scope(), reuse=True):
            with tf.device("/%s:0" % device_name):
                f_acc = tf.placeholder(
                    "float", shape=[None, BVCNT, FEATURE_CNT], name="feature_acc")
                m_acc = tf.placeholder(
                    "float", shape=[None, BVCNT + 1], name="move_acc")
                r_acc = tf.placeholder(
                    "float", shape=[None], name="result_acc")

                p_, v_ = dn.model(f_acc, temp=1.0, dr=1.0)
                prediction = tf.equal(tf.reduce_max(p_, 1),
                                      tf.reduce_max(tf.multiply(p_, m_acc), 1))
                accuracy_p = tf.reduce_mean(tf.cast(prediction, "float"))
                accuracy_v = tf.reduce_mean(tf.square(tf.subtract(v_, r_acc)))
                accuracy = (accuracy_p, accuracy_v)

        ckpt = tf.train.get_checkpoint_state(sgf_dir)
        
        #last_model = ckpt.model_checkpoint_path # 最後に保存したmodelへのパス
        
        last_model = sgf_dir + model_name
        stdout_log("last_model = {}".format(last_model))
        
        if os.path.isfile(last_model + ".data-00000-of-00001"):    
            sess = dn.create_sess(last_model)
        else:
            sess = dn.create_sess()

    # load sgf and convert to feed
    stdout_log("----------- start training-------------")

    limit_time = time.time() + STEP_TIME - SAVE_TIME
    endt = time.ctime(limit_time + JST)
    cnvt = time.strptime(endt)
    
    stdout_log("STEP_TIME = {}".format(STEP_TIME))
    stdout_log("SAVE_TIME = {}".format(SAVE_TIME))
    stdout_log("END_TIME = {}".format(time.strftime("%Y/%m/%d %H:%M:%S", cnvt)))
    
    #with open(sgf_dir + 'trained.txt', mode='w') as f:
    #    trained = [str(0), str(0), str(0), str(0)]
    #    f.write('\n'.join(trained)) 
    #with open(sgf_dir + 'epoch_count.txt', mode='w') as f:
    #    t = str(0)
    #    f.write(t)    
    
    #前回実行時の状態を復元
    with open(sgf_dir + model_name + '_' + 'trained.txt') as f:
        l_strip = [s.strip() for s in f.readlines()]
        start_epoch_idx = int(l_strip[0])
        start_step_idx = int(l_strip[1])
        global_step_idx = int(l_strip[2])
        no = int(l_strip[3])

        stdout_log("start_epoch_idx = {}".format(start_epoch_idx))
        stdout_log("start_step_idx = {}".format(start_step_idx))
        stdout_log("global_step_idx = {}".format(global_step_idx))
        stdout_log("feed_file_no = {}".format(no))
        
    with open(sgf_dir + model_name + '_' + 'epoch_count.txt') as f:
        epoch_count = int(f.read())        
        stdout_log("epoch_count = {}".format(epoch_count))

    stdout_log("Converting ...")
    start_time = time.time()
    
    with open(sgf_dir + 'feed_n_17979696.pickle', mode='rb') as f:
        feed_n = pickle.load(f)
        stdout_log("loaded feed_n count = {}".format(len(feed_n)))
        stdout_log("loaded feed_n = {}".format(feed_n))

    # 全feed数
    feed_cnt = 0

    # 読み込みファイルNoの設定)(0～283)
    n = 283
    stdout_log("feed_files No. = {}".format(n))
    
    feed_cnt = feed_n[n]
    
    #読み込むファイル番号を保持するリスト
    feed_files = list(range(n + 1))

    #pickleファイルのfeed数
    feed_n_1 = []
    for i in range(len(feed_n)):
        if i == 0:
            feed_n_1.append(feed_n[0])
            continue
        else:
            feed_n_1.append(feed_n[i] - feed_n[i - 1])

    stdout_log("loaded feed_n_1 count = {}".format(len(feed_n_1)))
    stdout_log("loaded feed_n_1 = {}".format(feed_n_1))

    # learning settings
    batch_cnt = 512
    #batch_cnt = 2000
    #total_epochs = 8 * 5
    total_epochs = 10
    epoch_steps = feed_cnt // (batch_cnt * gpu_cnt)
    total_steps = total_epochs * epoch_steps
    #global_step_idx = 0
    learning_rate = lr_

    stdout_log("feed_cnt = %d" % (feed_cnt))
    stdout_log("batch_cnt = %d" % (batch_cnt))
    stdout_log("total_epochs = %d" % (total_epochs))
    stdout_log("epoch_steps = %d" % (epoch_steps))
    stdout_log("total_steps = %d" % (total_steps))
    stdout_log("learning rate = %.1g" % (learning_rate))
    stdout_log("model_name = %s" % (model_name))
    
    # training
    for epoch_idx in range(start_epoch_idx, total_epochs):
        if epoch_idx > 0 and (epoch_idx - 8) % 8 == 0:
            learning_rate *= 0.5
            stdout_log("learning rate=%.1g" % (learning_rate))

        
        #シャッフルした後のfeedの読み込み順の累積
        feed_n_shuffled = []
        
        #新しいepochに入った場合はシャッフルする
        if epoch_idx > epoch_count - 1:
            stdout_log("shuffling...")
            epoch_count = epoch_idx + 1

            #読み込みファイルNo.の順番(ランダム)
            random.shuffle(feed_files)
            stdout_log("feed_files_shuffled = {}".format(feed_files))

            for i in range(len(feed_files)):
                if i == 0:
                    feed_n_shuffled.append(feed_n_1[feed_files[i]])
                    continue
                else:
                    feed_n_shuffled.append(feed_n_1[feed_files[i]] + feed_n_shuffled[i - 1])

            a = []
            for i in range(len(feed_files)):
                a.append(feed_n_1[feed_files[i]])

            stdout_log("feed_n_1 = {}".format(a))    
            stdout_log("feed_n_shuffled = {}".format(feed_n_shuffled))


            with open(sgf_dir + model_name + '_' + 'epoch_count.txt', mode='w') as f:
                f.write(str(epoch_count)) 
            with open(sgf_dir + model_name + '_' + 'feed_files.txt', mode='w') as f:
                a_str = [str(num) for num in feed_files]
                f.write('\n'.join(a_str)) 
            with open(sgf_dir + model_name + '_' + 'feed_n_shuffled.txt', mode='w') as f:
                a_str = [str(num) for num in feed_n_shuffled]
                f.write('\n'.join(a_str)) 
        else:
            with open(sgf_dir + model_name + '_' + 'feed_files.txt', mode='r') as f:
                feed_files = [int(s.strip()) for s in f.readlines()]
                stdout_log("feed_files reload = {}".format(feed_files))
            with open(sgf_dir + model_name + '_' + 'feed_n_shuffled.txt', mode='r') as f:
                feed_n_shuffled = [int(s.strip()) for s in f.readlines()]
                stdout_log("feed_n_shuffled reload = {}".format(feed_n_shuffled))

        
        file_number = feed_files[no]
        
        f = open(sgf_dir + 'feed_train_' + str(file_number) + '.pickle','rb')
        feed_train = pickle.load(f) 
        
        stdout_log('training : feed_train_' + str(file_number) + '.pickle')
        stdout_log("training : steps = {}".format(feed_train[0].size // batch_cnt + 1))
        
        for step_idx in range(start_step_idx, epoch_steps):
            feed_dict_ = {}
            feed_dict_[lr] = learning_rate
            for gpu_idx in range(gpu_cnt):
                
                if feed_n_shuffled[no]  <= step_idx * batch_cnt:
                    stdout_log('training : step_idx = {}'.format(step_idx))
                    no += 1
                    if no > n:
                        no = 0
                    file_number = feed_files[no]
                    with open(sgf_dir + 'feed_train_' + str(file_number) + '.pickle','rb') as f:
                        feed_train = pickle.load(f) 
                        stdout_log('training : feed_train_' + str(file_number) + '.pickle')
                        stdout_log("training : steps = {}".format(feed_train[0].size // batch_cnt + 1))
                
                batch = feed_train[0].next_batch(batch_cnt)
                feed_dict_[f_list[gpu_idx]] = np.array(batch[0])
                feed_dict_[m_list[gpu_idx]] = np.array(batch[1])
                feed_dict_[r_list[gpu_idx]] = np.array(batch[2])

            sess.run(train_op, feed_dict = feed_dict_)
            
            global_step_idx += 1

            if global_step_idx % 100 == 0:
                stdout_log("global_steps... = {}".format(global_step_idx))

            if global_step_idx % (total_steps // 1000) == 0:
                progress_now = float(global_step_idx) / total_steps * 100
                str_log = "progress: %03.2f[%%] " % (progress_now)

                elapsed_time = time.time() - start_time
                str_log += "%03.1f" % (elapsed_time) + "[sec]"
                stdout_log("%s" % (str_log))
                start_time = time.time()
                
                dn.save_vars(sess, sgf_dir + model_name)
                stdout_log("save model.ckpt")

                with open(sgf_dir + model_name + '_' + 'trained.txt', mode='w') as f:
                    trained = [str(epoch_idx), str(step_idx + 1), str(global_step_idx), str(no)]
                    f.write('\n'.join(trained))    
                    stdout_log("epoch_idx = {}, step_idx = {}, gloal_step_idx = {}, feed_file_no = {}".format(epoch_idx, step_idx + 1, global_step_idx, no))
            
            if time.time() > limit_time or (global_step_idx % (total_steps // 200) == 0):
            #if global_step_idx % 1 == 0:

                with open(sgf_dir + 'feed_test.pickle', mode='rb') as f:
                    feed_test = pickle.load(f)
                    feed = [feed_train, feed_test]
                    stdout_log("loaded feed_test.")

                str_log = ""
                # str_summary = "%3.3f" % (float(global_step_idx) / total_steps * 100)
                acc_steps = feed[1][0].size // batch_cnt
                np.random.shuffle(feed[0][0]._perm)
                for i in range(2):
                    acc_str = "train" if i == 0 else "test "
                    acc_sum = [0.0, 0.0]

                    for _ in range(acc_steps):
                        acc_batch = feed[i][0].next_batch(batch_cnt)
                        accur = sess.run(accuracy, feed_dict={f_acc: acc_batch[0], m_acc: acc_batch[1], r_acc: acc_batch[2]})
                        acc_sum[0] += accur[0]
                        acc_sum[1] += accur[1]

                    str_log += "%s: policy=%3.5f[%%]  value=%.5f " % (acc_str, acc_sum[0] / acc_steps * 100, acc_sum[1] / acc_steps / 2)
                    # str_summary += "\t%3.3f\t%3.3f" \
                    #     % (acc_sum[0] / acc_steps * 100,
                    #        acc_sum[1] / acc_steps / 2)

                stdout_log("%s" % (str_log))
                stdout_log("save model.ckpt")
                dn.save_vars(sess, sgf_dir + model_name)
                
                with open(sgf_dir + model_name + '.txt', mode='w') as f:
                    now_dt = datetime.now()
                    dt_jst = now_dt + timedelta(minutes=540)
                    str_datetime = datetime.strftime(dt_jst, "%Y/%m/%d-%H:%M:%S")
                    f.write(str_datetime + ' ' +  model_name + ' ' + str_log)
                
                #with open(sgf_dir + 'trained.txt', mode='w') as f:
                #    trained = [str(epoch_idx), str(step_idx + 1), str(global_step_idx), str(no)]
                #    f.write('\n'.join(trained))    
                #    stdout_log("epoch_idx = {}, step_idx = {}, gloal_step_idx = {}, feed_file_no = {}".format(epoch_idx, step_idx + 1, global_step_idx, no))                         
                
                if time.time() > limit_time:
                    stdout_log("★★★ Time Limit....")
                    return
            
        #１epoch終了したので、ファイルNoを0にする。
        no = 0
        start_step_idx = 0
        
    dn.save_vars(sess, sgf_dir + model_name)

        
import sys
sys.path.append('/content/drive/My Drive/')

from learn import Feed

STEP_TIME = 3600 * 12
SAVE_TIME = 60 * 26
JST = 3600 * 9

BLOCK_CNT = 8

if __name__ == "__main__":

    model = "model_256_" + str(BLOCK_CNT) + ".ckpt"
    learn(3e-4, 0.5, sgf_dir=r"/content/drive/My Drive/", use_gpu=True, gpu_cnt=1, model_name=model)


Mounted at /content/drive
2019/04/22-07:36:55 FILTER_CNT = 256
2019/04/22-07:36:55 BLOCK_CNT = 8
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.
Instructions for updating:
Use tf.cast instead.
2019/04/22-07:36:56 FILTER_CNT = 256
2019/04/22-07:36:56 BLOCK_CNT = 8
2019/04/22-07:36:57 last_model = /content/drive/My Drive/model_256_8.ckpt
2019/04/22-07:36:58 -- Model resotore : /content/drive/My Drive/model_256_8.ckpt
Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from /content/drive/My Drive/model_256_8.ckpt
2019/04/22-07:37:01 ----------- start training-------------
2019/04/22-07:37:01 STEP_TIME = 43200
2019/04/22-07:37:01 SAVE_TIME = 1560
2019/04/22-07:37:01 END_TIME = 2019/04/22 19:11:01
2019/04/