<a href="https://colab.research.google.com/github/cafeblue999/sgf_4/blob/master/myZero.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
# -*- coding: utf-8 -*-

from sys import stderr, stdout, exit
from datetime import datetime, date, timedelta
import numpy as np
import logging
import pickle
import tensorflow as tf
import glob
import time
from collections import Counter
import random
import os

from keras.layers import *
from keras.models import Model

from google.colab import files
from google.colab import drive


drive.mount('/content/drive', force_remount=True)

np.set_printoptions(threshold=np.inf)

basename = datetime.now().strftime("%Y%m%d-%H%M%S")
log_file = 'log_' + basename + '.txt'

logging.basicConfig(format='%(asctime)s\t%(levelname)s\t%(message)s', datefmt='%Y/%m/%d %H:%M:%S', filename=log_file, level=logging.INFO)

def stdout_log(str):
    now_dt = datetime.now()
    dt_jst = now_dt + timedelta(minutes=540)
    str_datetime = datetime.strftime(dt_jst, "%Y/%m/%d-%H:%M:%S")
    stdout.write(str_datetime + " " + str + "\n")
    #log_file = open("log.txt", "a")
    #log_file.write(str)
    #log_file.close()

    
BSIZE = 19  # board size
EBSIZE = BSIZE + 2  # extended board size
BVCNT = BSIZE ** 2  # vertex count
EBVCNT = EBSIZE ** 2  # extended vertex count
PASS = EBVCNT  # pass
VNULL = EBVCNT + 1  # invalid position
KOMI = 6.5
dir4 = [1, EBSIZE, -1, -EBSIZE]
diag4 = [1 + EBSIZE, EBSIZE - 1, -EBSIZE - 1, 1 - EBSIZE]
KEEP_PREV_CNT = 7
FEATURE_CNT = KEEP_PREV_CNT * 2 + 3  # 7
x_labels = "ABCDEFGHJKLMNOPQRST"


def ev2xy(ev):
    return ev % EBSIZE, ev // EBSIZE

def xy2ev(x, y):
    return y * EBSIZE + x

def rv2ev(rv):
    if rv == BVCNT:
        return PASS
    return rv % BSIZE + 1 + (rv // BSIZE + 1) * EBSIZE

def ev2rv(ev):
    if ev == PASS:
        return BVCNT
    return ev % EBSIZE - 1 + (ev // EBSIZE - 1) * BSIZE

def ev2str(ev):
    if ev >= PASS:        
        return "PASS"
    x, y = ev2xy(ev)
    return x_labels[x - 1] + str(y)

def str2ev(v_str):
    v_str = v_str.upper()
    if v_str == "PASS" or v_str == "RESIGN":
        return PASS
    else:
        x = x_labels.find(v_str[0]) + 1
        y = int(v_str[1:])
        return xy2ev(x, y)

def turn2str(turn):
    if turn == 0:
        return 'W'
    else:
        return 'B'

rv_list = [rv2ev(i) for i in range(BVCNT)]


class StoneGroup(object):

    def __init__(self):
        self.lib_cnt = VNULL  # liberty count
        self.size = VNULL  # stone size
        self.v_atr = VNULL  # liberty position if in Atari
        self.libs = set()  # set of liberty positions

    def clear(self, stone=True):
        # clear as placed stone or empty
        self.lib_cnt = 0 if stone else VNULL
        self.size = 1 if stone else VNULL
        self.v_atr = VNULL
        self.libs.clear()

    def add(self, v):
        # add liberty at v
        if v not in self.libs:
            self.libs.add(v)
            self.lib_cnt += 1
            self.v_atr = v

    def sub(self, v):
        # remove liberty at v
        if v in self.libs:
            self.libs.remove(v)
            self.lib_cnt -= 1

    def merge(self, other):
        # merge with aother stone group
        self.libs |= other.libs
        self.lib_cnt = len(self.libs)
        self.size += other.size
        if self.lib_cnt == 1:
            for lib in self.libs:
                self.v_atr = lib


class Board(object):

    def __init__(self):
        # 1-d array ([EBVCNT]) of stones or empty or exterior
        # 0: white 1: black
        # 2: empty 3: exterior
        self.color = np.full(EBVCNT, 3)
        self.sg = [StoneGroup() for _ in range(EBVCNT)]  # stone groups
        self.clear()

    def clear(self):
        self.color[rv_list] = 2  # empty
        self.id = np.arange(EBVCNT)  # id of stone group
        self.next = np.arange(EBVCNT)  # next position in the same group
        for i in range(EBVCNT):
            self.sg[i].clear(stone=False)
        self.prev_color = [np.copy(self.color) for _ in range(KEEP_PREV_CNT)]

        self.ko = VNULL  # illegal position due to Ko
        self.turn = 1  # black
        self.move_cnt = 0  # move count
        self.prev_move = VNULL  # previous move
        self.remove_cnt = 0  # removed stones count
        self.history = []

    def copy(self, b_cpy):
        b_cpy.color = np.copy(self.color)
        b_cpy.id = np.copy(self.id)
        b_cpy.next = np.copy(self.next)
        for i in range(EBVCNT):
            b_cpy.sg[i].lib_cnt = self.sg[i].lib_cnt
            b_cpy.sg[i].size = self.sg[i].size
            b_cpy.sg[i].v_atr = self.sg[i].v_atr
            b_cpy.sg[i].libs |= self.sg[i].libs
        for i in range(KEEP_PREV_CNT):
            b_cpy.prev_color[i] = np.copy(self.prev_color[i])

        b_cpy.ko = self.ko
        b_cpy.turn = self.turn
        b_cpy.move_cnt = self.move_cnt
        b_cpy.prev_move = self.prev_move
        b_cpy.remove_cnt = self.remove_cnt

        for h in self.history:
            b_cpy.history.append(h)

    def remove(self, v):
        # remove stone group including stone at v
        v_tmp = v
        while 1:
            self.remove_cnt += 1
            self.color[v_tmp] = 2  # empty
            self.id[v_tmp] = v_tmp  # reset id
            for d in dir4:
                nv = v_tmp + d
                # add liberty to neighbor groups
                self.sg[self.id[nv]].add(v_tmp)
            v_next = self.next[v_tmp]
            self.next[v_tmp] = v_tmp
            v_tmp = v_next
            if v_tmp == v:
                break  # finish when all stones are removed

    def merge(self, v1, v2):
        # merge stone groups at v1 and v2
        id_base = self.id[v1]
        id_add = self.id[v2]
        if self.sg[id_base].size < self.sg[id_add].size:
            id_base, id_add = id_add, id_base  # swap
        self.sg[id_base].merge(self.sg[id_add])

        v_tmp = id_add
        while 1:
            self.id[v_tmp] = id_base  # change id to id_base
            v_tmp = self.next[v_tmp]
            if v_tmp == id_add:
                break
        # swap next id for circulation
        self.next[v1], self.next[v2] = self.next[v2], self.next[v1]

    def place_stone(self, v):
        self.color[v] = self.turn
        self.id[v] = v
        self.sg[self.id[v]].clear(stone=True)
        for d in dir4:
            nv = v + d
            if self.color[nv] == 2:
                self.sg[self.id[v]].add(nv)  # add liberty
            else:
                self.sg[self.id[nv]].sub(v)  # remove liberty
        # merge stone groups
        for d in dir4:
            nv = v + d
            if self.color[nv] == self.turn and self.id[nv] != self.id[v]:
                self.merge(v, nv)
        # remove opponent's stones
        self.remove_cnt = 0
        for d in dir4:
            nv = v + d
            if self.color[nv] == int(self.turn == 0) and \
                    self.sg[self.id[nv]].lib_cnt == 0:
                self.remove(nv)

    def legal(self, v):
        if v == PASS:
            return True
        elif v == self.ko or self.color[v] != 2:
            return False

        stone_cnt = [0, 0]
        atr_cnt = [0, 0]
        for d in dir4:
            nv = v + d
            c = self.color[nv]
            if c == 2:
                return True
            elif c <= 1:
                stone_cnt[c] += 1
                if self.sg[self.id[nv]].lib_cnt == 1:
                    atr_cnt[c] += 1

        return (atr_cnt[int(self.turn == 0)] != 0 or
                atr_cnt[self.turn] < stone_cnt[self.turn])

    def eyeshape(self, v, pl):
        if v == PASS:
            return False
        for d in dir4:
            c = self.color[v + d]
            if c == 2 or c == int(pl == 0):
                return False

        diag_cnt = [0, 0, 0, 0]
        for d in diag4:
            nv = v + d
            diag_cnt[self.color[nv]] += 1

        wedge_cnt = diag_cnt[int(pl == 0)] + int(diag_cnt[3] > 0)
        if wedge_cnt == 2:
            for d in diag4:
                nv = v + d
                if self.color[nv] == int(pl == 0) and \
                        self.sg[self.id[nv]].lib_cnt == 1 and \
                        self.sg[self.id[nv]].v_atr != self.ko:
                    return True

        return wedge_cnt < 2

    def play(self, v, not_fill_eye=True):

        #logging.info("board play : {}".format(ev2str(v)))

        if not self.legal(v):
            logging.info("board play : not legal = {}".format(ev2str(v)))
            return 1
        elif not_fill_eye and self.eyeshape(v, self.turn):
            return 2
        else:
            for i in range(KEEP_PREV_CNT - 1)[::-1]:
                self.prev_color[i + 1] = np.copy(self.prev_color[i])
            self.prev_color[0] = np.copy(self.color)

            if v == PASS:
                self.ko = VNULL
            else:
                self.place_stone(v)
                id = self.id[v]
                self.ko = VNULL
                if self.remove_cnt == 1 and \
                        self.sg[id].lib_cnt == 1 and \
                        self.sg[id].size == 1:
                    self.ko = self.sg[id].v_atr

        self.prev_move = v
        self.history.append(v)
        self.turn = int(self.turn == 0)
        self.move_cnt += 1

        return 0

    def random_play(self):
        empty_list = np.where(self.color == 2)[0]
        np.random.shuffle(empty_list)

        for v in empty_list:
            if self.play(v, True) == 0:
                return v

        self.play(PASS)
        return PASS

    def score(self):
        stone_cnt = [0, 0]
        for rv in range(BVCNT):
            v = rv2ev(rv)
            c = self.color[v]
            if c <= 1:
                stone_cnt[c] += 1
            else:
                nbr_cnt = [0, 0, 0, 0]
                for d in dir4:
                    nbr_cnt[self.color[v + d]] += 1
                if nbr_cnt[0] > 0 and nbr_cnt[1] == 0:
                    stone_cnt[0] += 1
                elif nbr_cnt[1] > 0 and nbr_cnt[0] == 0:
                    stone_cnt[1] += 1
        return stone_cnt[1] - stone_cnt[0] - KOMI

    def rollout(self, show_board=False):
        while self.move_cnt < EBVCNT * 2:
            prev_move = self.prev_move
            move = self.random_play()
            if show_board and move != PASS:
                stderr.write("\nmove count=%d\n" % b.move_cnt)
                b.showboard()
            if prev_move == PASS and move == PASS:
                break

    def showboard(self):

        def pirnt_xlabel():
            line_str = "  "
            for x in range(BSIZE):
                line_str += " " + x_labels[x] + " "
            stderr.write(line_str + "\n")

        pirnt_xlabel()

        for y in range(1, BSIZE + 1)[::-1]:  # 9, 8, ..., 1
            line_str = str(y) if y >= 10 else " " + str(y)
            for x in range(1, BSIZE + 1):
                v = xy2ev(x, y)
                x_str = " . "
                color = self.color[v]
                if color <= 1:
                    stone_str = "O" if color == 0 else "X"
                    if v == self.prev_move:
                        x_str = "[" + stone_str + "]"
                    else:
                        x_str = " " + stone_str + " "
                line_str += x_str
            line_str += str(y) if y >= 10 else " " + str(y)
            stderr.write(line_str + "\n")

        pirnt_xlabel()
        stderr.write("\n")

    def showboard_file(self):

        def pirnt_xlabel():
            line_str = "  "
            for x in range(BSIZE):
                line_str += " " + x_labels[x] + " "
            #logging.info(line_str + "\n")
            logging.info(line_str)

        pirnt_xlabel()

        for y in range(1, BSIZE + 1)[::-1]:  # 9, 8, ..., 1
            line_str = str(y) if y >= 10 else " " + str(y)
            for x in range(1, BSIZE + 1):
                v = xy2ev(x, y)
                x_str = " . "
                color = self.color[v]
                if color <= 1:
                    stone_str = "O" if color == 0 else "x"
                    if v == self.prev_move:
                        x_str = "[" + stone_str + "]"
                    else:
                        x_str = " " + stone_str + " "
                line_str += x_str
            line_str += str(y) if y >= 10 else " " + str(y)
            #logging.info(line_str + "\n")
            logging.info(line_str)

        pirnt_xlabel()
        #logging.info("\n")
        logging.info("\n")

    def feature(self):
        feature_ = np.zeros((EBVCNT, FEATURE_CNT), dtype=np.float)
        my = self.turn
        opp = int(self.turn == 0)

        feature_[:, 0] = (self.color == my)
        feature_[:, 1] = (self.color == opp)
        for i in range(KEEP_PREV_CNT):
            feature_[:, (i + 1) * 2] = (self.prev_color[i] == my)
            feature_[:, (i + 1) * 2 + 1] = (self.prev_color[i] == opp)
        feature_[:, FEATURE_CNT - 1] = my

        return feature_[rv_list, :]

    def hash(self):
        return (hash(self.color.tostring()) ^
                hash(self.prev_color[0].tostring()) ^ self.turn)

    def info(self):
        empty_list = np.where(self.color == 2)[0]
        cand_list = []
        for v in empty_list:
            if self.legal(v) and not self.eyeshape(v, self.turn):
                cand_list.append(ev2rv(v))
        cand_list.append(ev2rv(PASS))
        return (self.hash(), self.move_cnt, cand_list)

      # -*- coding: utf-8 -*-



rnd_array = [np.arange(BVCNT + 1)]
for i in range(1, 8):
    rnd_array.append(rnd_array[i - 1])
    rot_array = rnd_array[i][:BVCNT].reshape(BSIZE, BSIZE)
    if i % 2 == 0:
        rot_array = rot_array.transpose(1, 0)
    else:
        rot_array = rot_array[::-1, :]
    rnd_array[i][:BVCNT] = rot_array.reshape(BVCNT)


class Feed(object):

    def __init__(self, f_, m_, r_):
        self._feature = f_
        self._move = m_
        self._result = r_
        self.size = self._feature.shape[0]
        self._idx = 0
        self._perm = np.arange(self.size)
        np.random.shuffle(self._perm)

        #logging.info("self._feature = {}".format(f_))
        logging.info("self._move shape = {}".format(m_.shape))
        #logging.info("self._move = {}".format(m_))
        logging.info("self._result shape = {}".format(r_.shape))
        #logging.info("self._result = {}".format(r_))
        logging.info("self.feature shape = {}".format(self._feature.shape))
        logging.info("self.size = {}".format(self._feature.shape[0]))
        logging.info("self._idx = {}".format(self._idx))
        logging.info("self._perm shape = {}".format(self._perm.shape))
        #logging.info("self._perm = {}".format(self._perm))

    def next_batch(self, batch_size=128):
        if self._idx > self.size:
            np.random.shuffle(self._perm)
            self._idx = 0
        start = self._idx
        self._idx += batch_size
        end = self._idx

        #logging.info("start = {} end = {}".format(start, end))
        #logging.info("self._feature = {}".format(self._feature))

        rnd_cnt = np.random.choice(np.arange(8))

        f_batch = self._feature[self._perm[start:end]]  # slice for mini-batch
        #logging.info('f_batch_1 = {}'.format(f_batch))
        
        f_batch = f_batch[:, rnd_array[rnd_cnt][:BVCNT]].astype(np.float32)
        #logging.info('f_batch_2 = {}'.format(f_batch))
        
        m_batch = self._move[self._perm[start:end]]  # slice for mini-batch
        #logging.info('m_batch_1 = {}'.format(m_batch))
        
        m_batch = m_batch[:, rnd_array[rnd_cnt]].astype(np.float32)
        #logging.info('f_batch_2 = {}'.format(m_batch))
        
        r_batch = self._result[self._perm[start:end]].astype(np.float32)
        #logging.info('r_batch = {}'.format(r_batch))

        return f_batch, m_batch, r_batch

########################### Model #############################################    
def dual_residual_network(input_shape, blocks=20):
    stdout_log('- Model : dual_resnet input_shape = {}'.format(input_shape))
    stdout_log('- Model : blocks = {}'.format(blocks))
    inputs = Input(batch_shape=input_shape)
    first_conv = conv_bn_relu_block(name="init")(inputs)
    res_tower = residual_tower(blocks=blocks)(first_conv)
    policy = policy_head()(res_tower)
    value = value_head()(res_tower)
    return Model(inputs=inputs, outputs=[policy, value])

def conv_bn_relu_block(name, activation=True, filters=256, kernel_size=(3,3), 
                       strides=(1,1), padding="same", init="he_normal"):
    def f(inputs):
        conv = Conv2D(filters=filters, 
                      kernel_size=kernel_size,
                      strides=strides,
                      padding=padding,
                      kernel_initializer=init,
                      data_format='channels_last',
                      kernel_regularizer = regularizers.l2(0.0001),
                      name="{}_conv_block".format(name))(inputs)
        #print('conv shape = {}'.format(conv.shape))
        batch_norm = BatchNormalization(axis=1, name="{}_batch_norm".format(name))(conv)
        dr = Dropout(0.5)(batch_norm)
        return Activation("relu", name="{}_relu".format(name))(dr) if activation else batch_norm
    return f    


def residual_block(block_num, **args):
    def f(inputs):
        res = conv_bn_relu_block(name="residual_1_{}".format(block_num), activation=True, **args)(inputs)
        res = conv_bn_relu_block(name="residual_2_{}".format(block_num) , activation=False, **args)(res)
        res = add([inputs, res], name="add_{}".format(block_num))
        return Activation("relu", name="{}_relu".format(block_num))(res) 
    return f


def residual_tower(blocks, **args):
    def f(inputs):
        x = inputs
        for i in range(blocks):
            x = residual_block(block_num=i)(x)
        return x
    return f

def convolutional_tower(blocks, **args):
    def f(inputs):
        x = inputs
        for i in range(blocks):
            x = conv_bn_relu_block(name=i)(x)
        return x
    return f


def policy_head():
    def f(inputs):
        conv = Conv2D(filters=2, 
                      kernel_size=(3, 3),
                      strides=(1, 1),
                      padding="same",
                      kernel_regularizer = regularizers.l2(0.0001),
                      name="policy_head_conv_block")(inputs)
        batch_norm = BatchNormalization(axis=1, name="policy_head_batch_norm")(conv)
        activation = Activation("relu", name="policy_head_relu")(batch_norm)
        p = Flatten()(activation)
        return Dense(units=19*19 +1, name="policy_out", activation="softmax")(p)
    return f    


def value_head():
    def f(inputs):
        conv = Conv2D(filters=1, 
                      kernel_size=(1, 1),
                      strides=(1, 1),
                      padding="same",
                      kernel_regularizer = regularizers.l2(0.0001),
                      name="value_head_conv_block")(inputs)
        batch_norm = BatchNormalization(axis=1, name="value_head_batch_norm")(conv)
        activation = Activation("relu", name="value_head_relu")(batch_norm)
        v = Flatten()(activation)
        dense =  Dense(units= 256,  kernel_regularizer = regularizers.l2(0.0001), name="value_head_dense", activation="relu")(v)
        return Dense(units= 1,  kernel_regularizer = regularizers.l2(0.0001), name="value_out", activation="tanh")(dense)
    return f   


from keras.optimizers import SGD
from keras.models import load_model

def learn(lr_=1e-4, dr_=0.7, sgf_dir="sgf/", use_gpu=True, gpu_cnt=1, model_name=""):
    
    learning_rate = lr_
    
    shape=(None, BSIZE, BSIZE, FEATURE_CNT)
    
    model = dual_residual_network(shape, blocks=BLOCK_CNT)

    if os.path.isfile(sgf_dir + model_name):  
        model = load_model(sgf_dir + model_name)
        stdout_log('- Resore model : {}'.format(sgf_dir + model_name))
    else:
        stdout_log('- New model ')
        model.compile(SGD(lr=lr_), loss=['categorical_crossentropy', 'mse'], metrics=['accuracy'])
    
    stdout_log("model.metrics_names = {}".format(model.metrics_names))
    #model.summary()

    
################################ Training ######################################    
    stdout_log("----------- start training-------------")

    limit_time = time.time() + STEP_TIME - SAVE_TIME
    endt = time.ctime(limit_time + JST)
    cnvt = time.strptime(endt)
    
    stdout_log("STEP_TIME = {}".format(STEP_TIME))
    stdout_log("SAVE_TIME = {}".format(SAVE_TIME))
    stdout_log("END_TIME = {}".format(time.strftime("%Y/%m/%d %H:%M:%S", cnvt)))
    
    trained_txt = sgf_dir +  model_name + '_' + 'trained.txt'
    if os.path.isfile(trained_txt) != True:
        with open(sgf_dir +  model_name + '_' + 'trained.txt', mode='w') as f:
            trained = [str(0), str(0), str(0), str(0), str(0.0)]
            f.write('\n'.join(trained)) 
        with open(sgf_dir + model_name  + '_' + 'epoch_count.txt', mode='w') as f:
            t = str(0)
            f.write(t)    
    
    #前回実行時の状態を復元
    with open(sgf_dir + model_name + '_' + 'trained.txt') as f:
        l_strip = [s.strip() for s in f.readlines()]
        start_epoch_idx = int(l_strip[0])
        start_step_idx = int(l_strip[1])
        global_step_idx = int(l_strip[2])
        no = int(l_strip[3])
        max_policy = float(l_strip[4])

        stdout_log("start_epoch_idx = {}".format(start_epoch_idx))
        stdout_log("start_step_idx = {}".format(start_step_idx))
        stdout_log("global_step_idx = {}".format(global_step_idx))
        stdout_log("feed_file_no = {}".format(no))
        stdout_log("max_policy = {}".format(max_policy))
        
    with open(sgf_dir + model_name + '_' + 'epoch_count.txt') as f:
        epoch_count = int(f.read())        
        stdout_log("epoch_count = {}".format(epoch_count))

    stdout_log("Converting ...")
    start_time = time.time()
    
    with open(sgf_dir + 'feed_n_17979696.pickle', mode='rb') as f:
        feed_n = pickle.load(f)
        stdout_log("loaded feed_n count = {}".format(len(feed_n)))
        stdout_log("loaded feed_n = {}".format(feed_n))

    # 全feed数
    feed_cnt = 0

    # 読み込みファイルNoの設定)(0～283)
    n = 283
    stdout_log("feed_files No. = {}".format(n))
    
    feed_cnt = feed_n[n]
    
    #読み込むファイル番号を保持するリスト
    feed_files = list(range(n + 1))

    #pickleファイルのfeed数
    feed_n_1 = []
    for i in range(len(feed_n)):
        if i == 0:
            feed_n_1.append(feed_n[0])
            continue
        else:
            feed_n_1.append(feed_n[i] - feed_n[i - 1])

    stdout_log("loaded feed_n_1 count = {}".format(len(feed_n_1)))
    stdout_log("loaded feed_n_1 = {}".format(feed_n_1))

    # learning settings
    batch_cnt =300
    #batch_cnt = 000
    #total_epochs = 8 * 5
    total_epochs = 10
    epoch_steps = feed_cnt // (batch_cnt * gpu_cnt)
    total_steps = total_epochs * epoch_steps
    #global_step_idx = 0


    stdout_log("feed_cnt = %d" % (feed_cnt))
    stdout_log("batch_cnt = %d" % (batch_cnt))
    stdout_log("total_epochs = %d" % (total_epochs))
    stdout_log("epoch_steps = %d" % (epoch_steps))
    stdout_log("total_steps = %d" % (total_steps))
    stdout_log("learning rate = %.1g" % (learning_rate))
    stdout_log("model_name = %s" % (model_name))
    
    # training
    for epoch_idx in range(start_epoch_idx, total_epochs):
        #if epoch_idx > 0 and (epoch_idx - 8) % 8 == 0:
        #    learning_rate *= 0.5
        #    stdout_log("learning rate=%.1g" % (learning_rate))
        if epoch_idx < 10:
            learning_rate = lr_ * (1.0 - epoch_idx * 0.1)
        else:
            learning_rate = lr_ *  0.05
        stdout_log("epoch learning rate=%.1g" % (learning_rate))

        
        #シャッフルした後のfeedの読み込み順の累積
        feed_n_shuffled = []
        
        #新しいepochに入った場合はシャッフルする
        if epoch_idx > epoch_count - 1:
            stdout_log("shuffling...")
            epoch_count = epoch_idx + 1

            #読み込みファイルNo.の順番(ランダム)
            random.shuffle(feed_files)
            stdout_log("feed_files_shuffled = {}".format(feed_files))

            for i in range(len(feed_files)):
                if i == 0:
                    feed_n_shuffled.append(feed_n_1[feed_files[i]])
                    continue
                else:
                    feed_n_shuffled.append(feed_n_1[feed_files[i]] + feed_n_shuffled[i - 1])

            a = []
            for i in range(len(feed_files)):
                a.append(feed_n_1[feed_files[i]])

            stdout_log("feed_n_1 = {}".format(a))    
            stdout_log("feed_n_shuffled = {}".format(feed_n_shuffled))


            with open(sgf_dir + model_name + '_' + 'epoch_count.txt', mode='w') as f:
                f.write(str(epoch_count)) 
            with open(sgf_dir + model_name + '_' + 'feed_files.txt', mode='w') as f:
                a_str = [str(num) for num in feed_files]
                f.write('\n'.join(a_str)) 
            with open(sgf_dir + model_name + '_' + 'feed_n_shuffled.txt', mode='w') as f:
                a_str = [str(num) for num in feed_n_shuffled]
                f.write('\n'.join(a_str)) 
        else:
            with open(sgf_dir + model_name + '_' + 'feed_files.txt', mode='r') as f:
                feed_files = [int(s.strip()) for s in f.readlines()]
                stdout_log("feed_files reload = {}".format(feed_files))
            with open(sgf_dir + model_name + '_' + 'feed_n_shuffled.txt', mode='r') as f:
                feed_n_shuffled = [int(s.strip()) for s in f.readlines()]
                stdout_log("feed_n_shuffled reload = {}".format(feed_n_shuffled))

        
        file_number = feed_files[no]
        
        f = open(sgf_dir + 'feed_train_' + str(file_number) + '.pickle','rb')
        feed_train = pickle.load(f) 
        
        stdout_log('training : feed_train_' + str(file_number) + '.pickle')
        stdout_log("training : steps = {}".format(feed_train[0].size // batch_cnt + 1))
        
        for step_idx in range(start_step_idx, epoch_steps):
            #feed_dict_ = {}
            #feed_dict_[lr] = learning_rate
            for gpu_idx in range(gpu_cnt):
                
                if feed_n_shuffled[no]  < step_idx * batch_cnt:
                    stdout_log('training : step_idx = {}'.format(step_idx))
                    no += 1
                    if no > n:
                        no = 0
                    file_number = feed_files[no]
                    with open(sgf_dir + 'feed_train_' + str(file_number) + '.pickle','rb') as f:
                        feed_train = pickle.load(f) 
                        stdout_log('training : feed_train_' + str(file_number) + '.pickle')
                        stdout_log("training : steps = {}".format(feed_train[0].size // batch_cnt + 1))
                
                batch = feed_train[0].next_batch(batch_cnt)
                
                f1 = np.array(batch[0])
                f2 = f1.reshape([-1, BSIZE, BSIZE, FEATURE_CNT])
                #print('f shape = {}'.format(f2.shape))
                m = np.array(batch[1])
                #print('m shape = {}'.format(m.shape))
                r = np.array(batch[2])
                #print('r shape = {}'.format(r.shape))

            model.fit(f2, [m, r], batch_size=batch_cnt, epochs=1, verbose=0)
            #model.train_on_batch(f2, [m, r])
            #model.evaluate(f2, [m, r], batch_size=batch_cnt, verbose=1) 
            #sess.run(train_op, feed_dict = feed_dict_)
            
            global_step_idx += 1

            if global_step_idx % 100 == 0:
                stdout_log("global_steps... = {}".format(global_step_idx))

            if global_step_idx % (total_steps // 1000) == 0:
                progress_now = float(global_step_idx) / total_steps * 100
                str_log = "progress: %03.2f[%%] " % (progress_now)

                elapsed_time = time.time() - start_time
                str_log += "%03.1f" % (elapsed_time) + "[sec]"
                stdout_log("%s" % (str_log))
                start_time = time.time()
                
                model.save(sgf_dir + model_name)
                stdout_log("save model.")

                with open(sgf_dir + model_name + '_' + 'trained.txt', mode='w') as f:
                    trained = [str(epoch_idx), str(step_idx + 1), str(global_step_idx), str(no), str(max_policy)]
                    f.write('\n'.join(trained))    
                    stdout_log("epoch_idx = {}, step_idx = {}, gloal_step_idx = {}, feed_file_no = {}".format(epoch_idx, step_idx + 1, global_step_idx, no))
            
##################################### Test #####################################
            # modelのテストを実施する
            if time.time() > limit_time or (global_step_idx % (total_steps // 100) == 0):

                with open(sgf_dir + 'feed_test.pickle', mode='rb') as f:
                    feed_test = pickle.load(f)
                    feed = [feed_train, feed_test]
                    stdout_log("loaded feed_test.")

                str_log = ""
                
                test_steps = feed[1][0].size // batch_cnt
                #np.random.shuffle(feed[0][0]._perm) 
                test_sum = [0.0, 0.0, 0.0, 0.0, 0.0]

                for _ in range(test_steps):
                    test_mini_batch = feed[1][0].next_batch(batch_cnt)

                    f1 = np.array(test_mini_batch[0])
                    f2 = f1.reshape([-1, BSIZE, BSIZE, FEATURE_CNT])
                    m = np.array(test_mini_batch[1])
                    r = np.array(test_mini_batch[2])

                    #accur = sess.run(accuracy, feed_dict={f_acc: acc_batch[0], m_acc: acc_batch[1], r_acc: acc_batch[2]})
                    loss, p_loss, v_loss, p_acc, v_acc = model.evaluate(f2, [m, r], verbose=0)

                    #stdout_log('test : loss={} p_loss={} v_loss={} p_acc ={} v_acc={}'.format(loss, p_loss, v_loss, p_acc, v_acc))

                    test_sum[0] += p_loss
                    test_sum[1] += v_loss
                    test_sum[2] += p_acc 
                    test_sum[3] += v_acc
                    test_sum[4] += loss

                str_log = "loss=%3.5f p_loss=%3.5f v_loss=%3.5f p_acc=%3.5f[%%] v_acc=%3.5f[%%]" % (test_sum[4]/test_steps, test_sum[0]/test_steps, test_sum[1]/test_steps, test_sum[2]/test_steps*100, test_sum[3]/test_steps*100)
                stdout_log('test : {}'.format(str_log))
                
                
                # テストでPolicyがMAXの場合はセーブする。（以前のものは削除する）
                #if i == 1:
                #    if max_policy < acc_sum[0] / acc_steps:
                #        stdout_log('● MAX ●')
                #        old_max_policy = max_policy
                #        max_policy = acc_sum[0] / acc_steps
                #
                #        #model.save(sgf_dir + model_name + '_' + str(max_policy + '.h5'))
                #        model.save(sgf_dir + model_name)

                #        with open(sgf_dir + model_name + '_' + 'trained.txt', mode='w') as f:
                #            trained = [str(epoch_idx), str(step_idx + 1), str(global_step_idx), str(no), str(max_policy), str(acc_sum[1]/acc_steps/2)]
                #            f.write('\n'.join(trained)) 

                        #old_filename = sgf_dir + model_name + '_' + str(old_max_policy + '.h5')
                        #stdout_log('old_filename = {}'.format(old_filename))

                        #if os.path.isfile(old_filename):
                        #    os.remove(old_filename)
                        #if os.path.isfile(old_filename + '.index'):
                        #    os.remove(old_filename + '.index')
                        #if os.path.isfile(old_filename + '.data-00000-of-00001'):
                        #    os.remove(old_filename + '.data-00000-of-00001') 
                                
                #stdout_log("%s" % (str_log))
                stdout_log("save model.")
                
                model.save(sgf_dir + model_name)
                
                with open(sgf_dir + model_name + '.txt', mode='w') as f:
                    now_dt = datetime.now()
                    dt_jst = now_dt + timedelta(minutes=540)
                    str_datetime = datetime.strftime(dt_jst, "%Y/%m/%d-%H:%M:%S")
                    f.write(str_datetime + ' ' +  model_name + ' ' + str_log)
                
                if time.time() > limit_time:
                    stdout_log("★★★ Time Limit....")
                    return
            
        #１epoch終了したので、ファイルNoを0にする。
        no = 0
        start_step_idx = 0
        
    model.save(sgf_dir + model_name)

        
import sys
sys.path.append('/content/drive/My Drive/')

from learn import Feed


STEP_TIME = 3600 * 12
SAVE_TIME = 60 * 26
JST = 3600 * 9

BLOCK_CNT = 12

if __name__ == "__main__":

    model = "zero_256_" + str(BLOCK_CNT) + ".h5"
    learn(1e-2, 0.5, sgf_dir=r"/content/drive/My Drive/", use_gpu=True, gpu_cnt=1, model_name=model)
    #learn(3e-4, 0.5, sgf_dir=r"/content/drive/My Drive/", use_gpu=True, gpu_cnt=1, model_name=model)


Using TensorFlow backend.
W0614 00:56:08.908958 139659460233088 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:68: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0614 00:56:08.923959 139659460233088 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:508: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0614 00:56:08.928651 139659460233088 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:3884: The name tf.truncated_normal is deprecated. Please use tf.random.truncated_normal instead.

W0614 00:56:08.958652 139659460233088 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:168: The name tf.get_default_session is deprecated. Please use tf.compat.v1.get_default_session inste

Mounted at /content/drive
2019/06/14-09:56:08 - Model : dual_resnet input_shape = (None, 19, 19, 17)
2019/06/14-09:56:08 - Model : blocks = 12


W0614 00:56:09.755563 139659460233088 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:1801: The name tf.nn.fused_batch_norm is deprecated. Please use tf.compat.v1.nn.fused_batch_norm instead.

W0614 00:56:09.833599 139659460233088 deprecation.py:506] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:3144: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
W0614 00:56:20.919645 139659460233088 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/optimizers.py:757: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.

W0614 00:56:21.486003 139659460233088 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/math_grad.py:1

2019/06/14-09:56:27 - Resore model : /content/drive/My Drive/zero_256_12.h5
2019/06/14-09:56:27 model.metrics_names = ['loss', 'policy_out_loss', 'value_out_loss', 'policy_out_acc', 'value_out_acc']
2019/06/14-09:56:27 ----------- start training-------------
2019/06/14-09:56:27 STEP_TIME = 43200
2019/06/14-09:56:27 SAVE_TIME = 1560
2019/06/14-09:56:27 END_TIME = 2019/06/14 21:30:27
2019/06/14-09:56:27 start_epoch_idx = 0
2019/06/14-09:56:27 start_step_idx = 46722
2019/06/14-09:56:27 global_step_idx = 46722
2019/06/14-09:56:27 feed_file_no = 221
2019/06/14-09:56:27 max_policy = 0.0
2019/06/14-09:56:28 epoch_count = 1
2019/06/14-09:56:28 Converting ...
2019/06/14-09:56:28 loaded feed_n count = 284
2019/06/14-09:56:28 loaded feed_n = [68940, 140162, 208293, 279013, 348248, 417764, 486114, 555979, 626402, 696984, 765989, 835016, 904985, 973293, 1043663, 1112155, 1181182, 1250203, 1321028, 1391217, 1461878, 1529255, 1598316, 1669832, 1738130, 1806060, 1875742, 1949450, 2019880, 2094046, 216

In [0]:
pip install keras==2.1.6

Collecting keras==2.1.6
[?25l  Downloading https://files.pythonhosted.org/packages/54/e8/eaff7a09349ae9bd40d3ebaf028b49f5e2392c771f294910f75bb608b241/Keras-2.1.6-py2.py3-none-any.whl (339kB)
[K     |████████████████████████████████| 348kB 47.1MB/s 
Installing collected packages: keras
  Found existing installation: Keras 2.2.4
    Uninstalling Keras-2.2.4:
      Successfully uninstalled Keras-2.2.4
Successfully installed keras-2.1.6
