In [1]:
# -*- coding: utf-8 -*-
# %matplotlib inline
%load_ext autoreload
%autoreload 2

import numpy as np
np.set_printoptions(suppress=True)

from shutil import copyfile
import random
from importlib import reload


from keras.utils import plot_model

from game import Game, GameState
from agent import Agent
from memory import Memory
from model import Residual_CNN
from funcs import playMatches, playMatchesBetweenVersions

import loggers as lg

from settings import run_folder, run_archive_folder
import initialise
import pickle
import config

import time
import os
import fileflag
import evalcount

env = Game()

Using TensorFlow backend.


In [2]:
no = 3  # Evaluating 파일 여러개 중 이 파일의 번호
######## CREATE THE PLAYERS ########

# 빈 Neural Network 생성
current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (2,) + env.grid_shape,   env.action_size, config.HIDDEN_CNN_LAYERS)
best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (2,) +  env.grid_shape,   env.action_size, config.HIDDEN_CNN_LAYERS)

# Best Player Model 불러옴
if initialise.INITIAL_MODEL_VERSION != None:
    best_player_version  = initialise.INITIAL_MODEL_VERSION
    if best_player_version <= evalcount.best_player_version():
        best_player_version = evalcount.best_player_version()
    else:
        evalcount.init_best_player_version(best_player_version)

    print('LOADING MODEL VERSION ' + str(initialise.INITIAL_MODEL_VERSION) + '...')
    m_tmp = best_NN.read(env.name, initialise.INITIAL_RUN_NUMBER, best_player_version)
    best_NN.model.set_weights(m_tmp.get_weights())

else:
    best_player_version = 0
    best_NN.model.set_weights(current_NN.model.get_weights())
    print('NEW MODEL EVALUATING ...')


# Current Player Model 불러옴
while not os.path.exists('./run/models/training_model.h5'):
    print('Waiting for training_model.h5')
    time.sleep(10)
fileflag.fileFlag_on(1)
m_tmp_training = current_NN.read_tmp(env.name)
fileflag.fileFlag_off(1)
current_NN.model.set_weights(m_tmp_training.get_weights())

######## CREATE THE PLAYERS ########
current_player = Agent('current_player', env.state_size, env.action_size, config.EVAL_MCTS_SIMS, config.CPUCT, current_NN)
best_player = Agent('best_player', env.state_size, env.action_size, config.EVAL_MCTS_SIMS, config.CPUCT, best_NN)
iteration = 0

print('Number of MCTS SIM : ' + str(config.EVAL_MCTS_SIMS))


while 1:
    lg.logger_test.info('Evaluating module ' + str(no) +' alive')
    if fileflag.get_fileFlag(3) == '1':
        print('Waiting for training...')
        time.sleep(30)
        
    ######## TOURNAMENT ########
    print('TOURNAMENT...')
    lg.logger_test.info('TOURNAMENT...')
    if evalcount.eval_end() == 0:
        scores, _, points, sp_scores = playMatches(best_player, current_player, 1, lg.logger_tourney, turns_until_tau0 = 0, memory = None)
        scores = evalcount.add_score(scores, best_player, current_player)
        lg.logger_test.info(str(scores))

    else:
        if scores[best_player.name] + scores[current_player.name] + scores["drawn"] >= config.EVAL_EPISODES:
            # 여기 해당되는 모듈이 evaluation 종결
            print('\nSCORES')
            print(scores)
            print('\nSTARTING PLAYER / NON-STARTING PLAYER SCORES')
            print(sp_scores)
            #print(points)
            print('\n\n')
            #evalcount.eval_reset()

            if scores['current_player'] > scores['best_player'] * config.SCORING_THRESHOLD: # current player 이긴 경우 best_player 갱신, 저장
                
                best_player_version = evalcount.best_player_version()
                best_player_version += 1
                evalcount.init_best_player_version(best_player_version)
                best_NN.model.set_weights(current_NN.model.get_weights())
                best_player = Agent('best_player', env.state_size, env.action_size, config.EVAL_MCTS_SIMS, config.CPUCT, best_NN)
                best_NN.write(env.name, best_player_version)
                fileflag.fileFlag_on(2)
                best_NN.model.save(run_folder + 'models/model_temp.h5')
                fileflag.fileFlag_off(2)
                lg.logger_test.info('Got new model!')

                evalcount.eval_reset()  # eval_count reset하여 다른 모듈도 끝마치도록 함
            
            else:   # current player 진 경우 그냥 reset하고 마침
                evalcount.eval_reset()
                print('Not yet improved enough')
                lg.logger_test.info('Not yet improved enough')

        else:
            evalcount.evel_wait()   # 마지막 모듈이 evaluation 종결시키기까지 대기 / 마지막 모듈이 종결시켜야만 다음 진행
            best_player_version = evalcount.best_player_version()   # 있어도 되고 없어도 된다 (어차피 마지막 모듈일 경우 current 갱신 전에 읽어옴)
            if os.path.exists('./run/models/model_temp.h5'):
                fileflag.fileFlag_on(2)
                m_tmp = best_NN.read_tmp_selfplay(env.name)
                fileflag.fileFlag_off(2)
                best_NN.model.set_weights(m_tmp.get_weights())
                best_player = Agent('best_player', env.state_size, env.action_size, config.EVAL_MCTS_SIMS, config.CPUCT, best_NN)
                print('change best_model to model_temp')
            else:
                print('use empty model as best_player continuously')


        
        # 더 학습된 Model로 교체
        fileflag.fileFlag_on(1)
        m_tmp_training = current_NN.read_tmp(env.name)
        fileflag.fileFlag_off(1)
        current_NN.model.set_weights(m_tmp_training.get_weights())
        current_player = Agent('current_player', env.state_size, env.action_size, config.EVAL_MCTS_SIMS, config.CPUCT, current_NN)
        print('change current model to training_model')



    

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.

LOADING MODEL VERSION 10...


OSError: Unable to open file (unable to open file: name = './run/models/version0000.h5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)