In [7]:
# -*- coding: utf-8 -*-
# %matplotlib inline
%load_ext autoreload
%autoreload 2

import numpy as np
np.set_printoptions(suppress=True)

from shutil import copyfile
import random
from importlib import reload


from keras.utils import plot_model

from game import Game, GameState
from agent import Agent
from memory import Memory
from model import Residual_CNN
from funcs import playMatches, playMatchesBetweenVersions

import loggers as lg

from settings import run_folder, run_archive_folder
import initialise
import pickle
import config

import time
import os
import fileflag
import evalcount

env = Game()

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [8]:
######## CREATE THE PLAYERS ########

# 빈 Neural Network 생성
current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (2,) + env.grid_shape,   env.action_size, config.HIDDEN_CNN_LAYERS)
best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (2,) +  env.grid_shape,   env.action_size, config.HIDDEN_CNN_LAYERS)

# Best Player Model 불러옴
if initialise.INITIAL_MODEL_VERSION != None:
    best_player_version  = initialise.INITIAL_MODEL_VERSION
    print('LOADING MODEL VERSION ' + str(initialise.INITIAL_MODEL_VERSION) + '...')
    m_tmp = best_NN.read(env.name, initialise.INITIAL_RUN_NUMBER, best_player_version)
    best_NN.model.set_weights(m_tmp.get_weights())

else:
    best_player_version = 0
    best_NN.model.set_weights(current_NN.model.get_weights())
    print('NEW MODEL EVALUATING ...')


# Current Player Model 불러옴
while not os.path.exists('./run/models/training_model.h5'):
    print('Waiting for training_model.h5')
    time.sleep(10)
fileflag.fileFlag_on(1)
m_tmp_training = current_NN.read_tmp(env.name)
fileflag.fileFlag_off(1)
current_NN.model.set_weights(m_tmp_training.get_weights())

######## CREATE THE PLAYERS ########
current_player = Agent('current_player', env.state_size, env.action_size, config.EVAL_MCTS_SIMS, config.CPUCT, current_NN)
best_player = Agent('best_player', env.state_size, env.action_size, config.EVAL_MCTS_SIMS, config.CPUCT, best_NN)
iteration = 0

print('Number of MCTS SIM : ' + str(config.EVAL_MCTS_SIMS))


while 1:
    
    ######## TOURNAMENT ########
    print('TOURNAMENT...')
    if evalcount.eval_end() == 0:
        scores, _, points, sp_scores = playMatches(best_player, current_player, 1, lg.logger_tourney, turns_until_tau0 = 0, memory = None)
        scores = evalcount.add_score(scores, best_player, current_player)
        lg.logger_test.info(str(scores))
    else:
        if scores[best_player.name] + scores[current_player.name] + scores["drawn"] >= config.EVAL_EPISODES:
            print('\nSCORES')
            print(scores)
            print('\nSTARTING PLAYER / NON-STARTING PLAYER SCORES')
            print(sp_scores)
            #print(points)
            print('\n\n')
            evalcount.eval_reset()

            if scores['current_player'] > scores['best_player'] * config.SCORING_THRESHOLD:
                best_player_version = best_player_version + 1
                best_NN.model.set_weights(current_NN.model.get_weights())
                best_player = Agent('best_player', env.state_size, env.action_size, config.EVAL_MCTS_SIMS, config.CPUCT, best_NN)
                best_NN.write(env.name, best_player_version)
                fileflag.fileFlag_on(2)
                best_NN.model.save(run_folder + 'models/model_temp.h5')
                fileflag.fileFlag_off(2)
                lg.logger_test.info('Got new model!')
            else:
                print('Not yet improved enough')
                lg.logger_test.info('Not yet improved enough')

        else:
            evalcount.evel_wait()
            if os.path.exists('./run/models/model_temp.h5'):
                fileflag.fileFlag_on(2)
                m_tmp = best_NN.read_tmp_selfplay(env.name)
                fileflag.fileFlag_off(2)
                best_NN.model.set_weights(m_tmp.get_weights())
                best_player = Agent('best_player', env.state_size, env.action_size, config.EVAL_MCTS_SIMS, config.CPUCT, best_NN)
                print('change best_model to model_temp')
            else:
                print('use empty model as best_player continuously')


        
        # 더 학습된 Model로 교체
        fileflag.fileFlag_on(1)
        m_tmp_training = current_NN.read_tmp(env.name)
        fileflag.fileFlag_off(1)
        current_NN.model.set_weights(m_tmp_training.get_weights())
        current_player = Agent('current_player', env.state_size, env.action_size, config.EVAL_MCTS_SIMS, config.CPUCT, current_NN)
        print('change current model to training_model')



    

NEW MODEL EVALUATING ...
File flag 1 ON
File flag 1 OFF
Number of MCTS SIM : 2
TOURNAMENT...
1{'best_player': 0, 'drawn': 0, 'current_player': 1}
TOURNAMENT...
1{'best_player': 1, 'drawn': 0, 'current_player': 2}
TOURNAMENT...
Wating for other evaluation...
File flag 1 ON
File flag 1 OFF
TOURNAMENT...
1{'best_player': 2, 'drawn': 0, 'current_player': 1}
TOURNAMENT...
Wating for other evaluation...
File flag 1 ON
File flag 1 OFF
TOURNAMENT...
1{'best_player': 2, 'drawn': 0, 'current_player': 2}
TOURNAMENT...
Wating for other evaluation...
The file is busy
The file is busy
File flag 1 ON


KeyboardInterrupt: 