In [18]:
# -*- coding: utf-8 -*-
# %matplotlib inline
%load_ext autoreload
%autoreload 2

import numpy as np
np.set_printoptions(suppress=True)

from shutil import copyfile
import random
from importlib import reload


from keras.utils import plot_model

from game import Game, GameState
from agent import Agent
from memory import Memory
from model import Residual_CNN
from funcs import playMatches, playMatchesBetweenVersions

import loggers as lg

from settings import run_folder, run_archive_folder
import initialise
import pickle
import config

import time
import os
import fileflag

env = Game()

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [21]:
######## CREATE THE PLAYERS ########

# 빈 Neural Network 생성
current_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (2,) + env.grid_shape,   env.action_size, config.HIDDEN_CNN_LAYERS)   # 빈 NN
best_NN = Residual_CNN(config.REG_CONST, config.LEARNING_RATE, (2,) +  env.grid_shape,   env.action_size, config.HIDDEN_CNN_LAYERS)     # 사용하는 NN

# Best Player Model 불러옴
if os.path.exists('./run/models/model_temp.h5'):    # model_temp 사용
    best_player_version = 'temp'
    fileflag.fileFlag_on(2)
    m_tmp = best_NN.read_tmp_selfplay(env.name)
    fileflag.fileFlag_off(2)
    best_NN.model.set_weights(m_tmp.get_weights())
    print('USING TEMP MODEL ...')
elif initialise.INITIAL_MODEL_VERSION != None:      # 지정 버전 모델 사용
    best_player_version  = initialise.INITIAL_MODEL_VERSION
    print('LOADING MODEL VERSION ' + str(initialise.INITIAL_MODEL_VERSION) + '...')
    m_tmp = best_NN.read(env.name, initialise.INITIAL_RUN_NUMBER, best_player_version)
    best_NN.model.set_weights(m_tmp.get_weights())
else:                                               # 빈 모델 사용
    best_player_version = 0
    best_NN.model.set_weights(current_NN.model.get_weights())
    print('USING EMPTY MODEL ...')


######## CREATE THE PLAYER ########
best_player = Agent('best_player', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, best_NN)

iteration = 0

first_play = True


while 1:

    memory = Memory(config.MEMORY_SIZE)
        
    iteration += 1
    reload(lg)
    reload(config)
    
    print('ITERATION NUMBER ' + str(iteration))
    
    lg.logger_main.info('BEST PLAYER VERSION: %s', str(best_player_version))
    print('BEST PLAYER VERSION ' + str(best_player_version))

    ######## SELF PLAY ########
    print('SELF PLAYING ' + str(config.EPISODES) + ' EPISODES...')
    lg.logger_test.info('Start self playing...')
    _, memory, _, _ = playMatches(best_player, best_player, config.EPISODES, lg.logger_main, turns_until_tau0 = config.TURNS_UNTIL_TAU0, memory = memory)
    print('\n')
    
    memory.clear_stmemory()

    ######## LOAD OLD MEMORIES & DUMP ########
    if first_play == True:
        if initialise.INITIAL_MEMORY_VERSION != None:
            memory_old = pickle.load( open( "./run/memory/memory" + '_' + str(initialise.INITIAL_MEMORY_VERSION) + ".p",   "rb" ) )
            print('USING MEMORY : ' + str(initialise.INITIAL_MEMORY_VERSION))
            print('INITIAL MEMORY SIZE: ' + str(len(memory_old.ltmemory)))
        else:
            memory_old = Memory(config.MEMORY_SIZE)
            print('NEW TEMP MEMORY')
    else:
        fileflag.fileFlag_on(0)
        memory_old = pickle.load( open( "./run/memory/memory_temp" + ".p",   "rb" ) )
        fileflag.fileFlag_off(0)
        print('USING TEMP MEMORY')
        print('TEMP MEMORY SIZE: ' + str(len(memory_old.ltmemory)))

    first_play = False

    for i in memory.ltmemory:
	    memory_old.ltmemory.append(i)
    
    print('NEW MEMORY SIZE: ' + str(len(memory.ltmemory)))

    fileflag.fileFlag_on(0)
    pickle.dump( memory_old, open( "./run/memory/memory_temp.p", "wb" ) )
    fileflag.fileFlag_off(0)
    pickle.dump( memory_old, open( "./run/memory/memory_" + time.strftime( '%m%d-%H%M%S',time.localtime(time.time()) ) + ".p", "wb" ) )

    lg.logger_test.info('Self Play memory dumped')


    # MODEL 교체
    if os.path.exists('./run/models/model_temp.h5'):    # model_temp 사용
        best_player_version = 0
        fileflag.fileFlag_on(2)
        m_tmp = best_NN.read_tmp_selfplay(env.name)
        fileflag.fileFlag_off(2)
        best_NN.model.set_weights(m_tmp.get_weights())
        print('LOADING TEMP MODEL ...')
    elif initialise.INITIAL_MODEL_VERSION != None:      # 지정 버전 모델 사용
        best_player_version  = initialise.INITIAL_MODEL_VERSION
        print('LOADING MODEL VERSION ' + str(initialise.INITIAL_MODEL_VERSION) + '...')
        m_tmp = best_NN.read(env.name, initialise.INITIAL_RUN_NUMBER, best_player_version)
        best_NN.model.set_weights(m_tmp.get_weights())
    else:                                               # 빈 모델 사용
        best_player_version = 0
        best_NN.model.set_weights(current_NN.model.get_weights())
        print('LOADING EMPTY MODEL ...')

    ######## CHANGE THE PLAYER ########
    best_player = Agent('best_player', env.state_size, env.action_size, config.MCTS_SIMS, config.CPUCT, best_NN)

LOADING MODEL VERSION 0002...
ITERATION NUMBER 1
BEST PLAYER VERSION 0002
SELF PLAYING 1 EPISODES...
1

USING MEMORY : 0531-103848
INITIAL MEMORY SIZE: 596
NEW MEMORY SIZE: 208
LOADING MODEL VERSION 0002...
ITERATION NUMBER 2
BEST PLAYER VERSION 0002
SELF PLAYING 1 EPISODES...
1

USING TEMP MEMORY
TEMP MEMORY SIZE: 804
NEW MEMORY SIZE: 154
USING TEMP MODEL ...
ITERATION NUMBER 3
BEST PLAYER VERSION 0
SELF PLAYING 1 EPISODES...
1

USING TEMP MEMORY
TEMP MEMORY SIZE: 958
NEW MEMORY SIZE: 100
USING TEMP MODEL ...
ITERATION NUMBER 4
BEST PLAYER VERSION 0
SELF PLAYING 1 EPISODES...
1

USING TEMP MEMORY
TEMP MEMORY SIZE: 1058
NEW MEMORY SIZE: 264
USING TEMP MODEL ...
ITERATION NUMBER 5
BEST PLAYER VERSION 0
SELF PLAYING 1 EPISODES...
1

USING TEMP MEMORY
TEMP MEMORY SIZE: 1322
NEW MEMORY SIZE: 212


KeyboardInterrupt: 