# Setting & Import

In [1]:
import time
import sys
import os

# 현재 파일(main.ipynb)이 있는 디렉토리의 부모 디렉토리를 sys.path에 추가
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))

In [2]:
# INFO 
from gameInfo import *
from fileInfo import *
from Omok.Jimin.main.config import *
from Omok.Jimin.utils.setDevice import *
from timer import *

# CODES
from state.ver2 import *
from network.resnet import *
from trainer.ver1 import *
from eval.ver1 import *
from selfplay.ver1 import *
from tester.ver2 import *
from utils.saveLoad import *

In [3]:
State = select_state(STATE_DIM)

## Check

In [None]:
print(f"Using device: {device}")

## Train

In [None]:
start = time.time()
previous_time = start

# 시각화용 
win_rates = []

# model 정의
model = Network(N_RESIDUAL_BLOCK, N_KERNEL, STATE_DIM, N_ACTIONS)
model = model.to(device)

self_play = SelfPlay(model, TRAIN_TEMPERATURE, TEMP_DISCOUNT, N_SELFPLAY, N_PLAYOUT)
train = TrainNetwork(model, BATCH_SIZE, LEARNING_RATE, LEARN_DECAY, LEARN_EPOCH)
eval_network = EvalNetwork(model, EVAL_GAME_COUNT, EVAL_TEMPERATURE, N_PLAYOUT)

# 경로 생성
make_directory(F_PATH)

# 학습
for i in range(N_ITER):
    print(f'\n - [ {i+1} ] --------------------------------')

    # 나눠서 selfplay 
    self_play(i)

    h, m, s = convert_seconds(time.time() - previous_time)
    previous_time = time.time() 
    print(f">>> selfplay에 소요된 시간은 {h}시간 {m}분 {s}초 입니다. \n")
    
    # 학습 
    train(self_play.history)

    h, m, s = convert_seconds(time.time() - previous_time)
    previous_time = time.time() 
    print(f">>> 학습에 소요된 시간은 {h}시간 {m}분 {s}초 입니다. \n")

    save_model(train.model, f_name='latest_model_weight')

    if (i+1) % 5 == 0:    
        eval_network(train.model)
        win_rates.append(eval_network.win_rate)

        h, m, s = convert_seconds(time.time() - previous_time)
        previous_time = time.time() 
        print(f">>> Eval에 소요된 시간은 {h}시간 {m}분 {s}초 입니다. \n")

    if (i+1) % 10 == 0:
        eval_network.visualize_game(download=True, idx=i+1)
        print(f"current temp is {self_play.temp}")

    if eval_network.updated:
        self_play.update_model(train.model)
        eval_network.updated = False

h, m, s = convert_seconds(time.time() - start)
print(f"총 학습에 걸린 시간은 {h}시간 {m}분 {s}초 입니다. ")

In [None]:
# download hyper params 
save_as_txt('hyperParams', hyper_params)

In [8]:
train = TrainNetwork(model, BATCH_SIZE, 0.001, LEARN_DECAY, LEARN_EPOCH)

In [None]:
visualize_win_rate(win_rates, path=F_PATH, download=True)
visualize_loss(train.losses, path=F_PATH, download=True)

In [None]:
start = time.time()
previous_time = start

# 학습
for i in range(100,150):
    print(f'\n - [ {i+1} ] --------------------------------')

    # 나눠서 selfplay 
    self_play(i)

    h, m, s = convert_seconds(time.time() - previous_time)
    previous_time = time.time() 
    print(f">>> selfplay에 소요된 시간은 {h}시간 {m}분 {s}초 입니다. \n")
    
    # 학습 
    train(self_play.history)

    h, m, s = convert_seconds(time.time() - previous_time)
    previous_time = time.time() 
    print(f">>> 학습에 소요된 시간은 {h}시간 {m}분 {s}초 입니다. \n")

    save_model(train.model, f_name='latest_model_weight')

    if (i+1) % 5 == 0:    
        eval_network(train.model)
        win_rates.append(eval_network.win_rate)

        h, m, s = convert_seconds(time.time() - previous_time)
        previous_time = time.time() 
        print(f">>> Eval에 소요된 시간은 {h}시간 {m}분 {s}초 입니다. \n")

    if (i+1) % 10 == 0:
        eval_network.visualize_game(download=True, idx=i+1)
        print(f"current temp is {self_play.temp}")

    if eval_network.updated:
        self_play.update_model(train.model)
        eval_network.updated = False

h, m, s = convert_seconds(time.time() - start)
print(f"총 학습에 걸린 시간은 {h}시간 {m}분 {s}초 입니다. ")


 - [ 101 ] --------------------------------
self play :  2 / 20 | 2002 / 2000
self play :  4 / 20 | 2004 / 2000
self play :  6 / 20 | 2006 / 2000
self play :  8 / 20 | 2008 / 2000
self play :  10 / 20 | 2010 / 2000
self play :  12 / 20 | 2012 / 2000
self play :  14 / 20 | 2014 / 2000
self play :  16 / 20 | 2016 / 2000
self play :  18 / 20 | 2018 / 2000
self play :  20 / 20 | 2020 / 2000
>>> selfplay에 소요된 시간은 0시간 8분 37초 입니다. 


> Train Started.
step : 100 / 1000 | (mean) p_loss : 1.262 v_loss : 0.721 | lr : [0.001]
step : 200 / 1000 | (mean) p_loss : 1.230 v_loss : 0.704 | lr : [0.001]
step : 300 / 1000 | (mean) p_loss : 1.212 v_loss : 0.698 | lr : [0.001]
step : 400 / 1000 | (mean) p_loss : 1.201 v_loss : 0.694 | lr : [0.001]
step : 500 / 1000 | (mean) p_loss : 1.193 v_loss : 0.692 | lr : [0.001]
step : 600 / 1000 | (mean) p_loss : 1.187 v_loss : 0.690 | lr : [0.001]
step : 700 / 1000 | (mean) p_loss : 1.181 v_loss : 0.688 | lr : [0.001]
step : 800 / 1000 | (mean) p_loss : 1.179 v_los