# Setting & Import

In [1]:
import time
import sys
import os

# 현재 파일(main.ipynb)이 있는 디렉토리의 부모 디렉토리를 sys.path에 추가
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))

In [2]:
# INFO 
from gameInfo import *
from fileInfo import *
from hyperParams import *
from setDevice import *
from timer import *

# CODES
from state.ver2 import *
from network.resnet import *
from trainer.ver1 import *
from eval.ver1 import *
from selfplay.ver1 import *
from tester.ver2 import *
from utils.saveLoad import *

In [3]:
State = select_state(STATE_DIM)

## Check

In [None]:
print(f"Using device: {device}")

## Train

In [None]:
start = time.time()
previous_time = start

# 시각화용 
win_rates = []

# model 정의
model = Network(N_RESIDUAL_BLOCK, N_KERNEL, STATE_DIM, N_ACTIONS)
model = model.to(device)

self_play = SelfPlay(model, TRAIN_TEMPERATURE, TEMP_DISCOUNT, N_SELFPLAY, N_PLAYOUT)
train = TrainNetwork(model, BATCH_SIZE, LEARNING_RATE, LEARN_DECAY, LEARN_EPOCH)
eval_network = EvalNetwork(model, EVAL_GAME_COUNT, EVAL_TEMPERATURE, N_PLAYOUT)

# 경로 생성
make_directory(F_PATH)

# 학습
for i in range(N_ITER):
    print(f'\nTrain {i+1} --------------------------------')

    # 나눠서 selfplay 
    self_play(i)

    h, m, s = convert_seconds(time.time() - previous_time)
    previous_time = time.time() 
    print(f">>> selfplay에 소요된 시간은 {h}시간 {m}분 {s}초 입니다. \n")
    
    # 학습 
    train(self_play.history)

    h, m, s = convert_seconds(time.time() - previous_time)
    previous_time = time.time() 
    print(f">>> 학습에 소요된 시간은 {h}시간 {m}분 {s}초 입니다. \n")

    save_model(train.model, f_name='latest_model_weight')

    if (i+1) % 5 == 0:    
        eval_network(train.model)
        win_rates.append(eval_network.win_rate)

        h, m, s = convert_seconds(time.time() - previous_time)
        previous_time = time.time() 
        print(f">>> Eval에 소요된 시간은 {h}시간 {m}분 {s}초 입니다. \n")

    if (i+1) % 10 == 0:
        eval_network.visualize_game(download=True, idx=i+1)
        print(f"current temp is {self_play.temp}")

    if eval_network.updated:
        self_play.update_model(train.model)
        eval_network.updated = False

    h, m, s = convert_seconds(time.time() - previous_time)
    previous_time = time.time() 
    print(f"--> {i+1}번째 iter를 완료하는데 걸린 시간은, {h}시간 {m}분 {s}초 입니다. \n")

h, m, s = convert_seconds(time.time() - start)
print(f"총 학습에 걸린 시간은 {h}시간 {m}분 {s}초 입니다. ")

'/Users/ijimin/Documents/GitHub/AiGO/Omok/Jimin/model/Omok_1' already exists.

Train 1 --------------------------------
self play :  2 / 20 | 2 / 2000
self play :  4 / 20 | 4 / 2000
self play :  6 / 20 | 6 / 2000
self play :  8 / 20 | 8 / 2000
self play :  10 / 20 | 10 / 2000
self play :  12 / 20 | 12 / 2000
self play :  14 / 20 | 14 / 2000
self play :  16 / 20 | 16 / 2000
self play :  18 / 20 | 18 / 2000
self play :  20 / 20 | 20 / 2000
>>> selfplay에 소요된 시간은 0시간 10분 12초 입니다. 


> Train Started.
> step : 100 / 1000 | (mean) p_loss : 4.392 v_loss : 1.368 | lr : [0.002]
> step : 200 / 1000 | (mean) p_loss : 4.391 v_loss : 1.204 | lr : [0.002]
> step : 300 / 1000 | (mean) p_loss : 4.391 v_loss : 1.140 | lr : [0.002]
> step : 400 / 1000 | (mean) p_loss : 4.391 v_loss : 1.105 | lr : [0.002]
> step : 500 / 1000 | (mean) p_loss : 4.390 v_loss : 1.081 | lr : [0.002]
> step : 600 / 1000 | (mean) p_loss : 4.390 v_loss : 1.067 | lr : [0.002]
> step : 700 / 1000 | (mean) p_loss : 4.390 v_loss : 1.

In [None]:
# download hyper params 
save_as_txt('hyperParams', hyper_params)

In [None]:
visualize_win_rate(win_rates, path=F_PATH, download=True)
visualize_loss(train.losses, path=F_PATH, download=True)