In [1]:
GYM_OLD = False

# GYM 환경설정
TEST = False

REPLAY = False
RECORD = False
GYM = 'LunarLander-v2'
WIND = True
SEED = 0
TARGET_RETURN = 200
STRETCH_GOAL = 250

# Hyper parameters
FC1_SIZE = 128         # Hidden layer 1 크기
FC2_SIZE = 128         # Hidden layer 2 크기
BUFFER_SIZE = 50000    # ReplayBuffer 크기
BATCH_SIZE = 64        # Minibatch 크기
DROPOUT = 0.0          # Dropout
GAMMA = 0.99           # 할인율 (discount factor)
TAU = 0.01             # 학습 모델 Soft update(q → q_target) 비율
LEARNING_RATE = 0.0005 # 학습율
EPS_START = 0.5        # Epsilon 시작 비율
EPS_END = 0.01         # Epsilon 최종 비율
EPS_DECAY = 0.995      # Epsilon 감소율

UPDATE_EVERY = 4       # 학습 주기
UPDATE_RATIO = 1.0     # Replay Buffer에서 업데이트를 시작할 비율 (BATCH_SIZE 기준)
TOTAL_EPISODES = 2000  # 최대 Episode 개수
MAX_TIMESTAMP = 1000   # Episode당 최대 Timestamp

# 알고리즘
USE_LINEAR_EPS_DECAY = False
USE_DOUBLE_DQN = True
USE_DUELING_DQN = True

# Model import/export
import os
SCRIPT_PATH = os.path.abspath("")
MODEL_PATH = os.path.join(SCRIPT_PATH, 'model')
SCORE_PATH = os.path.join(SCRIPT_PATH, 'score')
MP4_PATH = os.path.join(SCRIPT_PATH, 'mp4')
os.makedirs(MODEL_PATH) if not os.path.exists(MODEL_PATH) else None
os.makedirs(SCORE_PATH) if not os.path.exists(SCORE_PATH) else None
os.makedirs(MP4_PATH) if not os.path.exists(MP4_PATH) else None

class dotdict(dict):
    """dot.notation access to dictionary attributes"""
    __getattr__ = dict.get
    __setattr__ = dict.__setitem__
    __delattr__ = dict.__delitem__


In [2]:
# DQN_TYPE 처리
if USE_DOUBLE_DQN and USE_DUELING_DQN:
    dqn_type = 'DoubleDueling'
elif USE_DOUBLE_DQN:
    dqn_type = 'Double'
elif USE_DUELING_DQN:
    dqn_type = 'Dueling'
else:
    dqn_type = 'DQN'

# WIND 처리
if WIND:
    wind = 'WIND'
else:
    wind = 'NOWIND'


# 설정값 Dictionary로 변환 (Loop 처리 용이하도록)
CONFIGURE = {
    'dqn_type':dqn_type,
    'wind_type':wind,
    'gym_old':GYM_OLD,
    'replay':REPLAY,
    'record':RECORD,
    'gym':GYM,
    'wind':WIND,
    'seed':SEED,
    'target_return':TARGET_RETURN,
    'stretch_goal':STRETCH_GOAL,
    'fc1_size':FC1_SIZE,
    'fc2_size':FC2_SIZE,
    'buffer_size':BUFFER_SIZE,
    'batch_size':BATCH_SIZE,
    'dropout':DROPOUT,
    'gamma':GAMMA,
    'tau':TAU,
    'learning_rate':LEARNING_RATE,
    'eps_start':EPS_START,
    'eps_end':EPS_END,
    'eps_decay':EPS_DECAY,
    'update_every':UPDATE_EVERY,
    'update_ratio':UPDATE_RATIO,
    'total_episodes':TOTAL_EPISODES,
    'max_timestamp':MAX_TIMESTAMP,
    'use_linear_eps_decay':USE_LINEAR_EPS_DECAY,
    'use_double_dqn':USE_DOUBLE_DQN,
    'use_dueling_dqn':USE_DUELING_DQN,
    'script_path':SCRIPT_PATH,
    'model_path':MODEL_PATH,
    'score_path':SCORE_PATH,
    'mp4_path':MP4_PATH
}
LOOP_CONFIGURE = dotdict(CONFIGURE)


In [3]:
import pandas as pd
import matplotlib.pyplot as plt

def plot_models(CONFIGURE, type, loop):
    df = pd.DataFrame()
    for item in loop:
        if type == 'FC1_SIZE':
            CONFIGURE.fc1_size = item
            CONFIGURE.fc2_size = item
        elif type == 'BUFFER_SIZE':
            CONFIGURE.buffer_size = item
        elif type == 'BATCH_SIZE':
            CONFIGURE.batch_size = item
        elif type == 'DROPOUT':
            CONFIGURE.dropout = item
        elif type == 'GAMMA':
            CONFIGURE.gamma = item
        elif type == 'TAU':
            CONFIGURE.tau = item
        elif type == 'LEARNING_RATE':
            CONFIGURE.learning_rate = item
        elif type == 'EPS_START':
            CONFIGURE.eps_start = item
        elif type == 'EPS_DECAY':
            CONFIGURE.eps_decay = item
        elif type == 'DQN_TYPE':
            CONFIGURE.dqn_type = item

        # 저장정보 변경
        CONFIGURE.mode = 'TEST' if CONFIGURE.replay else 'TRAIN'
        CONFIGURE.model_csvname = os.path.join(CONFIGURE.score_path, "{}_{}_{}_{}_{}_{}_{}_{}_{}_{}_{}_{}_{}.csv".format(
            CONFIGURE.gym, CONFIGURE.mode, CONFIGURE.wind_type, CONFIGURE.fc1_size, CONFIGURE.buffer_size, CONFIGURE.batch_size, CONFIGURE.dropout, CONFIGURE.gamma, CONFIGURE.tau, CONFIGURE.learning_rate, CONFIGURE.eps_start, CONFIGURE.eps_decay, CONFIGURE.dqn_type))
        CONFIGURE.model_resultname = os.path.join(CONFIGURE.score_path, "{}_{}_{}_{}.png".format(
            CONFIGURE.gym, CONFIGURE.mode, CONFIGURE.wind_type, type))

        new_df = pd.read_csv(CONFIGURE.model_csvname)
        new_df.drop(new_df.columns[~new_df.columns.str.contains('Average Score')], axis=1, inplace=True)
        new_df.rename(columns = {'Average Score': '{} {}'.format(type, item)}, inplace=True)
        df = pd.concat([df, new_df], axis=1)

    fig = plt.figure(dpi=150)
    plt.tight_layout()
    ax = fig.add_subplot(1, 1, 1)
    plt.ylim([-300, 300])
    for column in df.columns:
        plt.plot(df.index.values+1, column, data=df, linewidth=1.5)
    plt.axhline(y=200, color='navy', alpha=0.5, linestyle='dashed', linewidth=1)
    plt.axhline(y=250, color='red', alpha=0.5, linestyle='dashed', linewidth=1)
    plt.legend(loc='lower right', fontsize=9, ncol=2)
    plt.savefig(CONFIGURE.model_resultname)
    #plt.show()
    plt.close()


In [4]:
LOOP = {
    'WIND':['WIND', 'NOWIND'],
    'FC1_SIZE':[4, 32, 64, 128, 256, 512, 1024],
    'BUFFER_SIZE':[1000, 10000, 50000, 100000],
    'BATCH_SIZE':[4, 32, 64, 128],
    'DROPOUT':[0.0, 0.2, 0.4, 0.6, 0.8],
    'GAMMA':[0.8, 0.9, 0.99],
    'TAU':[0.1, 0.01, 0.001],
    'LEARNING_RATE':[0.5, 0.05, 0.005, 0.0005],
    'EPS_START':[0.9, 0.5, 0.1],
    'EPS_DECAY':[0.9, 0.99, 0.995],
    'DQN_TYPE':['DQN', 'Double', 'Dueling','DoubleDueling']
}

# WIND 여부에 따라서는 모두 수행함
for w in LOOP['WIND']:
    # WIND 값 변경
    CONFIGURE['wind_type'] = w
    if CONFIGURE['wind_type'] == 'WIND':
        CONFIGURE['wind'] = True
    else:
        CONFIGURE['wind'] = False

    # 그 외 항목에 대해서는 각 항목별로만 변경하면서 수행
    for key, value in LOOP.items():
        # WIND는 제외
        if key == 'WIND':
            continue

        # 설정값을 기본값으로 초기화
        LOOP_CONFIGURE = dotdict(CONFIGURE)
        # 현재 Key/Value에 대해 수행
        plot_models(LOOP_CONFIGURE, key, value)
