In [1]:
GYM_OLD = False

# GYM 환경설정
TEST = False

REPLAY = True
RECORD = False
GYM = 'LunarLander-v2'
WIND = True
SEED = 10
TARGET_RETURN = 200
STRETCH_GOAL = 250

# Hyper parameters
FC1_SIZE = 512         # Hidden layer 1 크기
FC2_SIZE = 512         # Hidden layer 2 크기
BUFFER_SIZE = 50000    # ReplayBuffer 크기
BATCH_SIZE = 64        # Minibatch 크기
DROPOUT = 0.0          # Dropout
GAMMA = 0.99           # 할인율 (discount factor)
TAU = 0.01             # 학습 모델 Soft update(q → q_target) 비율
LEARNING_RATE = 0.0005 # 학습율
EPS_START = 0.5        # Epsilon 시작 비율
EPS_END = 0.01         # Epsilon 최종 비율
EPS_DECAY = 0.995      # Epsilon 감소율

UPDATE_EVERY = 4       # 학습 주기
UPDATE_RATIO = 1.0     # Replay Buffer에서 업데이트를 시작할 비율 (BATCH_SIZE 기준)
TOTAL_EPISODES = 2000  # 최대 Episode 개수
MAX_TIMESTAMP = 1000   # Episode당 최대 Timestamp

# 알고리즘
USE_LINEAR_EPS_DECAY = False
USE_DOUBLE_DQN = True
USE_DUELING_DQN = True

# Model import/export
import os
SCRIPT_PATH = os.path.abspath("")
print(SCRIPT_PATH)
MODEL_PATH = os.path.join(SCRIPT_PATH, 'model')
SCORE_PATH = os.path.join(SCRIPT_PATH, 'score')
MP4_PATH = os.path.join(SCRIPT_PATH, 'mp4')
os.makedirs(MODEL_PATH) if not os.path.exists(MODEL_PATH) else None
os.makedirs(SCORE_PATH) if not os.path.exists(SCORE_PATH) else None
os.makedirs(MP4_PATH) if not os.path.exists(MP4_PATH) else None

class dotdict(dict):
    """dot.notation access to dictionary attributes"""
    __getattr__ = dict.get
    __setattr__ = dict.__setitem__
    __delattr__ = dict.__delitem__


d:\RL\swap_model


In [2]:
# DQN_TYPE 처리
if USE_DOUBLE_DQN and USE_DUELING_DQN:
    dqn_type = 'DoubleDueling'
elif USE_DOUBLE_DQN:
    dqn_type = 'Double'
elif USE_DUELING_DQN:
    dqn_type = 'Dueling'
else:
    dqn_type = 'DQN'

# WIND 처리
if WIND:
    wind = 'WIND'
else:
    wind = 'NOWIND'


# 설정값 Dictionary로 변환 (Loop 처리 용이하도록)
CONFIGURE = {
    'dqn_type':dqn_type,
    'wind_type':wind,
    'gym_old':GYM_OLD,
    'replay':REPLAY,
    'record':RECORD,
    'gym':GYM,
    'wind':WIND,
    'seed':SEED,
    'target_return':TARGET_RETURN,
    'stretch_goal':STRETCH_GOAL,
    'fc1_size':FC1_SIZE,
    'fc2_size':FC2_SIZE,
    'buffer_size':BUFFER_SIZE,
    'batch_size':BATCH_SIZE,
    'dropout':DROPOUT,
    'gamma':GAMMA,
    'tau':TAU,
    'learning_rate':LEARNING_RATE,
    'eps_start':EPS_START,
    'eps_end':EPS_END,
    'eps_decay':EPS_DECAY,
    'update_every':UPDATE_EVERY,
    'update_ratio':UPDATE_RATIO,
    'total_episodes':TOTAL_EPISODES,
    'max_timestamp':MAX_TIMESTAMP,
    'use_linear_eps_decay':USE_LINEAR_EPS_DECAY,
    'use_double_dqn':USE_DOUBLE_DQN,
    'use_dueling_dqn':USE_DUELING_DQN,
    'script_path':SCRIPT_PATH,
    'model_path':MODEL_PATH,
    'score_path':SCORE_PATH,
    'mp4_path':MP4_PATH
}
LOOP_CONFIGURE = dotdict(CONFIGURE)


In [3]:
import pandas as pd
import math
import matplotlib.pyplot as plt

def plot_models(CONFIGURE, type, loop):
    df = pd.DataFrame()
    seeds = ['10', '20', '30', '40', '50', '60', '70', '80', '90', '100']
    for seed in seeds:
        df_seed = pd.DataFrame()
        for item in loop:
            if type == 'FC1_SIZE':
                CONFIGURE.fc1_size = item
                CONFIGURE.fc2_size = item
            elif type == 'BUFFER_SIZE':
                CONFIGURE.buffer_size = item
            elif type == 'BATCH_SIZE':
                CONFIGURE.batch_size = item
            elif type == 'DROPOUT':
                CONFIGURE.dropout = item
            elif type == 'GAMMA':
                CONFIGURE.gamma = item
            elif type == 'TAU':
                CONFIGURE.tau = item
            elif type == 'LEARNING_RATE':
                CONFIGURE.learning_rate = item
            elif type == 'EPS_START':
                CONFIGURE.eps_start = item
            elif type == 'EPS_DECAY':
                CONFIGURE.eps_decay = item
            elif type == 'DQN_TYPE':
                CONFIGURE.dqn_type = item

            # 저장정보 변경
            CONFIGURE.mode = 'TEST' if CONFIGURE.replay else 'TRAIN'
            CONFIGURE.model_csvname = os.path.join(CONFIGURE.score_path, "{}_{}_{}_{}_{}_{}_{}_{}_{}_{}_{}_{}_{}.csv".format(
                CONFIGURE.gym, CONFIGURE.mode, CONFIGURE.wind_type, CONFIGURE.fc1_size, CONFIGURE.buffer_size, CONFIGURE.batch_size, CONFIGURE.dropout, CONFIGURE.gamma, CONFIGURE.tau, CONFIGURE.learning_rate, CONFIGURE.eps_start, CONFIGURE.eps_decay, CONFIGURE.dqn_type))
            CONFIGURE.model_resultname = os.path.join(CONFIGURE.score_path, "{}_{}_{}_{}.png".format(
                CONFIGURE.gym, CONFIGURE.mode, CONFIGURE.wind_type, type))

            csvname = CONFIGURE.model_csvname.replace("\\score", "\\score_seed" + seed)
            new_df = pd.read_csv(csvname)
            new_df.drop(new_df.columns[~new_df.columns.str.match('Score')], axis=1, inplace=True)
            new_df.rename(columns = {'Score': '{} {}_RAW'.format(type, item)}, inplace=True)
            df_seed = pd.concat([df_seed, new_df], axis=1)
        df = pd.concat([df, df_seed], axis=0)

    df.reset_index(drop=True, inplace=True)
    for item in loop:
        df['{} {}'.format(type, item)] = df['{} {}_RAW'.format(type, item)].rolling(100, min_periods=1).mean()
        print('{} {} 신뢰구간\t{:.3f} ±{:.3f}'.format(type, item, df['{} {}'.format(type, item)].mean(), 1.96 * df['{} {}'.format(type, item)].std() / math.sqrt(1000)))
    df.drop(df.columns[df.columns.str.contains('RAW')], axis=1, inplace=True)

    fig = plt.figure(dpi=150)
    plt.tight_layout()
    ax = fig.add_subplot(1, 1, 1)
    plt.ylim([-300, 300])
    for column in df.columns:
        plt.plot(df.index.values+1, column, data=df, linewidth=1.5)
    for i in range(len(seeds)+1):
        plt.axvline(x=i*100, color='deepskyblue', alpha=0.5, linewidth=1)
    plt.axhline(y=200, color='navy', alpha=0.5, linestyle='dashed', linewidth=1)
    plt.axhline(y=250, color='red', alpha=0.5, linestyle='dashed', linewidth=1)
    #plt.legend(loc='lower right', fontsize=9, ncol=2)
    plt.savefig(CONFIGURE.model_resultname)
    #plt.show()
    plt.close()



In [4]:
LOOP = {
    'WIND':['WIND', 'NOWIND']
}

# WIND 여부에 따라서는 모두 수행함
for w in LOOP['WIND']:
    # WIND 값 변경
    CONFIGURE['wind_type'] = w
    if CONFIGURE['wind_type'] == 'WIND':
        CONFIGURE['wind'] = True
    else:
        CONFIGURE['wind'] = False

    # 그 외 항목에 대해서는 각 항목별로만 변경하면서 수행
    for key, value in LOOP.items():

        # 설정값을 기본값으로 초기화
        LOOP_CONFIGURE = dotdict(CONFIGURE)
        # 현재 Key/Value에 대해 수행
        plot_models(LOOP_CONFIGURE, key, value)


WIND WIND 신뢰구간	137.696 ±1.827
WIND NOWIND 신뢰구간	137.696 ±1.827
WIND WIND 신뢰구간	264.174 ±0.142
WIND NOWIND 신뢰구간	264.174 ±0.142
