In [1]:
import matplotlib.pyplot as plt
import numpy as np

In [2]:
def smoothing(list_, n_smooth=500):
    smoothed_list_ = []
    for i in range(len(list_)):
        if i < n_smooth:
            #smoothed_list_.append(np.mean(list_[:i+1]))
            smoothed_list_.append(np.mean(list_[:n_smooth]))
        else:
            smoothed_list_.append(np.mean(list_[i-n_smooth:i+1]))
    return smoothed_list_            


def plot_graph(x_values, y_values, color_, title_, xlabel_=None, ylabel_=None, label_=None):
    if label_ is None:
        plt.plot(x_values, y_values, c=color_)
    else:
        plt.plot(x_values, y_values, c=color_, label=label_)
        plt.legend(loc=(1.02,0.0))
    if xlabel_ is not None:
        plt.xlabel(xlabel_)
    if ylabel_ is not None:
        plt.ylabel(ylabel_)
    plt.title(title_)
    plt.grid(True)


In [3]:
def step_by_step(result_list):
    train_reward, train_len, train_pf, train_loss = result_list[0], result_list[1], result_list[2], result_list[3]
    new_train_reward, new_train_pf, new_train_loss = [], [], []
    
    for i, step in enumerate(train_len):
        for _ in range(int(step)):
            new_train_reward.append(train_reward[i])
            new_train_pf.append(train_pf[i])
            new_train_loss.append(train_loss[i])
            
    new_train_reward = smoothing(new_train_reward, 1000)
    new_train_pf = smoothing(new_train_pf, 1000)
    new_train_loss = smoothing(new_train_loss, 1000)
    
    test_len, test_pf, test_step = result_list[4], result_list[5], result_list[6]
    new_test_pf = []
    
    prev_step = 0
    for i, step in enumerate(test_step):
        for _ in range(int(step-prev_step)):
            new_test_pf.append(test_pf[i])
        prev_step = step
            
    new_test_pf = smoothing(new_test_pf, 1000)
    
    return new_train_reward, new_train_pf, new_train_loss, new_test_pf
        

def convert_data_multiseed(data_dir, data_name, n_seeds, max_step=np.inf):
    train_reward_list, train_pf_list, train_len_list, train_loss_list = [], [], [], []
    test_pf_list, test_len_list = [], []
    
    min_train, min_test = [], []
    
    for s in range(num_seeds):
        result_file = result_dir + model_name + "_seed{}.npy".format(s)
        result_ = np.load(result_file, allow_pickle=True)
        
        train_reward, train_pf, train_loss, test_pf = step_by_step(result_)
        
        train_reward_list.append(train_reward)
        train_pf_list.append(train_pf)
        train_loss_list.append(train_loss)
        
        test_pf_list.append(test_pf)
        
        min_train.append(len(train_reward))
        min_test.append(len(test_pf_list))
        
    min_train.append(max_step)
    min_train = min(min_train)
    min_test = min(min_test)
    
    for i in range(num_seeds):
        if len(train_reward_list[i]) > min_train:
            train_reward_list[i] = train_reward_list[i][:min_train]
            train_pf_list[i] = train_pf_list[i][:min_train]
            train_loss_list[i] = train_loss_list[i][:min_train]
            
        if len(test_pf_list[i]) > min_train:
            test_pf_list[i] = test_pf_list[i][:min_train]
            
            
    train_reward_list, train_pf_list, train_loss_list = np.array(train_reward_list), np.array(train_pf_list), np.array(train_loss_list)
    test_pf_list = np.array(test_pf_list)
    
    mean_train_reward, std_train_reward = np.mean(train_reward_list, axis=0), np.std(train_reward_list, axis=0)
    mean_train_pf, std_train_pf = np.mean(train_pf_list, axis=0), np.std(train_pf_list, axis=0)
    mean_train_loss, std_train_loss = np.mean(train_loss_list, axis=0), np.std(train_loss_list, axis=0)
    mean_test_pf, std_test_pf = np.mean(test_pf_list, axis=0), np.std(test_pf_list, axis=0)
    
    return [mean_train_reward, std_train_reward], [mean_train_pf, std_train_pf], [mean_train_loss, std_train_loss], [mean_test_pf, std_test_pf]
    

In [None]:
result_dir = "./results/board/"
model_list = [
    ["DQN_0000", 3],
    ["DTAC_0000", 2],
    ["DPPO_0000", 3],
    ["DQN_1111", 4],
    ["DQN_1206_0102", 1],
]
color_list = ["salmon", "gold", "darkseagreen", "cornflowerblue", "mediumorchid"]

label_list = [None]*len(model_list)
# label_list = [
#     "DQN + B-Mask + F-Mask + Act-Proj + Coord-Conv",
#     "DQN + B-Mask + F-Mask + Act-Proj",
#     "DQN + B-Mask + F-Mask",
#     "DQN + B-Mask",
#     "DQN"
#  ]
label_list = [
    "DQN",
    "Discrete-TAC",
    "Discrete-PPO",
    "DQN + Demo"
 ]

max_len = 1.3e5

plt.figure(figsize=(7,16))


for model_idx, model_ in enumerate(model_list):
    model_name, num_seeds = model_
    
    if num_seeds == 1:    
        result_file = result_dir + model_name + "_seed0.npy"
        result_ = np.load(result_file, allow_pickle=True)

        train_reward, train_len, train_pf, train_loss = result_[0], result_[1], result_[2], result_[3]
        test_len, test_pf, test_step = result_[4], result_[5], result_[6]
        
        n_smooth = 100
        mean_train_reward = smoothing(train_reward, n_smooth)
        mean_train_pf = smoothing(train_pf, n_smooth)
        mean_train_loss = smoothing(train_loss, n_smooth)
    
        train_x = train_len
        train_x = np.cumsum(train_x)
            
        mean_test_pf = smoothing(test_pf, 10)
        test_x = np.array(test_step)
            
        std_train_reward, std_train_pf, std_train_loss, std_test_pf = None, None, None, None
        
    elif num_seeds > 1:
        train_reward, train_pf, train_loss, test_pf = convert_data_multiseed(result_dir, model_name, num_seeds, max_step=max_len)
        
        mean_train_reward, std_train_reward = train_reward
        mean_train_pf, std_train_pf = train_pf
        mean_train_loss, std_train_loss = train_loss
        mean_test_pf, std_test_pf = test_pf
        
        train_x = np.arange(len(mean_train_reward))
        test_x = np.arange(len(mean_test_pf))
            
            
    plt.subplot(4, 1, 1)
    plt.plot(train_x, mean_train_reward, c=color_list[model_idx])
    plt.title("Train: Reward")
    plt.grid(True)
        
    plt.subplot(4, 1, 2)
    plt.plot(train_x, mean_train_pf, c=color_list[model_idx])
    plt.title("Train: Packing-Ratio")
    plt.grid(True)
    
    plt.subplot(4, 1, 3)
    plt.plot(train_x, mean_train_loss, c=color_list[model_idx])
    plt.title("Train: Loss")
    plt.grid(True)
    
    plt.subplot(4, 1, 4)
    plt.plot(test_x, mean_test_pf, c=color_list[model_idx])
    plt.title("Test: Packing-Ratio")
    plt.grid(True)
    
plt.show()
    