In [2]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import glob
import json
from pathlib import Path

In [3]:
def load_json(dir):
    try:
        with open(dir) as f:
            data = json.load(f)
            
        return data
    
    except:
        return None

In [4]:
RESULT_DIR = "..\\result_summary"

In [5]:
def parse_file_nm(file_nm):
    mcts_info = None
    
    components = file_nm.split("\\")
    
    if "debug" in components:
        components.remove("debug")
    
    method = components[2]
    
    diff_cut = 1
    
    if 'am' in method:
        _, _, method, _, prob_type, num_probs, model_info, baseline_info, _ = components
        
    elif 'mcts' in method:
        _, _, method, diff_cut, _, prob_type, num_probs, model_info, baseline_info, mcts_info, _ = components
        diff_cut = float(diff_cut.split("-")[-1])

    num_probs = num_probs.split("-")[0]
    num_probs = int(num_probs.split("_")[1])
    baseline_info = baseline_info.split("-")[-1]
    
    return method, prob_type, num_probs, model_info, baseline_info, mcts_info, diff_cut

In [5]:
for file in glob.glob(f"{RESULT_DIR}/**/*.json", recursive=True):
    result = load_json(file)

    avg_result = False
    for k, v in result.items():
        for k2, v2 in v.items():
            if 'result' in k2:
                avg_result = True
                break
    
    if not avg_result and len(result) != 102:
        print(len(result))
        print(file)
        
# need to do diff-0.75 and CVRP with N=50 for 4 and 6

In [6]:
# parse all json file named with "all_result_avg.json" from the RESULT_DIR

all_result = {}

for file in glob.glob(f"{RESULT_DIR}/**/all_result_avg.json", recursive=True):
    data = load_json(file)
    # print(file)
    method, prob_type, num_probs, model_info, baseline_info, mcts_info, diff_cut = parse_file_nm(file)
    
    if data is not None:
        
        if method not in all_result:
            all_result[method] = {}
        
        if prob_type not in all_result[method]:
            all_result[method][prob_type] = {}
            
        if num_probs not in all_result[method][prob_type]:
            all_result[method][prob_type][num_probs] = {}
        
        if mcts_info is None:
            if model_info not in all_result[method][prob_type][num_probs]:
                all_result[method][prob_type][num_probs][model_info] = {}
            
            if baseline_info not in all_result[method][prob_type][num_probs][model_info]:
                all_result[method][prob_type][num_probs][model_info][baseline_info] = data
                
        else:
            if model_info not in all_result[method][prob_type][num_probs]:
                all_result[method][prob_type][num_probs][model_info] = {}
                
            if baseline_info not in all_result[method][prob_type][num_probs][model_info]:
                all_result[method][prob_type][num_probs][model_info][baseline_info] = {}
                
            if mcts_info not in all_result[method][prob_type][num_probs][model_info][baseline_info]:
                all_result[method][prob_type][num_probs][model_info][baseline_info][mcts_info] = {}
                
            if diff_cut not in all_result[method][prob_type][num_probs][model_info][baseline_info][mcts_info]:
                all_result[method][prob_type][num_probs][model_info][baseline_info][mcts_info][diff_cut] = data
                
    # break

In [7]:
def get_parital_result(method, prob_type, num_prob):       
    partial_result = {'model_info': [], 'baseline': [], 'mcts_info': [], 'epoch': [], 'score': [], 'score_std': [], 'runtime': [],  'runtime_std': [], 'diff_cut': []}
    
    def _append_epoch_result(model_info, baseline_info, epoch, mcts, epoch_score, epoch_score_std, epoch_runtime, epoch_runtime_std, diff_cut):
        partial_result['model_info'].append(model_info)
        partial_result['baseline'].append(baseline_info)
        partial_result['mcts_info'].append(mcts)
        partial_result['epoch'].append(epoch)                
        partial_result['score'].append(epoch_score)
        partial_result['score_std'].append(epoch_score_std)
        partial_result['runtime'].append(epoch_runtime)
        partial_result['runtime_std'].append(epoch_runtime_std)
        partial_result['diff_cut'].append(diff_cut)

    target_result = all_result[method][prob_type][num_prob]
    
    if "am" in method:
        for model_info in target_result:
            for baseline_info in target_result[model_info]:
                all_epoch_results = target_result[model_info][baseline_info]
                
                for epoch in all_epoch_results:
                    current_epoch_result = all_epoch_results[epoch]['result_avg']
                    epoch_score = current_epoch_result['score']
                    epoch_score_std = all_epoch_results[epoch]['result_std']['score']
                    epoch_runtime = current_epoch_result['runtime']
                    epoch_runtime_std = all_epoch_results[epoch]['result_std']['runtime']
                    
                    _append_epoch_result(model_info, baseline_info, epoch, "am", epoch_score,epoch_score_std, epoch_runtime, epoch_runtime_std, 1)
                    
    elif 'mcts' in method:
        for model_info in target_result:
            for baseline_info in target_result[model_info]:
                all_mcts_results = target_result[model_info][baseline_info]
                
                for mcts_info in all_mcts_results:
                    all_diff_results = all_mcts_results[mcts_info]
                    
                    for diff_cut in all_diff_results:
                        all_epoch_results = all_diff_results[diff_cut]
                    
                        for epoch in all_epoch_results:
                            current_epoch_result = all_epoch_results[epoch]['result_avg']
                            epoch_score = current_epoch_result['score']
                            epoch_score_std = all_epoch_results[epoch]['result_std']['score']
                            epoch_runtime = current_epoch_result['runtime']
                            epoch_runtime_std = all_epoch_results[epoch]['result_std']['runtime']
                            
                            _append_epoch_result(model_info, baseline_info, epoch, mcts_info, epoch_score,epoch_score_std, epoch_runtime, epoch_runtime_std, diff_cut)                                         
            
    df = pd.DataFrame.from_dict(partial_result)
    df['activation'] = df['model_info'].apply(lambda x: x.split("-")[5])
    df['enc_layers'] = df['model_info'].apply(lambda x: x.split('-')[2])
    df = df.drop(columns=['model_info'])
    
    df['train_score'] = df.apply(lambda x: x['epoch'].split("-")[1].split("=")[1], axis=1).astype(float)
    df['epoch'] =  df.apply(lambda x: x['epoch'].split("-")[0].split("=")[1], axis=1).astype(int)
    
    if 'mcts' in method:
        df['cpuct'] = df['mcts_info'].apply(lambda x: x.split("-")[2].split('_')[1])
        df['ns'] = df['mcts_info'].apply(lambda x: x.split("-")[0].split('_')[1])
        df['cpuct'] = df['cpuct'].astype(float)
        df['ns'] = df['ns'].astype(int)
    
    elif 'am' in method:
        df['cpuct'] = 0
        df['ns'] = 0
        
    df = df.drop(columns=['mcts_info'])
    

        
    return df

In [8]:
def plot_bar_result(base_df, baseline, activation, prob_type, num_prob, plot_dev=False, hue='cpuct'):
    if prob_type == 'tsp':
        y_ranges = {20: (3.75, 3.95), 50: (5.7, 5.875), 100: (7.95, 8.25)}
        
    elif prob_type == 'cvrp':
        y_ranges = {20: (6.1, 7.5), 50: (9, 12), 100: (7.95, 8.25)}
        
    _df = base_df[(base_df['baseline'] == baseline) & (base_df['activation'] == activation)]
    
    # drop rows where the mcts_info is 0.8
    _df = _df[_df['cpuct'] != '0.8']
    
    # sort by the train score
    _df = _df.sort_values(by=['epoch', 'cpuct', 'ns']).reset_index(drop=True)
    _df['score_std'] = _df['score_std'].astype(float)
    
    _df['cpuct'] = _df['cpuct'].astype(object)
    _df['cpuct'] = _df['cpuct'].fillna('am')
    
    _df['ns'] = _df['ns'].astype(object)
    _df['ns'] = _df['ns'].fillna('am')
    
    plt.figure(figsize=(12, 7))
    
    ax = sns.barplot(data=_df, x=_df['epoch'], y='score', hue=hue)
    
    if plot_dev:
        bar_coords = [(rect.get_x(), rect.get_y(), rect.get_width(), rect.get_height()) for rect in ax.patches]
                
        for i, (x, y, w, h) in enumerate(bar_coords):
            x_pos = x + w / 2
            y_top = y + h + 0.01
            
            plt.errorbar(x=x_pos, y=y_top, yerr=_df['score_std'][i], fmt='none', color='black', capsize=4)
    
    plt.title(f"{prob_type}_{num_prob}_{baseline}_{activation}")
    plt.ylim(*y_ranges[num_prob])
    plt.legend()
    
    path = Path(f"../result_image/bars")
    
    if not path.exists():
        path.mkdir(parents=True)
        
    
    plt.savefig(f"{path}/{prob_type}_{num_prob}_{baseline}_{activation}.png")
    plt.show()

In [9]:
# tsp_20 = pd.concat([get_parital_result('am', 'tsp', 20), get_parital_result('mcts_v2', 'tsp', 20)])

# for _baseline in ['mean', 'val']:
#     for _activation in ['relu', 'swiglu']:
#         plot_bar_result(tsp_20, _baseline, _activation, 'tsp', 20, hue='cpuct')

In [10]:
tsp_100 = pd.concat([get_parital_result('am', 'tsp', 100), get_parital_result('mcts', 'tsp', 100)])
tsp_100.to_csv("../result_csv/tsp_100.csv")
tsp_100.sort_values(by=['score']).head(15)

Unnamed: 0,baseline,epoch,score,score_std,runtime,runtime_std,diff_cut,activation,enc_layers,train_score,cpuct,ns
499,val,299,7.952167,0.072568,10.248525,25.766765,0.75,swiglu,6,7.86777,1.1,1000
414,mean,299,7.952278,0.073115,1.087797,0.199681,0.75,swiglu,6,7.90623,1.1,100
527,val,299,7.952654,0.072702,0.581626,0.353638,0.5,swiglu,6,7.86777,1.1,500
497,val,299,7.952794,0.072653,0.929683,1.953379,0.5,swiglu,6,7.86777,1.1,1000
464,mean,299,7.952995,0.073776,4.318947,5.818327,0.75,swiglu,6,7.90623,1.1,500
514,val,299,7.953113,0.072455,0.438101,0.08744,0.5,swiglu,6,7.86777,1.1,250
531,val,249,7.953231,0.071119,5.765433,5.832914,0.75,swiglu,6,7.84949,1.1,500
450,mean,299,7.953557,0.073392,3.438926,2.169177,0.75,swiglu,6,7.90623,1.1,250
483,val,299,7.953631,0.074261,1.265813,0.224949,0.75,swiglu,6,7.86777,1.1,100
433,mean,299,7.95392,0.072868,8.395795,24.720814,0.75,swiglu,6,7.90623,1.1,1000


In [11]:
for env_type in ['tsp', 'cvrp']:
    for N in [20, 50, 100]:
        df = pd.concat([get_parital_result('am', env_type, N), get_parital_result('mcts', env_type, N)])
        df.to_csv(f"../result_csv/{env_type}_{N}.csv", index=False)

In [12]:
df

Unnamed: 0,baseline,epoch,score,score_std,runtime,runtime_std,diff_cut,activation,enc_layers,train_score,cpuct,ns
0,mean,49,16.717783,3.231875,0.471543,0.000779,1.00,relu,4,16.39185,0.0,0
1,mean,99,16.606976,3.071906,0.473988,0.001013,1.00,relu,4,16.41725,0.0,0
2,mean,149,16.574835,2.968623,0.479116,0.001992,1.00,relu,4,16.01232,0.0,0
3,mean,199,16.535594,3.113413,0.480392,0.001704,1.00,relu,4,16.35231,0.0,0
4,mean,249,16.490532,3.008314,0.476007,0.000952,1.00,relu,4,15.99893,0.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
346,val,199,16.474388,3.018032,11.897707,14.392978,0.75,swiglu,6,16.17082,1.1,500
347,val,149,16.473369,3.139008,12.321263,14.749940,0.75,swiglu,6,16.41855,1.1,500
348,val,299,16.347532,3.049330,10.432255,12.280734,0.75,swiglu,6,16.26320,1.1,500
349,val,249,16.437976,3.054953,12.052590,14.915801,0.75,swiglu,6,16.53491,1.1,500


In [85]:
# load tsp and N=20

def get_pivot_table(env_type, N, epoch=299, enc_layers=6, diff_cut=0.75, index=['epoch', 'ns', 'activation', 'baseline'], refine_to_str=True):
    df = pd.read_csv(f"../result_csv/{env_type}_{N}.csv")
    cond1 = df['epoch'] == epoch
    cond2 = df['enc_layers'] == enc_layers
    cond3 = df['diff_cut'] == diff_cut 
    cond4 = df['diff_cut'] == 1
    df = df[cond1 & cond2 & (cond3 | cond4)]
    pivot_table = df.pivot_table(index=index, values=['score', 'score_std', 'runtime', 'runtime_std'])
    # # reindex the column order as to score mean, score std, runtime mean, runtime std
    pivot_table = pivot_table.reindex(columns=['score', 'score_std', 'runtime', 'runtime_std'])
    if refine_to_str:
        pivot_table['score'] = pivot_table.apply(lambda x: f"{round(x['score'], 5):.4f} ± {round(x['score_std'], 2):.2f}", axis=1)
        pivot_table = pivot_table.drop(['score_std'], axis=1)
    
        pivot_table['runtime'] = pivot_table.apply(lambda x: f"{round(x['runtime'], 4):.3f} ± {round(x['runtime_std'], 2):.2f}", axis=1)
        pivot_table = pivot_table.drop(['runtime_std'], axis=1)
        
    return pivot_table

In [87]:
tsp_results = [get_pivot_table('tsp', N, refine_to_str=False) for N in [20, 50, 100]]
all_results_df = pd.concat(tsp_results, keys=[20, 50, 100],  axis=1)
all_results_df


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,20,20,20,20,50,50,50,50,100,100,100,100
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,score,score_std,runtime,runtime_std,score,score_std,runtime,runtime_std,score,score_std,runtime,runtime_std
epoch,ns,activation,baseline,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2
299,0,relu,mean,3.849166,0.08734,0.106683,0.000164,5.736106,0.059724,0.231187,0.000463,7.985253,0.078364,0.449912,0.000449
299,0,relu,val,3.848943,0.08474,0.111188,0.000175,5.738576,0.063584,0.229614,0.000554,8.048133,0.07965,0.450653,0.000271
299,0,swiglu,mean,3.846409,0.084434,0.109879,0.000128,5.726738,0.069453,0.237449,0.000701,7.956173,0.073098,0.45437,0.000451
299,0,swiglu,val,3.848185,0.085138,0.112462,0.000178,5.740503,0.066726,0.233933,0.000357,7.955108,0.072696,0.456002,0.000474
299,100,relu,mean,3.848645,0.087409,0.098566,0.002793,5.737484,0.059985,0.372957,0.038249,7.984709,0.078276,1.211698,0.225768
299,100,relu,val,3.84835,0.084755,0.089544,0.001562,5.731046,0.065711,0.363083,0.0395,8.046557,0.079659,1.28427,0.20434
299,100,swiglu,mean,3.846195,0.084505,0.102594,0.004207,5.72485,0.068898,0.324971,0.032071,7.952278,0.073115,1.087797,0.199681
299,100,swiglu,val,3.847354,0.085008,0.10876,0.005548,5.737676,0.066269,0.35633,0.03514,7.953631,0.074261,1.265813,0.224949
299,250,relu,mean,3.848549,0.087396,0.14293,0.011627,5.735231,0.059618,1.027504,0.453966,7.984099,0.07855,3.958377,2.679324
299,250,relu,val,3.84835,0.084755,0.117019,0.008553,5.733597,0.065631,0.885038,0.448591,8.056533,0.08286,4.035738,2.068755


In [86]:
tsp_results = [get_pivot_table('tsp', N) for N in [20, 50, 100]]
all_results_df = pd.concat(tsp_results, keys=[20, 50, 100],  axis=1)
all_results_df


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,20,20,50,50,100,100
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,score,runtime,score,runtime,score,runtime
epoch,ns,activation,baseline,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
299,0,relu,mean,3.8492 ± 0.09,0.107 ± 0.00,5.7361 ± 0.06,0.231 ± 0.00,7.9852 ± 0.08,0.450 ± 0.00
299,0,relu,val,3.8489 ± 0.08,0.111 ± 0.00,5.7386 ± 0.06,0.230 ± 0.00,8.0481 ± 0.08,0.451 ± 0.00
299,0,swiglu,mean,3.8464 ± 0.08,0.110 ± 0.00,5.7267 ± 0.07,0.237 ± 0.00,7.9562 ± 0.07,0.454 ± 0.00
299,0,swiglu,val,3.8482 ± 0.09,0.113 ± 0.00,5.7405 ± 0.07,0.234 ± 0.00,7.9551 ± 0.07,0.456 ± 0.00
299,100,relu,mean,3.8486 ± 0.09,0.099 ± 0.00,5.7375 ± 0.06,0.373 ± 0.04,7.9847 ± 0.08,1.212 ± 0.23
299,100,relu,val,3.8483 ± 0.08,0.089 ± 0.00,5.7310 ± 0.07,0.363 ± 0.04,8.0466 ± 0.08,1.284 ± 0.20
299,100,swiglu,mean,3.8462 ± 0.08,0.103 ± 0.00,5.7248 ± 0.07,0.325 ± 0.03,7.9523 ± 0.07,1.088 ± 0.20
299,100,swiglu,val,3.8474 ± 0.09,0.109 ± 0.01,5.7377 ± 0.07,0.356 ± 0.04,7.9536 ± 0.07,1.266 ± 0.22
299,250,relu,mean,3.8485 ± 0.09,0.143 ± 0.01,5.7352 ± 0.06,1.028 ± 0.45,7.9841 ± 0.08,3.958 ± 2.68
299,250,relu,val,3.8483 ± 0.08,0.117 ± 0.01,5.7336 ± 0.07,0.885 ± 0.45,8.0565 ± 0.08,4.036 ± 2.07


In [88]:
cvrp_results = [get_pivot_table('cvrp', N) for N in [20, 50, 100]]
all_results_df = pd.concat(cvrp_results, keys=[20, 50, 100],  axis=1)
all_results_df


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,20,20,50,50,100,100
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,score,runtime,score,runtime,score,runtime
epoch,ns,activation,baseline,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
299,0,relu,mean,6.4097 ± 0.75,0.131 ± 0.00,10.8050 ± 1.65,0.279 ± 0.00,16.4418 ± 2.99,0.523 ± 0.00
299,0,relu,val,6.4553 ± 0.80,0.132 ± 0.00,10.8634 ± 1.65,0.276 ± 0.00,16.4463 ± 3.00,0.525 ± 0.00
299,0,swiglu,mean,6.4231 ± 0.73,0.133 ± 0.00,10.7940 ± 1.70,0.278 ± 0.00,16.4043 ± 3.09,0.527 ± 0.00
299,0,swiglu,val,6.4201 ± 0.72,0.132 ± 0.00,10.8402 ± 1.48,0.277 ± 0.00,16.3575 ± 3.04,0.525 ± 0.00
299,100,relu,mean,6.4010 ± 0.76,0.358 ± 0.07,10.8225 ± 1.59,0.965 ± 0.15,16.4628 ± 3.00,2.550 ± 0.43
299,100,relu,val,6.4452 ± 0.83,0.439 ± 0.09,10.8718 ± 1.61,0.934 ± 0.16,16.4750 ± 3.08,2.697 ± 0.47
299,100,swiglu,mean,6.4228 ± 0.75,0.320 ± 0.05,10.7747 ± 1.71,0.921 ± 0.20,16.4190 ± 3.10,2.380 ± 0.55
299,100,swiglu,val,6.4020 ± 0.74,0.398 ± 0.06,10.8420 ± 1.53,1.060 ± 0.19,16.3954 ± 2.97,2.253 ± 0.44
299,250,relu,mean,6.4060 ± 0.75,0.817 ± 0.40,10.8228 ± 1.61,2.046 ± 0.96,16.4760 ± 3.10,14.936 ± 14.14
299,250,relu,val,6.4315 ± 0.81,0.962 ± 0.47,10.8858 ± 1.62,2.090 ± 1.02,16.4594 ± 3.13,15.114 ± 12.43


In [89]:
tsp_results = [get_pivot_table('tsp', N, index=['activation']) for N in [20, 50, 100]]
tsp_results = pd.concat(tsp_results, keys=[20, 50, 100],  axis=1)

cvrp_results = [get_pivot_table('cvrp', N, index=['activation']) for N in [20, 50, 100]]
cvrp_results = pd.concat(cvrp_results, keys=[20, 50, 100],  axis=1)

all_results_df = pd.concat([tsp_results, cvrp_results], keys=['tsp', 'cvrp'], axis=0)
all_results_df

Unnamed: 0_level_0,Unnamed: 1_level_0,20,20,50,50,100,100
Unnamed: 0_level_1,Unnamed: 1_level_1,score,runtime,score,runtime,score,runtime
Unnamed: 0_level_2,activation,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
tsp,relu,3.8487 ± 0.09,0.124 ± 0.02,5.7340 ± 0.06,0.907 ± 0.83,8.0190 ± 0.08,4.225 ± 6.96
tsp,swiglu,3.8470 ± 0.08,0.152 ± 0.05,5.7318 ± 0.07,0.818 ± 0.66,7.9551 ± 0.07,3.881 ± 6.80
cvrp,relu,6.4255 ± 0.79,0.828 ± 0.68,10.8442 ± 1.63,2.672 ± 3.46,16.4556 ± 3.02,11.354 ± 16.35
cvrp,swiglu,6.4149 ± 0.74,0.721 ± 0.42,10.8147 ± 1.61,2.765 ± 4.12,16.3895 ± 3.06,9.527 ± 16.50


In [None]:
tsp_results = [get_pivot_table('tsp', N, index=['activation'], refine_to_str=False) for N in [20, 50, 100]]
tsp_results = pd.concat(tsp_results, keys=[20, 50, 100],  axis=1)

cvrp_results = [get_pivot_table('cvrp', N, index=['activation'], refine_to_str=False) for N in [20, 50, 100]]
cvrp_results = pd.concat(cvrp_results, keys=[20, 50, 100],  axis=1)

all_results_df = pd.concat([tsp_results, cvrp_results], keys=['tsp', 'cvrp'], axis=0)
all_results_df

In [232]:
tsp_results = [get_pivot_table('tsp', N, index=['baseline']) for N in [20, 50, 100]]
tsp_results = pd.concat(tsp_results, keys=[20, 50, 100],  axis=1)

cvrp_results = [get_pivot_table('cvrp', N, index=['baseline']) for N in [20, 50, 100]]
cvrp_results = pd.concat(cvrp_results, keys=[20, 50, 100],  axis=1)

all_results_df = pd.concat([tsp_results, cvrp_results], keys=['tsp', 'cvrp'], axis=0)
all_results_df

Unnamed: 0_level_0,Unnamed: 1_level_0,20,20,20,20,50,50,50,50,100,100,100,100
Unnamed: 0_level_1,Unnamed: 1_level_1,score,score_std,runtime,runtime_std,score,score_std,runtime,runtime_std,score,score_std,runtime,runtime_std
Unnamed: 0_level_2,baseline,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
tsp,mean,3.847532,0.085914,0.143579,0.034008,5.73029,0.064384,0.849227,0.719525,7.968395,0.076,3.837999,7.168259
tsp,val,3.84814,0.084904,0.132662,0.030068,5.735478,0.065808,0.876634,0.774389,8.005644,0.077479,4.267926,6.591142
cvrp,mean,6.414754,0.746359,0.704385,0.498716,10.801845,1.663723,2.603224,3.584254,16.43567,3.053555,10.46618,18.769996
cvrp,val,6.425673,0.77544,0.84384,0.600933,10.857036,1.570981,2.833637,3.987187,16.409435,3.034862,10.415026,14.081803


In [233]:
tsp_results = [get_pivot_table('tsp', N, index=['activation', 'baseline']) for N in [20, 50, 100]]
tsp_results = pd.concat(tsp_results, keys=[20, 50, 100],  axis=1)

cvrp_results = [get_pivot_table('cvrp', N, index=['activation', 'baseline']) for N in [20, 50, 100]]
cvrp_results = pd.concat(cvrp_results, keys=[20, 50, 100],  axis=1)

all_results_df = pd.concat([tsp_results, cvrp_results], keys=['tsp', 'cvrp'], axis=0)
all_results_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,20,20,20,20,50,50,50,50,100,100,100,100
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,score,score_std,runtime,runtime_std,score,score_std,runtime,runtime_std,score,score_std,runtime,runtime_std
Unnamed: 0_level_2,activation,baseline,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2
tsp,relu,mean,3.848909,0.087351,0.135863,0.024499,5.735429,0.059666,0.954732,0.826665,7.983006,0.07875,4.136831,7.754827
tsp,relu,val,3.848468,0.084752,0.111687,0.01351,5.732479,0.065083,0.860328,0.836371,8.054898,0.083024,4.312788,6.167008
tsp,swiglu,mean,3.846156,0.084476,0.151295,0.043516,5.725151,0.069102,0.743721,0.612386,7.953784,0.07325,3.539167,6.58169
tsp,swiglu,val,3.847812,0.085056,0.153637,0.046626,5.738476,0.066533,0.892941,0.712407,7.95639,0.071933,4.223063,7.015277
cvrp,relu,mean,6.406097,0.755201,0.753307,0.647176,10.815637,1.617335,2.615675,3.151902,16.455791,3.008325,11.087819,18.449787
cvrp,relu,val,6.444959,0.815983,0.901911,0.708723,10.872692,1.638581,2.728584,3.759984,16.455378,3.040988,11.620718,14.246272
cvrp,swiglu,mean,6.423411,0.737517,0.655462,0.350257,10.788053,1.71011,2.590773,4.016606,16.41555,3.098785,9.844541,19.090205
cvrp,swiglu,val,6.406387,0.734897,0.785769,0.493144,10.84138,1.503381,2.938689,4.214389,16.363492,3.028735,9.209334,13.917334


In [38]:
tsp_results = [get_pivot_table('tsp', N, index=['ns', 'activation']) for N in [20, 50, 100]]
tsp_results = pd.concat(tsp_results, keys=[20, 50, 100],  axis=1)

cvrp_results = [get_pivot_table('cvrp', N, index=['ns', 'activation']) for N in [20, 50, 100]]
cvrp_results = pd.concat(cvrp_results, keys=[20, 50, 100],  axis=1)

all_results_df = pd.concat([tsp_results, cvrp_results], keys=['tsp', 'cvrp'], axis=0)
all_results_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,20,20,50,50,100,100
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,score,runtime,score,runtime,score,runtime
Unnamed: 0_level_2,ns,activation,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
tsp,0,relu,3.849 ± 0.09,0.109 ± 0.00,5.737 ± 0.06,0.230 ± 0.00,8.017 ± 0.08,0.450 ± 0.00
tsp,0,swiglu,3.847 ± 0.08,0.111 ± 0.00,5.734 ± 0.07,0.236 ± 0.00,7.956 ± 0.07,0.455 ± 0.00
tsp,100,relu,3.849 ± 0.09,0.094 ± 0.00,5.734 ± 0.06,0.368 ± 0.04,8.016 ± 0.08,1.248 ± 0.22
tsp,100,swiglu,3.847 ± 0.08,0.106 ± 0.00,5.731 ± 0.07,0.341 ± 0.03,7.953 ± 0.07,1.177 ± 0.21
tsp,250,relu,3.848 ± 0.09,0.130 ± 0.01,5.734 ± 0.06,0.956 ± 0.45,8.020 ± 0.08,3.997 ± 2.37
tsp,250,swiglu,3.847 ± 0.08,0.149 ± 0.02,5.731 ± 0.07,0.853 ± 0.35,7.956 ± 0.07,3.670 ± 2.57
tsp,500,relu,3.849 ± 0.09,0.127 ± 0.02,5.732 ± 0.06,1.119 ± 0.82,8.024 ± 0.08,5.240 ± 6.31
tsp,500,swiglu,3.847 ± 0.08,0.165 ± 0.05,5.732 ± 0.07,1.001 ± 0.70,7.958 ± 0.07,4.782 ± 5.96
tsp,1000,relu,3.849 ± 0.09,0.159 ± 0.06,5.732 ± 0.06,1.864 ± 2.85,8.018 ± 0.08,10.188 ± 25.91
tsp,1000,swiglu,3.847 ± 0.08,0.231 ± 0.16,5.731 ± 0.07,1.662 ± 2.23,7.953 ± 0.07,9.322 ± 25.24


In [58]:
tsp_results = [get_pivot_table('tsp', N, index=['ns', 'activation'], refine_to_str=False) for N in [20, 50, 100]]
tsp_results = pd.concat(tsp_results, keys=[20, 50, 100],  axis=1)
tsp_results = tsp_results.reset_index()
ns_col_name = tsp_results.columns[0]
ns_col_name

('ns', '')

In [84]:
for env_type in ['tsp', 'cvrp']:
    for N in [20, 50, 100]:
        corr_coef = get_pivot_table(env_type, N, index=['ns'], refine_to_str=False).reset_index().corr().loc['ns', 'score']
        print(f"{env_type}_{N}: {corr_coef}")

tsp_20: -0.11810629992614363
tsp_50: -0.7213754639076688
tsp_100: 0.14677544871966403
cvrp_20: 0.13159901236379598
cvrp_50: 0.02364275470398175
cvrp_100: -0.4732575291038821


In [70]:
def corr(ns, score):
    return np.corrcoef(ns, score)[0, 1]

for N in [20, 50, 100]:
    ns = tsp_results.loc[:, (ns_col_name, '')].values.flatten()
    score = tsp_results.loc[:, (N, 'score')]
    print(f"N={N}: ", corr(ns, score))


N=20:  -0.02571148582075159
N=50:  -0.513745201132636
N=100:  0.010411342828905632


  ns = tsp_results.loc[:, (ns_col_name, '')].values.flatten()
  ns = tsp_results.loc[:, (ns_col_name, '')].values.flatten()
  ns = tsp_results.loc[:, (ns_col_name, '')].values.flatten()


In [1]:
import os
from PIL import Image
from IPython.display import Image as Img
from IPython.display import display

def generate_gif(path, name):
    img_list = os.listdir(path)
    img_list = [path + '/' + x for x in img_list]
    images = [Image.open(x) for x in img_list]
    
    im = images[0]
    im.save(f'{name}.gif', save_all=True, append_images=images[1:], loop=0xff, duration=250)
    # loop 반복 횟수
    # duration 프레임 전환 속도 (500 = 0.5초)
    return Img(url=f'{name}.gif')

In [2]:
mcst_images_path = '../debug/plot/tsp/50/78/mcts'

generate_gif(mcst_images_path, 'tsp-50-75-mcts')

In [None]:
mcst_images_path = '../debug/plot/tsp/50/78/am'

generate_gif(mcst_images_path, 'tsp-50-75-am')

In [None]:
def load_model_str_format(act, base, env_type, N):
    str_mcts = f"../result_summary/mcts/diff-0.75/pretrained_result/{env_type}/N_{N}-B_64/shared_mha-128-6-32-4-{act}-10-0.0001/1562-1-{base}"
    str_am = f"../result_summary/am/pretrained_result/{env_type}/N_{N}-B_64/shared_mha-128-6-32-4-{act}-10-0.0001/1562-1-{base}"
    return str_mcts, str_am

all_prob_result = {}

for env_type in ['tsp', 'cvrp']:
    for N in [20, 50, 100]:
        indiv_result_dict = {'method': [], 'score': [], 'prob_num': []}
        for baseline in ['mean', 'val']:
            for act in ['relu', 'swiglu']:
                str_mcts, str_am = load_model_str_format(act, baseline, env_type, N)
                
                mcts_path = Path(str_mcts)
                
                for file in mcts_path.glob("**/*.json"):              
                    if 'epoch=299' in file.name:
                        result_loaded = load_json(str(file))
                        # find the number in file.name which comes after 'ns='
                        ns_num = int(str(file).split('\\')[9].split('-')[0].split('_')[1])
                        method = f"mcts-{ns_num}-{act}-{baseline}"    
                        
                        if ns_num == 250:
                            continue
                          
                        for prob_num, val in result_loaded.items():
                            if prob_num == 'average' or prob_num == 'std':
                                continue
                            
                            indiv_result_dict['prob_num'].append(prob_num)
                            indiv_result_dict['method'].append(method)
                            indiv_result_dict['score'].append(val['score'])

                    else:
                        continue
                    
                for file in Path(str_am).glob("**/*.json"):
                    if 'epoch=299' in file.name:
                        result_loaded = load_json(str(file))
                        method = f"am-{act}-{baseline}"
                        
                        for prob_num, val in result_loaded.items():
                            if prob_num == 'average' or prob_num == 'std':
                                continue
                            
                            indiv_result_dict['prob_num'].append(prob_num)
                            indiv_result_dict['method'].append(method)
                            indiv_result_dict['score'].append(val['score'])
                    else:
                        continue
                    
        all_prob_result[f"{env_type}_{N}"] = pd.DataFrame.from_dict(indiv_result_dict).pivot_table(index=['prob_num'], columns=['method'], values=['score'])

all_prob_result

In [None]:
# define a paired t-test function for two pandas series
from scipy.stats import ttest_rel
def paired_ttest(s1, s2):
    return ttest_rel(s1, s2)


In [None]:
from itertools import combinations

p_value_low_results = {}

for problem, df in all_prob_result.items():
    problem_p_value = {}
    
    for col1, col2 in combinations(df.columns, 2):
        split_col1 = col1[1].split("-")
        split_col2 = col2[1].split("-")
        
        # if split_col1[0] != split_col2[0] and split_col1[-1] == split_col2[-1] and split_col1[-2] == split_col2[-2]:
        #     problem_p_value[col1[1], col2[1]] = paired_ttest(df[col1], df[col2]).pvalue
        
        if split_col1[0] != split_col2[0]:
            problem_p_value[col1[1], col2[1]] = paired_ttest(df[col1], df[col2]).pvalue
    
    # sort the problem_p_value by the p-value
    problem_p_value = {k: v for k, v in sorted(problem_p_value.items(), key=lambda item: item[1])}    
    
    # leave only 1 lowest p-value records from problem_p_value
    first_record = list(problem_p_value.items())[0]
    problem_p_value = {first_record[0]: round(first_record[1], 4)}
    
    # problem_p_value = {k: v for k, v in problem_p_value.items() if v < 0.05}
    
    p_value_low_results[problem] = problem_p_value

p_value_low_results    
    