In [1]:
import json
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

plt.rcParams['figure.figsize'] = (12, 6)
sns.set_style("whitegrid")


In [2]:
# JSON 파일 읽기
results = []
results_path = Path("archive/experiment_results")

for json_file in results_path.glob("*.json"):
    with open(json_file, 'r', encoding='utf-8') as f:
        data = json.load(f)
        data['filename'] = json_file.name
        results.append(data)

print(f"총 {len(results)}개 파일 로드 완료")


총 288개 파일 로드 완료


In [12]:

# DataFrame으로 변환
rows = []
for r in results:
    row = {
        'model_name': r['hyperparameters'].get('model_name'),
        'rnn_type': r['hyperparameters'].get('rnn_type'),
        'use_static': r['hyperparameters'].get('use_static_features'),
        'lr': r['hyperparameters'].get('learning_rate'),
        'hidden_dim': r['hyperparameters'].get('hidden_dim'),
        'num_layers': r['hyperparameters'].get('num_layers'),
        'max_seq_len': r['hyperparameters'].get('max_seq_len'),
        'test_r2': r['results'].get('test_r2'),
        'test_mae': r['results'].get('test_mae'),
        'test_rmse': r['results'].get('test_rmse'),
    }
    rows.append(row)

df = pd.DataFrame(rows)
df.head()

am_df = df[df.model_name.str.contains('am_')]
pm_df = df[df.model_name.str.contains('pm_')]

# 'am_', 'seed42_', 'epoch100_' 제거
am_df.model_name.replace(['am_', 'seed42_', 'ep100_'], '', regex=True, inplace=True)
# 'pm_', 'seed42_', 'epoch100_' 제거
pm_df.model_name.replace(['pm_', 'seed42_', 'ep100_'], '', regex=True, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  am_df.model_name.replace(['am_', 'seed42_', 'ep100_'], '', regex=True, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  am_df.model_name.replace(['am_', 'seed42_', 'ep100_'], '', regex=True, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using

In [15]:
# 상위 10개 모델
am_top10 = am_df.nsmallest(10, 'test_rmse')[['model_name', 'rnn_type', 'use_static', 'lr', 'hidden_dim', 'num_layers', 'max_seq_len', 'test_r2', 'test_mae', 'test_rmse']]
pm_top10 = pm_df.nsmallest(10, 'test_rmse')[['model_name', 'rnn_type', 'use_static', 'lr', 'hidden_dim', 'num_layers', 'max_seq_len', 'test_r2', 'test_mae', 'test_rmse']]
display(am_top10)
display(pm_top10)

Unnamed: 0,model_name,rnn_type,use_static,lr,hidden_dim,num_layers,max_seq_len,test_r2,test_mae,test_rmse
119,lstm_bs32_lr0.001_hd64_tw1_nl2_msl23,lstm,False,0.001,64,2,23,0.964834,0.160132,0.211536
227,lstm_bs32_lr0.005_hd128_tw1_nl2_msl10,lstm,False,0.005,128,2,10,0.964442,0.159263,0.212711
225,lstm_static_bs32_lr0.0005_hd128_tw1_nl2_msl10,lstm,True,0.0005,128,2,10,0.96432,0.160872,0.213075
193,lstm_static_bs32_lr0.005_hd64_tw1_nl2_msl23,lstm,True,0.005,64,2,23,0.964238,0.162141,0.213321
187,lstm_bs32_lr0.0005_hd128_tw1_nl3_msl10,lstm,False,0.0005,128,3,10,0.964205,0.161677,0.213418
259,lstm_static_bs32_lr0.001_hd128_tw1_nl2_msl10,lstm,True,0.001,128,2,10,0.963967,0.162907,0.214127
156,lstm_static_bs32_lr0.001_hd64_tw1_nl2_msl23,lstm,True,0.001,64,2,23,0.963962,0.160538,0.214143
145,lstm_bs32_lr0.005_hd64_tw1_nl2_msl23,lstm,False,0.005,64,2,23,0.963748,0.163023,0.214776
211,lstm_static_bs32_lr0.005_hd64_tw1_nl3_msl23,lstm,True,0.005,64,3,23,0.963705,0.161795,0.214905
127,gru_static_bs32_lr0.0005_hd64_tw1_nl2_msl23,gru,True,0.0005,64,2,23,0.963655,0.162119,0.215054


Unnamed: 0,model_name,rnn_type,use_static,lr,hidden_dim,num_layers,max_seq_len,test_r2,test_mae,test_rmse
149,gru_static_bs32_lr0.001_hd64_tw1_nl2_msl23,gru,True,0.001,64,2,23,0.938288,0.206427,0.296678
99,lstm_static_bs32_lr0.0005_hd64_tw1_nl2_msl10,lstm,True,0.0005,64,2,10,0.938011,0.205689,0.297342
212,gru_bs32_lr0.0005_hd128_tw1_nl3_msl10,gru,False,0.0005,128,3,10,0.937814,0.207362,0.297816
240,lstm_static_bs32_lr0.001_hd128_tw1_nl2_msl23,lstm,True,0.001,128,2,23,0.937779,0.207063,0.297898
198,gru_bs32_lr0.0005_hd128_tw1_nl2_msl23,gru,False,0.0005,128,2,23,0.937759,0.207608,0.297947
117,gru_bs32_lr0.0005_hd64_tw1_nl3_msl23,gru,False,0.0005,64,3,23,0.937753,0.207558,0.297962
106,lstm_static_bs32_lr0.0005_hd64_tw1_nl2_msl23,lstm,True,0.0005,64,2,23,0.937743,0.207306,0.297985
209,gru_static_bs32_lr0.0005_hd128_tw1_nl2_msl23,gru,True,0.0005,128,2,23,0.937654,0.207016,0.298198
140,lstm_static_bs32_lr0.001_hd64_tw1_nl2_msl23,lstm,True,0.001,64,2,23,0.937496,0.207999,0.298576
186,lstm_bs32_lr0.0005_hd128_tw1_nl2_msl10,lstm,False,0.0005,128,2,10,0.937427,0.207092,0.29874


Unnamed: 0,model_name,rnn_type,use_static,lr,hidden_dim,num_layers,max_seq_len,test_r2,test_mae,test_rmse
0,am_lstm_ep100_bs32_lr0.0005_hd32_tw1_seed42_nl...,lstm,False,0.0005,32,2,10,0.960151,0.169954,0.225179
2,am_lstm_static_ep100_bs32_lr0.0005_hd32_tw1_se...,lstm,True,0.0005,32,2,10,0.962388,0.164796,0.218769
4,am_lstm_ep100_bs32_lr0.0005_hd32_tw1_seed42_nl...,lstm,False,0.0005,32,2,23,0.961462,0.167349,0.221446
6,am_lstm_static_ep100_bs32_lr0.0005_hd32_tw1_se...,lstm,True,0.0005,32,2,23,0.962106,0.165371,0.219588
8,am_gru_ep100_bs32_lr0.0005_hd32_tw1_seed42_nl2...,gru,False,0.0005,32,2,10,0.961591,0.167277,0.221075
...,...,...,...,...,...,...,...,...,...,...
283,am_gru_static_ep100_bs32_lr0.005_hd128_tw1_see...,gru,True,0.0050,128,2,23,0.960893,0.169128,0.223074
284,am_lstm_static_ep100_bs32_lr0.005_hd128_tw1_se...,lstm,True,0.0050,128,3,10,0.962980,0.165383,0.217041
285,am_lstm_static_ep100_bs32_lr0.005_hd128_tw1_se...,lstm,True,0.0050,128,3,23,0.963380,0.165669,0.215865
286,am_gru_static_ep100_bs32_lr0.005_hd128_tw1_see...,gru,True,0.0050,128,3,10,0.952798,0.186980,0.245075


In [45]:
# AM과 PM 분리
am_df = df[df['model_name'].str.startswith('am_')].copy()
pm_df = df[df['model_name'].str.startswith('pm_')].copy()

print(f"AM 모델: {len(am_df)}개")
print(f"PM 모델: {len(pm_df)}개")

# 같은 설정을 찾기 위한 키 생성 (model_name에서 am_/pm_ 제거)
am_df['config_key'] = am_df['model_name'].str.replace('^am_', '', regex=True)
pm_df['config_key'] = pm_df['model_name'].str.replace('^pm_', '', regex=True)

# 합산하기 위해 merge
merged = am_df.merge(
    pm_df, 
    on='config_key', 
    suffixes=('_am', '_pm'),
    how='inner'
)

# 성능 지표 합산
merged['total_r2'] = merged['test_r2_am'] + merged['test_r2_pm']
merged['total_mae'] = merged['test_mae_am'] + merged['test_mae_pm']
merged['total_rmse'] = merged['test_rmse_am'] + merged['test_rmse_pm']

# 결과 표시 (total_r2는 높을수록 좋으므로 내림차순, total_mae/rmse는 낮을수록 좋으므로 오름차순)
result_cols = ['config_key', 'total_r2', 'total_mae', 'total_rmse']

print("\n=== 합산 성능 기준 상위 10개 (total_rmse 낮은 순) ===")
top_by_rmse = merged.nsmallest(10, 'total_rmse')[result_cols].round(3)
# top_by_rmse["config_key"] = top_by_rmse["config_key"].str.replace("ep100_", "").str.replace("seed42_", "")
display(top_by_rmse)

AM 모델: 144개
PM 모델: 144개

=== 합산 성능 기준 상위 10개 (total_rmse 낮은 순) ===


Unnamed: 0,config_key,total_r2,total_mae,total_rmse
113,lstm_ep100_bs32_lr0.005_hd128_tw1_seed42_nl2_m...,1.902,0.369,0.512
78,lstm_static_ep100_bs32_lr0.001_hd64_tw1_seed42...,1.901,0.369,0.513
92,lstm_ep100_bs32_lr0.0005_hd128_tw1_seed42_nl3_...,1.901,0.37,0.514
90,gru_ep100_bs32_lr0.0005_hd128_tw1_seed42_nl2_m...,1.901,0.369,0.514
120,gru_static_ep100_bs32_lr0.0005_hd128_tw1_seed4...,1.901,0.369,0.514
83,gru_static_ep100_bs32_lr0.001_hd64_tw1_seed42_...,1.901,0.37,0.514
95,lstm_static_ep100_bs32_lr0.005_hd64_tw1_seed42...,1.901,0.371,0.514
131,gru_static_ep100_bs32_lr0.001_hd128_tw1_seed42...,1.901,0.372,0.514
59,lstm_ep100_bs32_lr0.001_hd64_tw1_seed42_nl2_msl23,1.901,0.37,0.514
112,lstm_static_ep100_bs32_lr0.0005_hd128_tw1_seed...,1.901,0.371,0.514


In [46]:
top_by_rmse.config_key.iloc[1]

'lstm_static_ep100_bs32_lr0.001_hd64_tw1_seed42_nl2_msl23'

In [44]:
# 최고 성능 조합 (각 지표별)
print("=" * 80)
print("최고 합산 성능 조합")
print("=" * 80)

best_rmse = merged.loc[merged['total_rmse'].idxmin()]
print("\n최저 Total RMSE:")
print(f"  Config: {best_rmse['config_key']}")
print(f"  AM 모델: {best_rmse['model_name_am']}")
print(f"  PM 모델: {best_rmse['model_name_pm']}")

print(f"  AM R²: {best_rmse['test_r2_am']:.3f}")
print(f"  AM MAE: {best_rmse['test_mae_am']:.3f}")
print(f"  AM RMSE: {best_rmse['test_rmse_am']:.3f}")
print(f"  PM R²: {best_rmse['test_r2_pm']:.3f}")
print(f"  PM MAE: {best_rmse['test_mae_pm']:.3f}")
print(f"  PM RMSE: {best_rmse['test_rmse_pm']:.3f}")
      


최고 합산 성능 조합

최저 Total RMSE:
  Config: lstm_ep100_bs32_lr0.005_hd128_tw1_seed42_nl2_msl10
  AM 모델: am_lstm_ep100_bs32_lr0.005_hd128_tw1_seed42_nl2_msl10
  PM 모델: pm_lstm_ep100_bs32_lr0.005_hd128_tw1_seed42_nl2_msl10
  AM R²: 0.964
  AM MAE: 0.159
  AM RMSE: 0.213
  PM R²: 0.937
  PM MAE: 0.209
  PM RMSE: 0.299
