In [9]:
import os
import glob
import re
import pandas as pd
from pandas.api.types import CategoricalDtype
import numpy as np
from datetime import datetime

In [2]:
def get_log(folder_path):
    log_files = glob.glob(os.path.join(folder_path, '*.log'))
    if not log_files:
        return None
    latest_file = max(log_files, key=os.path.getmtime)
    return latest_file

def read_log(log_path):
    with open(log_path) as f:
        content=f.readlines()[::-1]
        for line in content:
            if " - Average:" in line:
                return line[31:-1]

def seconds_between(t1: str, t2: str) -> int:
    fmt = "%Y-%m-%d %H:%M:%S"
    dt1 = datetime.strptime(t1, fmt)
    dt2 = datetime.strptime(t2, fmt)
    diff = (dt2 - dt1).total_seconds()
    return int(abs(diff))

def get_time(log_path):
    with open(log_path) as f:
        content=f.readlines()
        t1=''
        t2=''
        for line in content:
            if " - Data shape:" in line:
                t1=line[:19]
                continue
            if " - Average:" in line:
                t2=line[:19]
                return seconds_between(t1,t2)

def get_avg_time(log_path):
    with open(log_path) as f:
        content=f.readlines()
        epoch = 1
        t1=''
        t2=''
        for line in content:
            if " - Epoch: " in line:
                epoch=int(line.split(',')[0].split()[-1])
            if " - Data shape:" in line:
                t1=line[:19]
                continue
            if " - Average:" in line:
                t2=line[:19]
                return "%.2f"%(seconds_between(t1,t2)/epoch)
            

def get_parameter(log_path):
    with open(log_path) as f:
        content=f.readlines()
        for line in content:
            if "The number of parameters:" in line:
                return line.split()[-1]
    return 0

In [10]:
names=["HL","LSTM","Transformer","Mamba","UMamba","STGCN","GWNET","ASTGCN","AGCRN","STTN","DGCRN","DCRNN"]
metrics=["MAE","RMSE","MAPE"]
datasets=["panhandle"]
cat_type = CategoricalDtype(categories=names, ordered=True)

In [4]:
names=["ARIMA","SARIMA"]
metrics=["MSE","MAE"]
datasets=["sz_taxi_od","sz_bike_od","sz_subway_od",] #
cat_type = CategoricalDtype(categories=names, ordered=True)

In [5]:
names=["HA_OD","HL_OD","ARIMA","SARIMA","LSTM_OD","GMEL","GWNET_OD","STGCN_OD","HMDLF","MPGCN_OD","STZINB","STTN","AGCRN_OD","ASTGCN_OD","STGODE_OD","ODMixer"]
metrics=["MSE","MAE"]
datasets=["sz_taxi_od","sz_bike_od","sz_subway_od",] #
cat_type = CategoricalDtype(categories=names, ordered=True)

In [8]:
names=["HA_OD","HL_OD","ARIMA","SARIMA","LSTM_OD","GMEL","GWNET_OD","STGCN_OD","HMDLF","MPGCN_OD","STZINB","STTN","AGCRN_OD","ASTGCN_OD","STGODE_OD","ODMixer"]
metrics=["MSE","MAE"]
datasets=["nyc_taxi_od","nyc_bike_od","nyc_subway_od",] #
cat_type = CategoricalDtype(categories=names, ordered=True)

In [7]:
names=["GWNET_OD","STZINB","AGCRN_OD"]
metrics=["MSE","MAE"]
datasets=["sz_subway_bike_od","sz_subway_taxi_od"] #
cat_type = CategoricalDtype(categories=names, ordered=True)

In [None]:
names=["GWNET_OD","STZINB","AGCRN_OD"]
metrics=["MSE","MAE"]
datasets=["nyc_subway_bike_od","sz_subway_taxi_od"] #
cat_type = CategoricalDtype(categories=names, ordered=True)

In [None]:
names=["HL","STGCN","GWNET","ASTGCN","AGCRN","STGODE","STTN","DCRNN","DSTAGNN","LSTM","TrustEnergy"]
metrics=["MAE","RMSE","MAPE","MPIW","WINK","COV"]
datasets=["panhandle"]
cat_type = CategoricalDtype(categories=names, ordered=True)

In [13]:
path="/home/dy23a.fsu/st/result"

In [6]:
path="/home/dy23a.fsu/st/result/sz"

In [9]:
path="/home/dy23a.fsu/st/result/nyc"

In [4]:
path="/home/dy23a.fsu/st/result/ph"

In [14]:
rows = []
for name in names:
    for dataset in datasets:
        path_=f"{path}/{name}/{dataset}"
        if log:=get_log(path_):
            res=read_log(log)
            time_=get_time(log)
            param_=get_parameter(log)
            if res is None:
                continue
            row = {'Dataset': dataset, 'Model': name}
            m = dict(re.findall(r'(\w+): ([\-\d\.]+)', res))
            not_keys=[i for i in m.keys() if i not in metrics]
            for i in not_keys:
                del m[i]
            
            row.update(m)
            row.update({'time':time_, 'param':param_})
            rows.append(row)


df = pd.DataFrame(rows)
df['Model'] = df['Model'].astype(cat_type)
df_sorted = df.sort_values(by=['Dataset','Model',])
print(df_sorted)


      Dataset        Model    MAE     MAPE    RMSE   time    param
0   panhandle           HL  0.593   63.774   4.237    301       24
1   panhandle         LSTM  0.869   81.551   5.801    565    93443
2   panhandle  Transformer  1.114   72.866  11.166    303    70444
3   panhandle        Mamba  1.258   83.880  11.394    124     2692
4   panhandle       UMamba  0.616   60.747   4.511  11120   341148
5   panhandle        STGCN  1.182   77.720  11.340    296   277684
6   panhandle        GWNET  1.455   99.957  11.839    208   315356
7   panhandle       ASTGCN  1.719  107.357  11.866    148  3528490
8   panhandle        AGCRN  2.290  134.737  11.969    132   766500
9   panhandle         STTN  1.231   84.489  10.737    681   126764
10  panhandle        DGCRN  0.566   59.276   3.644   1480   307156


In [12]:
df_sorted["Dataset"] = df_sorted["Dataset"].str.replace("sz_", "").str.replace("_od", "")  # 只保留 bike/taxi/subway
# df_sorted["Dataset"] = df_sorted["Dataset"].str.replace("nyc_", "").str.replace("_od", "")
df_sorted[["MSE", "MAE"]] = df_sorted[["MSE", "MAE"]].apply(pd.to_numeric)
df_pivot = df_sorted.pivot_table(
    index="Model",
    columns="Dataset",
    values=["MSE", "MAE"],
    observed=False
)
df_pivot.columns = [f"{ds}_{metric}" for metric, ds in df_pivot.columns]
df_pivot = df_pivot.reset_index()
col_order = ["Model","taxi_MSE", "taxi_MAE", "bike_MSE", "bike_MAE", "subway_MSE", "subway_MAE"]
df_pivot = df_pivot[col_order]
print(df_pivot)

KeyError: "['MSE'] not in index"

In [9]:
df_sorted["Dataset"] = df_sorted["Dataset"].str.replace("sz_", "").str.replace("_od", "")  # 只保留 bike/taxi/subway
df_sorted[["time", "param"]] = df_sorted[["time", "param"]].apply(pd.to_numeric)
df_pivot = df_sorted.pivot_table(
    index="Model",
    columns="Dataset",
    values=["time", "param"],
    observed=False
)
df_pivot.columns = [f"{ds}_{metric}" for metric, ds in df_pivot.columns]
df_pivot = df_pivot.reset_index()
col_order = ["Model","taxi_param", "taxi_time", "bike_time", "subway_time"]
# col_order = ["Model","taxi_time", "taxi_param", "bike_time", "bike_param", "subway_time", "subway_param"]
df_pivot = df_pivot[col_order]
print(df_pivot)

        Model  taxi_param  taxi_time  bike_time  subway_time
0       HA_OD         0.0        6.0        6.0          5.0
1       HL_OD         7.0     4445.0     4327.0       2764.0
2       ARIMA         0.0    17533.0    20763.0      23037.0
3      SARIMA         0.0     9563.0    10819.0      11731.0
4     LSTM_OD     21473.0     5294.0     1512.0       1959.0
5        GMEL    330241.0     7129.0     3272.0       1677.0
6    GWNET_OD    230599.0     1368.0      514.0       1413.0
7    STGCN_OD    564843.0     1266.0      620.0        550.0
8       HMDLF   2118772.0     1514.0     1676.0       1203.0
9    MPGCN_OD      1154.0   121081.0   114939.0     120955.0
10     STZINB    213724.0     1940.0     1239.0       3540.0
11   AGCRN_OD    829345.0     1621.0     1892.0       2847.0
12  ASTGCN_OD   1598610.0     1040.0      742.0        890.0
13    ODMixer  23243795.0      926.0      249.0        831.0
