In [1]:
import os
import glob
import re
import pandas as pd
from pandas.api.types import CategoricalDtype
import numpy as np
from datetime import datetime

In [2]:
def get_log(folder_path):
    log_files = glob.glob(os.path.join(folder_path, '*.log'))
    if not log_files:
        return None
    latest_file = max(log_files, key=os.path.getmtime)
    return latest_file

def read_log(log_path):
    with open(log_path) as f:
        content=f.readlines()[::-1]
        for line in content:
            if " - Average:" in line:
                return line[31:-1]

def seconds_between(t1: str, t2: str) -> int:
    fmt = "%Y-%m-%d %H:%M:%S"
    dt1 = datetime.strptime(t1, fmt)
    dt2 = datetime.strptime(t2, fmt)
    diff = (dt2 - dt1).total_seconds()
    return int(abs(diff))

def get_time(log_path):
    with open(log_path) as f:
        content=f.readlines()
        t1=''
        t2=''
        for line in content:
            if " - Data shape:" in line:
                t1=line[:19]
                continue
            if " - Average:" in line:
                t2=line[:19]
                return seconds_between(t1,t2)

def get_avg_time(log_path):
    with open(log_path) as f:
        content=f.readlines()
        epoch = 1
        t1=''
        t2=''
        for line in content:
            if " - Epoch: " in line:
                epoch=int(line.split(',')[0].split()[-1])
            if " - Data shape:" in line:
                t1=line[:19]
                continue
            if " - Average:" in line:
                t2=line[:19]
                return "%.2f"%(seconds_between(t1,t2)/epoch)
            

def get_parameter(log_path):
    with open(log_path) as f:
        content=f.readlines()
        for line in content:
            if "The number of parameters:" in line:
                return line.split()[-1]
    return 0

In [42]:
names=["HL","STGCN","GWNET","ASTGCN","AGCRN","STGODE","STTN","DCRNN","DSTAGNN","LSTM","TrustEnergy"]
metrics=["MAE","RMSE","MAPE","MPIW","WINK","COV"]
datasets=["panhandle"]
cat_type = CategoricalDtype(categories=names, ordered=True)

In [4]:
names=["ARIMA","SARIMA"]
metrics=["MSE","MAE"]
datasets=["sz_taxi_od","sz_bike_od","sz_subway_od",] #
cat_type = CategoricalDtype(categories=names, ordered=True)

In [3]:
names=["HA_OD","HL_OD","ARIMA","SARIMA","LSTM_OD","GMEL","GWNET_OD","STGCN_OD","HMDLF","MPGCN_OD","STZINB","STTN","AGCRN_OD","ASTGCN_OD","STGODE_OD","ODMixer"]
metrics=["MSE","MAE"]
datasets=["sz_taxi_od","sz_bike_od","sz_subway_od",] #
cat_type = CategoricalDtype(categories=names, ordered=True)

In [8]:
names=["HA_OD","HL_OD","ARIMA","SARIMA","LSTM_OD","GMEL","GWNET_OD","STGCN_OD","HMDLF","MPGCN_OD","STZINB","STTN","AGCRN_OD","ASTGCN_OD","STGODE_OD","ODMixer"]
metrics=["MSE","MAE"]
datasets=["nyc_taxi_od","nyc_bike_od","nyc_subway_od",] #
cat_type = CategoricalDtype(categories=names, ordered=True)

In [7]:
names=["GWNET_OD","STZINB","AGCRN_OD"]
metrics=["MSE","MAE"]
datasets=["sz_subway_bike_od","sz_subway_taxi_od"] #
cat_type = CategoricalDtype(categories=names, ordered=True)

In [8]:
path="/home/dy23a.fsu/st/result/cross"

In [4]:
path="/home/dy23a.fsu/st/result/sz"

In [6]:
path="/blue/gtyson.fsu/dy23a.fsu/result/result/"

In [9]:
path="/home/dy23a.fsu/st/result/nyc"

In [10]:
rows = []
for name in names:
    for dataset in datasets:
        path_=f"{path}/{name}/{dataset}"
        if log:=get_log(path_):
            res=read_log(log)
            time_=get_avg_time(log)
            param_=get_parameter(log)
            if res is None:
                continue
            row = {'Dataset': dataset, 'Model': name}
            m = dict(re.findall(r'(\w+): ([\-\d\.]+)', res))
            not_keys=[i for i in m.keys() if i not in metrics]
            for i in not_keys:
                del m[i]
            
            row.update(m)
            row.update({'time':time_, 'param':param_})
            rows.append(row)


df = pd.DataFrame(rows)
df['Model'] = df['Model'].astype(cat_type)
df_sorted = df.sort_values(by=['Dataset','Model',])
print(df_sorted)


          Dataset      Model     MSE    MAE     time   param
1     nyc_bike_od      HA_OD   0.068  0.048     1.00       0
4     nyc_bike_od      HL_OD   0.023  0.027     0.57       7
7     nyc_bike_od      ARIMA   0.111  0.140  2123.00       0
10    nyc_bike_od     SARIMA   0.238    NaN  1482.00       0
13    nyc_bike_od    LSTM_OD   0.155  0.234     0.62   21473
16    nyc_bike_od       GMEL   0.146  0.221     1.65  330241
19    nyc_bike_od   GWNET_OD   0.057  0.131     0.96  153855
22    nyc_bike_od   STGCN_OD   0.105  0.185     0.90  157379
25    nyc_bike_od      HMDLF   0.235  0.293     3.52  290060
28    nyc_bike_od   MPGCN_OD   0.074  0.162    14.85    1154
31    nyc_bike_od     STZINB   0.022  0.027     0.73   50484
34    nyc_bike_od   AGCRN_OD   0.122  0.204     1.23  203945
37    nyc_bike_od  ASTGCN_OD   0.124  0.210     0.93  112490
40    nyc_bike_od    ODMixer   0.023  0.042     0.79  447011
2   nyc_subway_od      HA_OD   0.000  0.000     3.00       0
5   nyc_subway_od      H

In [11]:
# df_sorted["Dataset"] = df_sorted["Dataset"].str.replace("sz_", "").str.replace("_od", "")  # 只保留 bike/taxi/subway
df_sorted["Dataset"] = df_sorted["Dataset"].str.replace("nyc_", "").str.replace("_od", "")
df_sorted[["MSE", "MAE"]] = df_sorted[["MSE", "MAE"]].apply(pd.to_numeric)
df_pivot = df_sorted.pivot_table(
    index="Model",
    columns="Dataset",
    values=["MSE", "MAE"],
    observed=False
)
df_pivot.columns = [f"{ds}_{metric}" for metric, ds in df_pivot.columns]
df_pivot = df_pivot.reset_index()
col_order = ["Model","taxi_MSE", "taxi_MAE", "bike_MSE", "bike_MAE", "subway_MSE", "subway_MAE"]
df_pivot = df_pivot[col_order]
print(df_pivot)

        Model  taxi_MSE  taxi_MAE  bike_MSE  bike_MAE  subway_MSE  subway_MAE
0       HA_OD     0.917     0.340     0.068     0.048       0.000       0.000
1       HL_OD     0.379     0.224     0.023     0.027       0.002       0.026
2       ARIMA     0.925     0.543     0.111     0.140       1.778       0.308
3      SARIMA     1.894       NaN     0.238       NaN       5.420       0.715
4     LSTM_OD     0.963     0.655     0.155     0.234       0.728       0.490
5        GMEL     1.012     0.643     0.146     0.221       2.819       0.818
6    GWNET_OD     0.581     0.446     0.057     0.131       0.010       0.054
7    STGCN_OD     0.762     0.546     0.105     0.185       0.080       0.156
8       HMDLF     1.270     0.752     0.235     0.293      12.350       1.831
9    MPGCN_OD     0.663     0.506     0.074     0.162       1.813       0.554
10     STZINB     0.339     0.195     0.022     0.027       0.006       0.061
11   AGCRN_OD     0.861     0.600     0.122     0.204       0.11

In [7]:
df_sorted["Dataset"] = df_sorted["Dataset"].str.replace("sz_", "").str.replace("_od", "")  # 只保留 bike/taxi/subway
df_sorted[["time", "param"]] = df_sorted[["time", "param"]].apply(pd.to_numeric)
df_pivot = df_sorted.pivot_table(
    index="Model",
    columns="Dataset",
    values=["time", "param"],
    observed=False
)
df_pivot.columns = [f"{ds}_{metric}" for metric, ds in df_pivot.columns]
df_pivot = df_pivot.reset_index()
col_order = ["Model","taxi_param", "taxi_time", "bike_time", "subway_time"]
# col_order = ["Model","taxi_time", "taxi_param", "bike_time", "bike_param", "subway_time", "subway_param"]
df_pivot = df_pivot[col_order]
print(df_pivot)

        Model  taxi_param  taxi_time  bike_time  subway_time
0       HA_OD         0.0       6.00       6.00         6.00
1       HL_OD         7.0       4.45       4.33         4.20
2       ARIMA         0.0   17533.00   20763.00     22721.00
3      SARIMA         0.0    9563.00   10819.00     11672.00
4     LSTM_OD     21473.0       5.35       5.52         5.29
5        GMEL    330241.0       7.13       6.87         6.81
6    GWNET_OD    230599.0       4.77       4.85         4.41
7    STGCN_OD    564843.0       4.85       5.17         4.89
8       HMDLF   2118772.0      13.05      13.30        13.40
9    MPGCN_OD      1154.0     121.08     121.50       120.81
10     STZINB    213724.0       5.84       6.10         5.88
11   AGCRN_OD    829345.0       4.81       4.57         4.53
12  ASTGCN_OD   1598610.0       4.79       4.61         4.54
13    ODMixer  23243795.0       6.30       6.38         6.18
