In [2]:
import pandas as pd
import numpy as np
import plotly.express as px
import json
import os

In [3]:
result_root = "../output/DGraph"

In [4]:
def load_result_file(path):
    if not os.path.isfile(path):
        print(f"{path} result file not existed")
        return []

    with open(path, "r") as f:
        results = json.loads(f.read())
    return results


def plot_train_eval_loss_metric(result_path, eval_freq=2):
    train_prefix = "epoch_train_{metric}_list.txt.{part}"
    eval_prefix = "epoch_eval_{metric}_list.txt.{part}"
    resultdf = []

    for p in [100, 200, 300, 400, 500]:
        partdf = pd.DataFrame(columns=['part', 'epoch'])
        for metric in ["ap", "auc", "mae", "rmse"]:
            train_df = pd.DataFrame()
            eval_df = pd.DataFrame()

            train_result_file = (
                f"{result_path}/{train_prefix.format(metric=metric, part=p)}"
            )

            eval_result_file = (
                f"{result_path}/{eval_prefix.format(metric=metric, part=p)}"
            )

            train_result = load_result_file(train_result_file)
            eval_result = load_result_file(eval_result_file)
            print(f"train_result/eval_result: {len(train_result)}/{len(eval_result)}")

            train_df[f"train_{metric}"] = train_result
            # train_df['mode'] = 'train'
            train_df["part"] = p
            train_df["epoch"] = train_df.index

            eval_df[f'eval_{metric}'] = eval_result
            # eval_df['mode'] = 'eval'
            eval_df["part"] = p
            eval_df["epoch"] = eval_df.index * eval_freq

            print(f"train_df: {train_df.shape}")
            print(f"eval_df: {eval_df.shape}")

            _resultdf = train_df.merge(eval_df, on=["part", "epoch"], how='outer')
            # _resultdf = pd.concat([train_df, eval_df])
            print(f"_resultdf: {_resultdf.shape}")

            partdf = partdf.merge(
                _resultdf,
                on=["part", "epoch"], how='outer'
            )
        resultdf.append(partdf)
            
    return pd.concat(resultdf)


results = plot_train_eval_loss_metric(f"{result_root}/origin")

../output/DGraph/origin/epoch_train_ap_list.txt.100 result file not existed
train_result/eval_result: 0/25
train_df: (0, 3)
eval_df: (25, 3)
_resultdf: (25, 4)
../output/DGraph/origin/epoch_train_auc_list.txt.100 result file not existed
train_result/eval_result: 0/25
train_df: (0, 3)
eval_df: (25, 3)
_resultdf: (25, 4)
train_result/eval_result: 50/25
train_df: (50, 3)
eval_df: (25, 3)
_resultdf: (50, 4)
train_result/eval_result: 50/25
train_df: (50, 3)
eval_df: (25, 3)
_resultdf: (50, 4)
../output/DGraph/origin/epoch_train_ap_list.txt.200 result file not existed
train_result/eval_result: 0/50
train_df: (0, 3)
eval_df: (50, 3)
_resultdf: (50, 4)
../output/DGraph/origin/epoch_train_auc_list.txt.200 result file not existed
train_result/eval_result: 0/50
train_df: (0, 3)
eval_df: (50, 3)
_resultdf: (50, 4)
train_result/eval_result: 100/50
train_df: (100, 3)
eval_df: (50, 3)
_resultdf: (100, 4)
train_result/eval_result: 100/50
train_df: (100, 3)
eval_df: (50, 3)
_resultdf: (100, 4)
../outpu

In [5]:
results.dtypes

train_ap      float64
eval_ap       float64
part            int64
epoch           int64
train_auc     float64
eval_auc      float64
train_mae     float64
eval_mae      float64
train_rmse    float64
eval_rmse     float64
dtype: object

In [6]:
results

Unnamed: 0,train_ap,eval_ap,part,epoch,train_auc,eval_auc,train_mae,eval_mae,train_rmse,eval_rmse
0,,0.007226,100,0,,0.463658,0.330760,0.166762,0.673127,0.386307
1,,0.008107,100,2,,0.494657,0.026191,0.053107,0.189962,0.170181
2,,0.007889,100,4,,0.497034,0.017325,0.028485,0.139890,0.106843
3,,0.007555,100,6,,0.499596,0.015092,0.015788,0.120744,0.077051
4,,0.007245,100,8,,0.497665,0.014332,0.009306,0.115613,0.064223
...,...,...,...,...,...,...,...,...,...,...
245,,,500,241,,,0.006301,,0.079232,
246,,,500,243,,,0.006301,,0.079232,
247,,,500,245,,,0.006301,,0.079232,
248,,,500,247,,,0.006301,,0.079232,


In [10]:
px.scatter(
    results.sort_values(["epoch", "part"]),
    x="epoch",
    y=[
        "train_mae",
        "train_rmse",
        "eval_mae",
        "eval_rmse",
        "eval_auc",
        "eval_ap",
    ],
    facet_col="part",
    render_mode="SVG",
)