In [22]:
import re
import pandas as pd
import plotly.express as px

from glob import glob

In [23]:
pt_time = re.compile(r"Done epoch (\d): Time (\d+\.\d+)")
pt_node = re.compile(r"node(\d+)")
pt_gds = re.compile(r"gds(\d+)")
pt_up = re.compile(r"up(\d+)")

In [24]:
logs = sorted(glob("./logs/**/torch.log"))
rows = []
resolutions = ["398 x 323", "495 x 370", "990 x 740", "1980 x 1480", "3960 x 2960"]

for log in logs:
    with open(log, "r") as f:
        lines = f.readlines()

    time_per_epoch = {1: 0, 2: 0, 3: 0, 4: 0, 5: 0}
    for line in lines:
        sh_time = pt_time.search(line)
        if sh_time:
            time_per_epoch[int(sh_time.group(1))] = float(sh_time.group(2))

    up = int(pt_up.search(log).group(1))
    if up == 0:
        resolution = "398 x 323"
    elif up == 1:
        resolution = "495 x 370"
    elif up == 2:
        resolution = "990 x 740"
    elif up == 4:
        resolution = "1980 x 1480"
    elif up == 8:
        resolution = "3960 x 2960"

    gds = int(pt_gds.search(log).group(1))
    if gds == 0:
        reader = "cpu"
    elif gds == 1:
        reader = "gpu"

    node_num = int(pt_node.search(log).group(1))
    if node_num == 4:
        gpus = "A100 * 4"
    if node_num == 5:
        gpus = "H100 * 8"
    elif node_num == 7:
        gpus = "L40 * 4"
    elif node_num == 8:
        gpus = "A40 * 4"
    elif node_num == 9:
        gpus = "L4 * 4"

    rows.append(
        {
            "gpus": gpus,
            "storage": log.split("-")[3],
            "reader": reader,
            "resolution": resolution,
            "epoch_1": round(time_per_epoch[1], 2),
            "epoch_2": round(time_per_epoch[2], 2),
            "epoch_3": round(time_per_epoch[3], 2),
            "epoch_4": round(time_per_epoch[4], 2),
            "epoch_5": round(time_per_epoch[5], 2),
        }
    )

In [25]:
df = pd.DataFrame(rows)
df["resolution"] = pd.Categorical(df["resolution"], categories=resolutions)
df.sort_values(by=["storage", "reader", "resolution"], ascending=[True, True, True], inplace=True)
df.reset_index(drop=True, inplace=True)
df["mean"] = round(df.iloc[:, [4, 5, 6, 7, 8]].mean(axis=1), 3)
df["standard_deviation"] = round(df.iloc[:, [4, 5, 6, 7, 8]].std(axis=1), 3)
df.to_csv("data_gds.csv", index=False)
df

Unnamed: 0,gpus,storage,reader,resolution,epoch_1,epoch_2,epoch_3,epoch_4,epoch_5,mean,standard_deviation
0,H100 * 8,gpfs,cpu,398 x 323,3.2,0.63,0.65,0.64,0.63,1.15,1.146
1,H100 * 8,gpfs,cpu,495 x 370,10.68,8.47,8.41,8.61,8.58,8.95,0.97
2,H100 * 8,gpfs,cpu,990 x 740,13.15,8.68,9.05,9.24,9.45,9.914,1.831
3,H100 * 8,gpfs,cpu,1980 x 1480,51.28,10.61,9.17,9.15,10.11,18.064,18.579
4,H100 * 8,gpfs,cpu,3960 x 2960,222.06,20.44,16.76,13.72,14.94,57.584,91.98
5,H100 * 8,gpfs,gpu,398 x 323,3.78,0.65,0.63,0.63,0.64,1.266,1.405
6,H100 * 8,gpfs,gpu,495 x 370,11.43,8.21,8.57,8.88,8.94,9.206,1.277
7,H100 * 8,gpfs,gpu,990 x 740,11.34,8.94,9.37,9.32,9.49,9.692,0.944
8,H100 * 8,gpfs,gpu,1980 x 1480,12.0,9.48,9.62,9.47,9.36,9.986,1.13
9,H100 * 8,gpfs,gpu,3960 x 2960,12.01,10.76,11.45,11.93,19.34,13.098,3.525
