In [9]:
import re
import pandas as pd
import plotly.express as px

from glob import glob

In [10]:
pt_time = re.compile(r"Done epoch (\d): Time (\d+\.\d+)")
pt_node = re.compile(r"node(\d+)")
pt_storage = re.compile(r"ontap|gpfs")
pt_gds = re.compile(r"gds(\d)")
pt_reader = re.compile(r"cpu|gpu")
pt_resolution = re.compile(r"sd|hd|fhd|qhd|uhd")

In [11]:
logs = sorted(glob("./logs/**/torch.log"))
rows = []
resolutions = {
    "SD": (854, 480),
    "HD": (1280, 720),
    "FHD": (1920, 1080),
    "QHD": (2560, 1440),
    "UHD": (3840, 2160),
}

for log in logs:
    with open(log, "r") as f:
        lines = f.readlines()

    time_per_epoch = {1: 0, 2: 0, 3: 0, 4: 0, 5: 0}
    for line in lines:
        sh_time = pt_time.search(line)
        if sh_time:
            time_per_epoch[int(sh_time.group(1))] = float(sh_time.group(2))

    node_num = int(pt_node.search(log).group(1))
    if node_num == 4:
        gpus = "A100 * 4"
    if node_num == 5:
        gpus = "H100 * 8"
    elif node_num == 7:
        gpus = "L40 * 4"
    elif node_num == 8:
        gpus = "A40 * 4"
    elif node_num == 9:
        gpus = "L4 * 4"

    try:
        gds = int(pt_gds.search(log).group(1))
        gds = "ON" if gds else "OFF"
    except:
        gds = "OFF"

    rows.append(
        {
            "gpus": gpus,
            "storage": pt_storage.search(log).group(0).upper(),
            "resolution": pt_resolution.search(log).group(0).upper(),
            "reader": pt_reader.search(log).group(0).upper(),
            "gds": gds,
            "epoch_1": round(time_per_epoch[1], 2),
            "epoch_2": round(time_per_epoch[2], 2),
            "epoch_3": round(time_per_epoch[3], 2),
            "epoch_4": round(time_per_epoch[4], 2),
            "epoch_5": round(time_per_epoch[5], 2),
        }
    )

In [12]:
df = pd.DataFrame(rows)
df["resolution"] = pd.Categorical(df["resolution"], categories=resolutions.keys())
df.sort_values(by=["resolution"], ascending=[True], inplace=True)
df.reset_index(drop=True, inplace=True)
df["mean"] = round(df.iloc[:, [5, 6, 7, 8, 9]].mean(axis=1), 3)
df["standard_deviation"] = round(df.iloc[:, [5, 6, 7, 8, 9]].std(axis=1), 3)
df.to_csv("data_gds.csv", index=False)
df

Unnamed: 0,gpus,storage,resolution,reader,gds,epoch_1,epoch_2,epoch_3,epoch_4,epoch_5,mean,standard_deviation
0,H100 * 8,GPFS,SD,CPU,OFF,120.84,84.35,78.9,80.12,79.86,88.814,18.025
1,H100 * 8,GPFS,SD,GPU,OFF,191.5,116.54,128.31,219.65,221.84,175.568,50.139
2,H100 * 8,GPFS,SD,GPU,ON,255.81,255.92,265.56,263.23,271.93,262.49,6.835
3,H100 * 8,GPFS,HD,CPU,OFF,200.68,181.92,182.5,192.37,187.52,188.998,7.784
4,H100 * 8,GPFS,HD,GPU,OFF,73.31,224.65,265.08,256.33,149.26,193.726,81.308
5,H100 * 8,GPFS,HD,GPU,ON,296.44,330.85,326.47,327.14,336.82,323.544,15.698
6,H100 * 8,GPFS,FHD,CPU,OFF,493.87,470.31,483.66,476.74,484.47,481.81,8.859
7,H100 * 8,GPFS,FHD,GPU,OFF,436.21,473.07,516.29,945.8,947.15,663.704,259.685
8,H100 * 8,GPFS,FHD,GPU,ON,531.62,546.71,537.0,533.16,552.11,540.12,8.911
9,H100 * 8,GPFS,QHD,CPU,OFF,725.09,725.26,728.91,726.1,721.36,725.344,2.703
