In [None]:
import re
import tarfile

import pandas as pd
import plotly.express as px

In [None]:
# all snapshot files are in a TGZ file
# extract all as a single dataframe
# Read from SOURCE_TGZ: a tgz archive with the CSV files generated by running snapshot.py
# Save in SAVE.CSV

snapshot_filename_re = r"(?P<timestamp>\d{4}\d{2}\d{2}_\d{2}:\d{2})\.csv"

snapshots: list[pd.DataFrame] = []

SOURCE_TGZ = "slurm_snapshot.tgz"

with tarfile.open(SOURCE_TGZ, mode="r:gz") as tgz:
    snap: tarfile.TarInfo
    for snap in tgz.getmembers():
        timestamp = pd.to_datetime(re.match(snapshot_filename_re, snap.name).group("timestamp"), format="%Y%m%d_%H:%M")
        with tgz.extractfile(snap) as csv:
            df = pd.read_csv(csv)
        
        df["time"] = timestamp

        totals = df.sum(numeric_only=True)
        totals["HOSTNAMES"] = "TOTAL"
        totals["time"] = timestamp 
        df = pd.concat([df, totals.to_frame().T], ignore_index=True)

        snapshots.append(df)

SAVE_CSV = "202306151500-20231020154500.csv"
data = pd.concat(snapshots, ignore_index=True)
data.to_csv(SAVE_CSV, index=False)

In [None]:
# Save viz into HTML file

SAVE_HTML = "slurm_viz.html"

fig = px.area(
    data_frame=data,
    x="time",
    y="AvailableGPU",
    facet_col="HOSTNAMES",
    facet_col_wrap=1,
    color="HOSTNAMES",
    facet_row_spacing=0.025,
    height=1400,
    title=f"GPU Availability - {data['time'].min().strftime('%b %-d')} to {data['time'].max().strftime('%b %-d')}"
)

fig.update_yaxes(matches=None, title=None, showgrid=False)
fig.for_each_yaxis(lambda yaxis: yaxis.update(showticklabels=False, range=(0, 4)))
list(fig.select_yaxes(row=3))[0].update(range=(0, 8))
list(fig.select_yaxes(row=2))[0].update(range=(0, 8))
list(fig.select_yaxes(row=1))[0].update(range=(0, 80), title="TOTAL")
fig.for_each_annotation(lambda a: a.update(text=f"<b>{a.text.split('=')[-1]}</b>"))


fig.write_html(SAVE_HTML)
fig.show()