# GPUs per Job on PLI

In [1]:
import os
import re
import subprocess
from calendar import monthrange
import pandas as pd
import numpy as np

In [2]:
os.environ["SLURM_TIME_FORMAT"] = "%s"

In [3]:
def get_data_from_sacct(clusters: str,
                        start_date: str,
                        end_date: str,
                        partitions: str,
                        fields: str) -> pd.DataFrame:
    """Return a dataframe of the sacct output."""
    cmd = f"sacct -M {clusters} -r {partitions} -X -P -n -S {start_date} -E {end_date} -a -o {fields}"
    print(cmd)
    output = subprocess.run(cmd,
                            stdout=subprocess.PIPE,
                            shell=True,
                            timeout=100,
                            text=True,
                            check=True)
    rows = [row.split("|") for row in output.stdout.split()]
    df = pd.DataFrame(rows)
    df.columns = fields.split(",")
    return df

In [4]:
def clean_dataframe(df):
    col = "elapsedraw"
    df = df[pd.notna(df[col])]
    df = df[df[col].str.isnumeric()]
    df[col] = df[col].astype("int64")
    df = df[df[col] > 0]
    df = df[pd.notna(df["alloctres"]) & df["alloctres"].str.contains("gres/gpu=")]
    return df

In [5]:
s = "2024-06-01T00:00:00"
e = "now"
partitions = "cli,pli-c,pli,pli-p,pli-cp,pli-lc"
fields = "user,elapsedraw,alloctres"
df = get_data_from_sacct("della", s, e, partitions, fields)    

sacct -M della -r cli,pli-c,pli,pli-p,pli-cp,pli-lc -X -P -n -S 2024-06-01T00:00:00 -E now -a -o user,elapsedraw,alloctres


In [6]:
df = clean_dataframe(df)

In [7]:
def gpus_per_job(tres: str) -> int:
    """Return the number of allocated GPUs."""
    gpus = re.findall(r"gres/gpu=\d+", tres)
    return int(gpus[0].replace("gres/gpu=", "")) if gpus else 0

In [8]:
df["gpus_per_job"] = df["alloctres"].apply(gpus_per_job)
df["gpu-hours"] = df["gpus_per_job"] * df["elapsedraw"] / 3600

## Overall Usage

In [9]:
days = 6 * 30.5 + 3
hours_per_day = 24
gpus = 296
overall_usage = df["gpu-hours"].sum() / (days * hours_per_day * gpus)
overall_usage

0.734520415913225

## GPUs per Job vs. GPU-Hours

In [10]:
gp = df.groupby("gpus_per_job").agg({"gpu-hours":"sum", "user":lambda series: series.unique().size})
gp = gp.reset_index(drop=False).rename(columns={"user":"users"})
gp["proportion(%)"] = gp["gpu-hours"].apply(lambda x: round(100 * x / gp["gpu-hours"].sum()))
gp["gpu-hours"] = gp["gpu-hours"].apply(round)
print(gp[["gpus_per_job", "gpu-hours", "proportion(%)", "users"]].to_string(index=False, justify="center"))

 gpus_per_job  gpu-hours  proportion(%)  users
       1        182159         19         79  
       2         26083          3         55  
       3          2605          0         11  
       4        166342         17         66  
       5           877          0          3  
       6          2267          0          7  
       7          1459          0          4  
       8        214549         22         51  
      12           141          0          3  
      16         26864          3         15  
      18             1          0          1  
      32        311295         32          8  
      64         35661          4          2  
     128           251          0          2  
