In [39]:
import polars as pl
import pyslurm
from IPython.display import display
import json
from datetime import datetime
import math

In [None]:
with open("node.json", "w") as f:
    json_string = json.dumps(pyslurm.node().get())
    json_object = json.loads(json_string)
    json.dump(json_object, f, indent=4)

In [None]:
# Process node data

json_string = json.dumps(pyslurm.node().get())
json_object = json.loads(json_string)
p = [v for v in json_object.values()]
node_df = pl.DataFrame(p)
node_df = node_df.drop(["core_spec_cnt", "cpu_spec_list", "core_spec_cnt", "extra", "features", "features_active", "mcs_label", "mem_spec_limit", "owner", "tmp_disk", "reason_time", "reason", "reason_uid", "power_mgmt", "energy"])
node_df = node_df.with_columns(pl.col("last_busy", "slurmd_start_time").map_elements(lambda x: datetime.fromtimestamp(x), return_dtype=pl.Datetime))
node_df = node_df.with_columns(pl.col("last_busy", "slurmd_start_time").map_elements(lambda x: x.strftime("%Y-%m-%d %H:%M:%S"), return_dtype=pl.Utf8))
node_df = node_df.with_columns(pl.col("gres", "gres_used").list.first())
node_df = node_df.with_columns(pl.col("gres", "gres_used").map_elements(lambda x: int(x.split(":")[-1]) if isinstance(x, str) else x, return_dtype=pl.Int64))
node_df = node_df.with_columns(pl.col("gres", "gres_used").fill_null(0))
node_df = node_df.with_columns([
    (pl.col("real_memory") / 1024).floor(),
    (pl.col("free_mem") / 1024).floor()
])
node_df = node_df.filter(~pl.col("name").is_in([f"z0{i}" for i in range(10, 17)]))
# Le magic #
node_df = node_df.explode("partitions")
node_df = node_df.group_by("partitions").agg(pl.col("name"), pl.col("state"), pl.col("alloc_cpus").sum(), pl.col("cpus").sum(), pl.col("gres_used").sum(), pl.col("gres").sum(), pl.col("free_mem").sum(), pl.col("real_memory").sum())
# End of le magic #
node_df = node_df.with_columns(
    pl.concat_str([
        ((pl.col("gres_used") / pl.col("gres")) * 100).round(2).cast(pl.Utf8),
        pl.lit("%"),
    ]).alias("gres_usage")
)
node_df = node_df.with_columns(
    pl.concat_str([
        ((pl.col("alloc_cpus") / pl.col("cpus")) * 100).round(2).cast(pl.Utf8),
        pl.lit("%")
    ]).alias("cpus_usage")
)
node_df = node_df.with_columns(
    pl.concat_str([
        (((pl.col("real_memory") - pl.col("free_mem")) / pl.col("real_memory")) * 100).round(2).cast(pl.Utf8),
        pl.lit("%")
    ]).alias("mem_usage")
)
node_df = node_df.with_columns(
    pl.concat_str([
        (pl.col("real_memory").cast(pl.Utf8)),
        pl.lit(" GB"),
    ])
)
node_df = node_df.with_columns(pl.col("gres_usage").replace("NaN%", ""))
custom_order = ["30mins", "4hours", "12hours", "5days", "gpu"]
order_dict = {val: i for i, val in enumerate(custom_order)}
node_df = (node_df
    .with_columns(
        pl.col("partitions").map_elements(lambda x: order_dict.get(x), return_dtype=pl.Int64).alias("__sort_key")
    )
    .sort("__sort_key")
    .drop("__sort_key")
)
display(node_df.head(10))
print(node_df.shape)

partitions,name,state,alloc_cpus,cpus,gres_used,gres,free_mem,real_memory,gres_usage,cpus_usage,mem_usage
str,list[str],list[str],i64,i64,i64,i64,f64,str,str,str,str
"""30mins""","[""z1014"", ""z1016"", … ""z1050""]","[""IDLE"", ""IDLE"", … ""MIXED""]",936,2408,0,0,4476.0,"""11812.0 GB""","""""","""38.87%""","""62.11%"""
"""4hours""","[""z1014"", ""z1016"", … ""z1040""]","[""IDLE"", ""IDLE"", … ""IDLE""]",0,1040,0,0,4064.0,"""5026.0 GB""","""""","""0.0%""","""19.14%"""
"""12hours""","[""z1021"", ""z1022"", … ""z1053""]","[""MIXED"", ""MIXED"", … ""MIXED""]",1080,1560,0,0,527.0,"""7539.0 GB""","""""","""69.23%""","""93.01%"""
"""5days""","[""z1014"", ""z1016"", … ""z1053""]","[""IDLE"", ""IDLE"", … ""MIXED""]",576,1296,0,0,2973.0,"""6030.0 GB""","""""","""44.44%""","""50.7%"""
"""gpu""","[""z018"", ""z019""]","[""IDLE"", ""IDLE""]",0,64,0,8,416.0,"""752.0 GB""","""0.0%""","""0.0%""","""44.68%"""


(5, 12)


In [None]:
with open("jobs.json", "w") as f:
    json_string = json.dumps(pyslurm.job().get())
    json_object = json.loads(json_string)
    json.dump(json_object, f, indent=4)

In [49]:
def _format_time(seconds):
    days = int(seconds / (24 * 60 * 60))
    hours = int((seconds % (24 * 60 * 60)) / (60 * 60))
    mins = int((seconds % (60 * 60)) / 60)
    secs = int(seconds % 60)

    parts = []
    if days > 0:
        parts.append(f"{days} day{'s' if days != 1 else ''}")
    if hours > 0:
        parts.append(f"{hours} hour{'s' if hours != 1 else ''}")
    if mins > 0:
        parts.append(f"{mins} min{'s' if mins != 1 else ''}")
    if secs > 0 or (days == 0 and hours == 0 and mins == 0):
        parts.append(f"{secs} sec{'s' if secs != 1 else ''}")

    return " ".join(parts)

jobs_json_string = json.dumps(pyslurm.job().get())
jobs_json = json.loads(jobs_json_string)
jobs = []
for k, v in jobs_json.items():
    v.update({"id": k})
    jobs.append(v)
jobs_df = pl.DataFrame(jobs, infer_schema_length=None)
jobs_df = jobs_df.drop(["cpus_allocated", "cpus_alloc_layout"])
jobs_df = jobs_df.with_columns(pl.col("eligible_time", "end_time", "start_time", "submit_time").map_elements(lambda x: datetime.fromtimestamp(x), return_dtype=pl.Datetime))
jobs_df = jobs_df.with_columns(pl.col("run_time").map_elements(lambda x: _format_time(x), return_dtype=pl.Utf8).alias("run_time_str"))
jobs_df = jobs_df.with_columns(pl.col("user_id").cast(pl.Utf8))
jobs_df = jobs_df.select("id", "name", "partition", "nodes", "num_nodes", "job_state", "run_time_str", "user_id")
display(jobs_df.head(10))

id,name,partition,nodes,num_nodes,job_state,run_time_str,user_id
str,str,str,str,i64,str,str,str
"""492166""","""e3faf13b-2922-4712-856d-15f38a…","""12hours""","""z1049""",1,"""COMPLETED""","""3 hours 46 secs""","""14004"""
"""492410""","""61850e9f-e2b3-4da2-a790-5dbe8d…","""gpu""","""z018""",1,"""RUNNING""","""2 hours 52 mins 58 secs""","""14004"""
"""492411""","""61850e9f-e2b3-4da2-a790-5dbe8d…","""gpu""","""z019""",1,"""RUNNING""","""2 hours 52 mins 58 secs""","""14004"""
"""492412""","""61850e9f-e2b3-4da2-a790-5dbe8d…","""gpu""","""z018""",1,"""RUNNING""","""2 hours 52 mins 58 secs""","""14004"""
"""492413""","""61850e9f-e2b3-4da2-a790-5dbe8d…","""gpu""","""z019""",1,"""RUNNING""","""2 hours 52 mins 58 secs""","""14004"""
"""492414""","""61850e9f-e2b3-4da2-a790-5dbe8d…","""gpu""","""z018""",1,"""RUNNING""","""2 hours 52 mins 58 secs""","""14004"""
"""492415""","""61850e9f-e2b3-4da2-a790-5dbe8d…","""gpu""","""z019""",1,"""RUNNING""","""2 hours 52 mins 58 secs""","""14004"""
"""492416""","""61850e9f-e2b3-4da2-a790-5dbe8d…","""gpu""","""z018""",1,"""RUNNING""","""2 hours 52 mins 58 secs""","""14004"""
"""492417""","""61850e9f-e2b3-4da2-a790-5dbe8d…","""gpu""","""z019""",1,"""RUNNING""","""2 hours 52 mins 58 secs""","""14004"""
"""493983""","""Ld_job""","""5days""","""z1014""",1,"""RUNNING""","""1 hour 37 mins 6 secs""","""1514701014"""


In [41]:
with open("statistics.json", "w") as f:
    json_string = json.dumps(pyslurm.statistics().get())
    json_object = json.loads(json_string)
    json.dump(json_object, f, indent=4)

In [None]:
jobs_json_string = json.dumps(pyslurm.statistics().get())
jobs_json = json.loads(jobs_json_string)
rpc_type_stats = jobs_json.pop("rpc_type_stats")
rpc_user_stats = jobs_json.pop("rpc_user_stats")
statistics_df_1 = pl.DataFrame(jobs_json, infer_schema_length=None)
statistics_2 = []
for k, v in rpc_type_stats.items():
    v.update({"type": k})
    statistics_2.append(v)
statistics_3 = []
for k, v in rpc_user_stats.items():
    v.update({"user": k})
    statistics_3.append(v)
statistics_df_2 = pl.DataFrame(statistics_2)
statistics_df_3 = pl.DataFrame(statistics_3)
statistics_df_3 = statistics_df_3.with_columns(pl.col("total_time").map_elements(lambda x: f"{((x / 10e6))} seconds", return_dtype=pl.Utf8).alias("total_time_str"))
display(statistics_df_1.head(10))
display(statistics_df_2.head(10))
display(statistics_df_3)

In [None]:
empty = pl.DataFrame()
if empty.is_empty():
    print("Falsy")
else:
    print("Truthy")

In [None]:
list_of_column_names = node_df.select("name", "state").columns
print(list_of_column_names)

In [None]:
for column in node_df.select("name", "state").iter_columns():
    print(column.to_list())

In [9]:
from prettytable import PrettyTable

pretty_column_names = {"partitions": "Partitions", "cpus": "CPUs", "cpus_usage": "CPU usage", "real_memory": "RAM", "mem_usage": "RAM usage", "gres": "GPUs", "gres_usage": "GPU usage"}
    
node_df_selection = node_df.select("partitions", "cpus", "cpus_usage", "real_memory", "mem_usage", "gres", "gres_usage")

table = PrettyTable()
table.field_names = [pretty_column_names.get(col) for col in node_df_selection.columns]
for row in node_df_selection.iter_rows():
    table.add_row(list(row))

print(table)
print(type(table.get_string()))

+------------+------+-----------+------------+-----------+------+-----------+
| Partitions | CPUs | CPU usage |    RAM     | RAM usage | GPUs | GPU usage |
+------------+------+-----------+------------+-----------+------+-----------+
|   30mins   | 2408 |   38.87%  | 11812.0 GB |   62.11%  |  0   |           |
|   4hours   | 1040 |    0.0%   | 5026.0 GB  |   19.14%  |  0   |           |
|  12hours   | 1560 |   69.23%  | 7539.0 GB  |   93.01%  |  0   |           |
|   5days    | 1296 |   44.44%  | 6030.0 GB  |   50.7%   |  0   |           |
|    gpu     |  64  |    0.0%   |  752.0 GB  |   44.68%  |  8   |    0.0%   |
+------------+------+-----------+------------+-----------+------+-----------+
<class 'str'>


In [13]:
map = {}
for row in node_df.select("partitions", "name").iter_rows():
    (k, v) = row
    map.update({k: v})
    
print(map)

{'30mins': ['z1014', 'z1016', 'z1017', 'z1018', 'z1019', 'z1020', 'z1021', 'z1022', 'z1023', 'z1024', 'z1025', 'z1026', 'z1027', 'z1028', 'z1029', 'z1032', 'z1033', 'z1034', 'z1035', 'z1036', 'z1038', 'z1039', 'z1040', 'z1041', 'z1042', 'z1043', 'z1044', 'z1045', 'z1047', 'z1048', 'z1049', 'z1050'], '4hours': ['z1014', 'z1016', 'z1017', 'z1018', 'z1019', 'z1020', 'z1032', 'z1033', 'z1034', 'z1035', 'z1036', 'z1038', 'z1039', 'z1040'], '12hours': ['z1021', 'z1022', 'z1023', 'z1024', 'z1025', 'z1026', 'z1027', 'z1028', 'z1029', 'z1041', 'z1042', 'z1043', 'z1044', 'z1045', 'z1047', 'z1048', 'z1049', 'z1050', 'z1051', 'z1052', 'z1053'], '5days': ['z1014', 'z1016', 'z1017', 'z1018', 'z1019', 'z1020', 'z1041', 'z1042', 'z1043', 'z1044', 'z1045', 'z1047', 'z1048', 'z1049', 'z1050', 'z1051', 'z1052', 'z1053'], 'gpu': ['z018', 'z019']}


In [None]:
print()

pyslurm.Partitions()


In [None]:
import subprocess
from dataclasses import dataclass

@dataclass
class User:
    username: str = None
    uid: str = None
    gecos: str = None

user_groups = ["xusers", "zusers", "tusers", "rusers", "musers"]
result = ""
for user_group in user_groups:
    result += (subprocess.run(['getent', 'group', f"{user_group}"], capture_output=True, text=True)).stdout

users_groups = result.strip().split('\n')

members = []
for user_group in users_groups:
    fields = user_group.split(':')
    members.extend(fields[-1].split(','))

user_info = []
for member in set(members):
    result = subprocess.run(['getent', 'passwd', f"{member}"], capture_output=True, text=True)
    users_data = result.stdout.strip().split('\n')

    for user_line in users_data:
        fields = user_line.split(':')
        username, _, uid, _, gecos = fields[:5]
        user_info.append(User(username, uid, gecos))


[User(username='liuj', uid='11029', gecos='Jiangyuan Liu'), User(username='zhuangj', uid='10229', gecos='Jiali Zhuang'), User(username='jacksoni', uid='11050', gecos='Isabella Jackson'), User(username='campbellm', uid='1514701014', gecos='Max Campbell'), User(username='vanderva', uid='10589', gecos='Arjan van der Velde'), User(username='perezc1', uid='11060', gecos='Claudia Perez'), User(username='purcarom', uid='10463', gecos='Michael Purcaro'), User(username='zhongrenhu', uid='11017', gecos='Zhongren Hu'), User(username='bhattaa', uid='1514601466', gecos='Agamoni Bhattacharyya'), User(username='pierceb', uid='13000', gecos='Brian Pierce'), User(username='vrevent', uid='10486', gecos='Thom Vreven'), User(username='elnesro', uid='11049', gecos='Omar El Nesr'), User(username='haasn', uid='14010', gecos='Natalie Haas'), User(username='hattonc', uid='1514601457', gecos='Courtney Hatton'), User(username='andrewsg', uid='14004', gecos='Greg Andrews'), User(username='hustonm', uid='151470102