In [2]:
import polars as pl
import pyslurm
from IPython.display import display
import json
from datetime import datetime

In [None]:
with open("node.json", "w") as f:
    json_string = json.dumps(pyslurm.node().get())
    json_object = json.loads(json_string)
    json.dump(json_object, f, indent=4)

In [None]:
# Process node data

json_string = json.dumps(pyslurm.node().get())
json_object = json.loads(json_string)
p = [v for v in json_object.values()]
node_df = pl.DataFrame(p)
node_df = node_df.drop(["core_spec_cnt", "cpu_spec_list", "core_spec_cnt", "extra", "features", "features_active", "mcs_label", "mem_spec_limit", "owner", "tmp_disk", "reason_time", "reason", "reason_uid", "power_mgmt", "energy"])
node_df = node_df.with_columns(pl.col("last_busy", "slurmd_start_time").map_elements(lambda x: datetime.fromtimestamp(x), return_dtype=pl.Datetime))
node_df = node_df.with_columns(pl.col("last_busy", "slurmd_start_time").map_elements(lambda x: x.strftime("%Y-%m-%d %H:%M:%S"), return_dtype=pl.Utf8))
node_df = node_df.with_columns(pl.col("gres", "gres_used").list.first())
node_df = node_df.with_columns(pl.col("gres", "gres_used").map_elements(lambda x: int(x.split(":")[-1]) if isinstance(x, str) else x, return_dtype=pl.Int64))
node_df = node_df.with_columns(pl.col("gres", "gres_used").fill_null(0))
node_df = node_df.with_columns((pl.col("gres") - pl.col("gres_used")).alias("gres_available"))
node_df = node_df.with_columns((pl.col("cpus") - pl.col("alloc_cpus")).alias("cpus_available"))
node_df = node_df.with_columns([
    (pl.col("real_memory") / 1024).floor(),
    (pl.col("free_mem") / 1024).floor()
])
node_df = node_df.filter(~pl.col("name").is_in([f"z0{i}" for i in range(10, 17)]))
node_df = node_df.explode("partitions")
node_df = node_df.group_by("partitions").agg(pl.col("name"), pl.col("state"), pl.col("cpus_available").sum(), pl.col("cpus").sum(), pl.col("gres_available").sum(), pl.col("gres").sum(), pl.col("free_mem").sum(), pl.col("real_memory").sum())
node_df = node_df.with_columns(
    pl.concat_str([
        pl.col("gres_available").cast(pl.Utf8),
        pl.lit(" of "),
        pl.col("gres").cast(pl.Utf8)
    ]).alias("gres_status")
)
node_df = node_df.with_columns(
    pl.concat_str([
        pl.col("cpus_available").cast(pl.Utf8),
        pl.lit(" of "),
        pl.col("cpus").cast(pl.Utf8)
    ]).alias("cpus_status")
)
node_df = node_df.with_columns(
    pl.concat_str([
        pl.col("free_mem").cast(pl.Utf8),
        pl.lit(" GB of "),
        pl.col("real_memory").cast(pl.Utf8),
        pl.lit("GB")
    ]).alias("mem_status")
)
node_df = node_df.with_columns(pl.col("gres_status").replace("0 of 0", ""))
custom_order = ["30mins", "4hours", "12hours", "5days", "gpu"]
order_dict = {val: i for i, val in enumerate(custom_order)}
node_df = (node_df
    .with_columns(
        pl.col("partitions").map_elements(lambda x: order_dict.get(x), return_dtype=pl.Int64).alias("__sort_key")
    )
    .sort("__sort_key")
    .drop("__sort_key")
)
# node_df = node_df.sort("partitions")
display(node_df.head(10))
print(node_df.shape)

  .with_columns(


partitions,name,state,cpus_available,cpus,gres_available,gres,free_mem,real_memory,gres_status,cpus_status,mem_status
str,list[str],list[str],i64,i64,i64,i64,f64,f64,str,str,str
"""30mins""","[""z1014"", ""z1016"", … ""z1050""]","[""IDLE"", ""IDLE"", … ""MIXED""]",1472,2408,0,0,4670.0,11812.0,"""""","""1472 of 2408""","""4670.0 GB of 11812.0GB"""
"""4hours""","[""z1014"", ""z1016"", … ""z1040""]","[""IDLE"", ""IDLE"", … ""IDLE""]",1040,1040,0,0,4028.0,5026.0,"""""","""1040 of 1040""","""4028.0 GB of 5026.0GB"""
"""12hours""","[""z1021"", ""z1022"", … ""z1053""]","[""MIXED"", ""MIXED"", … ""MIXED""]",480,1560,0,0,869.0,7539.0,"""""","""480 of 1560""","""869.0 GB of 7539.0GB"""
"""5days""","[""z1014"", ""z1016"", … ""z1053""]","[""IDLE"", ""IDLE"", … ""MIXED""]",720,1296,0,0,3270.0,6030.0,"""""","""720 of 1296""","""3270.0 GB of 6030.0GB"""
"""gpu""","[""z018"", ""z019""]","[""IDLE"", ""IDLE""]",64,64,8,8,417.0,752.0,"""8 of 8""","""64 of 64""","""417.0 GB of 752.0GB"""


(5, 12)


In [None]:
with open("jobs.json", "w") as f:
    json_string = json.dumps(pyslurm.job().get())
    json_object = json.loads(json_string)
    json.dump(json_object, f, indent=4)

In [None]:
jobs_json_string = json.dumps(pyslurm.job().get())
jobs_json = json.loads(jobs_json_string)
jobs = [dict for dict in jobs_json.values()]
jobs_df = pl.DataFrame(jobs, infer_schema_length=None)
jobs_df = jobs_df.drop(["cpus_allocated", "cpus_alloc_layout"])
jobs_df = jobs_df.with_columns(pl.col("eligible_time", "end_time", "start_time", "submit_time").map_elements(lambda x: datetime.fromtimestamp(x), return_dtype=pl.Datetime))
display(jobs_df.head(10))

In [None]:
with open("statistics.json", "w") as f:
    json_string = json.dumps(pyslurm.statistics().get())
    json_object = json.loads(json_string)
    json.dump(json_object, f, indent=4)

In [None]:
jobs_json_string = json.dumps(pyslurm.statistics().get())
jobs_json = json.loads(jobs_json_string)
rpc_type_stats = jobs_json.pop("rpc_type_stats")
rpc_user_stats = jobs_json.pop("rpc_user_stats")
statistics_df_1 = pl.DataFrame(jobs_json, infer_schema_length=None)
statistics_2 = []
for k, v in rpc_type_stats.items():
    v.update({"type": k})
    statistics_2.append(v)
statistics_3 = []
for k, v in rpc_user_stats.items():
    v.update({"user": k})
    statistics_3.append(v)
statistics_df_2 = pl.DataFrame(statistics_2)
statistics_df_3 = pl.DataFrame(statistics_3)
statistics_df_3 = statistics_df_3.with_columns(pl.col("total_time").map_elements(lambda x: f"{((x / 10e6))} seconds", return_dtype=pl.Utf8).alias("total_time_str"))
display(statistics_df_1.head(10))
display(statistics_df_2.head(10))
display(statistics_df_3)

In [None]:
empty = pl.DataFrame()
if empty.is_empty():
    print("Falsy")
else:
    print("Truthy")

In [None]:
list_of_column_names = node_df.select("name", "state").columns
print(list_of_column_names)

In [None]:
for column in node_df.select("name", "state").iter_columns():
    print(column.to_list())

In [14]:
from prettytable import PrettyTable

pretty_column_names = {"partitions": "Partitions", "cpus_status": "CPUs", "mem_status": "Memory", "gres_status": "GPUs"}
    
node_df_selection = node_df.select("partitions", "cpus_status", "mem_status", "gres_status")

table = PrettyTable()
table.field_names = [pretty_column_names.get(col) for col in node_df_selection.columns]
for row in node_df_selection.iter_rows():
    table.add_row(list(row))

print(table)
print(type(table.get_string()))

+------------+--------------+------------------------+--------+
| Partitions |     CPUs     |         Memory         |  GPUs  |
+------------+--------------+------------------------+--------+
|   30mins   | 1472 of 2408 | 4670.0 GB of 11812.0GB |        |
|   4hours   | 1040 of 1040 | 4028.0 GB of 5026.0GB  |        |
|  12hours   | 480 of 1560  |  869.0 GB of 7539.0GB  |        |
|   5days    | 720 of 1296  | 3270.0 GB of 6030.0GB  |        |
|    gpu     |   64 of 64   |  417.0 GB of 752.0GB   | 8 of 8 |
+------------+--------------+------------------------+--------+
<class 'str'>
