Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 1 addition & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,19 +26,14 @@ installation, submission, monitoring, and various commands provided by GridTK.
Before diving into GridTK, ensure you have the following prerequisites:

1. A working Slurm setup.
2. [Pixi](https://pixi.sh/) (recommended) or [pipx](https://pipx.pypa.io/stable/) installed.
2. [pipx](https://pipx.pypa.io/stable/) installed.
3. GridTK installed (instructions provided below).

## Installation

To install GridTK, open your terminal and run the following command:

```bash
# Install gridtk using pixi
$ curl -fsSL https://pixi.sh/install.sh | bash # installs pixi
$ pixi global install gridtk

# Install gridtk using pipx
$ pipx install gridtk
```
It is **not recommended** to install GridTK using `pip install gridtk` in the
Expand Down
51 changes: 45 additions & 6 deletions src/gridtk/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

from collections.abc import Iterable
from pathlib import Path
from typing import Any

import sqlalchemy

Expand All @@ -35,13 +36,51 @@
from .tools import job_ids_from_dep_str, parse_array_indexes


def parse_scontrol_output(output: str) -> dict[str, Any]:
"""Parse scontrol output and return a dict similar to `sacct --json`."""
result: dict[str, Any] = dict()
for key_value in output.strip().split():
if "=" not in key_value:
continue
key, value = key_value.split("=", 1)
result[key] = value
# make results similar to sacct --json
result["state"] = {"current": [result["JobState"]], "reason": result["Reason"]}
result["derived_exit_code"] = {
"return_code": {"number": result["ExitCode"].split(":")[0]}
}
result["nodes"] = result["NodeList"]
if result["nodes"] == "(null)":
result["nodes"] = "None assigned"
return result


def job_status_from_scontrol(job_id: int) -> dict:
"""Retrieve the status of a job using scontrol."""
try:
# we don't use --json because it is not supported by older versions of scontrol
output = subprocess.check_output(
["scontrol", "show", "job", str(job_id)], text=True
)
except subprocess.CalledProcessError:
return dict()
return parse_scontrol_output(output)


def update_job_statuses(grid_ids: Iterable[int]) -> dict[int, dict]:
"""Retrieve the status of the jobs in the database."""
status = dict()
output = subprocess.check_output(
["sacct", "-j", ",".join([str(x) for x in grid_ids]), "--json"],
text=True,
)
try:
output = subprocess.check_output(
["sacct", "-j", ",".join([str(x) for x in grid_ids]), "--json"],
text=True,
)
except subprocess.CalledProcessError:
for job_id in grid_ids:
job_status = job_status_from_scontrol(job_id)
if job_status:
status[job_id] = job_status
return status
for job in json.loads(output)["jobs"]:
status[job["job_id"]] = job
return status
Expand Down Expand Up @@ -200,6 +239,6 @@ def __del__(self):
and len(self.list_jobs(update_jobs=False)) == 0
):
Path(self.database).unlink()
if self.logs_dir.exists() and len(os.listdir(self.logs_dir)) == 0:
shutil.rmtree(self.logs_dir)
if self.logs_dir.exists() and len(os.listdir(self.logs_dir)) == 0:
shutil.rmtree(self.logs_dir)
self.engine.dispose()