From 2fd039ae2882ee6b5c51b3ff6e201f1fd80eeebf Mon Sep 17 00:00:00 2001 From: Sebastian Wolf Date: Thu, 7 Mar 2024 16:23:31 +0100 Subject: [PATCH 1/2] Add priority to hq command --- hpc/LoadBalancer.cpp | 2 ++ hpc/LoadBalancer.hpp | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/hpc/LoadBalancer.cpp b/hpc/LoadBalancer.cpp index c8703f6..776fcba 100644 --- a/hpc/LoadBalancer.cpp +++ b/hpc/LoadBalancer.cpp @@ -41,6 +41,8 @@ const std::vector get_model_names() { return umbridge::SupportedModels(hq_job.server_url); } +std::atomic HyperQueueJob::job_count = 0; + int main(int argc, char *argv[]) { create_directory_if_not_existing("urls"); diff --git a/hpc/LoadBalancer.hpp b/hpc/LoadBalancer.hpp index 595e7f4..a1a9dfa 100644 --- a/hpc/LoadBalancer.hpp +++ b/hpc/LoadBalancer.hpp @@ -71,6 +71,7 @@ int hq_submit_delay_ms = 0; class HyperQueueJob { public: + static std::atomic job_count; HyperQueueJob(std::string model_name, bool start_client=true, bool force_default_submission_script=false) { @@ -113,6 +114,7 @@ class HyperQueueJob const std::filesystem::path submission_script_model_specific("job_" + model_name + ".sh"); std::string hq_command = "hq submit --output-mode=quiet "; + hq_command += "--priority=" + std::to_string(job_count) + " "; if (std::filesystem::exists(submission_script_dir / submission_script_model_specific) && !force_default_submission_script) { hq_command += (submission_script_dir / submission_script_model_specific).string(); @@ -128,6 +130,8 @@ class HyperQueueJob // Submit the HQ job and retrieve the HQ job ID. std::string job_id = getCommandOutput(hq_command); + // TODO MUTEX + job_count--; // Delete the line break. if (!job_id.empty()) From 510a4ed91c23736ee9b4ec9feb2595fbb1eceeb3 Mon Sep 17 00:00:00 2001 From: Sebastian Wolf Date: Tue, 12 Mar 2024 16:45:24 +0100 Subject: [PATCH 2/2] Address review comments --- hpc/LoadBalancer.cpp | 2 +- hpc/LoadBalancer.hpp | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/hpc/LoadBalancer.cpp b/hpc/LoadBalancer.cpp index 776fcba..c23bdff 100644 --- a/hpc/LoadBalancer.cpp +++ b/hpc/LoadBalancer.cpp @@ -41,7 +41,7 @@ const std::vector get_model_names() { return umbridge::SupportedModels(hq_job.server_url); } -std::atomic HyperQueueJob::job_count = 0; +std::atomic HyperQueueJob::job_count = 0; int main(int argc, char *argv[]) { diff --git a/hpc/LoadBalancer.hpp b/hpc/LoadBalancer.hpp index a1a9dfa..20d877d 100644 --- a/hpc/LoadBalancer.hpp +++ b/hpc/LoadBalancer.hpp @@ -71,7 +71,7 @@ int hq_submit_delay_ms = 0; class HyperQueueJob { public: - static std::atomic job_count; + static std::atomic job_count; HyperQueueJob(std::string model_name, bool start_client=true, bool force_default_submission_script=false) { @@ -130,7 +130,6 @@ class HyperQueueJob // Submit the HQ job and retrieve the HQ job ID. std::string job_id = getCommandOutput(hq_command); - // TODO MUTEX job_count--; // Delete the line break.