From 8d6cfd3fc9583ad3838bdfe37bb21fe5022f04dc Mon Sep 17 00:00:00 2001 From: Lev Gromov Date: Wed, 17 Apr 2024 15:09:19 +0200 Subject: [PATCH] Add model and job-script information output and warning for typos --- hpc/LoadBalancer.cpp | 77 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) diff --git a/hpc/LoadBalancer.cpp b/hpc/LoadBalancer.cpp index c23bdff..d6469b0 100644 --- a/hpc/LoadBalancer.cpp +++ b/hpc/LoadBalancer.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -41,6 +42,78 @@ const std::vector get_model_names() { return umbridge::SupportedModels(hq_job.server_url); } +void print_model_and_job_script_information(const std::vector& model_names) { + // Constants + const std::filesystem::path SUBMISSION_SCRIPT_DIR("./hq_scripts"); + const std::filesystem::path SUBMISSION_SCRIPT_GENERIC("job.sh"); + + const std::string SECTION_START_DELIMITER = "==============================MODEL INFO=============================="; + const std::string SECTION_END_DELIMITER = "======================================================================"; + + // Sort the model names in alphabetical order for cleaner output. + std::vector model_names_sorted = model_names; + std::sort(model_names_sorted.begin(), model_names_sorted.end()); + + std::cout << SECTION_START_DELIMITER << "\n"; + // Print list of available models and corresponding job-scripts. + std::cout << "Available models and corresponding job-scripts:\n"; + for (const std::string& model_name : model_names_sorted) { + // Determine which job script will be used by checking if a model specific job script exists. + std::string used_job_script; + const std::filesystem::path submission_script_model_specific("job_" + model_name + ".sh"); + if (std::filesystem::exists(SUBMISSION_SCRIPT_DIR / submission_script_model_specific)) { + used_job_script = submission_script_model_specific.string(); + } else { + used_job_script = SUBMISSION_SCRIPT_GENERIC.string(); + } + std::cout << "* Model '" << model_name << "' --> '" << used_job_script << "'\n"; + } + std::cout << std::endl; + + + // Check if there are job scripts that are unused and print a warning. + std::vector unused_job_scripts; + + // Build a regex to parse job-script filenames and extract the model name. + // Format should be: job_.sh + const std::string format_prefix = "^job_"; // Ensures that filename starts with 'job_'. + const std::string format_suffix = "\\.sh$"; // Ensures that filename ends with '.sh'. + const std::string format_model_name = "(.*)"; // Arbitrary sequence of characters as a marked subexpression. + const std::regex format_regex(format_prefix + format_model_name + format_suffix); + + for (auto& file : std::filesystem::directory_iterator(SUBMISSION_SCRIPT_DIR)) { + const std::string filename = file.path().filename().string(); + // Check if filename matches format of a model specific job script, i.e. 'job_.sh'. + std::smatch match_result; + if (std::regex_search(filename, match_result, format_regex)) { + // Extract first matched subexpression, i.e. the model name. + const std::string model_name = match_result[1].str(); + // Check if a corresponding model exists. If not, mark job script as unused. + if (!std::binary_search(model_names_sorted.begin(), model_names_sorted.end(), model_name)) { + unused_job_scripts.push_back(filename); + } + } + } + + // Print the warning message. + if(!unused_job_scripts.empty()) { + // Sort unused job scripts alphabetically for cleaner output. + std::sort(unused_job_scripts.begin(), unused_job_scripts.end()); + + std::cout << "WARNING: The following model-specific job-scripts are not used by any of the available models:\n"; + for (const std::string& job_script : unused_job_scripts) { + std::cout << "* '" << job_script << "'\n"; + } + std::cout << std::endl; + + std::cout << "If this behavior is unintentional, then please verify that:\n" + << "1. The filename of your model-specific job-script follows the format: 'job_.sh' (e.g. 'job_mymodel.sh')\n" + << "2. The spelling of your model name matches in the model definition and in the filename of your model-specific job-script.\n"; + } + + std::cout << SECTION_END_DELIMITER << std::endl; +} + std::atomic HyperQueueJob::job_count = 0; int main(int argc, char *argv[]) @@ -74,6 +147,10 @@ int main(int argc, char *argv[]) // Initialize load balancer for each available model on the model server. const std::vector model_names = get_model_names(); + // Inform the user about the available models and the job scripts that will be used. + // Output a warning for unused model-specific job-scripts to prevent typos. + print_model_and_job_script_information(model_names); + std::vector LB_vector; for (auto model_name : model_names) {