diff --git a/HeterogeneousCore/SonicTriton/interface/TritonService.h b/HeterogeneousCore/SonicTriton/interface/TritonService.h index 255d1855f86c7..ea441088d2d4a 100644 --- a/HeterogeneousCore/SonicTriton/interface/TritonService.h +++ b/HeterogeneousCore/SonicTriton/interface/TritonService.h @@ -2,6 +2,7 @@ #define HeterogeneousCore_SonicTriton_TritonService #include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/Utilities/interface/GlobalIdentifier.h" #include #include @@ -38,7 +39,13 @@ class TritonService { instanceName(pset.getUntrackedParameter("instanceName")), tempDir(pset.getUntrackedParameter("tempDir")), imageName(pset.getUntrackedParameter("imageName")), - sandboxName(pset.getUntrackedParameter("sandboxName")) {} + sandboxName(pset.getUntrackedParameter("sandboxName")) { + //randomize instance name + if (instanceName.empty()) { + instanceName = + pset.getUntrackedParameter("instanceBaseName") + "_" + edm::createGlobalIdentifier(); + } + } bool enable; bool debug; diff --git a/HeterogeneousCore/SonicTriton/src/TritonService.cc b/HeterogeneousCore/SonicTriton/src/TritonService.cc index 82ee6fa2d9658..f6b8f1914dc3a 100644 --- a/HeterogeneousCore/SonicTriton/src/TritonService.cc +++ b/HeterogeneousCore/SonicTriton/src/TritonService.cc @@ -8,7 +8,6 @@ #include "FWCore/ServiceRegistry/interface/ActivityRegistry.h" #include "FWCore/ServiceRegistry/interface/ProcessContext.h" #include "FWCore/Utilities/interface/Exception.h" -#include "FWCore/Utilities/interface/GlobalIdentifier.h" #include "grpc_client.h" #include "grpc_service.pb.h" @@ -217,11 +216,6 @@ void TritonService::preBeginJob(edm::PathsAndConsumesOfModulesBase const&, edm:: if (verbose_) edm::LogInfo("TritonService") << msg; - //randomize instance name - if (fallbackOpts_.instanceName.empty()) { - fallbackOpts_.instanceName = "triton_server_instance_" + edm::createGlobalIdentifier(); - } - //assemble server start command std::string command("cmsTriton -P -1 -p " + pid_); if (fallbackOpts_.debug) @@ -308,6 +302,7 @@ void TritonService::fillDescriptions(edm::ConfigurationDescriptions& description fallbackDesc.addUntracked("useGPU", false); fallbackDesc.addUntracked("retries", -1); fallbackDesc.addUntracked("wait", -1); + fallbackDesc.addUntracked("instanceBaseName", "triton_server_instance"); fallbackDesc.addUntracked("instanceName", ""); fallbackDesc.addUntracked("tempDir", ""); fallbackDesc.addUntracked("imageName", ""); diff --git a/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py b/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py index d9ba2be799cc5..fafb6346eaaaf 100644 --- a/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py +++ b/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py @@ -29,6 +29,7 @@ options.register("mode","Async", VarParsing.multiplicity.singleton, VarParsing.varType.string, "mode for client (choices: {})".format(', '.join(allowed_modes))) options.register("verbose", False, VarParsing.multiplicity.singleton, VarParsing.varType.bool, "enable verbose output") options.register("brief", False, VarParsing.multiplicity.singleton, VarParsing.varType.bool, "briefer output for graph modules") +options.register("fallbackName", "", VarParsing.multiplicity.singleton, VarParsing.varType.string, "name for fallback server") options.register("unittest", False, VarParsing.multiplicity.singleton, VarParsing.varType.bool, "unit test mode: reduce input sizes") options.register("testother", False, VarParsing.multiplicity.singleton, VarParsing.varType.bool, "also test gRPC communication if shared memory enabled, or vice versa") options.register("shm", True, VarParsing.multiplicity.singleton, VarParsing.varType.bool, "enable shared memory") @@ -83,6 +84,8 @@ process.TritonService.verbose = options.verbose process.TritonService.fallback.verbose = options.verbose process.TritonService.fallback.useDocker = options.docker +if len(options.fallbackName)>0: + process.TritonService.fallback.instanceBaseName = options.fallbackName if options.device != "auto": process.TritonService.fallback.useGPU = options.device=="gpu" if len(options.address)>0: diff --git a/HeterogeneousCore/SonicTriton/test/unittest.sh b/HeterogeneousCore/SonicTriton/test/unittest.sh index 38989cc598e42..410a34991e4f1 100755 --- a/HeterogeneousCore/SonicTriton/test/unittest.sh +++ b/HeterogeneousCore/SonicTriton/test/unittest.sh @@ -58,14 +58,16 @@ if [ -n "$SINGULARITY_CONTAINER" ]; then fi fi +fallbackName=triton_server_instance_${DEVICE} tmpFile=$(mktemp -p ${LOCALTOP} SonicTritonTestXXXXXXXX.log) -cmsRun ${LOCALTOP}/src/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py modules=TritonGraphProducer,TritonGraphFilter,TritonGraphAnalyzer maxEvents=2 unittest=1 verbose=1 device=${DEVICE} testother=1 >& $tmpFile +cmsRun ${LOCALTOP}/src/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py modules=TritonGraphProducer,TritonGraphFilter,TritonGraphAnalyzer maxEvents=2 unittest=1 verbose=1 device=${DEVICE} testother=1 fallbackName=${fallbackName} >& $tmpFile CMSEXIT=$? cat $tmpFile +sleep 15 STOP_COUNTER=0 -while ! LOGFILE="$(ls -rt ${LOCALTOP}/log_triton_server_instance*.log 2>/dev/null | tail -n 1)" && [ "$STOP_COUNTER" -lt 5 ]; do +while ! LOGFILE="$(ls -rt ${LOCALTOP}/log_${fallbackName}_*.log 2>/dev/null | tail -n 1)" && [ "$STOP_COUNTER" -lt 5 ]; do STOP_COUNTER=$((STOP_COUNTER+1)) sleep 5 done @@ -73,6 +75,7 @@ done if [ -n "$LOGFILE" ]; then echo -e '\n=====\nContents of '$LOGFILE':\n=====\n' cat "$LOGFILE" + rm $LOGFILE fi if grep -q "Socket closed" $tmpFile; then