Skip to content

Commit

Permalink
more control over fallback server name for tests
Browse files Browse the repository at this point in the history
  • Loading branch information
kpedro88 committed Sep 21, 2021
1 parent b05f148 commit b96778b
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 9 deletions.
9 changes: 8 additions & 1 deletion HeterogeneousCore/SonicTriton/interface/TritonService.h
Expand Up @@ -2,6 +2,7 @@
#define HeterogeneousCore_SonicTriton_TritonService

#include "FWCore/ParameterSet/interface/ParameterSet.h"
#include "FWCore/Utilities/interface/GlobalIdentifier.h"

#include <vector>
#include <unordered_set>
Expand Down Expand Up @@ -38,7 +39,13 @@ class TritonService {
instanceName(pset.getUntrackedParameter<std::string>("instanceName")),
tempDir(pset.getUntrackedParameter<std::string>("tempDir")),
imageName(pset.getUntrackedParameter<std::string>("imageName")),
sandboxName(pset.getUntrackedParameter<std::string>("sandboxName")) {}
sandboxName(pset.getUntrackedParameter<std::string>("sandboxName")) {
//randomize instance name
if (instanceName.empty()) {
instanceName =
pset.getUntrackedParameter<std::string>("instanceBaseName") + "_" + edm::createGlobalIdentifier();
}
}

bool enable;
bool debug;
Expand Down
7 changes: 1 addition & 6 deletions HeterogeneousCore/SonicTriton/src/TritonService.cc
Expand Up @@ -8,7 +8,6 @@
#include "FWCore/ServiceRegistry/interface/ActivityRegistry.h"
#include "FWCore/ServiceRegistry/interface/ProcessContext.h"
#include "FWCore/Utilities/interface/Exception.h"
#include "FWCore/Utilities/interface/GlobalIdentifier.h"

#include "grpc_client.h"
#include "grpc_service.pb.h"
Expand Down Expand Up @@ -217,11 +216,6 @@ void TritonService::preBeginJob(edm::PathsAndConsumesOfModulesBase const&, edm::
if (verbose_)
edm::LogInfo("TritonService") << msg;

//randomize instance name
if (fallbackOpts_.instanceName.empty()) {
fallbackOpts_.instanceName = "triton_server_instance_" + edm::createGlobalIdentifier();
}

//assemble server start command
std::string command("cmsTriton -P -1 -p " + pid_);
if (fallbackOpts_.debug)
Expand Down Expand Up @@ -308,6 +302,7 @@ void TritonService::fillDescriptions(edm::ConfigurationDescriptions& description
fallbackDesc.addUntracked<bool>("useGPU", false);
fallbackDesc.addUntracked<int>("retries", -1);
fallbackDesc.addUntracked<int>("wait", -1);
fallbackDesc.addUntracked<std::string>("instanceBaseName", "triton_server_instance");
fallbackDesc.addUntracked<std::string>("instanceName", "");
fallbackDesc.addUntracked<std::string>("tempDir", "");
fallbackDesc.addUntracked<std::string>("imageName", "");
Expand Down
3 changes: 3 additions & 0 deletions HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py
Expand Up @@ -29,6 +29,7 @@
options.register("mode","Async", VarParsing.multiplicity.singleton, VarParsing.varType.string, "mode for client (choices: {})".format(', '.join(allowed_modes)))
options.register("verbose", False, VarParsing.multiplicity.singleton, VarParsing.varType.bool, "enable verbose output")
options.register("brief", False, VarParsing.multiplicity.singleton, VarParsing.varType.bool, "briefer output for graph modules")
options.register("fallbackName", "", VarParsing.multiplicity.singleton, VarParsing.varType.string, "name for fallback server")
options.register("unittest", False, VarParsing.multiplicity.singleton, VarParsing.varType.bool, "unit test mode: reduce input sizes")
options.register("testother", False, VarParsing.multiplicity.singleton, VarParsing.varType.bool, "also test gRPC communication if shared memory enabled, or vice versa")
options.register("shm", True, VarParsing.multiplicity.singleton, VarParsing.varType.bool, "enable shared memory")
Expand Down Expand Up @@ -83,6 +84,8 @@
process.TritonService.verbose = options.verbose
process.TritonService.fallback.verbose = options.verbose
process.TritonService.fallback.useDocker = options.docker
if len(options.fallbackName)>0:
process.TritonService.fallback.instanceBaseName = options.fallbackName
if options.device != "auto":
process.TritonService.fallback.useGPU = options.device=="gpu"
if len(options.address)>0:
Expand Down
7 changes: 5 additions & 2 deletions HeterogeneousCore/SonicTriton/test/unittest.sh
Expand Up @@ -58,21 +58,24 @@ if [ -n "$SINGULARITY_CONTAINER" ]; then
fi
fi

fallbackName=triton_server_instance_${DEVICE}
tmpFile=$(mktemp -p ${LOCALTOP} SonicTritonTestXXXXXXXX.log)
cmsRun ${LOCALTOP}/src/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py modules=TritonGraphProducer,TritonGraphFilter,TritonGraphAnalyzer maxEvents=2 unittest=1 verbose=1 device=${DEVICE} testother=1 >& $tmpFile
cmsRun ${LOCALTOP}/src/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py modules=TritonGraphProducer,TritonGraphFilter,TritonGraphAnalyzer maxEvents=2 unittest=1 verbose=1 device=${DEVICE} testother=1 fallbackName=${fallbackName} >& $tmpFile
CMSEXIT=$?

cat $tmpFile
sleep 15

STOP_COUNTER=0
while ! LOGFILE="$(ls -rt ${LOCALTOP}/log_triton_server_instance*.log 2>/dev/null | tail -n 1)" && [ "$STOP_COUNTER" -lt 5 ]; do
while ! LOGFILE="$(ls -rt ${LOCALTOP}/log_${fallbackName}_*.log 2>/dev/null | tail -n 1)" && [ "$STOP_COUNTER" -lt 5 ]; do
STOP_COUNTER=$((STOP_COUNTER+1))
sleep 5
done

if [ -n "$LOGFILE" ]; then
echo -e '\n=====\nContents of '$LOGFILE':\n=====\n'
cat "$LOGFILE"
rm $LOGFILE
fi

if grep -q "Socket closed" $tmpFile; then
Expand Down

0 comments on commit b96778b

Please sign in to comment.