Skip to content

Commit

Permalink
Merge pull request #69 from art-daq/mrigatti/hwdev
Browse files Browse the repository at this point in the history
Fixed transition Configured -> Halted in the ots State Machine
  • Loading branch information
eflumerf committed Dec 21, 2022
2 parents 3a7f450 + 7a7fd3c commit dd40548
Show file tree
Hide file tree
Showing 15 changed files with 196 additions and 55 deletions.
11 changes: 11 additions & 0 deletions otsdaq/CoreSupervisors/CorePropertySupervisorBase.cc
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,17 @@ CorePropertySupervisorBase::CorePropertySupervisorBase(xdaq::Application* applic
__SUP_COUTV__(CorePropertySupervisorBase::supervisorApplicationUID_);
__SUP_COUTV__(CorePropertySupervisorBase::supervisorConfigurationPath_);

//try to verify binding port for context was established
//All this code failed to do the trick
// {
// application->ptr_;

// PeerTransportHTTP(this)
// const xdaq::NetGroup* netGroupPtr = application->getApplicationContext()->getNetGroup();
// auto netVector = netGroupPtr->getNetworks();
// __SUP_COUTV__(netVector.size());
// }

CorePropertySupervisorBase::indicateOtsAlive(this);

theConfigurationManager_->setOwnerContext(CorePropertySupervisorBase::supervisorContextUID_);
Expand Down
10 changes: 7 additions & 3 deletions otsdaq/CoreSupervisors/FESupervisor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -958,18 +958,23 @@ void FESupervisor::transitionConfiguring(toolbox::Event::Reference /*event*/)
void FESupervisor::transitionHalting(toolbox::Event::Reference event)
{
__SUP_COUT__ << "transitionHalting" << __E__;
TLOG_DEBUG(7) << "transitionHalting";
TLOG_DEBUG(7) << "transitionHalting";

//shutdown workloops first, then shutdown metric manager
CoreSupervisorBase::transitionHalting(event);

try
{
if(metricMan && metricMan->Initialized())
{
TLOG_DEBUG(7) << "Metric manager(" << metricMan << ") shutting down..." << __E__;
metricMan->shutdown(); // will set initilized_ to false with mutex, which should prevent races
metricMan.reset(nullptr);
TLOG_DEBUG(7) << "Metric manager shutdown." << __E__;
}
else
__SUP_COUT__ << "Metric manager(" << metricMan << ") already shutdown." << __E__;

metricMan.reset(nullptr);
}
catch(...)
{
Expand All @@ -984,6 +989,5 @@ TLOG_DEBUG(7) << "transitionHalting";
);
}

CoreSupervisorBase::transitionHalting(event);
__SUP_COUT__ << "transitionHalting done." << __E__;
} // end transitionHalting()
2 changes: 2 additions & 0 deletions otsdaq/FECore/FEVInterface.cc
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ FEVInterface::~FEVInterface(void)
//==============================================================================
void FEVInterface::configureSlowControls(void)
{
__COUT__ << "configureSlowControls" << __E__;

// Start artdaq metric manager here, if possible
if(metricMan && !metricMan->Running() && metricMan->Initialized())
{
Expand Down
20 changes: 12 additions & 8 deletions otsdaq/FECore/FEVInterfacesManager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -195,14 +195,18 @@ void FEVInterfacesManager::configure(void)
fe->configure();
postStateMachineExecution(i);

// configure slow controls and start slow controls workloop
// slow controls workloop stays alive through start/stop.. and dies on halt
fe->configureSlowControls();
fe->startSlowControlsWorkLoop();

__CFG_COUT__ << "Done " << transitionName << " interface " << name << __E__;
__CFG_COUT__ << "Done " << transitionName << " interface " << name << __E__;
__CFG_COUT__ << "Done " << transitionName << " interface " << name << __E__;
//when done with fe configure, configure slow controls
if(!fe->VStateMachine::getSubIterationWork() && !fe->VStateMachine::getIterationWork())
{
// configure slow controls and start slow controls workloop
// slow controls workloop stays alive through start/stop.. and dies on halt
fe->configureSlowControls();
fe->startSlowControlsWorkLoop();

__CFG_COUT__ << "Done " << transitionName << " interface " << name << __E__;
__CFG_COUT__ << "Done " << transitionName << " interface " << name << __E__;
__CFG_COUT__ << "Done " << transitionName << " interface " << name << __E__;
}
}
postStateMachineExecutionLoop();

Expand Down
13 changes: 8 additions & 5 deletions otsdaq/FiniteStateMachine/RunControlStateMachine.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ const std::string RunControlStateMachine::HALTED_STATE_NAME = "Halted";
const std::string RunControlStateMachine::PAUSED_STATE_NAME = "Paused";
const std::string RunControlStateMachine::RUNNING_STATE_NAME = "Running";

const std::string RunControlStateMachine::SHUTDOWN_TRANSITION_NAME = "Shutdown";
const std::string RunControlStateMachine::STARTUP_TRANSITION_NAME = "Startup";

//==============================================================================
RunControlStateMachine::RunControlStateMachine(const std::string& name)
: theStateMachine_(name), asyncFailureReceived_(false), asyncPauseExceptionReceived_(false), asyncStopExceptionReceived_(false)
Expand Down Expand Up @@ -51,14 +54,14 @@ RunControlStateMachine::RunControlStateMachine(const std::string& name)
//clang-format off
// this line was added to get out of Failed state
RunControlStateMachine::addStateTransition('F', 'H', "Halt", "Halting", this, &RunControlStateMachine::transitionHalting);
RunControlStateMachine::addStateTransition('F', 'X', "Shutdown", "Shutting Down", this, &RunControlStateMachine::transitionShuttingDown);
RunControlStateMachine::addStateTransition('F', 'X', RunControlStateMachine::SHUTDOWN_TRANSITION_NAME, "Shutting Down", this, &RunControlStateMachine::transitionShuttingDown);
RunControlStateMachine::addStateTransition('F', 'F', "Error", "Erroring", this, &RunControlStateMachine::transitionShuttingDown);
RunControlStateMachine::addStateTransition('F', 'F', "Fail", "Failing", this, &RunControlStateMachine::transitionShuttingDown);

RunControlStateMachine::addStateTransition(
'H', 'C', "Configure", "Configuring", "ConfigurationAlias", this, &RunControlStateMachine::transitionConfiguring);
RunControlStateMachine::addStateTransition('H', 'X', "Shutdown", "Shutting Down", this, &RunControlStateMachine::transitionShuttingDown);
RunControlStateMachine::addStateTransition('X', 'I', "Startup", "Starting Up", this, &RunControlStateMachine::transitionStartingUp);
RunControlStateMachine::addStateTransition('H', 'X', RunControlStateMachine::SHUTDOWN_TRANSITION_NAME, "Shutting Down", this, &RunControlStateMachine::transitionShuttingDown);
RunControlStateMachine::addStateTransition('X', 'I', RunControlStateMachine::STARTUP_TRANSITION_NAME, "Starting Up", this, &RunControlStateMachine::transitionStartingUp);

// Every state can transition to halted
RunControlStateMachine::addStateTransition('I', 'H', "Initialize", "Initializing", this, &RunControlStateMachine::transitionInitializing);
Expand All @@ -84,8 +87,8 @@ RunControlStateMachine::RunControlStateMachine(const std::string& name)
xoap::bind(this, &RunControlStateMachine::runControlMessageHandler, "Resume", XDAQ_NS_URI);
xoap::bind(this, &RunControlStateMachine::runControlMessageHandler, "Halt", XDAQ_NS_URI);
xoap::bind(this, &RunControlStateMachine::runControlMessageHandler, "Abort", XDAQ_NS_URI);
xoap::bind(this, &RunControlStateMachine::runControlMessageHandler, "Shutdown", XDAQ_NS_URI);
xoap::bind(this, &RunControlStateMachine::runControlMessageHandler, "Startup", XDAQ_NS_URI);
xoap::bind(this, &RunControlStateMachine::runControlMessageHandler, RunControlStateMachine::SHUTDOWN_TRANSITION_NAME, XDAQ_NS_URI);
xoap::bind(this, &RunControlStateMachine::runControlMessageHandler, RunControlStateMachine::STARTUP_TRANSITION_NAME, XDAQ_NS_URI);
xoap::bind(this, &RunControlStateMachine::runControlMessageHandler, "Fail", XDAQ_NS_URI);
xoap::bind(this, &RunControlStateMachine::runControlMessageHandler, "Error", XDAQ_NS_URI);

Expand Down
3 changes: 3 additions & 0 deletions otsdaq/FiniteStateMachine/RunControlStateMachine.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,9 @@ class RunControlStateMachine : public virtual toolbox::lang::Class
static const std::string PAUSED_STATE_NAME;
static const std::string RUNNING_STATE_NAME;

static const std::string SHUTDOWN_TRANSITION_NAME;
static const std::string STARTUP_TRANSITION_NAME;

unsigned int getIterationIndex(void) { return iterationIndex_; }
unsigned int getSubIterationIndex(void) { return subIterationIndex_; }
void indicateIterationWork(void) { iterationWorkFlag_ = true; }
Expand Down
43 changes: 36 additions & 7 deletions otsdaq/GatewaySupervisor/GatewaySupervisor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,7 @@ void GatewaySupervisor::AppStatusWorkLoop(GatewaySupervisor* theSupervisor)
// sleep

// __COUT__ << "Just debugging App status checking" << __E__;
bool oneStatusReqHasFailed = false;
for(const auto& it : theSupervisor->allSupervisorInfo_.getAllSupervisorInfo())
{
auto appInfo = it.second;
Expand Down Expand Up @@ -291,19 +292,33 @@ void GatewaySupervisor::AppStatusWorkLoop(GatewaySupervisor* theSupervisor)
}
catch(const xdaq::exception::Exception& e)
{
//__COUT__ << "Failed to send getStatus SOAP Message: " << e.what() << __E__;
__COUT__ << "Getting Status "
<< " Supervisor instance = '" << appInfo.getName()
<< "' [LID=" << appInfo.getId() << "] in Context '"
<< appInfo.getContextName() << "' [URL=" <<
appInfo.getURL()
<< "].\n\n";
__COUTV__(SOAPUtilities::translate(tempMessage));
__COUT_WARN__ << "Failed to send getStatus SOAP Message: " << e.what() << __E__;
status = SupervisorInfo::APP_STATUS_UNKNOWN;
progress = "0";
detail = "SOAP Message Error";
sleep(5); // sleep to not overwhelm server with errors
oneStatusReqHasFailed = true;
}
catch(...)
{
//__COUT_WARN__ << "Failed to send getStatus SOAP Message due to unknown error." << __E__;
__COUT__ << "Getting Status "
<< " Supervisor instance = '" << appInfo.getName()
<< "' [LID=" << appInfo.getId() << "] in Context '"
<< appInfo.getContextName() << "' [URL=" <<
appInfo.getURL()
<< "].\n\n";
__COUTV__(SOAPUtilities::translate(tempMessage));
__COUT_WARN__ << "Failed to send getStatus SOAP Message due to unknown error." << __E__;
status = SupervisorInfo::APP_STATUS_UNKNOWN;
progress = "0";
detail = "Unknown SOAP Message Error";
sleep(5); // sleep to not overwhelm server with errors
oneStatusReqHasFailed = true;
}
} // end with non-gateway status request handling

Expand All @@ -319,6 +334,8 @@ void GatewaySupervisor::AppStatusWorkLoop(GatewaySupervisor* theSupervisor)
theSupervisor->allSupervisorInfo_.setSupervisorStatus(appInfo, status, progressInteger, detail);

} // end of app loop
if(oneStatusReqHasFailed)
sleep(5); // sleep to not overwhelm server with errors
} // end of infinite status checking loop
} // end AppStatusWorkLoop

Expand Down Expand Up @@ -1734,7 +1751,8 @@ catch(...)
void GatewaySupervisor::transitionShuttingDown(toolbox::Event::Reference /*e*/)
try
{
__COUT__ << "Fsm current state: " << theStateMachine_.getCurrentStateName() << __E__;
__COUT__ << "Fsm current state: " << theStateMachine_.getCurrentStateName() <<
" message: " << theStateMachine_.getCurrentStateName() << __E__;

RunControlStateMachine::theProgressBar_.step();
makeSystemLogEntry("System shutting down.");
Expand All @@ -1751,6 +1769,9 @@ try
sleep(1);
RunControlStateMachine::theProgressBar_.step();
}

broadcastMessage(theStateMachine_.getCurrentMessage());

} // end transitionShuttingDown()
catch(const xdaq::exception::Exception& e) // due to xoap send failure
{
Expand Down Expand Up @@ -1803,6 +1824,8 @@ try
RunControlStateMachine::theProgressBar_.step();
}

broadcastMessage(theStateMachine_.getCurrentMessage());

} // end transitionStartingUp()
catch(const xdaq::exception::Exception& e) // due to xoap send failure
{
Expand Down Expand Up @@ -2257,6 +2280,8 @@ bool GatewaySupervisor::handleBroadcastMessageTarget(const SupervisorInfo& appI
RunControlStateMachine::theProgressBar_.step();

std::string givenAppStatus = theStateMachine_.getCurrentTransitionName(command);
__COUTV__(givenAppStatus.capacity());

unsigned int givenAppProgress = appInfo.getProgress();
std::string givenAppDetail = appInfo.getDetail();
if(givenAppProgress >= 100)
Expand Down Expand Up @@ -2317,6 +2342,7 @@ bool GatewaySupervisor::handleBroadcastMessageTarget(const SupervisorInfo& appI
// for transition attempt, set status for app, in case the request occupies the target app
reply = send(appInfo.getDescriptor(), message);
// then release mutex here using scope change, to allow the app to start giving its own updates
__COUTV__(givenAppStatus.capacity());
}
catch(const xdaq::exception::Exception& e) // due to xoap send failure
{
Expand Down Expand Up @@ -2553,7 +2579,10 @@ void GatewaySupervisor::broadcastMessage(xoap::MessageReference message)

try
{
orderedSupervisors = allSupervisorInfo_.getOrderedSupervisorDescriptors(command);
orderedSupervisors = allSupervisorInfo_.getOrderedSupervisorDescriptors(command,
//only gateway apps for special shutdown and startup command broadcast
command == RunControlStateMachine::SHUTDOWN_TRANSITION_NAME ||
command == RunControlStateMachine::STARTUP_TRANSITION_NAME);
}
catch(const std::runtime_error& e)
{
Expand Down Expand Up @@ -3919,7 +3948,7 @@ void GatewaySupervisor::launchStartOTSCommand(const std::string& command, Config
if(context.address_[i] == '/')
j = i + 1;
hostnames.push_back(context.address_.substr(j));
__COUT__ << "StartOTS.sh hostname = " << hostnames.back() << __E__;
__COUT__ << "StartOTS.sh command '" << command << "' launching on hostname = " << hostnames.back() << __E__;
}
}
catch(...)
Expand Down
2 changes: 1 addition & 1 deletion otsdaq/Macros/StringMacros.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1271,7 +1271,7 @@ char* StringMacros::otsGetEnvironmentVarable(const char* name, const std::string
{
__SS__ << "Environment variable '" << name << "' not defined at " << location << "[" << line << "]" << __E__;
ss << "\n\n" << StringMacros::stackTrace() << __E__;
__SS_THROW__;
__SS_ONLY_THROW__;
}
return environmentVariablePtr;
} // end otsGetEnvironmentVarable()
Expand Down
15 changes: 14 additions & 1 deletion otsdaq/SOAPUtilities/SOAPMessenger.cc
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,20 @@ std::string SOAPMessenger::send(XDAQ_CONST_CALL xdaq::ApplicationDescriptor* d,

{
xoap::MessageReference message = SOAPUtilities::makeSOAPMessageReference(command);
return send(d, message);
std::string msgStr;
try
{
msgStr = send(d, message);
}
catch(...)
{
__COUT__ << "send failed?!" << __E__;
__COUT__ << SOAPUtilities::translate(message) << __E__;
throw;
}


return msgStr;
}

//==============================================================================
Expand Down
9 changes: 7 additions & 2 deletions otsdaq/SupervisorInfo/AllSupervisorInfo.cc
Original file line number Diff line number Diff line change
Expand Up @@ -352,9 +352,9 @@ const SupervisorInfo& AllSupervisorInfo::getArtdaqSupervisorInfo(void) const
} // end getArtdaqSupervisorInfo()

//==============================================================================
std::vector<std::vector<const SupervisorInfo*>> AllSupervisorInfo::getOrderedSupervisorDescriptors(const std::string& stateMachineCommand) const
std::vector<std::vector<const SupervisorInfo*>> AllSupervisorInfo::getOrderedSupervisorDescriptors(const std::string& stateMachineCommand, bool onlyGatewayContextSupervisors) const
{
__COUT__ << "getOrderedSupervisorDescriptors" << __E__;
__COUT__ << "getOrderedSupervisorDescriptors for command " << stateMachineCommand << __E__;

std::map<uint64_t /*priority*/, std::vector<unsigned int /*appId*/>> orderedByPriority;

Expand Down Expand Up @@ -417,6 +417,11 @@ std::vector<std::vector<const SupervisorInfo*>> AllSupervisorInfo::getOrderedSup
// priority? " << (unsigned int)priorityAppVector.first <<
//__E__;


if(onlyGatewayContextSupervisors &&
it->second.getContextName() != theSupervisorInfo_->getContextName())
continue; //for shutdown and startup only broadcast to apps that are local to the Gateway supervisor

if(it->second.isGatewaySupervisor())
continue; // skip gateway supervisor
if(it->second.isTypeLogbookSupervisor())
Expand Down
2 changes: 1 addition & 1 deletion otsdaq/SupervisorInfo/AllSupervisorInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ class AllSupervisorInfo : public SupervisorDescriptorInfoBase
XDAQ_CONST_CALL xdaq::ApplicationDescriptor* getWizardDescriptor (void) const;
const SupervisorInfo& getArtdaqSupervisorInfo (void) const;

std::vector<std::vector<const SupervisorInfo*>> getOrderedSupervisorDescriptors (const std::string& stateMachineCommand) const;
std::vector<std::vector<const SupervisorInfo*>> getOrderedSupervisorDescriptors (const std::string& stateMachineCommand, bool onlyGatewayContextSupervisors = false) const;
std::recursive_mutex& getSupervisorInfoMutex (unsigned int lid) { return allSupervisorInfoMutex_[lid]; }
private:
SupervisorInfo* theSupervisorInfo_;
Expand Down
2 changes: 1 addition & 1 deletion otsdaq/TablePlugins/MessageFacilityTable_table.cc
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ void MessageFacilityTable::init(ConfigurationManager* configManager)
child.second.getNode(COL_QT_PORT).getValue(fwdPort);
}

__COUT__ << "Foud FWD/WEB/QT " << (COL_ENABLE_FWD ? "true" : "false") << "/" << (COL_USE_WEB ? "true" : "false") << "/"
__COUT__ << "Found FWD/WEB/QT " << (COL_ENABLE_FWD ? "true" : "false") << "/" << (COL_USE_WEB ? "true" : "false") << "/"
<< (COL_USE_QT ? "true" : "false") << " and IP:Port:FwdPort " << fwdIP << ":" << fwdPort << ":" << destFwdPort << " in MesageFacility table."
<< __E__;
break; // take first enable row only!
Expand Down
12 changes: 6 additions & 6 deletions tools/common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -89,10 +89,10 @@ defineColors ()
defineColors

SCRIPT_NAME=$1
out() { echo -e "${RstClr}${IRed}${STARTTIME}${RstClr}-${Green}$(date +'%h%y.%T') ${IBlue}${HOSTNAME}${RstClr} ${SCRIPT_NAME} [${Cyan}${BASH_LINENO[0]}${RstClr}]${IBlack}\t${RstClr}$@${RstClr}"; }
info() { echo -e "${RstClr}${IRed}${STARTTIME}${RstClr}-${Green}$(date +'%h%y.%T') ${IBlue}${HOSTNAME}${RstClr} ${SCRIPT_NAME} [${Cyan}${BASH_LINENO[0]}${RstClr}]${IBlack}\t${RstClr}${IBlue}$@${RstClr}"; }
success() { echo -e "${RstClr}${IRed}${STARTTIME}${RstClr}-${Green}$(date +'%h%y.%T') ${IBlue}${HOSTNAME}${RstClr} ${SCRIPT_NAME} [${Cyan}${BASH_LINENO[0]}${RstClr}]${IBlack}\t${RstClr}${IGreen}$@${RstClr}"; }
error() { echo -e "${RstClr}${IRed}${STARTTIME}${RstClr}-${Green}$(date +'%h%y.%T') ${IBlue}${HOSTNAME}${RstClr} ${SCRIPT_NAME} [${Cyan}${BASH_LINENO[0]}${RstClr}]${IBlack}\t${RstClr}${IRed}$@${RstClr}"; } >&2
warning() { echo -e "${RstClr}${IRed}${STARTTIME}${RstClr}-${Green}$(date +'%h%y.%T') ${IBlue}${HOSTNAME}${RstClr} ${SCRIPT_NAME} [${Cyan}${BASH_LINENO[0]}${RstClr}]${IBlack}\t${RstClr}${IYellow}$@${RstClr}"; } >&2
die() { echo -e "${RstClr}${IRed}${STARTTIME}${RstClr}-${Green}$(date +'%h%y.%T') ${IBlue}${HOSTNAME}${RstClr} ${SCRIPT_NAME} [${Cyan}${BASH_LINENO[0]}${RstClr}]${IBlack}\t${RstClr}${IRed}$@${RstClr}"; exit 1; }
out() { echo -e "${RstClr}${IRed}${STARTTIME}${RstClr}-${Green}$(date +'%d%h%y.%T') ${IBlue}${HOSTNAME}${RstClr} ${SCRIPT_NAME} [${Cyan}${BASH_LINENO[0]}${RstClr}]${IBlack}\t${RstClr}$@${RstClr}"; }
info() { echo -e "${RstClr}${IRed}${STARTTIME}${RstClr}-${Green}$(date +'%d%h%y.%T') ${IBlue}${HOSTNAME}${RstClr} ${SCRIPT_NAME} [${Cyan}${BASH_LINENO[0]}${RstClr}]${IBlack}\t${RstClr}${IBlue}$@${RstClr}"; }
success() { echo -e "${RstClr}${IRed}${STARTTIME}${RstClr}-${Green}$(date +'%d%h%y.%T') ${IBlue}${HOSTNAME}${RstClr} ${SCRIPT_NAME} [${Cyan}${BASH_LINENO[0]}${RstClr}]${IBlack}\t${RstClr}${IGreen}$@${RstClr}"; }
error() { echo -e "${RstClr}${IRed}${STARTTIME}${RstClr}-${Green}$(date +'%d%h%y.%T') ${IBlue}${HOSTNAME}${RstClr} ${SCRIPT_NAME} [${Cyan}${BASH_LINENO[0]}${RstClr}]${IBlack}\t${RstClr}${IRed}$@${RstClr}"; } >&2
warning() { echo -e "${RstClr}${IRed}${STARTTIME}${RstClr}-${Green}$(date +'%d%h%y.%T') ${IBlue}${HOSTNAME}${RstClr} ${SCRIPT_NAME} [${Cyan}${BASH_LINENO[0]}${RstClr}]${IBlack}\t${RstClr}${IYellow}$@${RstClr}"; } >&2
die() { echo -e "${RstClr}${IRed}${STARTTIME}${RstClr}-${Green}$(date +'%d%h%y.%T') ${IBlue}${HOSTNAME}${RstClr} ${SCRIPT_NAME} [${Cyan}${BASH_LINENO[0]}${RstClr}]${IBlack}\t${RstClr}${IRed}$@${RstClr}"; exit 1; }

0 comments on commit dd40548

Please sign in to comment.