From e9e0cfa511ad3b285faa51444f0a1803d8d0cea3 Mon Sep 17 00:00:00 2001 From: Chris Jones Date: Thu, 5 Jan 2017 10:31:07 -0600 Subject: [PATCH] Fix randomly failing framework unit test The test run_global_stream_one.sh includes a test to check that when using modules which share a resource that only one module runs at a time. Previously, the measurement mechanism also included TriggerResults and the source in the timing which could create false positives and fail the test. Now the test excludes those two from the measurement so the test should never see more than one module running at a time. --- .../Framework/test/run_global_stream_one.sh | 7 +- .../test/test_no_concurrent_module_cfg.py | 4 +- .../Services/plugins/ConcurrentModuleTimer.cc | 113 ++++++++++++++---- 3 files changed, 95 insertions(+), 29 deletions(-) diff --git a/FWCore/Framework/test/run_global_stream_one.sh b/FWCore/Framework/test/run_global_stream_one.sh index 12350a82e7764..ac178e941d5b8 100755 --- a/FWCore/Framework/test/run_global_stream_one.sh +++ b/FWCore/Framework/test/run_global_stream_one.sh @@ -12,10 +12,7 @@ F3=${LOCAL_TEST_DIR}/test_one_modules_cfg.py #the last few lines of the output are the printout from the # ConcurrentModuleTimer service detailing how much time was -# spent in 2,3 or 4 modules running simultaneously. Given the -# only module that can run concurrently is the internal -# TriggerResults producer, we will ignore times less then 0.01s -# except for 2 at a time where we allow less than 0.02. +# spent in 2,3 or 4 modules running simultaneously. touch empty_file -(cmsRun ${LOCAL_TEST_DIR}/test_no_concurrent_module_cfg.py 2>&1) | tail -n 3 | grep -v ' 0.00' | grep -v ' 0 ' | grep -v 'e-' | grep -v '2 0.01' | diff - empty_file || die "Failure using test_no_concurrent_module_cfg.py" $? +(cmsRun ${LOCAL_TEST_DIR}/test_no_concurrent_module_cfg.py 2>&1) | tail -n 3 | grep -v ' 0 ' | grep -v 'e-' | diff - empty_file || die "Failure using test_no_concurrent_module_cfg.py" $? diff --git a/FWCore/Framework/test/test_no_concurrent_module_cfg.py b/FWCore/Framework/test/test_no_concurrent_module_cfg.py index 8f3e735893fc3..b4ae9d886b43d 100644 --- a/FWCore/Framework/test/test_no_concurrent_module_cfg.py +++ b/FWCore/Framework/test/test_no_concurrent_module_cfg.py @@ -28,4 +28,6 @@ process.p = cms.Path(process.c1+process.c2) -process.add_(cms.Service("ConcurrentModuleTimer")) \ No newline at end of file +process.add_(cms.Service("ConcurrentModuleTimer", + modulesToExclude = cms.untracked.vstring("TriggerResults"), + excludeSource = cms.untracked.bool(True))) diff --git a/FWCore/Services/plugins/ConcurrentModuleTimer.cc b/FWCore/Services/plugins/ConcurrentModuleTimer.cc index 7c7df84ddef93..8584e1c8f21d2 100644 --- a/FWCore/Services/plugins/ConcurrentModuleTimer.cc +++ b/FWCore/Services/plugins/ConcurrentModuleTimer.cc @@ -29,17 +29,21 @@ namespace edm { public: ConcurrentModuleTimer(edm::ParameterSet const& iConfig, edm::ActivityRegistry& iAR); ~ConcurrentModuleTimer(); - //static void fillDescriptions(edm::ConfigurationDescriptions & descriptions); + static void fillDescriptions(edm::ConfigurationDescriptions & descriptions); private: void start(); void stop(); + bool trackModule(ModuleCallingContext const& iContext) const; std::unique_ptr[]> m_timeSums; + std::vector m_modulesToExclude; + std::vector m_excludedModuleIds; std::chrono::high_resolution_clock::time_point m_time; unsigned int m_nTimeSums; unsigned int m_nModules; std::atomic m_spinLock; bool m_startedTiming; + bool m_excludeSource; }; } } @@ -61,29 +65,69 @@ using namespace edm::service; // constructors and destructor // ConcurrentModuleTimer::ConcurrentModuleTimer(edm::ParameterSet const& iConfig, edm::ActivityRegistry& iReg): +m_modulesToExclude(iConfig.getUntrackedParameter>("modulesToExclude")), m_time(), m_nModules(0), m_spinLock{false}, -m_startedTiming(false) +m_startedTiming(false), +m_excludeSource(iConfig.getUntrackedParameter("excludeSource")) { - iReg.watchPreModuleEvent([this](StreamContext const&, ModuleCallingContext const&){ - start(); - }); - iReg.watchPostModuleEvent([this](StreamContext const&, ModuleCallingContext const&){ - stop(); - }); - - iReg.watchPreModuleEventDelayedGet([this](StreamContext const&, ModuleCallingContext const& iContext){ - if(iContext.state() == ModuleCallingContext::State::kRunning) { - stop(); + if(not m_modulesToExclude.empty()) { + iReg.watchPreModuleConstruction( [this](ModuleDescription const& iMod) { + for(auto const& name: m_modulesToExclude) { + if( iMod.moduleLabel() == name) { + m_excludedModuleIds.push_back(iMod.id()); + break; + } } - }); - iReg.watchPostModuleEventDelayedGet([this](StreamContext const&, ModuleCallingContext const& iContext){ - if(iContext.state() == ModuleCallingContext::State::kRunning) { - start(); + }); + iReg.watchPreModuleEvent([this](StreamContext const&, ModuleCallingContext const& iContext){ + if(trackModule(iContext)) { + start(); } - }); - + }); + iReg.watchPostModuleEvent([this](StreamContext const&, ModuleCallingContext const& iContext){ + if(trackModule(iContext)) { + stop(); + } + }); + + iReg.watchPreModuleEventDelayedGet([this](StreamContext const&, ModuleCallingContext const& iContext){ + if(trackModule(iContext)) { + if(iContext.state() == ModuleCallingContext::State::kRunning) { + stop(); + } + } + }); + iReg.watchPostModuleEventDelayedGet([this](StreamContext const&, ModuleCallingContext const& iContext){ + if(trackModule(iContext)) { + if(iContext.state() == ModuleCallingContext::State::kRunning) { + start(); + } + } + }); + + } else { + //apply to all modules so can use faster version + iReg.watchPreModuleEvent([this](StreamContext const&, ModuleCallingContext const&){ + start(); + }); + iReg.watchPostModuleEvent([this](StreamContext const&, ModuleCallingContext const&){ + stop(); + }); + + iReg.watchPreModuleEventDelayedGet([this](StreamContext const&, ModuleCallingContext const& iContext){ + if(iContext.state() == ModuleCallingContext::State::kRunning) { + stop(); + } + }); + iReg.watchPostModuleEventDelayedGet([this](StreamContext const&, ModuleCallingContext const& iContext){ + if(iContext.state() == ModuleCallingContext::State::kRunning) { + start(); + } + }); + } + iReg.watchPreallocate([this](edm::service::SystemBounds const& iBounds){ m_nTimeSums =iBounds.maxNumberOfThreads()+1; m_timeSums.reset(new std::atomic[m_nTimeSums]); @@ -97,11 +141,15 @@ m_startedTiming(false) m_time = std::chrono::high_resolution_clock::now(); m_startedTiming=true; } - start(); - }); - iReg.watchPostSourceEvent([this](StreamID){ - stop(); + if(not m_excludeSource) { + start(); + } }); + if(not m_excludeSource) { + iReg.watchPostSourceEvent([this](StreamID){ + stop(); + }); + } } ConcurrentModuleTimer::~ConcurrentModuleTimer() { @@ -179,13 +227,32 @@ ConcurrentModuleTimer::stop() } } - // // const member functions // +bool +ConcurrentModuleTimer::trackModule(ModuleCallingContext const& iContext) const +{ + auto modId = iContext.moduleDescription()->id(); + for(auto const id: m_excludedModuleIds) { + if(modId == id) { + return false; + } + } + return true; +} // // static member functions // +void +ConcurrentModuleTimer::fillDescriptions(edm::ConfigurationDescriptions & descriptions) +{ + edm::ParameterSetDescription desc; + desc.addUntracked >("modulesToExclude", std::vector{})->setComment("Module labels to exclude from the timing measurements"); + desc.addUntracked("excludeSource",false)->setComment("Exclude the time the source is running"); + descriptions.add("ConcurrentModuleTimer", desc); +} + DEFINE_FWK_SERVICE(ConcurrentModuleTimer);