From bc8416384fc5f783032af2c79872dc7e5f8b2c0d Mon Sep 17 00:00:00 2001 From: Norraphat Date: Tue, 13 Nov 2018 05:47:24 +0100 Subject: [PATCH] Backport StoredMergeableRunProductMetadata --- .../StoredMergeableRunProductMetadata.h | 152 ++++++++++++++++++ .../src/StoredMergeableRunProductMetadata.cc | 81 ++++++++++ DataFormats/Provenance/src/classes.h | 1 + DataFormats/Provenance/src/classes_def.xml | 11 ++ 4 files changed, 245 insertions(+) create mode 100644 DataFormats/Provenance/interface/StoredMergeableRunProductMetadata.h create mode 100644 DataFormats/Provenance/src/StoredMergeableRunProductMetadata.cc diff --git a/DataFormats/Provenance/interface/StoredMergeableRunProductMetadata.h b/DataFormats/Provenance/interface/StoredMergeableRunProductMetadata.h new file mode 100644 index 0000000000000..a794f4d068103 --- /dev/null +++ b/DataFormats/Provenance/interface/StoredMergeableRunProductMetadata.h @@ -0,0 +1,152 @@ +#ifndef DataFormats_Provenance_StoredMergeableRunProductMetadata_h +#define DataFormats_Provenance_StoredMergeableRunProductMetadata_h + +/** \class edm::StoredMergeableRunProductMetadata + +This class holds information used to decide how to merge together +run products when multiple run entries with the same run number +and ProcessHistoryID are read from input files contiguously. This +class is persistent and stores the information that needs to be +remembered from one process to the next. Most of the work related +to this decision is performed by the class MergeableRunProductMetadata. +The main purpose of this class is to hold the information that +needs to be persistently stored. PoolSource and PoolOutputModule +interface with this class to read and write it. + +Note that the information is not stored for each product. +The information is stored for each run entry in Run TTree +in the input file and also for each process in which at least +one mergeable run product was selected to be written to the +output file. It is not necessary to save information +for each product individually, it will be the same for every +product created in the same process and in the same run entry. + +The main piece of information stored is the list of luminosity +block numbers processed when the product was created. Often, +this list can be obtained from the IndexIntoFile and we do not +need to duplicate this information here and so as an optimization +we don't. There are also cases where we can detect that the merging +has created invalid run products where part of the content +has probably been double counted. We save a value to record +this problem. + +To improve performance, the data structure has been flattened +into 4 vectors instead of containing a vector containing vectors +containing vectors. + +When the user of this class fails to find a run entry with a +particular process, the assumption should be made that the lumi +numbers are in IndexIntoFile and valid. + +Another optimization is that if in all cases the lumi numbers +can be obtained from IndexIntoFile and are valid, then all +the vectors are cleared and a boolean value is set to indicate +this. + +\author W. David Dagenhart, created 23 May, 2018 + +*/ + +#include "DataFormats/Provenance/interface/RunLumiEventNumber.h" + +#include +#include + +namespace edm { + + class StoredMergeableRunProductMetadata { + public: + + // This constructor exists for ROOT I/O + StoredMergeableRunProductMetadata(); + + // This constructor is used when creating a new object + // each time an output file is opened. + StoredMergeableRunProductMetadata(std::vector const& processesWithMergeableRunProducts); + + std::vector const& processesWithMergeableRunProducts() const { + return processesWithMergeableRunProducts_; + } + + class SingleRunEntry { + public: + + SingleRunEntry(); + SingleRunEntry(unsigned long long iBeginProcess, unsigned long long iEndProcess); + + unsigned long long beginProcess() const { return beginProcess_; } + unsigned long long endProcess() const { return endProcess_; } + + private: + + // indexes into singleRunEntryAndProcesses_ for a single run entry + unsigned long long beginProcess_; + unsigned long long endProcess_; + }; + + class SingleRunEntryAndProcess { + public: + + SingleRunEntryAndProcess(); + SingleRunEntryAndProcess(unsigned long long iBeginLumi, + unsigned long long iEndLumi, + unsigned int iProcess, + bool iValid, + bool iUseIndexIntoFile); + + + unsigned long long beginLumi() const { return beginLumi_; } + unsigned long long endLumi() const { return endLumi_; } + + unsigned int process() const { return process_; } + + bool valid() const { return valid_; } + bool useIndexIntoFile() const { return useIndexIntoFile_; } + + private: + + // indexes into lumis_ for products created in one process and + // written into a single run entry. + unsigned long long beginLumi_; + unsigned long long endLumi_; + + // index into processesWithMergeableRunProducts_ + unsigned int process_; + + // If false this indicates the way files were split and merged + // has created run products that are invalid and probably + // double count some of their content. + bool valid_; + + // If true the lumi numbers can be obtained from IndexIntoFile + // and are not stored in the vector named lumis_ + bool useIndexIntoFile_; + }; + + // These four functions are called by MergeableRunProductMetadata which + // fills the vectors. + std::vector& singleRunEntries() { return singleRunEntries_; } + std::vector& singleRunEntryAndProcesses() { return singleRunEntryAndProcesses_; } + std::vector& lumis() { return lumis_; } + bool& allValidAndUseIndexIntoFile() { return allValidAndUseIndexIntoFile_; } + + // Called by RootOutputFile immediately before writing the object + // when an output file is closed. + void optimizeBeforeWrite(); + + bool getLumiContent(unsigned long long runEntry, + std::string const& process, + bool& valid, + std::vector::const_iterator & lumisBegin, + std::vector::const_iterator & lumisEnd) const; + + private: + + std::vector processesWithMergeableRunProducts_; + std::vector singleRunEntries_; // index is the run entry + std::vector singleRunEntryAndProcesses_; + std::vector lumis_; + bool allValidAndUseIndexIntoFile_; + }; +} +#endif diff --git a/DataFormats/Provenance/src/StoredMergeableRunProductMetadata.cc b/DataFormats/Provenance/src/StoredMergeableRunProductMetadata.cc new file mode 100644 index 0000000000000..167cca47a197c --- /dev/null +++ b/DataFormats/Provenance/src/StoredMergeableRunProductMetadata.cc @@ -0,0 +1,81 @@ +#include "DataFormats/Provenance/interface/StoredMergeableRunProductMetadata.h" + +namespace edm { + + StoredMergeableRunProductMetadata::StoredMergeableRunProductMetadata() : + allValidAndUseIndexIntoFile_(true) { } + + StoredMergeableRunProductMetadata:: + StoredMergeableRunProductMetadata(std::vector const& processesWithMergeableRunProducts): + processesWithMergeableRunProducts_(processesWithMergeableRunProducts), + allValidAndUseIndexIntoFile_(true) { } + + StoredMergeableRunProductMetadata::SingleRunEntry::SingleRunEntry() : + beginProcess_(0), + endProcess_(0) { } + + StoredMergeableRunProductMetadata::SingleRunEntry::SingleRunEntry(unsigned long long iBeginProcess, + unsigned long long iEndProcess) : + beginProcess_(iBeginProcess), + endProcess_(iEndProcess) { } + + StoredMergeableRunProductMetadata::SingleRunEntryAndProcess::SingleRunEntryAndProcess() : + beginLumi_(0), + endLumi_(0), + process_(0), + valid_(false), + useIndexIntoFile_(false) { } + + StoredMergeableRunProductMetadata::SingleRunEntryAndProcess:: + SingleRunEntryAndProcess(unsigned long long iBeginLumi, + unsigned long long iEndLumi, + unsigned int iProcess, + bool iValid, + bool iUseIndexIntoFile) : + beginLumi_(iBeginLumi), + endLumi_(iEndLumi), + process_(iProcess), + valid_(iValid), + useIndexIntoFile_(iUseIndexIntoFile) { } + + void StoredMergeableRunProductMetadata::optimizeBeforeWrite() { + if (allValidAndUseIndexIntoFile_) { + processesWithMergeableRunProducts_.clear(); + singleRunEntries_.clear(); + singleRunEntryAndProcesses_.clear(); + lumis_.clear(); + } + } + + bool StoredMergeableRunProductMetadata:: + getLumiContent(unsigned long long runEntry, + std::string const& process, + bool& valid, + std::vector::const_iterator & lumisBegin, + std::vector::const_iterator & lumisEnd) const { + + valid = true; + if (allValidAndUseIndexIntoFile_) { + return false; + } + + SingleRunEntry const& singleRunEntry = singleRunEntries_.at(runEntry); + for (unsigned long long j = singleRunEntry.beginProcess(); j < singleRunEntry.endProcess(); ++j) { + SingleRunEntryAndProcess const& singleRunEntryAndProcess = singleRunEntryAndProcesses_.at(j); + // This string comparison could be optimized away by storing an index mapping in + // MergeableRunProductMetadata that gets recalculated each time a new input + // file is opened + if (processesWithMergeableRunProducts_.at(singleRunEntryAndProcess.process()) == process) { + valid = singleRunEntryAndProcess.valid(); + if (singleRunEntryAndProcess.useIndexIntoFile()) { + return false; + } else { + lumisBegin = lumis_.begin() + singleRunEntryAndProcess.beginLumi(); + lumisEnd = lumis_.begin() + singleRunEntryAndProcess.endLumi(); + return true; + } + } + } + return false; + } +} diff --git a/DataFormats/Provenance/src/classes.h b/DataFormats/Provenance/src/classes.h index 10308a62e8258..e87580521fddc 100644 --- a/DataFormats/Provenance/src/classes.h +++ b/DataFormats/Provenance/src/classes.h @@ -25,6 +25,7 @@ #include "DataFormats/Provenance/interface/ProcessHistoryID.h" #include "DataFormats/Provenance/interface/ProductID.h" #include "DataFormats/Provenance/interface/ProductProvenance.h" +#include "DataFormats/Provenance/interface/StoredMergeableRunProductMetadata.h" #include "DataFormats/Provenance/interface/StoredProductProvenance.h" #include "DataFormats/Provenance/interface/ProductRegistry.h" #include "DataFormats/Provenance/interface/RunAuxiliary.h" diff --git a/DataFormats/Provenance/src/classes_def.xml b/DataFormats/Provenance/src/classes_def.xml index d074add40efef..72973dff79823 100644 --- a/DataFormats/Provenance/src/classes_def.xml +++ b/DataFormats/Provenance/src/classes_def.xml @@ -131,6 +131,17 @@ + + + + + + + + + + +