Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactoring gen weight storage in EDM + Nano integration #32167

Closed
wants to merge 25 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
90a9105
First implementation of WeightGroupInfo/product classes
kdlong Jan 15, 2022
3562b96
Working on parsing integration with scale/pdf weights
kdlong Aug 15, 2019
cc3ee9b
New WeightGroups, improved parsing with helper class
kdlong Jan 15, 2022
6a3b0b5
updates to allow saving weight sums from all weight categories
sroychow Aug 21, 2020
1f632fa
Add more error handling
Oct 14, 2020
56c56a2
adding altset index table
sroychow Nov 3, 2020
b6e854c
Use cms::Exception, configure debugging, code formatting
kdlong Jan 16, 2022
b7e851f
attempt to delete extra kets after the last </weightgroup>
SanghyunKo Nov 20, 2020
b6963fa
Step forward to newer cmssw version
kdlong Jan 16, 2022
81e18ba
Allow gen products to run at GEN step or Nano
kdlong Jan 16, 2022
0aae867
Support ignoregroups in nano, fixes for unassociated weights
kdlong Jan 3, 2022
ab4cae0
Convert OwnVector to unique_ptr
kdlong Jan 3, 2022
a880aa2
Simplify nano producer and weight parsing
kdlong Jan 16, 2022
27811a9
Make producers edm::Global
kdlong Jan 16, 2022
1751101
Code format, don't fail for missing LHEEventProduct
kdlong Jan 16, 2022
cecf5c6
Update nano and nanogen configs
kdlong Apr 19, 2022
0fef54c
Fix scale weights in case of < 9 entries
kdlong Apr 13, 2022
7d6888f
Code format
kdlong Apr 19, 2022
eab7023
Hopefully fixing test errors
kdlong Apr 27, 2022
22f79fe
Attempt to fit cosmics workflow error
kdlong Jun 21, 2022
562bac1
mask genweight addition in procmodifier
sroychow Jul 8, 2022
11c68cf
macro to permit catch statement with LHAPDF
kdlong Jul 11, 2022
5dee41c
Fix event content for gen weights in AOD
kdlong Jul 11, 2022
61d39f0
Fix nanogen config
kdlong Dec 14, 2022
c29cb3a
Remove extraneous code in MEParamWeightGroup
kdlong Dec 16, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import FWCore.ParameterSet.Config as cms

genWeightAddition = cms.Modifier()
10 changes: 10 additions & 0 deletions Configuration/StandardSequences/python/Generator_cff.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
#
from PhysicsTools.HepMCCandAlgos.genParticles_cfi import *
from GeneratorInterface.Core.generatorSmeared_cfi import *
from GeneratorInterface.Core.genWeights_cfi import genWeights
from GeneratorInterface.Core.lheWeights_cfi import lheWeights
from RecoJets.Configuration.RecoGenJets_cff import *
from RecoMET.Configuration.RecoGenMET_cff import *
from RecoJets.Configuration.GenJetParticles_cff import *
Expand Down Expand Up @@ -53,6 +55,9 @@
VertexSmearing = cms.Sequence(cms.SequencePlaceholder("VtxSmeared"))
GenSmeared = cms.Sequence(generatorSmeared)
GeneInfo = cms.Sequence(GeneInfoTask)
genWeightsSeq = cms.Sequence(genWeights*lheWeights)
genWeights.allowUnassociatedWeights = True # This should be off, but needed until Pythia bug is fixed
lheWeights.failIfInvalidXML = False # Also would ideally be true, but is needed at least for the tau embedding unit test
genJetMET = cms.Sequence(genJetMETTask)

from SimPPS.Configuration.GenPPS_cff import *
Expand All @@ -74,3 +79,8 @@
hltResults = cms.InputTag('TriggerResults'),
triggerConditions = cms.vstring()
)

pgenWithWeight = cms.Sequence(cms.SequencePlaceholder("randomEngineStateProducer")+VertexSmearing+GenSmeared+GeneInfo+genWeightsSeq+genJetMET, PPSTransportTask)

from Configuration.ProcessModifiers.genWeightAddition_cff import genWeightAddition
genWeightAddition.toReplaceWith(pgen, pgenWithWeight)
4 changes: 4 additions & 0 deletions DataFormats/NanoAOD/src/classes_def.xml
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
<?xml version="1.0" encoding="utf-8"?>
<lcgdict>
<class name="nanoaod::FlatTable::Column" ClassVersion="3">
<version ClassVersion="3" checksum="3066258528"/>
Expand All @@ -11,6 +12,9 @@
<class name="nanoaod::FlatTable::RowView" transient="true" />
<class name="edm::Wrapper<nanoaod::FlatTable>" />

<class name="std::vector<nanoaod::FlatTable>" />
<class name="edm::Wrapper<std::vector<nanoaod::FlatTable>>" />

<class name="nanoaod::MergeableCounterTable::FloatColumn" ClassVersion="3">
<version ClassVersion="3" checksum="828208870"/>
</class>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@
'keep GenEventInfoProduct_generator_*_*',
'keep edmHepMCProduct_generatorSmeared_*_*',
'keep edmHepMCProduct_LHCTransport_*_*',
'keep GenWeightProduct_lheWeights_*_*',
'keep GenWeightProduct_genWeights_*_*',
'keep GenWeightInfoProduct_lheWeights_*_*',
'keep GenWeightInfoProduct_genWeights_*_*',
'keep GenFilterInfo_*_*_*',
'keep *_genParticles_*_*'
)
Expand All @@ -36,6 +40,10 @@
'keep GenEventInfoProduct_generator_*_*',
'keep edmHepMCProduct_generatorSmeared_*_*',
'keep edmHepMCProduct_LHCTransport_*_*',
'keep GenWeightProduct_lheWeights_*_*',
'keep GenWeightProduct_genWeights_*_*',
'keep GenWeightInfoProduct_lheWeights_*_*',
'keep GenWeightInfoProduct_genWeights_*_*',
'keep GenFilterInfo_*_*_*',
'keep *_genParticles_*_*'
)
Expand All @@ -50,6 +58,10 @@
'keep GenLumiInfoHeader_generator_*_*',
'keep GenLumiInfoProduct_generator_*_*',
'keep GenEventInfoProduct_generator_*_*',
'keep GenWeightProduct_lheWeights_*_*',
'keep GenWeightProduct_genWeights_*_*',
'keep GenWeightInfoProduct_lheWeights_*_*',
'keep GenWeightInfoProduct_genWeights_*_*',
'keep GenFilterInfo_*_*_*',
'keep *_genParticles_*_*'
)
Expand Down
1 change: 1 addition & 0 deletions GeneratorInterface/Core/BuildFile.xml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
<use name="lhapdf"/>
<use name="f77compiler"/>
<use name="root"/>
<use name="tinyxml2"/>
<export>
<lib name="1"/>
</export>
27 changes: 27 additions & 0 deletions GeneratorInterface/Core/interface/GenWeightHelper.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#ifndef GeneratorInterface_Core_GenWeightHelper_h
#define GeneratorInterface_Core_GenWeightHelper_h

#include <tinyxml2.h>

#include <fstream>
#include <map>
#include <regex>
#include <string>
#include <vector>

#include "GeneratorInterface/Core/interface/WeightHelper.h"
#include "SimDataFormats/GeneratorProducts/interface/GenLumiInfoProduct.h"
#include "SimDataFormats/GeneratorProducts/interface/PartonShowerWeightGroupInfo.h"
#include "SimDataFormats/GeneratorProducts/interface/PdfWeightGroupInfo.h"
#include "SimDataFormats/GeneratorProducts/interface/ScaleWeightGroupInfo.h"

namespace gen {
class GenWeightHelper : public WeightHelper {
public:
GenWeightHelper();
std::vector<std::unique_ptr<gen::WeightGroupInfo>> parseWeightGroupsFromNames(std::vector<std::string> weightNames,
bool addUnassociatedGroup) const;
};
} // namespace gen

#endif
53 changes: 53 additions & 0 deletions GeneratorInterface/Core/interface/LHEWeightHelper.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#ifndef GeneratorInterface_Core_LHEWeightHelper_h
#define GeneratorInterface_Core_LHEWeightHelper_h

#include <tinyxml2.h>

#include <fstream>
#include <map>
#include <regex>
#include <string>
#include <vector>

#include "GeneratorInterface/Core/interface/WeightHelper.h"
#include "SimDataFormats/GeneratorProducts/interface/LHERunInfoProduct.h"
#include "SimDataFormats/GeneratorProducts/interface/MEParamWeightGroupInfo.h"
#include "SimDataFormats/GeneratorProducts/interface/PdfWeightGroupInfo.h"
#include "SimDataFormats/GeneratorProducts/interface/ScaleWeightGroupInfo.h"
#include "SimDataFormats/GeneratorProducts/interface/UnknownWeightGroupInfo.h"

namespace gen {
class LHEWeightHelper : public WeightHelper {
public:
LHEWeightHelper() : WeightHelper(){};

enum class ErrorType { Empty, SwapHeader, HTMLStyle, NoWeightGroup, TrailingStr, Unknown, NoError };
const std::unordered_map<ErrorType, std::string> errorTypeAsString_ = {
{ErrorType::Empty, "Empty header"},
{ErrorType::SwapHeader, "Header info out of order"},
{ErrorType::HTMLStyle, "Header is invalid HTML"},
{ErrorType::TrailingStr, "Header has extraneous info"},
{ErrorType::Unknown, "Unregonized error"},
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

typo: Unregonized -> Unrecognized

{ErrorType::NoError, "No error here!"}};

std::vector<std::unique_ptr<gen::WeightGroupInfo>> parseWeights(std::vector<std::string> headerLines,
bool addUnassociated) const;
bool isConsistent(const std::string& fullHeader) const;
void swapHeaders(std::vector<std::string>& headerLines) const;
void setFailIfInvalidXML(bool value) { failIfInvalidXML_ = value; }
bool failIfInvalidXML() const { return failIfInvalidXML_; }

private:
std::string weightgroupKet_ = "</weightgroup>";
std::string weightTag_ = "</weight>";
bool failIfInvalidXML_ = false;
std::string parseGroupName(tinyxml2::XMLElement* el) const;
ParsedWeight parseWeight(tinyxml2::XMLElement* inner, std::string groupName, int groupIndex, int& weightIndex) const;
bool validateAndFixHeader(std::vector<std::string>& headerLines, tinyxml2::XMLDocument& xmlDoc) const;
tinyxml2::XMLError tryReplaceHtmlStyle(tinyxml2::XMLDocument& xmlDoc, std::string& fullHeader) const;
tinyxml2::XMLError tryRemoveTrailings(tinyxml2::XMLDocument& xmlDoc, std::string& fullHeader) const;
ErrorType findErrorType(int xmlError, const std::string& headerLines) const;
};
} // namespace gen

#endif
152 changes: 152 additions & 0 deletions GeneratorInterface/Core/interface/WeightHelper.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
#ifndef GeneratorInterface_LHEInterface_WeightHelper_h
#define GeneratorInterface_LHEInterface_WeightHelper_h

#include <bits/stdc++.h>

#include <boost/algorithm/string.hpp>
#include <fstream>
#include <memory>

#include "LHAPDF/LHAPDF.h"
#include "SimDataFormats/GeneratorProducts/interface/GenWeightInfoProduct.h"
#include "SimDataFormats/GeneratorProducts/interface/GenWeightProduct.h"
#include "SimDataFormats/GeneratorProducts/interface/MEParamWeightGroupInfo.h"
#include "SimDataFormats/GeneratorProducts/interface/PartonShowerWeightGroupInfo.h"
#include "SimDataFormats/GeneratorProducts/interface/PdfWeightGroupInfo.h"
#include "SimDataFormats/GeneratorProducts/interface/ScaleWeightGroupInfo.h"
#include "SimDataFormats/GeneratorProducts/interface/UnknownWeightGroupInfo.h"
#include "SimDataFormats/GeneratorProducts/interface/WeightGroupInfo.h"
#include "SimDataFormats/GeneratorProducts/interface/WeightsInfo.h"

namespace gen {
struct ParsedWeight {
std::string id;
int index;
std::string groupname;
std::string content;
std::unordered_map<std::string, std::string> attributes;
int wgtGroup_idx;
};

class WeightHelper {
public:
WeightHelper();

template <typename T>
std::unique_ptr<GenWeightProduct> weightProduct(const GenWeightInfoProduct& weightsInfo,
std::vector<T> weights,
float w0) const;

void setGuessPSWeightIdx(bool guessPSWeightIdx) {
PartonShowerWeightGroupInfo::setGuessPSWeightIdx(guessPSWeightIdx);
}
void addUnassociatedGroup(std::vector<std::unique_ptr<gen::WeightGroupInfo>>& weightGroups) const {
gen::UnknownWeightGroupInfo unassoc("unassociated");
unassoc.setDescription("Weights with missing or invalid header meta data");
weightGroups.push_back(std::make_unique<gen::UnknownWeightGroupInfo>(unassoc));
}
int addWeightToProduct(GenWeightProduct& product, double weight, std::string name, int weightNum, int groupIndex);
void setDebug(bool value) { debug_ = value; }

protected:
bool debug_ = false;
const unsigned int FIRST_PSWEIGHT_ENTRY = 2;
const unsigned int DEFAULT_PSWEIGHT_LENGTH = 46;
std::map<std::string, std::string> currWeightAttributeMap_;
std::map<std::string, std::string> currGroupAttributeMap_;
bool isScaleWeightGroup(const ParsedWeight& weight) const;
bool isMEParamWeightGroup(const ParsedWeight& weight) const;
bool isPdfWeightGroup(const ParsedWeight& weight) const;
bool isPartonShowerWeightGroup(const ParsedWeight& weight) const;
bool isOrphanPdfWeightGroup(ParsedWeight& weight) const;
void updateScaleInfo(gen::ScaleWeightGroupInfo& scaleGroup, const ParsedWeight& weight) const;
void updateMEParamInfo(const ParsedWeight& weight, int index) const;
void updatePdfInfo(gen::PdfWeightGroupInfo& pdfGroup, const ParsedWeight& weight) const;
void updatePartonShowerInfo(gen::PartonShowerWeightGroupInfo& psGroup, const ParsedWeight& weight) const;
void cleanupOrphanCentralWeight(WeightGroupInfoContainer& weightGroups) const;
bool splitPdfWeight(ParsedWeight& weight, WeightGroupInfoContainer& weightGroups) const;

int lhapdfId(const ParsedWeight& weight, gen::PdfWeightGroupInfo& pdfGroup) const;
std::string searchAttributes(const std::string& label, const ParsedWeight& weight) const;
std::string searchAttributesByTag(const std::string& label, const ParsedWeight& weight) const;
std::string searchAttributesByRegex(const std::string& label, const ParsedWeight& weight) const;

// Possible names for the same thing
const std::unordered_map<std::string, std::vector<std::string>> attributeNames_ = {
{"muf", {"muF", "MUF", "muf", "facscfact"}},
{"mur", {"muR", "MUR", "mur", "renscfact"}},
{"pdf", {"PDF", "PDF set", "lhapdf", "pdf", "pdf set", "pdfset"}},
{"dyn", {"DYN_SCALE"}},
{"dyn_name", {"dyn_scale_choice"}},
{"up", {"_up", "Hi"}},
{"down", {"_dn", "Lo"}},
{"me_variation", {"mass", "sthw2", "width"}},
};
void printWeights(const WeightGroupInfoContainer& weightGroups) const;
std::unique_ptr<WeightGroupInfo> buildGroup(ParsedWeight& weight) const;
WeightGroupInfoContainer buildGroups(std::vector<ParsedWeight>& parsedWeights, bool addUnassociatedGroup) const;
std::string searchString(const std::string& label, const std::string& name) const;
};

template <typename T>
std::unique_ptr<GenWeightProduct> WeightHelper::weightProduct(const GenWeightInfoProduct& weightsInfo,
std::vector<T> weights,
float w0) const {
auto weightProduct = std::make_unique<GenWeightProduct>(w0);
weightProduct->setNumWeightSets(weightsInfo.numberOfGroups());
gen::WeightGroupData groupData = {0, nullptr};
// size=1 happens if there are no PS weights, so the weights vector contains
// only the central GEN weight. Size = 2 happens when Pythia produces a separate weight for the hadronization
// In general this can also be handled by the "unassociated" group, but this avoids the requirement
// that that setting always be true for workflows without the GenLumiInfoProduct (which can reasonably not exist
// for special GEN workflows)
if (!weightsInfo.numberOfGroups()) {
if (weights.size() <= 2)
return weightProduct;
else
throw cms::Exception("WeightHelper")
<< "Found more than 2 weights in the event, but found no weight groups in the header.";
}

// This gets remade every event to avoid having state-dependence in the
// helper class could think about doing caching instead
int unassociatedIdx = weightsInfo.unassociatedIdx();
std::unique_ptr<gen::UnknownWeightGroupInfo> unassociatedGroup;
if (unassociatedIdx != -1)
unassociatedGroup = std::make_unique<gen::UnknownWeightGroupInfo>("unassociated");
int i = 0;
for (const auto& weight : weights) {
double wgtval;
std::string wgtid;
if constexpr (std::is_same<T, gen::WeightsInfo>::value) {
wgtid = weight.id;
wgtval = weight.wgt;
} else if (std::is_same<T, double>::value) {
wgtid = std::to_string(i);
wgtval = weight;
}
try {
groupData = weightsInfo.containingWeightGroupInfo(i, groupData.index);
} catch (const cms::Exception& e) {
if (unassociatedIdx == -1)
throw e;
if (debug_) {
std::cout << "WARNING: " << e.what() << std::endl;
}
// Access the unassociated group separately so it can be modified
unassociatedGroup->addContainedId(i, wgtid, wgtid);
groupData = {static_cast<size_t>(unassociatedIdx), unassociatedGroup.get()};
}
int entry = groupData.group->weightVectorEntry(wgtid, i);

// TODO: is this too slow?
if (debug_)
std::cout << "Adding weight num " << i << " EntryNum " << entry << " to group " << groupData.index << std::endl;
weightProduct->addWeight(wgtval, groupData.index, entry);
i++;
}
return weightProduct;
}
} // namespace gen

#endif
1 change: 1 addition & 0 deletions GeneratorInterface/Core/plugins/BuildFile.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
<use name="FWCore/MessageLogger"/>
<use name="FWCore/Utilities"/>
<use name="SimDataFormats/GeneratorProducts"/>
<use name="GeneratorInterface/Core"/>
<use name="FWCore/SharedMemory"/>
<use name="clhep"/>
<library name="GeneratorInterfaceCore_plugins" file="*.cc">
Expand Down
Loading