Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Preparing RecoEgamma_PhotonIdentification for Fall17 V2 MVA ID #24131

Merged
merged 12 commits into from Aug 11, 2018
2 changes: 1 addition & 1 deletion DQMOffline/Trigger/python/HLTEGTnPMonitor_cfi.py
Expand Up @@ -988,7 +988,7 @@
setupVIDSelection(egmGsfElectronIDsForDQM,item)


from RecoEgamma.PhotonIdentification.PhotonIDValueMapProducer_cfi import photonIDValueMapProducer
from RecoEgamma.PhotonIdentification.photonIDValueMapProducer_cff import photonIDValueMapProducer
from RecoEgamma.PhotonIdentification.egmPhotonIDs_cfi import egmPhotonIDs
egmPhotonIDsForDQM = egmPhotonIDs.clone()
egmPhotonIDsForDQM.physicsObjectsIDs = cms.VPSet()
Expand Down
2 changes: 1 addition & 1 deletion PhysicsTools/NanoAOD/python/photons_cff.py
Expand Up @@ -10,7 +10,7 @@

from PhysicsTools.SelectorUtils.tools.vid_id_tools import setupVIDSelection
from RecoEgamma.PhotonIdentification.egmPhotonIDs_cfi import *
from RecoEgamma.PhotonIdentification.PhotonIDValueMapProducer_cfi import *
from RecoEgamma.PhotonIdentification.photonIDValueMapProducer_cff import *
from RecoEgamma.PhotonIdentification.PhotonMVAValueMapProducer_cfi import *
from RecoEgamma.PhotonIdentification.PhotonRegressionValueMapProducer_cfi import *
from RecoEgamma.EgammaIsolationAlgos.egmPhotonIsolationMiniAOD_cff import *
Expand Down
43 changes: 36 additions & 7 deletions RecoEgamma/EgammaTools/interface/AnyMVAEstimatorRun2Base.h
Expand Up @@ -12,28 +12,40 @@ class AnyMVAEstimatorRun2Base {

public:
// Constructor, destructor
AnyMVAEstimatorRun2Base(const edm::ParameterSet& conf) : conf_(conf) {}
AnyMVAEstimatorRun2Base(const edm::ParameterSet& conf)
: conf_ (conf)
, name_ (conf.getParameter<std::string>("mvaName"))
, tag_ (conf.getParameter<std::string>("mvaTag"))
, nCategories_ (conf.getParameter<int>("nCategories"))
, methodName_ ("BDTG method")
, debug_ (conf.getUntrackedParameter<bool>("debug", false))
{}
virtual ~AnyMVAEstimatorRun2Base(){};

// Functions that must be provided in derived classes
// These function should work on electrons or photons
// of the reco or pat type

virtual float mvaValue( const edm::Ptr<reco::Candidate>& particle, const edm::EventBase&) const = 0;
virtual float mvaValue( const edm::Ptr<reco::Candidate>& particle, const edm::EventBase&, int &iCategory) const = 0;
float mvaValue( const edm::Ptr<reco::Candidate>& candPtr, const edm::EventBase& iEvent) const {
int iCategory;
return mvaValue(candPtr, iEvent, iCategory);
};

// A specific implementation of MVA is expected to have one or more categories
// defined with respect to eta, pt, etc.
// This function determines the category for a given particle.
virtual int findCategory( const edm::Ptr<reco::Candidate>& particle) const = 0;
virtual int getNCategories() const = 0;
virtual int findCategory( const edm::Ptr<reco::Candidate>& candPtr) const = 0;
int getNCategories() const { return nCategories_; }
// The name is a unique name associated with a particular MVA implementation,
// it is found as a const data member in a derived class.
virtual const std::string& getName() const = 0;
const std::string& getName() const { return name_; }
// An extra variable string set during construction that can be used
// to distinguish different instances of the estimator configured with
// different weight files. The tag can be used to construct names of ValueMaps, etc.
virtual const std::string& getTag() const = 0;
const std::string& getTag() const { return tag_; }

bool isDebug() const { return debug_; }
//
// Extra event content - if needed.
//
Expand All @@ -42,16 +54,33 @@ class AnyMVAEstimatorRun2Base {
// for certainty).

// This method needs to be used only once after this MVA estimator is constructed
virtual void setConsumes(edm::ConsumesCollector &&cc) const {};
virtual void setConsumes(edm::ConsumesCollector &&cc) {};

private:

//
// Data members
//

// Configuration
const edm::ParameterSet conf_;

// MVA name. This is a unique name for this MVA implementation.
// It will be used as part of ValueMap names.
// For simplicity, keep it set to the class name.
const std::string name_;

// MVA tag. This is an additional string variable to distinguish
// instances of the estimator of this class configured with different
// weight files.
const std::string tag_;

// The number of categories and number of variables per category
const int nCategories_;

const std::string methodName_;

const bool debug_;
};

// define the factory for this base class
Expand Down
6 changes: 3 additions & 3 deletions RecoEgamma/EgammaTools/interface/MVAValueMapProducer.h
Expand Up @@ -123,7 +123,6 @@ void MVAValueMapProducer<ParticleType>::produce(edm::Event& iEvent, const edm::E
<< " failed to find a standard AOD or miniAOD particle collection " << std::endl;
}


// Loop over MVA estimators
for( unsigned iEstimator = 0; iEstimator < mvaEstimators_.size(); iEstimator++ ){

Expand All @@ -134,10 +133,11 @@ void MVAValueMapProducer<ParticleType>::produce(edm::Event& iEvent, const edm::E
// Loop over particles
for (size_t i = 0; i < src->size(); ++i){
auto iCand = src->ptrAt(i);
const float response = mvaEstimators_[iEstimator]->mvaValue( iCand, iEvent );
int cat = -1; // Passed by reference to the mvaValue function to store the category
const float response = mvaEstimators_[iEstimator]->mvaValue( iCand, iEvent, cat );
mvaRawValues.push_back( response ); // The MVA score
mvaValues.push_back( 2.0/(1.0+exp(-2.0*response))-1 ); // MVA output between -1 and 1
mvaCategories.push_back( mvaEstimators_[iEstimator]->findCategory( iCand ) );
mvaCategories.push_back( cat );
} // end loop over particles

writeValueMap(iEvent, src, mvaValues , mvaValueMapNames_ [iEstimator] );
Expand Down
91 changes: 67 additions & 24 deletions RecoEgamma/EgammaTools/interface/MVAVariableManager.h
@@ -1,21 +1,25 @@
#ifndef RecoEgamma_EgammaTools_MVAVariableManager_H
#define RecoEgamma_EgammaTools_MVAVariableManager_H

#include "CommonTools/Utils/interface/StringObjectFunction.h"
#include "CommonTools/Utils/interface/StringObjectFunction.h"
#include "CommonTools/Utils/interface/StringCutObjectSelector.h"
#include "FWCore/ParameterSet/interface/FileInPath.h"
#include "DataFormats/Candidate/interface/Candidate.h"
#include "DataFormats/Common/interface/ValueMap.h"
#include "FWCore/Framework/interface/ConsumesCollector.h"
#include "FWCore/Framework/interface/Event.h"
#include "DataFormats/Candidate/interface/Candidate.h"

#include <fstream>

template <class ParticleType>
class MVAVariableManager {

public:
MVAVariableManager() {
nVars_ = 0;
MVAVariableManager()
: nVars_ (0)
, nHelperVars_ (0)
, nGlobalVars_ (0)
{
};

MVAVariableManager(const std::string &variableDefinitionFileName) {
Expand All @@ -25,6 +29,8 @@ class MVAVariableManager {
int init(const std::string &variableDefinitionFileName)
{
nVars_ = 0;
nHelperVars_ = 0;
nGlobalVars_ = 0;

variableInfos_.clear();
functions_.clear();
Expand Down Expand Up @@ -70,23 +76,15 @@ class MVAVariableManager {
return nVars_;
}

const std::vector<edm::InputTag>& getHelperInputTags() const {
return helperInputTags_;
}

const std::vector<edm::InputTag>& getGlobalInputTags() const {
return globalInputTags_;
}

float getValue(int index, const edm::Ptr<ParticleType>& ptclPtr, const edm::EventBase& iEvent) const
{
// For edm::EventBase with getByLabel
float getValue(int index, const edm::Ptr<ParticleType>& ptclPtr, const edm::EventBase& iEvent) const {
float value;
MVAVariableInfo varInfo = variableInfos_[index];
if (varInfo.fromVariableHelper) {
if (varInfo.fromVariableHelper >= 0) {
edm::Handle<edm::ValueMap<float>> vMap;
iEvent.getByLabel(edm::InputTag(formulas_[index]), vMap);
value = (*vMap)[ptclPtr];
} else if (varInfo.isGlobalVariable) {
} else if (varInfo.isGlobalVariable >= 0) {
edm::Handle<double> valueHandle;
iEvent.getByLabel(edm::InputTag(formulas_[index]), valueHandle);
value = *valueHandle;
Expand All @@ -102,30 +100,65 @@ class MVAVariableManager {
return value;
}

// For edm::Event where getByToken is possible
float getValue(int index, const edm::Ptr<ParticleType>& ptclPtr, const edm::Event& iEvent) const {
float value;
MVAVariableInfo varInfo = variableInfos_[index];
if (varInfo.fromVariableHelper >= 0) {
edm::Handle<edm::ValueMap<float>> vMap;
iEvent.getByToken(helperTokens_[varInfo.fromVariableHelper], vMap);
value = (*vMap)[ptclPtr];
} else if (varInfo.isGlobalVariable >= 0) {
edm::Handle<double> valueHandle;
iEvent.getByToken(globalTokens_[varInfo.isGlobalVariable], valueHandle);
value = *valueHandle;
} else {
value = functions_[index](*ptclPtr);
}
if (varInfo.hasLowerClip && value < varInfo.lowerClipValue) {
value = varInfo.lowerClipValue;
}
if (varInfo.hasUpperClip && value > varInfo.upperClipValue) {
value = varInfo.upperClipValue;
}
return value;
}

void setConsumes(edm::ConsumesCollector&& cc) {
// All tokens for event content needed by the MVA
// Tags from the variable helper
for (auto &tag : helperInputTags_) {
helperTokens_.push_back(cc.consumes<edm::ValueMap<float>>(tag));
}
for (auto &tag : globalInputTags_) {
globalTokens_.push_back(cc.consumes<double>(tag));
}
}

private:

struct MVAVariableInfo {
bool hasLowerClip;
bool hasUpperClip;
float lowerClipValue;
float upperClipValue;
bool fromVariableHelper;
bool isGlobalVariable;
int fromVariableHelper;
int isGlobalVariable;
};

void addVariable(const std::string &name, const std::string &formula,
const std::string &lowerClip, const std::string &upperClip)
{
bool hasLowerClip = lowerClip.find("None") == std::string::npos;
bool hasUpperClip = upperClip.find("None") == std::string::npos;
bool fromVariableHelper = formula.find("MVAVariableHelper") != std::string::npos ||
formula.find("IDValueMapProducer") != std::string::npos ||
formula.find("egmPhotonIsolation") != std::string::npos;
int fromVariableHelper = formula.find("MVAVariableHelper") != std::string::npos ||
formula.find("IDValueMapProducer") != std::string::npos ||
formula.find("egmPhotonIsolation") != std::string::npos;
float lowerClipValue = hasLowerClip ? (float)::atof(lowerClip.c_str()) : 0.;
float upperClipValue = hasUpperClip ? (float)::atof(upperClip.c_str()) : 0.;

// *Rho* is the only global variable used ever, so its hardcoded...
bool isGlobalVariable = formula.find("Rho") != std::string::npos;
int isGlobalVariable = formula.find("Rho") != std::string::npos;

if ( !(fromVariableHelper || isGlobalVariable) ) {
functions_.push_back(StringObjectFunction<ParticleType>(formula));
Expand All @@ -143,6 +176,11 @@ class MVAVariableManager {
if (isGlobalVariable) {
globalInputTags_.push_back(edm::InputTag(formula));
}

// Switch from bool to int, corresponding to the token index
fromVariableHelper = fromVariableHelper ? nHelperVars_++ : -1;
isGlobalVariable = isGlobalVariable ? nGlobalVars_++ : - 1;

MVAVariableInfo varInfo = {
.hasLowerClip = hasLowerClip,
.hasUpperClip = hasUpperClip,
Expand All @@ -157,8 +195,10 @@ class MVAVariableManager {
nVars_++;
};


int nVars_;
int nHelperVars_;
int nGlobalVars_;

std::vector<MVAVariableInfo> variableInfos_;
std::vector<StringObjectFunction<ParticleType>> functions_;
std::vector<std::string> formulas_;
Expand All @@ -167,8 +207,11 @@ class MVAVariableManager {

// To store the MVAVariableHelper input tags needed for the variables in this container
std::vector<edm::InputTag> helperInputTags_;

std::vector<edm::InputTag> globalInputTags_;

// Tokens
std::vector<edm::EDGetToken> helperTokens_;
std::vector<edm::EDGetToken> globalTokens_;
};

#endif
Expand Up @@ -12,7 +12,7 @@ class ElectronMVAEstimatorRun2 : public AnyMVAEstimatorRun2Base{

// Constructor and destructor
ElectronMVAEstimatorRun2(const edm::ParameterSet& conf);
~ElectronMVAEstimatorRun2() override;
~ElectronMVAEstimatorRun2() override {};
// For use with FWLite/Python
ElectronMVAEstimatorRun2(const std::string &mvaTag,
const std::string &mvaName,
Expand All @@ -21,50 +21,31 @@ class ElectronMVAEstimatorRun2 : public AnyMVAEstimatorRun2Base{
void init(const std::vector<std::string> &weightFileNames);

// Calculation of the MVA value
float mvaValue( const edm::Ptr<reco::Candidate>& candPtr, const edm::EventBase& iEvent) const override;

// Utility functions
int getNCategories() const override { return nCategories_; }
const std::string& getName() const final { return name_; }
const std::string& getTag() const final { return tag_; }

int findCategory( const edm::Ptr<reco::Candidate>& candPtr) const override;
float mvaValue( const edm::Ptr<reco::Candidate>& candPtr, const edm::EventBase& iEvent, int &iCategory) const override;

// Call this function once after the constructor to declare
// the needed event content pieces to the framework
void setConsumes(edm::ConsumesCollector&&) const final;
void setConsumes(edm::ConsumesCollector&&) final;

private:
int findCategory( const edm::Ptr<reco::Candidate>& candPtr) const override;

// MVA name. This is a unique name for this MVA implementation.
// It will be used as part of ValueMap names.
// For simplicity, keep it set to the class name.
const std::string name_;
private:

// MVA tag. This is an additional string variable to distinguish
// instances of the estimator of this class configured with different
// weight files.
const std::string tag_;
int findCategory( const edm::Ptr<reco::GsfElectron>& gsfPtr) const;

// The number of categories and number of variables per category
int nCategories_;
std::vector<StringCutObjectSelector<reco::GsfElectron>> categoryFunctions_;
std::vector<int> nVariables_;

// Data members
std::vector< std::unique_ptr<const GBRForest> > gbrForests_;

const std::string methodName_;


// There might be different variables for each category, so the variables
// names vector is itself a vector of length nCategories
std::vector<std::vector<int>> variables_;

MVAVariableManager<reco::GsfElectron> mvaVarMngr_;

bool debug_;

};

#endif