Skip to content

Commit

Permalink
Merge pull request #21215 from mverzett/DeepCSVProbToDiscriminators_C…
Browse files Browse the repository at this point in the history
…MSSW_9_4_X

DeepCSV/DeepCMVA discriminators
  • Loading branch information
cmsbuild committed Nov 19, 2017
2 parents fc3916a + 6213e94 commit b5ee7be
Show file tree
Hide file tree
Showing 9 changed files with 378 additions and 10 deletions.
Expand Up @@ -54,12 +54,6 @@
cms.InputTag('pfDeepCSVJetTags:probc'),
cms.InputTag('pfDeepCSVJetTags:probudsg'),
cms.InputTag('pfDeepCSVJetTags:probbb'),
# DeepCMVA
# cms.InputTag('pfDeepCMVAJetTags:probb'),
# cms.InputTag('pfDeepCMVAJetTags:probc'),
# cms.InputTag('pfDeepCMVAJetTags:probudsg'),
# cms.InputTag('pfDeepCMVAJetTags:probbb'),
# cms.InputTag('pfDeepCMVAJetTags:probcc'),
),
# clone tag infos ATTENTION: these take lots of space!
# usually the discriminators from the default algos
Expand Down
12 changes: 12 additions & 0 deletions PhysicsTools/PatAlgos/python/recoLayer0/bTagging_cff.py
Expand Up @@ -171,3 +171,15 @@
# ChargeTagging
, 'pfChargeBJetTags' : [["pfImpactParameterTagInfos", "pfInclusiveSecondaryVertexFinderTagInfos", "softPFMuonsTagInfos", "softPFElectronsTagInfos"]]
}


#meta-taggers are simple arithmetic on top of other taggers, they are stored here
#such that in case you want them re-run also the parent tagger is re-run as well
supportedMetaDiscr = {
'pfDeepCSVDiscriminatorsJetTags:BvsAll' : ['pfDeepCSVJetTags:probudsg', 'pfDeepCSVJetTags:probb', 'pfDeepCSVJetTags:probc', 'pfDeepCSVJetTags:probbb'],
'pfDeepCSVDiscriminatorsJetTags:CvsB' : ['pfDeepCSVJetTags:probudsg', 'pfDeepCSVJetTags:probb', 'pfDeepCSVJetTags:probc', 'pfDeepCSVJetTags:probbb'],
'pfDeepCSVDiscriminatorsJetTags:CvsL' : ['pfDeepCSVJetTags:probudsg', 'pfDeepCSVJetTags:probb', 'pfDeepCSVJetTags:probc', 'pfDeepCSVJetTags:probbb'],
'pfDeepCMVADiscriminatorsJetTags:BvsAll' : ['pfDeepCMVAJetTags:probudsg', 'pfDeepCMVAJetTags:probb', 'pfDeepCMVAJetTags:probc', 'pfDeepCMVAJetTags:probbb'],
'pfDeepCMVADiscriminatorsJetTags:CvsB' : ['pfDeepCMVAJetTags:probudsg', 'pfDeepCMVAJetTags:probb', 'pfDeepCMVAJetTags:probc', 'pfDeepCMVAJetTags:probbb'],
'pfDeepCMVADiscriminatorsJetTags:CvsL' : ['pfDeepCMVAJetTags:probudsg', 'pfDeepCMVAJetTags:probb', 'pfDeepCMVAJetTags:probc', 'pfDeepCMVAJetTags:probbb'],
}
9 changes: 9 additions & 0 deletions PhysicsTools/PatAlgos/python/slimming/miniAOD_tools.py
Expand Up @@ -236,6 +236,15 @@ def miniAOD_customizeCommon(process):

process.patJets.userData.userFloats.src += [ cms.InputTag('QGTagger:qgLikelihood'), ]

## DeepCSV meta discriminators (simple arithmethic on output probabilities)
process.load('RecoBTag.Combined.deepFlavour_cff')
task.add(process.pfDeepCSVDiscriminatorsJetTags)
process.patJets.discriminatorSources.extend([
cms.InputTag('pfDeepCSVDiscriminatorsJetTags:BvsAll' ),
cms.InputTag('pfDeepCSVDiscriminatorsJetTags:CvsB' ),
cms.InputTag('pfDeepCSVDiscriminatorsJetTags:CvsL' ),
])

## CaloJets
process.caloJetMap = cms.EDProducer("RecoJetDeltaRValueMapProducer",
src = process.patJets.jetSource,
Expand Down
42 changes: 39 additions & 3 deletions PhysicsTools/PatAlgos/python/tools/jetTools.py
Expand Up @@ -5,6 +5,7 @@
from PhysicsTools.PatAlgos.tools.helpers import *
from PhysicsTools.PatAlgos.recoLayer0.bTagging_cff import *
import sys
from FWCore.ParameterSet.MassReplace import MassSearchReplaceAnyInputTagVisitor

## dictionary with supported jet clustering algorithms
supportedJetAlgos = {
Expand Down Expand Up @@ -236,6 +237,15 @@ def setupBTagging(process, jetSource, pfCandidates, explicitJTA, pvSource, svSou

task = getPatAlgosToolsTask(process)

## expand the btagDiscriminators to remove the meta taggers and substitute the equivalent sources
discriminators = set(btagDiscriminators)
present_meta = discriminators.intersection(set(supportedMetaDiscr.keys()))
discriminators -= present_meta
for meta_tagger in present_meta:
for src in supportedMetaDiscr[meta_tagger]:
discriminators.add(src)
btagDiscriminators = list(discriminators)

## expand tagInfos to what is explicitly required by user + implicit
## requirements that come in from one or the other discriminator
requiredTagInfos = list(btagInfos)
Expand Down Expand Up @@ -576,6 +586,32 @@ def setupBTagging(process, jetSource, pfCandidates, explicitJTA, pvSource, svSou
acceptedBtagDiscriminators.append(discriminator_name)
else:
print ' --> %s ignored, since not available via RecoBTag.Configuration.RecoBTag_cff!'%(btagDiscr)
#update meta-taggers, if any
for meta_tagger in present_meta:
btagDiscr = meta_tagger.split(':')[0] #split input tag to get the producer label
#print discriminator_name, '-->', btagDiscr
if hasattr(btag,btagDiscr):
newDiscr = btagPrefix+btagDiscr+labelName+postfix #new discriminator name
if hasattr(process, newDiscr):
pass
else:
addToProcessAndTask(
newDiscr,
getattr(btag, btagDiscr).clone(),
process,
task
)
for dependency in supportedMetaDiscr[meta_tagger]:
if ':' in dependency:
new_dep = btagPrefix+dependency.split(':')[0]+labelName+postfix+':'+dependency.split(':')[1]
else:
new_dep = btagPrefix+dependency+labelName+postfix
replace = MassSearchReplaceAnyInputTagVisitor(dependency, new_dep)
replace.doIt(getattr(process, newDiscr), newDiscr)
acceptedBtagDiscriminators.append(meta_tagger)
else:
print ' --> %s ignored, since not available via RecoBTag.Configuration.RecoBTag_cff!'%(btagDiscr)

## replace corresponding tags for pat jet production
patJets.tagInfoSources = cms.VInputTag( *[ cms.InputTag(btagPrefix+x+labelName+postfix) for x in acceptedTagInfos ] )
patJets.discriminatorSources = cms.VInputTag(*[
Expand Down Expand Up @@ -708,7 +744,7 @@ def __init__(self):
and \'type-2\' are not case sensitive.", tuple, acceptNoneValue=True)
self.addParameter(self._defaultParameters,'btagDiscriminators',['None'], "If you are interested in btagging, in most cases just the labels of the btag discriminators that \
you are interested in is all relevant information that you need for a high level analysis. Add here all btag discriminators, that you are interested in as a list of strings. \
If this list is empty no btag discriminator information will be added to your new patJet collection.", allowedValues=supportedBtagDiscr.keys(),Type=list)
If this list is empty no btag discriminator information will be added to your new patJet collection.", allowedValues=(supportedBtagDiscr.keys()+supportedMetaDiscr.keys()),Type=list)
self.addParameter(self._defaultParameters,'btagInfos',['None'], "The btagInfos objects contain all relevant information from which all discriminators of a certain \
type have been calculated. You might be interested in keeping this information for low level tests or to re-calculate some discriminators from hand. Note that this information \
on the one hand can be very space consuming and that it is not necessary to access the pre-calculated btag discriminator information that has been derived from it. Only in very \
Expand Down Expand Up @@ -1114,7 +1150,7 @@ def __init__(self):
applied. If you are not interested in MET(Type1) corrections to this new patJet collection pass None as third argument of the python tuple.", tuple, acceptNoneValue=True)
self.addParameter(self._defaultParameters,'btagDiscriminators',['None'], "If you are interested in btagging in general the btag discriminators is all relevant \
information that you need for a high level analysis. Add here all btag discriminators, that you are interested in as a list of strings. If this list is empty no btag \
discriminator information will be added to your new patJet collection.", allowedValues=supportedBtagDiscr.keys(),Type=list)
discriminator information will be added to your new patJet collection.", allowedValues=(supportedBtagDiscr.keys()+supportedMetaDiscr.keys()),Type=list)
self.addParameter(self._defaultParameters,'btagInfos',['None'], "The btagInfos objects conatin all relevant information from which all discriminators of a certain \
type have been calculated. Note that this information on the one hand can be very space consuming and on the other hand is not necessary to access the btag discriminator \
information that has been derived from it. Only in very special cases the btagInfos might really be needed in your analysis. Add here all btagInfos, that you are interested \
Expand Down Expand Up @@ -1326,7 +1362,7 @@ def __init__(self):
and \'type-2\' are not case sensitive.", tuple, acceptNoneValue=True)
self.addParameter(self._defaultParameters,'btagDiscriminators',['None'], "If you are interested in btagging, in most cases just the labels of the btag discriminators that \
you are interested in is all relevant information that you need for a high level analysis. Add here all btag discriminators, that you are interested in as a list of strings. \
If this list is empty no btag discriminator information will be added to your new patJet collection.", allowedValues=supportedBtagDiscr.keys(),Type=list)
If this list is empty no btag discriminator information will be added to your new patJet collection.", allowedValues=(supportedBtagDiscr.keys()+supportedMetaDiscr.keys()),Type=list)
self.addParameter(self._defaultParameters,'btagInfos',['None'], "The btagInfos objects contain all relevant information from which all discriminators of a certain \
type have been calculated. You might be interested in keeping this information for low level tests or to re-calculate some discriminators from hand. Note that this information \
on the one hand can be very space consuming and that it is not necessary to access the pre-calculated btag discriminator information that has been derived from it. Only in very \
Expand Down
Expand Up @@ -23,7 +23,8 @@
process,
jetSource = cms.InputTag('slimmedJets'),
jetCorrections = ('AK4PFchs', cms.vstring(['L1FastJet', 'L2Relative', 'L3Absolute']), 'None'),
btagDiscriminators = ['pfCombinedSecondaryVertexV2BJetTags'] ## to add discriminators
btagDiscriminators = ['pfCombinedSecondaryVertexV2BJetTags', 'pfDeepCSVDiscriminatorsJetTags:BvsAll', 'pfDeepCSVDiscriminatorsJetTags:CvsB', 'pfDeepCSVDiscriminatorsJetTags:CvsL'], ## to add discriminators,
btagPrefix = 'TEST',
)
process.updatedPatJets.userData.userFloats.src += ['oldJetMass']

Expand Down
242 changes: 242 additions & 0 deletions RecoBTag/Combined/plugins/BTagProbabilityToDiscriminator.cc
@@ -0,0 +1,242 @@
// -*- C++ -*-
//
// Package: RecoBTag/SecondaryVertex
// Class: BTagProbabilityToDiscriminator
//
/**
*
* Description: EDProducer that performs simple arithmetic on the
* multi-classifier probabilities to compute simple discriminators
*
* Implementation:
* A collection of output discriminators is defined in a VPSet, each
* containing the output name, input probabilities and normalization (empty
* vInputTag if none) the output is computed as
* sum(INPUTS)/sum(normalizations)
*/
//
// Original Author: Mauro Verzetti (CERN)
//
//

// system include files
#include <memory>

// user include files
#include "FWCore/Framework/interface/Frameworkfwd.h"
#include "FWCore/Framework/interface/stream/EDProducer.h"

#include "FWCore/Framework/interface/Event.h"
#include "FWCore/Framework/interface/MakerMacros.h"

#include "FWCore/ParameterSet/interface/ParameterSet.h"
#include "FWCore/Utilities/interface/StreamID.h"

#include "DataFormats/BTauReco/interface/JetTag.h"
#include "DataFormats/Common/interface/RefToBase.h"
#include "FWCore/Framework/interface/makeRefToBaseProdFrom.h"

// from lwtnn
#include <fstream>
#include <iostream>
#include <map>
#include <set>
#include <string>
#include <vector>

#include <boost/algorithm/string.hpp>
#include <unordered_map>
using namespace std;
using namespace reco;
//
// class declaration
//

class BTagProbabilityToDiscriminator : public edm::stream::EDProducer<> {
public:
explicit BTagProbabilityToDiscriminator(const edm::ParameterSet &);
~BTagProbabilityToDiscriminator() override {}

static void fillDescriptions(edm::ConfigurationDescriptions &descriptions);

private:
typedef std::vector<edm::InputTag> vInputTag;
typedef std::vector<std::string> vstring;
typedef std::vector<edm::ParameterSet> vPSet;
struct Discriminator {
std::string name; // needed?
vstring numerator;
vstring denominator;
};

void beginStream(edm::StreamID) override {}
void produce(edm::Event &, const edm::EventSetup &) override;
void endStream() override {}

// ----------member data ---------------------------
std::vector<Discriminator> discrims_;
std::unordered_map<std::string, edm::EDGetTokenT<JetTagCollection>>
jet_tags_; // caches jet tags to avoid repetitions
};

BTagProbabilityToDiscriminator::BTagProbabilityToDiscriminator(
const edm::ParameterSet &iConfig) {
for (auto discriminator : iConfig.getParameter<vPSet>("discriminators")) {
Discriminator current;
current.name = discriminator.getParameter<std::string>("name");
produces<JetTagCollection>(current.name);

for (auto intag : discriminator.getParameter<vInputTag>("numerator")) {
if (jet_tags_.find(intag.encode()) == jet_tags_.end()) { // new
// probability
jet_tags_[intag.encode()] = consumes<JetTagCollection>(intag);
}
current.numerator.push_back(intag.encode());
}

for (auto intag : discriminator.getParameter<vInputTag>("denominator")) {
if (jet_tags_.find(intag.encode()) == jet_tags_.end()) { // new
// probability
jet_tags_[intag.encode()] = consumes<JetTagCollection>(intag);
}
current.denominator.push_back(intag.encode());
}
discrims_.push_back(current);
}

if (jet_tags_.empty()) {
throw cms::Exception("RuntimeError")
<< "The module BTagProbabilityToDiscriminator is run without any input "
"probability to work on!"
<< std::endl;
}
}

void BTagProbabilityToDiscriminator::produce(edm::Event &iEvent,
const edm::EventSetup &iSetup) {
std::unordered_map<std::string, edm::Handle<JetTagCollection>>
tags; // caches jet tags to avoid repetitions
size_t size = 0;
bool first = true;
for (const auto& entry : jet_tags_) {
edm::Handle<JetTagCollection> tmp;
iEvent.getByToken(entry.second, tmp);
tags[entry.first] = tmp;
if (first)
size = tmp->size();
else {
if (tmp->size() != size) {
throw cms::Exception("RuntimeError")
<< "The length of one of the input jet tag collections does not "
"match "
<< "with the others, this is probably due to the probabilities "
"belonging to different jet collections, which is forbidden!"
<< std::endl;
}
}
first = false;
}

// create the output collection
// which is a "map" RefToBase<Jet> --> float
vector<std::unique_ptr<JetTagCollection>> output_tags;
output_tags.reserve(discrims_.size());
for (size_t i = 0; i < discrims_.size(); ++i) {
output_tags.push_back(std::make_unique<JetTagCollection>(
*(tags.begin()->second)) // clone from the first element, will change
// the content later on
);
}

// loop over jets
for (size_t idx = 0; idx < output_tags[0]->size(); idx++) {
auto key = output_tags[0]->key(idx); // use key only for writing
// loop over new discriminators to produce
for (size_t disc_idx = 0; disc_idx < output_tags.size(); disc_idx++) {
float numerator = 0;
for (auto &num : discrims_[disc_idx].numerator)
numerator += (*tags[num])[idx].second;
float denominator = !discrims_[disc_idx].denominator.empty() ? 0 : 1;
for (auto &den : discrims_[disc_idx].denominator)
denominator += (*tags[den])[idx].second;
//protect against 0 denominator and undefined jet values (numerator probability < 0)
float new_value = (denominator != 0 && numerator >= 0) ? numerator / denominator : -10.;
(*output_tags[disc_idx])[key] = new_value;
}
}

// put the output in the event
for (size_t i = 0; i < output_tags.size(); ++i) {
iEvent.put(std::move(output_tags[i]), discrims_[i].name);
}
}

// ------------ method fills 'descriptions' with the allowed parameters for the
// module ------------
void BTagProbabilityToDiscriminator::fillDescriptions(
edm::ConfigurationDescriptions &descriptions) {
edm::ParameterSetDescription desc;
{
edm::ParameterSetDescription vpsd1;
vpsd1.add<std::vector<edm::InputTag>>("denominator", {});
vpsd1.add<std::vector<edm::InputTag>>(
"numerator",
{
edm::InputTag("pfDeepCSVJetTags", "probb"),
edm::InputTag("pfDeepCSVJetTags", "probbb"),
});
vpsd1.add<std::string>("name", "BvsAll");
std::vector<edm::ParameterSet> temp1;
temp1.reserve(3);
{
edm::ParameterSet temp2;
temp2.addParameter<std::vector<edm::InputTag>>("denominator", {});
temp2.addParameter<std::vector<edm::InputTag>>(
"numerator",
{
edm::InputTag("pfDeepCSVJetTags", "probb"),
edm::InputTag("pfDeepCSVJetTags", "probbb"),
});
temp2.addParameter<std::string>("name", "BvsAll");
temp1.push_back(temp2);
}
{
edm::ParameterSet temp2;
temp2.addParameter<std::vector<edm::InputTag>>(
"denominator",
{
edm::InputTag("pfDeepCSVJetTags", "probc"),
edm::InputTag("pfDeepCSVJetTags", "probb"),
edm::InputTag("pfDeepCSVJetTags", "probbb"),
});
temp2.addParameter<std::vector<edm::InputTag>>(
"numerator",
{
edm::InputTag("pfDeepCSVJetTags", "probc"),
});
temp2.addParameter<std::string>("name", "CvsB");
temp1.push_back(temp2);
}
{
edm::ParameterSet temp2;
temp2.addParameter<std::vector<edm::InputTag>>(
"denominator",
{
edm::InputTag("pfDeepCSVJetTags", "probudsg"),
edm::InputTag("pfDeepCSVJetTags", "probc"),
});
temp2.addParameter<std::vector<edm::InputTag>>(
"numerator",
{
edm::InputTag("pfDeepCSVJetTags", "probc"),
});
temp2.addParameter<std::string>("name", "CvsL");
temp1.push_back(temp2);
}
desc.addVPSet("discriminators", vpsd1, temp1);
}
descriptions.addDefault(desc);
}

// define this as a plug-in
DEFINE_FWK_MODULE(BTagProbabilityToDiscriminator);
2 changes: 2 additions & 0 deletions RecoBTag/Combined/python/deepFlavour_cff.py
Expand Up @@ -2,7 +2,9 @@
from RecoBTag.Combined.pfDeepCSVTagInfos_cfi import pfDeepCSVTagInfos
from RecoBTag.Combined.pfDeepCMVATagInfos_cfi import pfDeepCMVATagInfos
from RecoBTag.Combined.pfDeepCSVJetTags_cfi import pfDeepCSVJetTags
from RecoBTag.Combined.pfDeepCSVDiscriminatorsJetTags_cfi import pfDeepCSVDiscriminatorsJetTags
from RecoBTag.Combined.pfDeepCMVAJetTags_cfi import pfDeepCMVAJetTags
from RecoBTag.Combined.pfDeepCMVADiscriminatorsJetTags_cfi import pfDeepCMVADiscriminatorsJetTags

##
## Negative and positive taggers for light SF estimation
Expand Down

0 comments on commit b5ee7be

Please sign in to comment.