Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Btag cond format #7796

Merged
merged 19 commits into from
Mar 16, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 9 additions & 7 deletions CondFormats/BTauObjects/src/BTagCalibration.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ void BTagCalibration::addEntry(const BTagEntry &entry)
const std::vector<BTagEntry>& BTagCalibration::getEntries(
const BTagEntry::Parameters &par) const
{
auto tok = token(par);
std::string tok = token(par);
if (!data_.count(tok)) {
throw cms::Exception("BTagCalibration")
<< "(OperatingPoint, measurementType, sysType) not available: "
Expand Down Expand Up @@ -60,12 +60,14 @@ void BTagCalibration::readCSV(std::istream &s)
}
}

void BTagCalibration::makeCSV(std::ostream &s) const
{
s << BTagEntry::makeCSVHeader();
for (auto i = data_.cbegin(); i != data_.cend(); ++i) {
auto vec = i->second;
for (auto j = vec.cbegin(); j != vec.cend(); ++j) {
void BTagCalibration::makeCSV(ostream &s) const
{
s << tagger_ << ";" << BTagEntry::makeCSVHeader();
for (std::map<std::string, std::vector<BTagEntry> >::const_iterator i
= data_.cbegin(); i != data_.cend(); ++i) {
const std::vector<BTagEntry> &vec = i->second;
for (std::vector<BTagEntry>::const_iterator j
= vec.cbegin(); j != vec.cend(); ++j) {
s << j->makeCSVLine();
}
}
Expand Down
4 changes: 2 additions & 2 deletions CondFormats/BTauObjects/src/BTagCalibrationReader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ double BTagCalibrationReader::eval(BTagEntry::JetFlavor jf,

// search linearly through eta, pt and discr ranges and eval
// future: find some clever data structure based on intervals
const auto &entries = tmpData_.at(jf);
const std::vector<TmpEntry> &entries = tmpData_.at(jf);
for (unsigned i=0; i<entries.size(); ++i) {
const BTagCalibrationReader::TmpEntry &e = entries.at(i);
if (
Expand All @@ -45,7 +45,7 @@ double BTagCalibrationReader::eval(BTagEntry::JetFlavor jf,
void BTagCalibrationReader::setupTmpData(const BTagCalibration* c)
{
useAbsEta = std::vector<bool>(4, true);
const auto &entries = c->getEntries(params);
const std::vector<BTagEntry> &entries = c->getEntries(params);
for (unsigned i=0; i<entries.size(); ++i) {
const BTagEntry &be = entries[i];
BTagCalibrationReader::TmpEntry te;
Expand Down
124 changes: 104 additions & 20 deletions CondFormats/BTauObjects/src/BTagEntry.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,12 @@ BTagEntry::Parameters::Parameters(
ptMax(pt_max),
discrMin(discr_min),
discrMax(discr_max)
{}
{
std::transform(measurementType.begin(), measurementType.end(),
measurementType.begin(), ::tolower);
std::transform(sysType.begin(), sysType.end(),
sysType.begin(), ::tolower);
}

BTagEntry::BTagEntry(const std::string &csvLine)
{
Expand Down Expand Up @@ -65,21 +70,23 @@ BTagEntry::BTagEntry(const std::string &csvLine)
}

// make parameters
if (stoi(vec[0]) > 3) {
unsigned op = stoi(vec[0]);
if (op > 3) {
throw cms::Exception("BTagCalibration")
<< "Invalid csv line; OperatingPoint > 3: "
<< csvLine;
}
if (stoi(vec[3]) > 2) {
unsigned jf = stoi(vec[3]);
if (jf > 2) {
throw cms::Exception("BTagCalibration")
<< "Invalid csv line; JetFlavor > 2: "
<< csvLine;
}
params = BTagEntry::Parameters(
BTagEntry::OperatingPoint(stoi(vec[0])),
BTagEntry::OperatingPoint(op),
vec[1],
vec[2],
BTagEntry::JetFlavor(stoi(vec[3])),
BTagEntry::JetFlavor(jf),
stof(vec[4]),
stof(vec[5]),
stof(vec[6]),
Expand All @@ -92,21 +99,95 @@ BTagEntry::BTagEntry(const std::string &csvLine)
BTagEntry::BTagEntry(const std::string &func, BTagEntry::Parameters p):
formula(func),
params(p)
{}
{
TF1 f1("", formula.c_str()); // compile formula to check validity
if (f1.IsZombie()) {
throw cms::Exception("BTagCalibration")
<< "Invalid func string; formula does not compile: "
<< func;
}
}

BTagEntry::BTagEntry(const TF1* func, BTagEntry::Parameters p):
formula(std::string(func->GetExpFormula("p").Data())),
params(p)
{}
{
if (func->IsZombie()) {
throw cms::Exception("BTagCalibration")
<< "Invalid TF1 function; function is zombie: "
<< func->GetName();
}
}

// Creates chained step functions like this:
// "<prevous_bin> : x<bin_high_bound ? bin_value : <next_bin>"
// e.g. "x<0 ? 1 : x<1 ? 2 : x<2 ? 3 : 4"
std::string th1ToFormulaLin(const TH1* hist) {
int nbins = hist->GetNbinsX();
TAxis * axis = hist->GetXaxis();
std::stringstream buff;
buff << "x<" << axis->GetBinLowEdge(1) << " ? 0. : "; // default value
for (int i=1; i<nbins+1; ++i) {
char tmp_buff[50];
sprintf(tmp_buff,
"x<%g ? %g : ", // %g is the smaller one of %e or %f
axis->GetBinUpEdge(i),
hist->GetBinContent(i));
buff << tmp_buff;
}
buff << 0.; // default value
return buff.str();
}

// Creates step functions making a binary search tree:
// "x<mid_bin_bound ? (<left side tree>) : (<right side tree>)"
// e.g. "x<2 ? (x<1 ? (x<0 ? 0:0.1) : (1)) : (x<4 ? (x<3 ? 2:3) : (0))"
std::string th1ToFormulaBinTree(const TH1* hist, int start=0, int end=-1) {
if (end == -1) { // initialize
start = 0.;
end = hist->GetNbinsX()+1;
TH1* h2 = (TH1*) hist->Clone();
h2->SetBinContent(start, 0); // kill underflow
h2->SetBinContent(end, 0); // kill overflow
std::string res = th1ToFormulaBinTree(h2, start, end);
delete h2;
return res;
}
if (start == end) { // leave is reached
char tmp_buff[20];
sprintf(tmp_buff, "%g", hist->GetBinContent(start));
return std::string(tmp_buff);
}
if (start == end - 1) { // no parenthesis for neighbors
char tmp_buff[70];
sprintf(tmp_buff,
"x<%g ? %g:%g",
hist->GetXaxis()->GetBinUpEdge(start),
hist->GetBinContent(start),
hist->GetBinContent(end));
return std::string(tmp_buff);
}

// top-down recursion
std::stringstream buff;
int mid = (end-start)/2 + start;
char tmp_buff[25];
sprintf(tmp_buff,
"x<%g ? (",
hist->GetXaxis()->GetBinUpEdge(mid));
buff << tmp_buff
<< th1ToFormulaBinTree(hist, start, mid)
<< ") : ("
<< th1ToFormulaBinTree(hist, mid+1, end)
<< ")";
return buff.str();
}

BTagEntry::BTagEntry(const TH1* hist, BTagEntry::Parameters p):
params(p)
{
int nbins = hist->GetNbinsX();
auto axis = hist->GetXaxis();
TAxis * axis = hist->GetXaxis();

// overwrite bounds with histo values
if (params.operatingPoint == BTagEntry::OP_RESHAPING) {
Expand All @@ -117,18 +198,21 @@ BTagEntry::BTagEntry(const TH1* hist, BTagEntry::Parameters p):
params.ptMax = axis->GetBinUpEdge(nbins);
}

std::stringstream buff;
buff << "x<" << axis->GetBinLowEdge(1) << " ? 0. : "; // default value
for (int i=1; i<nbins+1; ++i) {
char tmp_buff[100];
sprintf(tmp_buff,
"x<%g ? %g : ", // %g is the smaller one of %e or %f
axis->GetBinUpEdge(i),
hist->GetBinContent(i));
buff << tmp_buff;
// balanced full binary tree height = ceil(log(2*n_leaves)/log(2))
// breakes even around 10, but lower values are more propable in pt-spectrum
if (nbins < 15) {
formula = th1ToFormulaLin(hist);
} else {
formula = th1ToFormulaBinTree(hist);
}

// compile formula to check validity
TF1 f1("", formula.c_str());
if (f1.IsZombie()) {
throw cms::Exception("BTagCalibration")
<< "Invalid histogram; formula does not compile (>150 bins?): "
<< hist->GetName();
}
buff << 0.; // default value
formula = buff.str();
}

std::string BTagEntry::makeCSVHeader()
Expand Down Expand Up @@ -172,4 +256,4 @@ std::string BTagEntry::trimStr(std::string str) {
return "";
else
return str.substr(s, e-s+1);
}
}
7 changes: 4 additions & 3 deletions CondFormats/BTauObjects/test/testBTagCalibration.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,14 @@ int main()
assert (e3.size() == 1);

// check csv output (ordering arbitrary)
string tggr = "testTagger";
string csv2_1("0, comb, up, 0, 1, 2, 3, 4, 5, 6, \"2*x\" \n");
string csv2_2("0, comb, down, 0, 1, 2, 3, 4, 5, 6, \"2*x\" \n");
stringstream csv2Stream1;
stringstream csv2Stream2;
csv2Stream1 << BTagEntry::makeCSVHeader() << csv2_1 << csv2_2;
csv2Stream2 << BTagEntry::makeCSVHeader() << csv2_2 << csv2_1;
BTagCalibration b2("csvivf");
csv2Stream1 << tggr << ";" << BTagEntry::makeCSVHeader() << csv2_1 << csv2_2;
csv2Stream2 << tggr << ";" << BTagEntry::makeCSVHeader() << csv2_2 << csv2_1;
BTagCalibration b2(tggr);
b2.readCSV(csv2Stream1);

stringstream csv3Stream;
Expand Down
25 changes: 17 additions & 8 deletions CondFormats/BTauObjects/test/testBTagEntry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ int main()
{
using namespace std;

auto par1 = BTagEntry::Parameters(BTagEntry::OP_TIGHT, "CoMb", "cEnTrAl_");
assert (par1.measurementType == std::string("comb"));
assert (par1.sysType == string("central_"));

// default constructor
auto b1 = BTagEntry();

Expand All @@ -23,14 +27,19 @@ int main()
assert (b2.formula == string("2*x"));

// histo constructor
auto h1 = TH1F("name", "title", 2, 0., 2.);
h1.Fill(0.5, 1);
h1.Fill(1.5, 2);
auto b3 = BTagEntry(
&h1,
BTagEntry::Parameters(BTagEntry::OP_TIGHT, "comb", "up", BTagEntry::FLAV_C)
);
assert (b3.formula == string("x<0 ? 0. : x<1 ? 1 : x<2 ? 2 : 0"));
auto h1 = TH1F("h1", "", 3, 0., 1.); // lin.
auto h2 = TH1F("h2", "", 100, 0., 1.); // bin. tree
auto sin = TF1("sin", "sin(x)");
for (float f=0.01f; f<1.f; f+=.01f) {
h1.Fill(f, sin.Eval(f)/30.);
h2.Fill(f, sin.Eval(f));
}
auto f3_1 = TF1("", BTagEntry(&h1, par1).formula.c_str());
auto f3_2 = TF1("", BTagEntry(&h2, par1).formula.c_str());
for (float f=0.01f; f<1.f; f+=.01f) {
assert (fabs(h1.GetBinContent(h1.FindBin(f)) - f3_1.Eval(f)) < 1e-5);
assert (fabs(h2.GetBinContent(h2.FindBin(f)) - f3_2.Eval(f)) < 1e-5);
}

// csv constructor
string csv = "0, comb, up, 0, 1, 2, 3, 4, 5, 6, \"2*x\" \n";
Expand Down
57 changes: 57 additions & 0 deletions RecoBTag/PerformanceDB/plugins/BTagCalibrationDbCreator.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#include <memory>
#include <string>
#include <iostream>
#include <sstream>
#include "FWCore/Framework/interface/Frameworkfwd.h"
#include "FWCore/Framework/interface/EDAnalyzer.h"
#include "FWCore/Framework/interface/Event.h"
#include "FWCore/Framework/interface/MakerMacros.h"
#include "FWCore/ParameterSet/interface/ParameterSet.h"
#include "FWCore/ServiceRegistry/interface/Service.h"
#include "CondCore/DBOutputService/interface/PoolDBOutputService.h"
#include "CondFormats/BTauObjects/interface/BTagCalibration.h"

class BTagCalibrationDbCreator : public edm::EDAnalyzer
{
public:
BTagCalibrationDbCreator(const edm::ParameterSet&);
virtual void beginJob() override;
virtual void analyze(const edm::Event&, const edm::EventSetup&) override {}
virtual void endJob() override {}
~BTagCalibrationDbCreator() {}

private:
std::string csvFile_;
std::string tagger_;
};

BTagCalibrationDbCreator::BTagCalibrationDbCreator(const edm::ParameterSet& p):
csvFile_(p.getUntrackedParameter<std::string>("csvFile")),
tagger_ (p.getUntrackedParameter<std::string>("tagger" ))
{}

void BTagCalibrationDbCreator::beginJob()
{
auto calib = new BTagCalibration(tagger_, csvFile_);
edm::Service<cond::service::PoolDBOutputService> s;
if (s.isAvailable()) {
if (s->isNewTagRequest(tagger_)) {
s->createNewIOV<BTagCalibration>(
calib,
s->beginOfTime(),
s->endOfTime(),
tagger_
);
} else {
s->appendSinceTime<BTagCalibration>(
calib,
111,
tagger_
);
}
} else {
std::cout << "ERROR: DB service not available" << std::endl;
}
}

DEFINE_FWK_MODULE(BTagCalibrationDbCreator);
1 change: 1 addition & 0 deletions RecoBTag/PerformanceDB/plugins/BuildFile.xml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
<export></export>
<library name="RecoBTagPerformanceDBplugins" file="*.cc">
<use name="CondFormats/PhysicsToolsObjects"/>
<use name="CondFormats/BTauObjects"/>
<use name="RecoBTag/Records"/>
<use name="RecoBTag/PerformanceDB"/>
<use name="CondFormats/DataRecord"/>
Expand Down
43 changes: 43 additions & 0 deletions RecoBTag/PerformanceDB/python/bTagCalibrationDbCreation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import os
import sys
import FWCore.ParameterSet.Config as cms

if len(sys.argv) < 3:
raise RuntimeError('\nERROR: Need csv-filename as first argument.\n')
csv_file = sys.argv[2]
db_file = csv_file.replace('.csv', '.db')
tagger = os.path.basename(csv_file).split('.')[0]
print "Using file:", csv_file
print "DBout into:", db_file
print "taggername:", tagger

process = cms.Process("BTagCalibCreator")
process.load("CondCore.DBCommon.CondDBCommon_cfi")
process.CondDBCommon.connect = 'sqlite_file:' + db_file

process.maxEvents = cms.untracked.PSet(
input=cms.untracked.int32(1),
)

process.source = cms.Source("EmptySource")
process.PoolDBOutputService = cms.Service("PoolDBOutputService",
process.CondDBCommon,
toPut = cms.VPSet(
cms.PSet(
record = cms.string(tagger),
tag = cms.string(tagger),
label = cms.string(tagger),
),
)
)

process.dbCreator = cms.EDAnalyzer("BTagCalibrationDbCreator",
csvFile=cms.untracked.string(csv_file),
tagger=cms.untracked.string(tagger),
)

process.p = cms.Path(
process.dbCreator
)


Loading