diff --git a/BuildFile.xml b/BuildFile.xml index 65359c41263d9..587ccbbd97cf9 100644 --- a/BuildFile.xml +++ b/BuildFile.xml @@ -4,6 +4,7 @@ + diff --git a/bin/combine.cpp b/bin/combine.cpp index 587374a2c65fd..6fc1fb0efe0ff 100644 --- a/bin/combine.cpp +++ b/bin/combine.cpp @@ -237,8 +237,13 @@ int main(int argc, char **argv) { if (vm.count("X-fpeMask")) gSystem->SetFPEMask(vm["X-fpeMask"].as()); + // CMSDAS Defaults (you can turn off with --X-rtd =0 + runtimedef::set("OPTIMIZE_BOUNDS", 1); + runtimedef::set("ADDNLL_RECURSIVE", 1); + runtimedef::set("ADDNLL_GAUSSNLL", 1); + runtimedef::set("ADDNLL_HISTNLL", 1); + runtimedef::set("TMCSO_AdaptivePseudoAsimov", 1); - // if you have libraries, it's time to load them now for (vector::const_iterator rtdp = runtimeDefines.begin(), endrtdp = runtimeDefines.end(); rtdp != endrtdp; ++rtdp) { std::string::size_type idx = rtdp->find('='); if (idx == std::string::npos) { diff --git a/interface/CachingNLL.h b/interface/CachingNLL.h index b8ba291c6f480..60e6c71870da0 100644 --- a/interface/CachingNLL.h +++ b/interface/CachingNLL.h @@ -13,6 +13,11 @@ #include #include #include +#include +#include "../interface/SimpleGaussianConstraint.h" +#include + +class RooMultiPdf; // Part zero: ArgSet checker namespace cacheutils { @@ -55,20 +60,38 @@ class CachingPdf { public: CachingPdf(RooAbsReal *pdf, const RooArgSet *obs) ; CachingPdf(const CachingPdf &other) ; - ~CachingPdf() ; + virtual ~CachingPdf() ; const std::vector & eval(const RooAbsData &data) ; const RooAbsReal *pdf() const { return pdf_; } void setDataDirty() { lastData_ = 0; } - private: + protected: const RooArgSet *obs_; RooAbsReal *pdfOriginal_; RooArgSet pdfPieces_; RooAbsReal *pdf_; const RooAbsData *lastData_; ValuesCache cache_; - void realFill_(const RooAbsData &data, std::vector &values) ; + std::vector nonZeroW_; + unsigned int nonZeroWEntries_; + virtual void newData_(const RooAbsData &data) ; + virtual void realFill_(const RooAbsData &data, std::vector &values) ; }; +template +class OptimizedCachingPdfT : public CachingPdf { + public: + OptimizedCachingPdfT(RooAbsReal *pdf, const RooArgSet *obs) : + CachingPdf(pdf,obs), vpdf_(0) {} + OptimizedCachingPdfT(const OptimizedCachingPdfT &other) : + CachingPdf(other), vpdf_(0) {} + virtual ~OptimizedCachingPdfT() { delete vpdf_; } + protected: + virtual void realFill_(const RooAbsData &data, std::vector &values) ; + virtual void newData_(const RooAbsData &data) ; + VPdfT *vpdf_; +}; + + class CachingAddNLL : public RooAbsReal { public: CachingAddNLL(const char *name, const char *title, RooAbsPdf *pdf, RooAbsData *data) ; @@ -88,16 +111,20 @@ class CachingAddNLL : public RooAbsReal { RooSetProxy & params() { return params_; } private: void setup_(); + void addPdfs_(RooAddPdf *addpdf, bool recursive, const RooArgList & basecoeffs) ; RooAbsPdf *pdf_; RooSetProxy params_; const RooAbsData *data_; std::vector weights_; double sumWeights_; mutable std::vector coeffs_; - mutable std::vector pdfs_; + mutable boost::ptr_vector pdfs_; + mutable boost::ptr_vector prods_; mutable std::vector integrals_; + mutable std::vector > multiPdfs_; mutable std::vector partialSum_; - mutable bool isRooRealSum_; + mutable std::vector workingArea_; + mutable bool isRooRealSum_, fastExit_; double zeroPoint_; }; @@ -117,18 +144,9 @@ class CachingSimNLL : public RooAbsReal { static void setNoDeepLogEvalError(bool noDeep) { noDeepLEE_ = noDeep; } void setZeroPoint() ; void clearZeroPoint() ; + static void forceUnoptimizedConstraints() { optimizeContraints_ = false; } friend class CachingAddNLL; private: - class SimpleGaussianConstraint : public RooGaussian { - public: - SimpleGaussianConstraint(const RooGaussian &g) : RooGaussian(g, "") {} - double getLogValFast() const { - Double_t arg = x - mean; - Double_t sig = sigma ; - return -0.5*arg*arg/(sig*sig); - } - }; - void setup_(); RooSimultaneous *pdfOriginal_; const RooAbsData *dataOriginal_; @@ -138,11 +156,13 @@ class CachingSimNLL : public RooAbsReal { std::auto_ptr factorizedPdf_; std::vector constrainPdfs_; std::vector constrainPdfsFast_; + std::vector constrainPdfsFastOwned_; std::vector pdfs_; std::auto_ptr dataSets_; std::vector datasets_; static bool noDeepLEE_; static bool hasError_; + static bool optimizeContraints_; std::vector constrainZeroPoints_; std::vector constrainZeroPointsFast_; }; diff --git a/interface/Combine.h b/interface/Combine.h index 001d67d6da316..3807aa3f74e17 100644 --- a/interface/Combine.h +++ b/interface/Combine.h @@ -71,6 +71,7 @@ class Combine { bool makeTempDir_; bool rebuildSimPdf_; bool optSimPdf_; + bool noMCbonly_; static TTree *tree_; }; diff --git a/interface/FastTemplate.h b/interface/FastTemplate.h index de2fe6358aa37..c49aab1ab9984 100644 --- a/interface/FastTemplate.h +++ b/interface/FastTemplate.h @@ -4,31 +4,36 @@ #include #include #include +#include class FastTemplate { public: typedef double T; - FastTemplate() : size_(0), values_(0) {} - FastTemplate(unsigned int size) : size_(size), values_(new T[size_]) {} - FastTemplate(const FastTemplate &other) : size_(other.size_), values_(size_ ? new T[size_] : 0) { if (size_) CopyValues(other); } - FastTemplate(const TH1 &other) : size_(other.GetNbinsX()), values_(new T[size_]) { CopyValues(other); } - FastTemplate(const TH2 &other) : size_(other.GetNbinsX()*other.GetNbinsY()), values_(new T[size_]) { CopyValues(other); } + typedef std::vector AT; + FastTemplate() : size_(0), values_() {} + FastTemplate(unsigned int size) : size_(size), values_(size_) {} + FastTemplate(const FastTemplate &other) : size_(other.size()), values_(other.values_) {} + FastTemplate(const TH1 &other) : size_(other.GetNbinsX()), values_(size_) { CopyValues(other); } + FastTemplate(const TH2 &other) : size_(other.GetNbinsX()*other.GetNbinsY()), values_(size_) { CopyValues(other); } FastTemplate & operator=(const FastTemplate &other) { - if (size_ != other.size_) { - delete [] values_; size_ = other.size_; values_ = new T[size_]; + if (&other != this) { + size_ = other.size_; + values_ = other.values_; } - CopyValues(other); return *this; + return *this; } FastTemplate & operator=(const TH1 &other) { - if (size_ != unsigned(other.GetNbinsX())) { - delete [] values_; size_ = other.GetNbinsX(); values_ = new T[size_]; + if (size() != unsigned(other.GetNbinsX())) { + size_ = other.GetNbinsX(); + values_.resize(size_); } CopyValues(other); return *this; } - ~FastTemplate() { delete [] values_; } + ~FastTemplate() { } void Resize(unsigned int newsize) { - if (newsize != size_) { - delete [] values_; size_ = newsize; values_ = new T[size_]; + if (newsize != size()) { + size_ = newsize; + values_.resize(size_); } } T Integral() const ; @@ -39,6 +44,10 @@ class FastTemplate { void CopyValues(const TH2 &other) ; T & operator[](unsigned int i) { return values_[i]; } const T & operator[](unsigned int i) const { return values_[i]; } + /// return the full size of the template + const unsigned int fullsize() const { return values_.size(); } + /// return the active size of the template (can be less than the full size if the SetActiveSize + /// has been used to inform the code that only the first N bins are not empty) const unsigned int size() const { return size_; } /// *this = log(*this) @@ -54,33 +63,39 @@ class FastTemplate { /// Does this += x * (diff + (sum)*y) void Meld(const FastTemplate & diff, const FastTemplate & sum, T x, T y) ; /// protect from underflows (*this = max(*this, minimum)); - void CropUnderflows(T minimum=1e-9); + void CropUnderflows(T minimum=1e-9, bool activebinsonly=true); + + /// Tell the code that only the first N bins of the template are non-empty, + /// and so that only those have to be considered when doing operations + void SetActiveSize(unsigned int size) { size_ = size; } void Dump() const ; protected: - unsigned int size_; - T *values_; + unsigned int size_; + AT values_; }; class FastHisto : public FastTemplate { public: - FastHisto() : FastTemplate(), binEdges_(0), binWidths_(0) {} + FastHisto() : FastTemplate(), binEdges_(), binWidths_() {} FastHisto(const TH1 &hist) ; FastHisto(const FastHisto &other) ; FastHisto & operator=(const FastHisto &other) { - if (size_ != other.size_) { - FastHisto fh(other); - swap(fh); + if (size() != other.size()) { + size_ = other.size_; + values_ = other.values_; + binWidths_ = other.binWidths_; + binEdges_ = other.binEdges_; } else CopyValues(other); return *this; } FastHisto & operator=(const TH1 &other) { - if (size_ != unsigned(other.GetNbinsX())) { + if (size() != unsigned(other.GetNbinsX())) { FastHisto fh(other); swap(fh); } else CopyValues(other); - CopyValues(other); return *this; + return *this; } - ~FastHisto() { delete [] binEdges_; delete [] binWidths_; } + ~FastHisto() { } void swap(FastHisto &other) { std::swap(size_, other.size_); std::swap(values_, other.values_); @@ -88,6 +103,8 @@ class FastHisto : public FastTemplate { std::swap(binEdges_, other.binEdges_); } T GetAt(const T &x) const ; + int FindBin(const T &x) const ; + const T & GetBinContent(int bin) const { return values_[bin]; } T IntegralWidth() const ; void Normalize() { T sum = IntegralWidth(); @@ -96,27 +113,32 @@ class FastHisto : public FastTemplate { void Dump() const ; private: - T *binEdges_; - T *binWidths_; + AT binEdges_; + AT binWidths_; }; class FastHisto2D : public FastTemplate { public: - FastHisto2D() : FastTemplate(), binX_(0), binY_(0), binEdgesX_(0), binEdgesY_(0), binWidths_(0) {} + FastHisto2D() : FastTemplate(), binX_(0), binY_(0), binEdgesX_(), binEdgesY_(), binWidths_() {} FastHisto2D(const TH2 &hist, bool normXonly=false) ; FastHisto2D(const FastHisto2D &other) ; FastHisto2D & operator=(const FastHisto2D &other) { - if (binX_ != other.binY_ || binY_ != other.binY_) { - FastHisto2D fh(other); - swap(fh); + if (binX() != other.binX() || binY() != other.binY()) { + size_ = other.size_; + values_ = other.values_; + binWidths_ = other.binWidths_; + binEdgesX_ = other.binEdgesX_; + binEdgesY_ = other.binEdgesY_; + binX_ = other.binX_; + binY_ = other.binY_; } else CopyValues(other); return *this; } - ~FastHisto2D() { delete [] binEdgesX_; delete [] binEdgesY_; delete [] binWidths_; } + ~FastHisto2D() { } void swap(FastHisto2D &other) { + std::swap(size_, other.size_); std::swap(binX_, other.binX_); std::swap(binY_, other.binY_); - std::swap(size_, other.size_); std::swap(values_, other.values_); std::swap(binWidths_, other.binWidths_); std::swap(binEdgesX_, other.binEdgesX_); @@ -124,6 +146,8 @@ class FastHisto2D : public FastTemplate { } T GetAt(const T &x, const T &y) const ; T IntegralWidth() const ; + unsigned int binX() const { return binX_; } + unsigned int binY() const { return binY_; } void Normalize() { T sum = IntegralWidth(); if (sum > 0) Scale(1.0f/sum); @@ -134,9 +158,9 @@ class FastHisto2D : public FastTemplate { void Dump() const ; private: unsigned int binX_, binY_; - T *binEdgesX_; - T *binEdgesY_; - T *binWidths_; + AT binEdgesX_; + AT binEdgesY_; + AT binWidths_; }; diff --git a/interface/FitterAlgoBase.h b/interface/FitterAlgoBase.h index 1e2842c261f0d..7a673ac160e4b 100644 --- a/interface/FitterAlgoBase.h +++ b/interface/FitterAlgoBase.h @@ -56,6 +56,9 @@ class FitterAlgoBase : public LimitAlgo { RooFitResult *doFit(RooAbsPdf &pdf, RooAbsData &data, const RooArgList &rs, const RooCmdArg &constrain, bool doHesse=true, int ndim=1,bool reuseNLL=false, bool saveFitResult=true) ; double findCrossing(CascadeMinimizer &minim, RooAbsReal &nll, RooRealVar &r, double level, double rStart, double rBound) ; double findCrossingNew(CascadeMinimizer &minim, RooAbsReal &nll, RooRealVar &r, double level, double rStart, double rBound) ; + + void optimizeBounds(const RooWorkspace *w, const RooStats::ModelConfig *mc) ; + void restoreBounds(const RooWorkspace *w, const RooStats::ModelConfig *mc) ; }; diff --git a/interface/RooMinimizerOpt.h b/interface/RooMinimizerOpt.h index 9cc2789b0dd63..441f15c550479 100644 --- a/interface/RooMinimizerOpt.h +++ b/interface/RooMinimizerOpt.h @@ -13,15 +13,42 @@ class RooMinimizerOpt : public RooMinimizer { public: RooMinimizerOpt(RooAbsReal& function) ; Double_t edm(); + Int_t minimize(const char* type, const char* alg=0) ; + Int_t improve() ; + Int_t migrad() ; + Int_t hesse() ; + Int_t minos() ; + Int_t minos(const RooArgSet& minosParamList) ; + }; class RooMinimizerFcnOpt : public RooMinimizerFcn { public: RooMinimizerFcnOpt(RooAbsReal *funct, RooMinimizer *context, bool verbose = false); + RooMinimizerFcnOpt(const RooMinimizerFcnOpt &other) ; virtual ROOT::Math::IBaseFunctionMultiDim* Clone() const; + Bool_t Synchronize(std::vector& parameters, Bool_t optConst, Bool_t verbose); + void initStdVects() const ; protected: virtual double DoEval(const double * x) const; mutable std::vector _vars; + mutable std::vector _vals; + mutable std::vector _hasOptimzedBounds; + struct OptBound { + double softMin, softMax, hardMin, hardMax; + double transform(double x) const { + if (x < softMin) { + double dx = softMin-x; // > 0 + return hardMin + ( (softMin-hardMin) + dx ) * std::exp ( -2*dx/(softMin-hardMin) ); + } else if (x > softMax) { + double dx = x-softMax; + return hardMax - ( (hardMax-softMax) + dx ) * std::exp ( -2*dx/(hardMax-softMax) ); + } else { + return x; + } + } + }; + mutable std::vector _optimzedBounds; }; #endif diff --git a/interface/SimpleGaussianConstraint.h b/interface/SimpleGaussianConstraint.h new file mode 100644 index 0000000000000..a751232af481d --- /dev/null +++ b/interface/SimpleGaussianConstraint.h @@ -0,0 +1,37 @@ +#ifndef SimpleGaussianConstraint_h +#define SimpleGaussianConstraint_h + +#include + +class SimpleGaussianConstraint : public RooGaussian { + public: + SimpleGaussianConstraint() {} ; + SimpleGaussianConstraint(const char *name, const char *title, + RooAbsReal& _x, RooAbsReal& _mean, RooAbsReal& _sigma): + RooGaussian(name,title,_x,_mean,_sigma) { init(); } + SimpleGaussianConstraint(const SimpleGaussianConstraint& other, const char* name=0) : + RooGaussian(other, name) { init(); } + SimpleGaussianConstraint(const RooGaussian &g) : RooGaussian(g, "") { init(); } + + virtual TObject* clone(const char* newname) const { return new SimpleGaussianConstraint(*this,newname); } + inline virtual ~SimpleGaussianConstraint() { } + + double getLogValFast() const { + if (_valueDirty) { + Double_t arg = x - mean; + //Double_t sig = sigma ; + //return -0.5*arg*arg/(sig*sig); + _value = scale_*arg*arg; + _valueDirty = false; + } + return _value; + } + + private: + double scale_; + void init() ; + + ClassDef(SimpleGaussianConstraint,1) // Gaussian PDF with fast log +}; + +#endif diff --git a/interface/VectorizedGaussian.h b/interface/VectorizedGaussian.h new file mode 100644 index 0000000000000..b364c36ad9bbd --- /dev/null +++ b/interface/VectorizedGaussian.h @@ -0,0 +1,27 @@ +#ifndef VectorizedGaussian_h +#define VectorizedGaussian_h + +#include +#include +#include + +class VectorizedGaussian { + class Worker : public RooGaussian { + public: + Worker(const RooGaussian &g) : RooGaussian(g, "") {} + const RooAbsReal & xvar() const { return x.arg(); } + const RooAbsReal & meanvar() const { return mean.arg(); } + const RooAbsReal & sigvar() const { return sigma.arg(); } + }; + public: + VectorizedGaussian(const RooGaussian &gaus, const RooAbsData &data) ; + void fill(std::vector &out) const ; + private: + const RooRealVar * x_; + const RooAbsReal * mean_; + const RooAbsReal * sigma_; + std::vector xvals_; + mutable std::vector work_, work2_; +}; + +#endif diff --git a/interface/VerticalInterpHistPdf.h b/interface/VerticalInterpHistPdf.h index cbabccbe9a4f5..1a6001915d6fb 100644 --- a/interface/VerticalInterpHistPdf.h +++ b/interface/VerticalInterpHistPdf.h @@ -12,10 +12,10 @@ #include "../interface/FastTemplate.h" #include -#define FastVerticalInterpHistPdf_CopyConstructor -#define FastVerticalInterpHistPdf_Serializable - class FastVerticalInterpHistPdf; +class FastVerticalInterpHistPdf2Base; +class FastVerticalInterpHistPdf2; +class FastVerticalInterpHistPdf2D2; class VerticalInterpHistPdf : public RooAbsPdf { public: @@ -85,6 +85,8 @@ class FastVerticalInterpHistPdfBase : public RooAbsPdf { /// Must be public, for serialization struct Morph { FastTemplate sum; FastTemplate diff; }; + + friend class FastVerticalInterpHistPdf2Base; protected: RooRealProxy _x; RooListProxy _funcList ; // List of component FUNCs @@ -122,6 +124,7 @@ class FastVerticalInterpHistPdfBase : public RooAbsPdf { }; +struct FastVerticalInterpHistPdfV; class FastVerticalInterpHistPdf : public FastVerticalInterpHistPdfBase { public: @@ -133,11 +136,7 @@ class FastVerticalInterpHistPdf : public FastVerticalInterpHistPdfBase { FastVerticalInterpHistPdf(const FastVerticalInterpHistPdf& other, const char* name=0) : FastVerticalInterpHistPdfBase(other, name), _x("x",this,other._x), -#ifdef FastVerticalInterpHistPdf_CopyConstructor _cache(other._cache), _cacheNominal(other._cacheNominal), _cacheNominalLog(other._cacheNominalLog) {} -#else - _cache(), _cacheNominal(), _cacheNominalLog() {} -#endif virtual TObject* clone(const char* newname) const { return new FastVerticalInterpHistPdf(*this,newname) ; } virtual ~FastVerticalInterpHistPdf() {} @@ -145,6 +144,8 @@ class FastVerticalInterpHistPdf : public FastVerticalInterpHistPdfBase { Bool_t hasCache() const { return _cache.size() > 0; } Bool_t isCacheReady() const { return _cache.size() > 0 && _init; } + friend class FastVerticalInterpHistPdfV; + friend class FastVerticalInterpHistPdf2; protected: RooRealProxy _x; @@ -164,6 +165,21 @@ class FastVerticalInterpHistPdf : public FastVerticalInterpHistPdfBase { ClassDef(FastVerticalInterpHistPdf,1) // }; +class FastVerticalInterpHistPdfV { + public: + FastVerticalInterpHistPdfV(const FastVerticalInterpHistPdf &, const RooAbsData &data) ; + void fill(std::vector &out) const ; + private: + const FastVerticalInterpHistPdf & hpdf_; + int begin_, end_, nbins_; + struct Block { + int index, begin, end; + Block(int i, int begin_, int end_) : index(i), begin(begin_), end(end_) {} + }; + std::vector blocks_; + std::vector bins_; +}; + class FastVerticalInterpHistPdf2D : public FastVerticalInterpHistPdfBase { public: @@ -179,21 +195,20 @@ class FastVerticalInterpHistPdf2D : public FastVerticalInterpHistPdfBase { FastVerticalInterpHistPdf2D(const FastVerticalInterpHistPdf2D& other, const char* name=0) : FastVerticalInterpHistPdfBase(other, name), _x("x",this,other._x), _y("y",this,other._y), _conditional(other._conditional), -#ifdef FastVerticalInterpHistPdf_CopyConstructor _cache(other._cache), _cacheNominal(other._cacheNominal), _cacheNominalLog(other._cacheNominalLog) {} -#else - _cache(), _cacheNominal(), _cacheNominalLog() {} -#endif virtual TObject* clone(const char* newname) const { return new FastVerticalInterpHistPdf2D(*this,newname) ; } virtual ~FastVerticalInterpHistPdf2D() {} - Double_t evaluate() const ; - - Bool_t selfNormalized() const { return kTRUE; } + const RooRealVar & x() const { return dynamic_cast(_x.arg()); } + const RooRealVar & y() const { return dynamic_cast(_y.arg()); } Bool_t conditional() const { return _conditional; } + Double_t evaluate() const ; + Bool_t hasCache() const { return _cache.size() > 0; } Bool_t isCacheReady() const { return _cache.size() > 0 && _init; } + + friend class FastVerticalInterpHistPdf2D2; protected: RooRealProxy _x, _y; bool _conditional; @@ -215,6 +230,158 @@ class FastVerticalInterpHistPdf2D : public FastVerticalInterpHistPdfBase { }; +class FastVerticalInterpHistPdf2Base : public RooAbsPdf { +public: + + FastVerticalInterpHistPdf2Base() ; + FastVerticalInterpHistPdf2Base(const char *name, const char *title, const RooArgSet &obs, const TList & funcList, const RooArgList& coefList, Double_t smoothRegion=1., Int_t smoothAlgo=1) ; + FastVerticalInterpHistPdf2Base(const FastVerticalInterpHistPdf2Base& other, const char* name=0) ; + explicit FastVerticalInterpHistPdf2Base(const FastVerticalInterpHistPdfBase& other, const char* name=0) ; + virtual TObject* clone(const char* newname) const = 0; + virtual ~FastVerticalInterpHistPdf2Base() ; + + Bool_t selfNormalized() const { return kTRUE; } + + Double_t evaluate() const = 0; + + const RooArgList& coefList() const { return _coefList ; } + + virtual void setActiveBins(unsigned int bins) {} + + /// Must be public, for serialization + typedef FastVerticalInterpHistPdfBase::Morph Morph; +protected: + RooListProxy _coefList ; // List of coefficients + Double_t _smoothRegion; + Int_t _smoothAlgo; + + // set to true after _morphParams and _sentry are initialized + mutable bool _initBase; //! not to be serialized + + // to check if parameters change + mutable SimpleCacheSentry _sentry; //! not to be serialized + + // For additive morphing, histograms of (fUp-f0)+(fDown-f0) and (fUp-f0)-(fDown-f0) + // For multiplicative morphing, log(fUp/f0)+log(fDown/f0), log(fUp/f0)-log(fDown/f0) + // NOTE: it's the responsibility of the daughter to make sure these are initialized!!! + std::vector _morphs; + + // Coefficients of the list in _coefList, already dynamic_cast'ed and in a vector + mutable std::vector _morphParams; //! not to be serialized + + // Prepare morphing data for a triplet of templates + void initMorph(Morph &out, const FastTemplate &nominal, FastTemplate &lo, FastTemplate &hi) const; + + // Do the vertical morphing from nominal value and morphs into cache. + // Do not normalize yet, as that depends on the dimension of the template + void syncTotal(FastTemplate &cache, const FastTemplate &cacheNominal, const FastTemplate &cacheNominalLog) const ; + + // return a smooth function that is equal to +/-1 for |x| >= smoothRegion_ and it's null in zero + inline double smoothStepFunc(double x) const { + if (fabs(x) >= _smoothRegion) return x > 0 ? +1 : -1; + double xnorm = x/_smoothRegion, xnorm2 = xnorm*xnorm; + return 0.125 * xnorm * (xnorm2 * (3.*xnorm2 - 10.) + 15); + } + + // initialize the morphParams and the sentry. to be called by the daughter class, sets also _initBase to true + void initBase() const ; + +private: + ClassDef(FastVerticalInterpHistPdf2Base,1) // +}; + + +struct FastVerticalInterpHistPdf2V; +class FastVerticalInterpHistPdf2 : public FastVerticalInterpHistPdf2Base { +public: + + FastVerticalInterpHistPdf2() : FastVerticalInterpHistPdf2Base() {} + FastVerticalInterpHistPdf2(const char *name, const char *title, const RooRealVar &x, const TList & funcList, const RooArgList& coefList, Double_t smoothRegion=1., Int_t smoothAlgo=1) ; + + FastVerticalInterpHistPdf2(const FastVerticalInterpHistPdf2& other, const char* name=0) : + FastVerticalInterpHistPdf2Base(other, name), + _x("x",this,other._x), + _cache(other._cache), _cacheNominal(other._cacheNominal), _cacheNominalLog(other._cacheNominalLog) {} + explicit FastVerticalInterpHistPdf2(const FastVerticalInterpHistPdf& other, const char* name=0) ; + virtual TObject* clone(const char* newname) const { return new FastVerticalInterpHistPdf2(*this,newname) ; } + virtual ~FastVerticalInterpHistPdf2() {} + + virtual void setActiveBins(unsigned int bins) ; + Double_t evaluate() const ; + + friend class FastVerticalInterpHistPdf2V; +protected: + RooRealProxy _x; + + /// Cache of the result + mutable FastHisto _cache; //! not to be serialized + + /// Cache of nominal pdf (additive morphing) and its bin-by-bin logarithm (multiplicative) + FastHisto _cacheNominal; + FastHisto _cacheNominalLog; + + void syncTotal() const ; + void initNominal(TObject *nominal) ; + void initComponent(int which, TObject *hi, TObject *lo) ; + +private: + ClassDef(FastVerticalInterpHistPdf2,1) // +}; +class FastVerticalInterpHistPdf2V { + public: + FastVerticalInterpHistPdf2V(const FastVerticalInterpHistPdf2 &, const RooAbsData &data) ; + void fill(std::vector &out) const ; + private: + const FastVerticalInterpHistPdf2 & hpdf_; + int begin_, end_, nbins_; + struct Block { + int index, begin, end; + Block(int i, int begin_, int end_) : index(i), begin(begin_), end(end_) {} + }; + std::vector blocks_; + std::vector bins_; +}; + + + +class FastVerticalInterpHistPdf2D2 : public FastVerticalInterpHistPdf2Base { +public: + + FastVerticalInterpHistPdf2D2() : FastVerticalInterpHistPdf2Base() {} + FastVerticalInterpHistPdf2D2(const char *name, const char *title, const RooRealVar &x, const RooRealVar &y, bool conditional, const TList & funcList, const RooArgList& coefList, Double_t smoothRegion=1., Int_t smoothAlgo=1) ; + + FastVerticalInterpHistPdf2D2(const FastVerticalInterpHistPdf2D2& other, const char* name=0) : + FastVerticalInterpHistPdf2Base(other, name), + _x("x",this,other._x), _y("y",this,other._y), _conditional(other._conditional), + _cache(other._cache), _cacheNominal(other._cacheNominal), _cacheNominalLog(other._cacheNominalLog) {} + explicit FastVerticalInterpHistPdf2D2(const FastVerticalInterpHistPdf2D& other, const char* name=0) ; + virtual TObject* clone(const char* newname) const { return new FastVerticalInterpHistPdf2D2(*this,newname) ; } + virtual ~FastVerticalInterpHistPdf2D2() {} + + const RooRealVar & x() const { return dynamic_cast(_x.arg()); } + const RooRealVar & y() const { return dynamic_cast(_y.arg()); } + Bool_t conditional() const { return _conditional; } + + Double_t evaluate() const ; +protected: + RooRealProxy _x, _y; + bool _conditional; + + /// Cache of the result + mutable FastHisto2D _cache; //! not to be serialized + + /// Cache of nominal pdf (additive morphing) and its bin-by-bin logarithm (multiplicative) + FastHisto2D _cacheNominal; + FastHisto2D _cacheNominalLog; + + void syncTotal() const ; + void initNominal(TObject *nominal) ; + void initComponent(int which, TObject *hi, TObject *lo) ; + +private: + ClassDef(FastVerticalInterpHistPdf2D2,1) // +}; + diff --git a/python/DatacardParser.py b/python/DatacardParser.py index 13a23b6b15ee1..697d1f4269d46 100644 --- a/python/DatacardParser.py +++ b/python/DatacardParser.py @@ -16,6 +16,10 @@ def addDatacardParserOptions(parser): parser.add_option("-L", "--LoadLibrary", dest="libs", type="string" , action="append", help="Load these libraries") parser.add_option("--poisson", dest="poisson", default=0, type="int", help="If set to a positive number, binned datasets wih more than this number of entries will be generated using poissonians") parser.add_option("--default-morphing", dest="defMorph", type="string", default="shape2N", help="Default template morphing algorithm (to be used when the datacard has just 'shape')") + parser.add_option("--no-b-only","--for-fits", dest="noBOnly", default=False, action="store_true", help="Do not save the background-only pdf (saves time)") + parser.add_option("--no-optimize-pdfs", dest="noOptimizePdf", default=False, action="store_true", help="Do not save the RooSimultaneous as RooSimultaneousOpt and Gaussian constraints as SimpleGaussianConstraint") + #parser.add_option("--use-HistPdf", dest="useHistPdf", type="string", default="always", help="Use RooHistPdf for TH1s: 'always' (default), 'never', 'when-constant' (i.e. not when doing template morphing)") + parser.add_option("--use-HistPdf", dest="useHistPdf", type="string", default="never", help="Use RooHistPdf for TH1s: 'always', 'never' (default), 'when-constant' (i.e. not when doing template morphing)") parser.add_option("--X-exclude-nuisance", dest="nuisancesToExclude", type="string", action="append", default=[], help="Exclude nuisances that match these regular expressions.") parser.add_option("--X-rescale-nuisance", dest="nuisancesToRescale", type="string", action="append", nargs=2, default=[], help="Rescale by this factor the nuisances that match these regular expressions (the rescaling is applied to the sigma of the gaussian constraint term).") parser.add_option("--X-force-no-simpdf", dest="forceNonSimPdf", default=False, action="store_true", help="FOR DEBUG ONLY: Do not produce a RooSimultaneous if there is just one channel (note: can affect performance)") @@ -23,6 +27,11 @@ def addDatacardParserOptions(parser): parser.add_option("--X-no-jmax", dest="noJMax", default=False, action="store_true", help="FOR DEBUG ONLY: Turn off the consistency check between jmax and number of processes.") parser.add_option("--X-allow-no-signal", dest="allowNoSignal", default=False, action="store_true", help="Allow parsing datacards that contain only backgrounds") parser.add_option("--X-allow-no-background", dest="allowNoBackground", default=False, action="store_true", help="Allow parsing datacards that contain only signal") + #parser.add_option("--X-optimize-templates", dest="optimizeExistingTemplates", default=False, action="store_true", help="Optimize templates on the fly (relevant for HZZ)") + #parser.add_option("--X-optimize-bound-nusances", dest="optimizeBoundNuisances", default=False, action="store_true", help="Flag nuisances to have a different implementation of bounds") + parser.add_option("--X-no-optimize-templates", dest="optimizeExistingTemplates", default=True, action="store_false", help="Don't optimize templates on the fly (relevant for HZZ)") + parser.add_option("--X-no-optimize-bound-nusances", dest="optimizeBoundNuisances", default=True, action="store_false", help="Don't flag nuisances to have a different implementation of bounds") + parser.add_option("--X-no-optimize-bins", dest="optimizeTemplateBins", default=True, action="store_false", help="Don't optimize template bins") from HiggsAnalysis.CombinedLimit.Datacard import Datacard diff --git a/python/ModelTools.py b/python/ModelTools.py index 5e1ed1da663cc..65a7b7c215b3e 100644 --- a/python/ModelTools.py +++ b/python/ModelTools.py @@ -97,10 +97,14 @@ def doNuisances(self): if re.match(pn, n): sig = float(pf); sigscale = sig * (4 if pdf == "shape" else 7) r = "-%g,%g" % (sigscale,sigscale) - self.doObj("%s_Pdf" % n, "Gaussian", "%s[%s], %s_In[0,%s], %g" % (n,r,n,r,sig)); + if self.options.noOptimizePdf: + self.doObj("%s_Pdf" % n, "Gaussian", "%s[%s], %s_In[0,%s], %g" % (n,r,n,r,sig)); + else: + self.doObj("%s_Pdf" % n, "SimpleGaussianConstraint", "%s[%s], %s_In[0,%s], %g" % (n,r,n,r,sig)); globalobs.append("%s_In" % n) if self.options.bin: self.out.var("%s_In" % n).setConstant(True) + if self.options.optimizeBoundNuisances: self.out.var(n).setAttribute("optimizeBounds") elif pdf == "gmM": val = 0; for c in errline.values(): #list channels @@ -202,6 +206,7 @@ def doNuisances(self): self.doObj("%s_Pdf" % n, "Gaussian", "%s, %s_In[%s,%g,%g], %s" % (n, n, args[0], self.out.var(n).getMin(), self.out.var(n).getMax(), args[1])) self.out.var("%s_In" % n).setConstant(True) globalobs.append("%s_In" % n) + #if self.options.optimizeBoundNuisances: self.out.var(n).setAttribute("optimizeBounds") else: raise RuntimeError, "Unsupported pdf %s" % pdf if nofloat: self.out.var("%s" % n).setAttribute("globalConstrained",True) @@ -311,6 +316,7 @@ def doModelConfigs(self): if self.out.set("globalObservables"): mc.SetGlobalObservables(self.out.set("globalObservables")) if self.options.verbose > 1: mc.Print("V") self.out._import(mc, mc.GetName()) + if self.options.noBOnly: break discparams = ROOT.RooArgSet("discreteParams") for cpar in self.discrete_param_set: roocpar = self.out.cat(cpar) diff --git a/python/PhysicsModel.py b/python/PhysicsModel.py index df4e41d75e763..51cfe1357e27c 100644 --- a/python/PhysicsModel.py +++ b/python/PhysicsModel.py @@ -122,7 +122,7 @@ def getHiggsProdDecMode(bin,process,options): raise RuntimeError, "Validation Error: signal process %s not among the allowed ones." % processSource # foundDecay = None - for D in [ "hww", "hzz", "hgg", "htt", "hbb", 'hzg', 'hmm' ]: + for D in [ "hww", "hzz", "hgg", "htt", "hbb", 'hzg', 'hmm', 'hinv' ]: if D in decaySource: if foundDecay: raise RuntimeError, "Validation Error: decay string %s contains multiple known decay names" % decaySource foundDecay = D @@ -133,6 +133,11 @@ def getHiggsProdDecMode(bin,process,options): if D in decaySource: if foundEnergy: raise RuntimeError, "Validation Error: decay string %s contains multiple known energies" % decaySource foundEnergy = D + if not foundEnergy: + for D in [ '7TeV', '8TeV', '14TeV' ]: + if D in options.fileName+":"+bin: + if foundEnergy: raise RuntimeError, "Validation Error: decay string %s contains multiple known energies" % decaySource + foundEnergy = D if not foundEnergy: foundEnergy = '7TeV' ## To ensure backward compatibility print "Warning: decay string %s does not contain any known energy, assuming %s" % (decaySource, foundEnergy) diff --git a/python/ShapeTools.py b/python/ShapeTools.py index 6799f5fdd8900..af3a0b4c80303 100644 --- a/python/ShapeTools.py +++ b/python/ShapeTools.py @@ -38,12 +38,19 @@ def doObservables(self): self.doCombinedDataset() def doIndividualModels(self): for b in self.DC.bins: + #print " + Getting model for bin %s" % (b) pdfs = ROOT.RooArgList(); bgpdfs = ROOT.RooArgList() coeffs = ROOT.RooArgList(); bgcoeffs = ROOT.RooArgList() for p in self.DC.exp[b].keys(): # so that we get only self.DC.processes contributing to this bin if self.DC.exp[b][p] == 0: continue if self.physics.getYieldScale(b,p) == 0: continue # exclude really the pdf + #print " +--- Getting pdf for %s in bin %s" % (p,b) (pdf,coeff) = (self.getPdf(b,p), self.out.function("n_exp_bin%s_proc_%s" % (b,p))) + if self.options.optimizeExistingTemplates: + pdf1 = self.optimizeExistingTemplates(pdf) + if (pdf1 != pdf): + self.out.dont_delete.append(pdf1) + pdf = pdf1 extranorm = self.getExtraNorm(b,p) if extranorm: prodset = ROOT.RooArgList(self.out.function("n_exp_bin%s_proc_%s" % (b,p))) @@ -62,41 +69,47 @@ def doIndividualModels(self): bgpdfs.add(pdf); bgcoeffs.add(coeff) if self.options.verbose: print "Creating RooAddPdf %s with %s elements" % ("pdf_bin"+b, coeffs.getSize()) sum_s = ROOT.RooAddPdf("pdf_bin%s" % b, "", pdfs, coeffs) - sum_b = ROOT.RooAddPdf("pdf_bin%s_bonly" % b, "", bgpdfs, bgcoeffs) + if not self.options.noBOnly: sum_b = ROOT.RooAddPdf("pdf_bin%s_bonly" % b, "", bgpdfs, bgcoeffs) if b in self.pdfModes: sum_s.setAttribute('forceGen'+self.pdfModes[b].title()) - sum_b.setAttribute('forceGen'+self.pdfModes[b].title()) + if not self.options.noBOnly: sum_b.setAttribute('forceGen'+self.pdfModes[b].title()) if len(self.DC.systs): ## rename the pdfs - sum_s.SetName("pdf_bin%s_nuis" % b); sum_b.SetName("pdf_bin%s_bonly_nuis" % b) + sum_s.SetName("pdf_bin%s_nuis" % b); + if not self.options.noBOnly: sum_b.SetName("pdf_bin%s_bonly_nuis" % b) # now we multiply by all the nuisances, but avoiding nested products # so we first make a list of all nuisances plus the RooAddPdf sumPlusNuis_s = ROOT.RooArgList(self.out.nuisPdfs); sumPlusNuis_s.add(sum_s) - sumPlusNuis_b = ROOT.RooArgList(self.out.nuisPdfs); sumPlusNuis_b.add(sum_b) # then make RooProdPdf and import it pdf_s = ROOT.RooProdPdf("pdf_bin%s" % b, "", sumPlusNuis_s) - pdf_b = ROOT.RooProdPdf("pdf_bin%s_bonly" % b, "", sumPlusNuis_b) + if not self.options.noBOnly: + sumPlusNuis_b = ROOT.RooArgList(self.out.nuisPdfs); sumPlusNuis_b.add(sum_b) + pdf_b = ROOT.RooProdPdf("pdf_bin%s_bonly" % b, "", sumPlusNuis_b) if b in self.pdfModes: pdf_s.setAttribute('forceGen'+self.pdfModes[b].title()) - pdf_b.setAttribute('forceGen'+self.pdfModes[b].title()) + if not self.options.noBOnly: pdf_b.setAttribute('forceGen'+self.pdfModes[b].title()) self.out._import(pdf_s, ROOT.RooFit.RenameConflictNodes(b)) - self.out._import(pdf_b, ROOT.RooFit.RecycleConflictNodes(), ROOT.RooFit.Silence()) + if not self.options.noBOnly: + self.out._import(pdf_b, ROOT.RooFit.RecycleConflictNodes(), ROOT.RooFit.Silence()) else: self.out._import(sum_s, ROOT.RooFit.RenameConflictNodes(b)) - self.out._import(sum_b, ROOT.RooFit.RecycleConflictNodes(), ROOT.RooFit.Silence()) + if not self.options.noBOnly: + self.out._import(sum_b, ROOT.RooFit.RecycleConflictNodes(), ROOT.RooFit.Silence()) def doCombination(self): ## Contrary to Number-counting models, here each channel PDF already contains the nuisances ## So we just have to build the combined pdf if len(self.DC.bins) > 1 or not self.options.forceNonSimPdf: for (postfixIn,postfixOut) in [ ("","_s"), ("_bonly","_b") ]: - simPdf = ROOT.RooSimultaneous("model"+postfixOut, "model"+postfixOut, self.out.binCat) + simPdf = ROOT.RooSimultaneous("model"+postfixOut, "model"+postfixOut, self.out.binCat) if self.options.noOptimizePdf else ROOT.RooSimultaneousOpt("model"+postfixOut, "model"+postfixOut, self.out.binCat) for b in self.DC.bins: pdfi = self.out.pdf("pdf_bin%s%s" % (b,postfixIn)) simPdf.addPdf(pdfi, b) self.out._import(simPdf) + if self.options.noBOnly: break else: self.out._import(self.out.pdf("pdf_bin%s" % self.DC.bins[0]).clone("model_s"), ROOT.RooFit.Silence()) - self.out._import(self.out.pdf("pdf_bin%s_bonly" % self.DC.bins[0]).clone("model_b"), ROOT.RooFit.Silence()) + if not self.options.noBOnly: + self.out._import(self.out.pdf("pdf_bin%s_bonly" % self.DC.bins[0]).clone("model_b"), ROOT.RooFit.Silence()) if self.options.fixpars: pars = self.out.pdf("model_s").getParameters(self.out.obs) iter = pars.createIterator() @@ -325,7 +338,7 @@ def getPdf(self,channel,process,_cache={}): postFix="Sig" if (process in self.DC.isSignal and self.DC.isSignal[process]) else "Bkg" if _cache.has_key((channel,process)): return _cache[(channel,process)] shapeNominal = self.getShape(channel,process) - nominalPdf = self.shape2Pdf(shapeNominal,channel,process) + nominalPdf = self.shape2Pdf(shapeNominal,channel,process) if (self.options.useHistPdf == "always" or shapeNominal == None) else shapeNominal if shapeNominal == None: return nominalPdf # no point morphing a fake shape morphs = []; shapeAlgo = None for (syst,nofloat,pdf,args,errline) in self.DC.systs: @@ -344,14 +357,25 @@ def getPdf(self,channel,process,_cache={}): shapeDown = self.getShape(channel,process,syst+"Down") if shapeUp.ClassName() != shapeNominal.ClassName(): raise RuntimeError, "Mismatched shape types for channel %s, process %s, syst %s" % (channel,process,syst) if shapeDown.ClassName() != shapeNominal.ClassName(): raise RuntimeError, "Mismatched shape types for channel %s, process %s, syst %s" % (channel,process,syst) - morphs.append((syst,errline[channel][process],self.shape2Pdf(shapeUp,channel,process),self.shape2Pdf(shapeDown,channel,process))) - if len(morphs) == 0: return nominalPdf + if self.options.useHistPdf == "always": + morphs.append((syst,errline[channel][process],self.shape2Pdf(shapeUp,channel,process),self.shape2Pdf(shapeDown,channel,process))) + else: + morphs.append((syst,errline[channel][process],shapeUp,shapeDown)) + if len(morphs) == 0: + if self.options.useHistPdf == "always": + return nominalPdf + else: + return self.shape2Pdf(shapeNominal,channel,process) if shapeAlgo == "shapeN": stderr.write("Warning: the shapeN implementation in RooStats and L&S are different\n") - pdfs = ROOT.RooArgList(nominalPdf) + pdfs = ROOT.RooArgList(nominalPdf) if self.options.useHistPdf == "always" else ROOT.TList() + if self.options.useHistPdf != "always": pdfs.Add(nominalPdf) coeffs = ROOT.RooArgList() minscale = 1 for (syst,scale,pdfUp,pdfDown) in morphs: - pdfs.add(pdfUp); pdfs.add(pdfDown); + if self.options.useHistPdf == "always": + pdfs.add(pdfUp); pdfs.add(pdfDown); + else: + pdfs.Add(pdfUp); pdfs.Add(pdfDown); if scale == 1: coeffs.add(self.out.var(syst)) else: # must scale it :-/ @@ -364,6 +388,39 @@ def getPdf(self,channel,process,_cache={}): if shapeAlgo == "shape": shapeAlgo = self.options.defMorph if "shapeL" in shapeAlgo: qrange = 0; elif "shapeN" in shapeAlgo: qalgo = -1; + if self.options.useHistPdf != "always": + if nominalPdf.InheritsFrom("TH1"): + rebins = ROOT.TList() + maxbins = 0 + for i in xrange(pdfs.GetSize()): + rebinned = self.rebinH1(pdfs.At(i)) + rebins.Add(rebinned) + maxbins = max(maxbins, rebinned._original_bins) + rhp = ROOT.FastVerticalInterpHistPdf2("shape%s_%s_%s_morph" % (postFix,channel,process), "", self.out.binVar, rebins, coeffs, qrange, qalgo) + if self.options.optimizeTemplateBins and maxbins < self.out.maxbins: + #print "Optimizing binning: %d -> %d for %s " % (self.out.maxbins, maxbins, rhp.GetName()) + rhp.setActiveBins(maxbins) + _cache[(channel,process)] = rhp + return rhp + elif nominalPdf.InheritsFrom("RooHistPdf") or nominalPdf.InheritsFrom("RooDataHist"): + nominalPdf = self.shape2Pdf(shapeNominal,channel,process) + pdfs.Clear(); ## now it contains "RooDataHist", it should contain "RooHistPdf" + pdfs.Add(nominalPdf) + for (syst,scale,shapeUp,shapeDown) in morphs: + pdfs.Add(self.shape2Pdf(shapeUp,channel,process)) + pdfs.Add(self.shape2Pdf(shapeDown,channel,process)) + histpdf = nominalPdf if nominalPdf.InheritsFrom("RooDataHist") else nominalPdf.dataHist() + xvar = histpdf.get().first() + rhp = ROOT.FastVerticalInterpHistPdf2("shape%s_%s_%s_morph" % (postFix,channel,process), "", xvar, pdfs, coeffs, qrange, qalgo) + _cache[(channel,process)] = rhp + return rhp + else: + pdflist = ROOT.RooArgList() + nominalPdf = self.shape2Pdf(shapeNominal,channel,process) + for (syst,scale,shapeUp,shapeDown) in morphs: + pdflist.add(self.shape2Pdf(shapeUp,channel,process)) + pdflist.add(self.shape2Pdf(shapeDown,channel,process)) + pdfs = pdflist if "2a" in shapeAlgo: # old shape2 if not nominalPdf.InheritsFrom("RooHistPdf"): raise RuntimeError, "Algorithms 'shape2', 'shapeL2', shapeN2' only work with histogram templates" if nominalPdf.dataHist().get().getSize() != 1: raise RuntimeError, "Algorithms 'shape2', 'shapeL2', shapeN2' only work in one dimension" @@ -420,6 +477,12 @@ def getExtraNorm(self,channel,process): self.doObj( "systeff_%s_%s_%s" % (channel,process,syst), "AsymPow", "%f,%f,%s" % (kappasScaled[0], kappasScaled[1], syst) ) terms.append( "systeff_%s_%s_%s" % (channel,process,syst) ) return terms if terms else None; + def rebinH1(self,shape): + rebinh1 = ROOT.TH1F(shape.GetName()+"_rebin", "", self.out.maxbins, 0.0, float(self.out.maxbins)) + for i in range(1,min(shape.GetNbinsX(),self.out.maxbins)+1): + rebinh1.SetBinContent(i, shape.GetBinContent(i)) + rebinh1._original_bins = shape.GetNbinsX() + return rebinh1; def shape2Data(self,shape,channel,process,_cache={}): postFix="Sig" if (process in self.DC.isSignal and self.DC.isSignal[process]) else "Bkg" if shape == None: @@ -442,11 +505,9 @@ def shape2Data(self,shape,channel,process,_cache={}): return _cache[name] if not _cache.has_key(shape.GetName()): if shape.ClassName().startswith("TH1"): - rebinh1 = ROOT.TH1F(shape.GetName()+"_rebin", "", self.out.maxbins, 0.0, float(self.out.maxbins)) - for i in range(1,min(shape.GetNbinsX(),self.out.maxbins)+1): - rebinh1.SetBinContent(i, shape.GetBinContent(i)) + rebinh1 = self.rebinH1(shape) rdh = ROOT.RooDataHist(shape.GetName(), shape.GetName(), ROOT.RooArgList(self.out.binVar), rebinh1) - self.out._import(rdh) + #self.out._import(rdh) _cache[shape.GetName()] = rdh elif shape.ClassName() in ["RooDataHist", "RooDataSet"]: return shape @@ -461,18 +522,23 @@ def shape2Pdf(self,shape,channel,process,_cache={}): return _cache[name] if not _cache.has_key(shape.GetName()+"Pdf"): if shape.ClassName().startswith("TH1"): - rdh = self.shape2Data(shape,channel,process) - rhp = self.doObj("%sPdf" % shape.GetName(), "HistPdf", "{%s}, %s" % (self.out.binVar.GetName(), shape.GetName())) - _cache[shape.GetName()+"Pdf"] = rhp + if self.options.useHistPdf == "never": + shape = self.rebinH1(shape) + list = ROOT.TList(); list.Add(shape); + rhp = ROOT.FastVerticalInterpHistPdf2("%sPdf" % shape.GetName(), "", self.out.binVar, list, ROOT.RooArgList()) + _cache[shape.GetName()+"Pdf"] = rhp + else: + rdh = self.shape2Data(shape,channel,process) + rhp = ROOT.RooHistPdf("%sPdf" % shape.GetName(), "", ROOT.RooArgSet(self.out.binVar), rdh) + rhp.rdh = rdh # so it doesn't get deleted + _cache[shape.GetName()+"Pdf"] = rhp elif shape.InheritsFrom("RooAbsPdf"): _cache[shape.GetName()+"Pdf"] = shape elif shape.InheritsFrom("RooDataHist"): rhp = ROOT.RooHistPdf("%sPdf" % shape.GetName(), "", shape.get(), shape) - self.out._import(rhp) _cache[shape.GetName()+"Pdf"] = rhp elif shape.InheritsFrom("RooDataSet"): rkp = ROOT.RooKeysPdf("%sPdf" % shape.GetName(), "", self.out.var(shape.var), shape,3,1.5); - self.out._import(rkp) _cache[shape.GetName()+"Pdf"] = rkp else: raise RuntimeError, "shape2Pdf not implemented for %s" % shape.ClassName() @@ -485,4 +551,20 @@ def argSetToString(self,argset): if not arg: break names.append(arg.GetName()) return ",".join(names) + def optimizeExistingTemplates(self,pdf): + if pdf.ClassName() == "FastVerticalInterpHistPdf2D": + return ROOT.FastVerticalInterpHistPdf2D2(pdf, "%s_opt" % pdf.GetName()) + elif pdf.ClassName() == "RooProdPdf" and pdf.pdfList().getSize() == 2: + f1 = pdf.pdfList().at(0) + f2 = pdf.pdfList().at(1) + #pdf.Print("") + if f2.ClassName() == "FastVerticalInterpHistPdf2D" and f2.conditional(): + f1 = self.optimizeExistingTemplates(f1) + f2 = ROOT.FastVerticalInterpHistPdf2D2(f2, "%s_opt" % f2.GetName()) + ret = ROOT.RooProdPdf("%s_opt" % pdf.GetName(), "", ROOT.RooArgSet(f1), ROOT.RooFit.Conditional(ROOT.RooArgSet(f2),ROOT.RooArgSet(f2.y()))) + ret.optf2 = f2 + ret.optf1 = f1 + #print "Optimize %s in \t" % (pdf.GetName()),; ret.Print("") + return ret + return pdf diff --git a/src/AsimovUtils.cc b/src/AsimovUtils.cc index 3dcb0c57e81f3..5f7e1866b5c2a 100644 --- a/src/AsimovUtils.cc +++ b/src/AsimovUtils.cc @@ -107,7 +107,7 @@ RooAbsData *asimovutils::asimovDatasetWithFit(RooStats::ModelConfig *mc, RooAbsD throw std::runtime_error(Form("AsimovUtils: can't find nuisance for constraint term %s", cterm->GetName())); } std::string pdfName(cterm->ClassName()); - if (pdfName == "RooGaussian" || pdfName == "RooBifurGauss" || pdfName == "RooPoisson" || pdfName == "RooGenericPdf") { + if (pdfName == "RooGaussian" || pdfName == "SimpleGaussianConstraint" || pdfName == "RooBifurGauss" || pdfName == "RooPoisson" || pdfName == "RooGenericPdf") { // this is easy rrv.setVal(match->getVal()); } else if (pdfName == "RooGamma") { diff --git a/src/BayesianToyMC.cc b/src/BayesianToyMC.cc index 5657646e5adda..f03695551fe01 100644 --- a/src/BayesianToyMC.cc +++ b/src/BayesianToyMC.cc @@ -13,6 +13,7 @@ #include #include "../interface/Combine.h" +#include "../interface/CachingNLL.h" #include "../interface/utils.h" using namespace RooStats; @@ -75,6 +76,9 @@ bool BayesianToyMC::run(RooWorkspace *w, RooStats::ModelConfig *mc_s, RooStats:: if (integrationType_ == "toymc") { if (nuisancePdf.get() == 0) nuisancePdf.reset(utils::makeNuisancePdf(*mc_s)); bcalc.ForceNuisancePdf(*nuisancePdf); + // turn off optimization of constraint terms in the NLL, otherwise + // it does not divide properly by the nuisance pdf + cacheutils::CachingSimNLL::forceUnoptimizedConstraints(); } // get the interval std::auto_ptr bcInterval(bcalc.GetInterval()); diff --git a/src/CachingNLL.cc b/src/CachingNLL.cc index 4f046d6685ee7..bd4ddaf55663c 100644 --- a/src/CachingNLL.cc +++ b/src/CachingNLL.cc @@ -6,6 +6,31 @@ #include #include "../interface/ProfilingTools.h" #include <../interface/RooMultiPdf.h> +#include <../interface/VerticalInterpHistPdf.h> +#include <../interface/VectorizedGaussian.h> +#include "vectorized.h" + +namespace cacheutils { + typedef OptimizedCachingPdfT CachingHistPdf; + typedef OptimizedCachingPdfT CachingHistPdf2; + typedef OptimizedCachingPdfT CachingGaussPdf; + + class ReminderSum : public RooAbsReal { + public: + ReminderSum() {} + ReminderSum(const char *name, const char *title, const RooArgList& sumSet) ; + ReminderSum(const ReminderSum &other, const char* name = 0) : + RooAbsReal(other, name), + list_("deps",this,other.list_), + terms_(other.terms_) {} + ~ReminderSum() {} + virtual TObject* clone(const char* newname) const { return new ReminderSum(*this,newname); } + private: + RooListProxy list_; + std::vector terms_; + Double_t evaluate() const; + }; +} //---- Uncomment this to get a '.' printed every some evals //#define TRACE_NLL_EVALS @@ -21,6 +46,7 @@ //std::map cacheutils::CachingAddNLL::offsets_; bool cacheutils::CachingSimNLL::noDeepLEE_ = false; bool cacheutils::CachingSimNLL::hasError_ = false; +bool cacheutils::CachingSimNLL::optimizeContraints_ = true; //#define DEBUG_TRACE_POINTS #ifdef DEBUG_TRACE_POINTS @@ -60,6 +86,7 @@ namespace { #define TRACE_NLL(x) #endif + cacheutils::ArgSetChecker::ArgSetChecker(const RooAbsCollection &set) { std::auto_ptr iter(set.createIterator()); @@ -202,13 +229,7 @@ cacheutils::CachingPdf::eval(const RooAbsData &data) PerfCounter::add("CachingPdf::eval called"); #endif bool newdata = (lastData_ != &data); - if (newdata) { - lastData_ = &data; - pdf_->optimizeCacheMode(*data.get()); - pdf_->attachDataSet(data); - const_cast(lastData_)->setDirtyProp(false); - cache_.clear(); - } + if (newdata) newData_(data); std::pair *, bool> hit = cache_.get(); if (!hit.second) { realFill_(data, *hit.first); @@ -216,6 +237,28 @@ cacheutils::CachingPdf::eval(const RooAbsData &data) return *hit.first; } +void +cacheutils::CachingPdf::newData_(const RooAbsData &data) +{ + lastData_ = &data; + pdf_->optimizeCacheMode(*data.get()); + pdf_->attachDataSet(data); + const_cast(lastData_)->setDirtyProp(false); + cache_.clear(); + nonZeroW_.resize(data.numEntries()); + nonZeroWEntries_ = 0; + for (unsigned int i = 0, n = nonZeroW_.size(); i < n; ++i) { + data.get(i); + if (data.weight() > 0) { + nonZeroWEntries_++; + nonZeroW_[i] = (data.weight() > 0); + } else { + nonZeroW_[i] = 0; + } + } +} + + void cacheutils::CachingPdf::realFill_(const RooAbsData &data, std::vector &vals) { @@ -223,19 +266,55 @@ cacheutils::CachingPdf::realFill_(const RooAbsData &data, std::vector PerfCounter::add("CachingPdf::realFill_ called"); #endif int n = data.numEntries(); - vals.resize(n); // should be a no-op if size is already >= n. - //std::auto_ptr params(pdf_->getObservables(*obs)); // for non-smart handling of pointers + vals.resize(nonZeroWEntries_); // should be a no-op if size is already >= n. std::vector::iterator itv = vals.begin(); - for (int i = 0; i < n; ++i, ++itv) { + for (int i = 0; i < n; ++i) { + if (!nonZeroW_[i]) continue; data.get(i); - //*params = *data.get(i); // for non-smart handling of pointers - *itv = pdf_->getVal(obs_); + *itv = pdf_->getVal(obs_); ++itv; //std::cout << " at i = " << i << " pdf = " << *itv << std::endl; TRACE_NLL("PDF value for " << pdf_->GetName() << " is " << *itv << " at this point.") TRACE_POINT2(*obs_,1) } } + +template +void +cacheutils::OptimizedCachingPdfT::newData_(const RooAbsData &data) +{ + lastData_ = &data; + pdf_->optimizeCacheMode(*data.get()); + pdf_->attachDataSet(data); + const_cast(lastData_)->setDirtyProp(false); + cache_.clear(); + delete vpdf_; + vpdf_ = new VPdfT(static_cast(*pdf_), data); +} + +template +void +cacheutils::OptimizedCachingPdfT::realFill_(const RooAbsData &data, std::vector &vals) +{ + vpdf_->fill(vals); +} + + +cacheutils::ReminderSum::ReminderSum(const char *name, const char *title, const RooArgList& sumSet) : + list_("deps","",this) +{ + RooLinkedListIter iter(sumSet.iterator()); + for (RooAbsReal *rar = (RooAbsReal *) iter.Next(); rar != 0; rar = (RooAbsReal *) iter.Next()) { + list_.add(*rar); + terms_.push_back(rar); + } +} +Double_t cacheutils::ReminderSum::evaluate() const { + Double_t ret = 1.0; + for (auto x : terms_) ret -= x->getVal(); + return ret; +} + cacheutils::CachingAddNLL::CachingAddNLL(const char *name, const char *title, RooAbsPdf *pdf, RooAbsData *data) : RooAbsReal(name, title), pdf_(pdf), @@ -269,26 +348,65 @@ cacheutils::CachingAddNLL::clone(const char *name) const return new cacheutils::CachingAddNLL(*this, name); } +void cacheutils::CachingAddNLL::addPdfs_(RooAddPdf *addpdf, bool recursive, const RooArgList & basecoeffs) +{ + bool histNll = runtimedef::get("ADDNLL_HISTNLL"); + bool gaussNll = runtimedef::get("ADDNLL_GAUSSNLL"); + int npdf = addpdf->pdfList().getSize(); + //std::cout << "Unpacking RooAddPdf " << addpdf->GetName() << " with " << npdf << " components:" << std::endl; + RooAbsReal *lastcoeff = 0; + if (npdf == addpdf->coefList().getSize()) { + lastcoeff = dynamic_cast(addpdf->coefList().at(npdf-1)); + } else { + prods_.push_back(new ReminderSum("","", addpdf->coefList())); + lastcoeff = & prods_.back(); + } + for (int i = 0; i < npdf; ++i) { + RooAbsReal * coeff = (i < npdf-1 ? dynamic_cast(addpdf->coefList().at(i)) : lastcoeff); + RooAbsPdf * pdfi = dynamic_cast(addpdf->pdfList().at(i)); + if (recursive && typeid(*pdfi) == typeid(RooAddPdf)) { + RooAddPdf *apdfi = static_cast(pdfi); + RooArgList list(*coeff); + if (basecoeffs.getSize()) list.add(basecoeffs); + //std::cout << " Invoking recursive unpack on " << i << ": RooAddPdf " << apdfi->GetName() << std::endl; + addPdfs_(apdfi, recursive, list); + continue; + } + if (basecoeffs.getSize() == 0) { + coeffs_.push_back(coeff); + } else { + RooArgList list(*coeff); + list.add(basecoeffs); + prods_.push_back(new RooProduct("","",list)); + coeffs_.push_back(&prods_.back()); + } + const RooArgSet *obs = data_->get(); + //std::cout << " Adding " << i << ": " << pdfi->ClassName() << " " << pdfi->GetName() << std::endl; + if (histNll && typeid(*pdfi) == typeid(FastVerticalInterpHistPdf)) { + pdfs_.push_back(new CachingHistPdf(pdfi, obs)); + } else if (histNll && typeid(*pdfi) == typeid(FastVerticalInterpHistPdf2)) { + pdfs_.push_back(new CachingHistPdf2(pdfi, obs)); + } else if (gaussNll && typeid(*pdfi) == typeid(RooGaussian)) { + pdfs_.push_back(new CachingGaussPdf(pdfi, obs)); + } else { + pdfs_.push_back(new CachingPdf(pdfi, obs)); + } + } +} + void cacheutils::CachingAddNLL::setup_() { - const RooArgSet *obs = data_->get(); + fastExit_ = !runtimedef::get("NO_ADDNLL_FASTEXIT"); for (int i = 0, n = integrals_.size(); i < n; ++i) delete integrals_[i]; - integrals_.clear(); + integrals_.clear(); pdfs_.clear(); coeffs_.clear(); prods_.clear(); RooAddPdf *addpdf = 0; RooRealSumPdf *sumpdf = 0; if ((addpdf = dynamic_cast(pdf_)) != 0) { isRooRealSum_ = false; - int npdf = addpdf->coefList().getSize(); - coeffs_.reserve(npdf); - pdfs_.reserve(npdf); - for (int i = 0; i < npdf; ++i) { - RooAbsReal * coeff = dynamic_cast(addpdf->coefList().at(i)); - RooAbsPdf * pdfi = dynamic_cast(addpdf->pdfList().at(i)); - coeffs_.push_back(coeff); - pdfs_.push_back(CachingPdf(pdfi, obs)); - } + addPdfs_(addpdf, runtimedef::get("ADDNLL_RECURSIVE"), RooArgList()); } else if ((sumpdf = dynamic_cast(pdf_)) != 0) { + const RooArgSet *obs = data_->get(); isRooRealSum_ = true; int npdf = sumpdf->coefList().getSize(); coeffs_.reserve(npdf); @@ -297,30 +415,8 @@ cacheutils::CachingAddNLL::setup_() for (int i = 0; i < npdf; ++i) { RooAbsReal * coeff = dynamic_cast(sumpdf->coefList().at(i)); RooAbsReal * funci = dynamic_cast(sumpdf->funcList().at(i)); - /// Temporarily switch this off, it doesn't work. Don't know why, however. - if (0 && typeid(*funci) == typeid(RooProduct)) { - RooArgList obsDep, obsInd; - obsInd.add(*coeff); - utils::factorizeFunc(*obs, *funci, obsDep, obsInd); - std::cout << "Entry " << i << ": coef name " << (coeff ? coeff->GetName() : "null") << - " type " << (coeff ? coeff->ClassName() : "n/a") << std::endl; - std::cout << " " << "; func name " << (funci ? funci->GetName() : "null") << - " type " << (funci ? funci->ClassName() : "n/a") << std::endl; - std::cout << "Terms depending on observables: " << std::endl; obsDep.Print("V"); - std::cout << "Terms not depending on observables: " << std::endl; obsInd.Print("V"); - if (obsInd.getSize() > 1) { - coeff = new RooProduct(TString::Format("%s_x_%s_obsIndep", coeff->GetName(), funci->GetName()), "", RooArgSet(obsInd)); - addOwnedComponents(RooArgSet(*coeff)); - } - if (obsDep.getSize() > 1) { - funci = new RooProduct(TString::Format("%s_obsDep", funci->GetName()), "", RooArgSet(obsInd)); - addOwnedComponents(RooArgSet(*funci)); - } else if (obsDep.getSize() == 1) { - funci = (RooAbsReal *) obsDep.first(); - } else throw std::logic_error("No part of pdf depends on observables?"); - } coeffs_.push_back(coeff); - pdfs_.push_back(CachingPdf(funci, obs)); + pdfs_.push_back(new CachingPdf(funci, obs)); integrals_.push_back(funci->createIntegral(*obs)); } } else { @@ -338,6 +434,17 @@ cacheutils::CachingAddNLL::setup_() //if (rrv != 0 && !rrv->isConstant()) params_.add(*rrv); if (rrv != 0) params_.add(*rrv); } + + // For multi pdf's need to reset the cache if index changed before evaluations + multiPdfs_.clear(); + for (auto itp = pdfs_.begin(), edp = pdfs_.end(); itp != edp; ++itp) { + bool isMultiPdf = itp->pdf()->IsA()->InheritsFrom(RooMultiPdf::Class()); + if (isMultiPdf) { + const RooMultiPdf *mpdf = dynamic_cast((*itp).pdf()); + multiPdfs_.push_back(std::make_pair(mpdf, &*itp)); + } + } + } Double_t @@ -348,19 +455,17 @@ cacheutils::CachingAddNLL::evaluate() const #endif // For multi pdf's need to reset the cache if index changed before evaluations - for (std::vector::iterator itp = pdfs_.begin(), edp = pdfs_.end(); itp != edp; ++itp) { - bool isMultiPdf = itp->pdf()->IsA()->InheritsFrom(RooMultiPdf::Class()); - if (isMultiPdf) { - const RooMultiPdf *mpdf = dynamic_cast((*itp).pdf()); - bool hasChangedPdf = mpdf->checkIndexDirty(); - if (hasChangedPdf) itp->setDataDirty(); - } + if (!multiPdfs_.empty()) { + for (std::vector >::iterator itp = multiPdfs_.begin(), edp = multiPdfs_.end(); itp != edp; ++itp) { + bool hasChangedPdf = itp->first->checkIndexDirty(); + if (hasChangedPdf) itp->second->setDataDirty(); + } } std::fill( partialSum_.begin(), partialSum_.end(), 0.0 ); std::vector::iterator itc = coeffs_.begin(), edc = coeffs_.end(); - std::vector::iterator itp = pdfs_.begin();//, edp = pdfs_.end(); + boost::ptr_vector::iterator itp = pdfs_.begin();//, edp = pdfs_.end(); std::vector::const_iterator itw, bgw = weights_.begin();//, edw = weights_.end(); std::vector::iterator its, bgs = partialSum_.begin(), eds = partialSum_.end(); double sumCoeff = 0; @@ -377,24 +482,34 @@ cacheutils::CachingAddNLL::evaluate() const // get vals const std::vector &pdfvals = itp->eval(*data_); // update running sum - std::vector::const_iterator itv = pdfvals.begin(); - for (its = bgs; its != eds; ++its, ++itv) { - *its += coeff * (*itv); // sum (n_i * pdf_i) - } + // std::vector::const_iterator itv = pdfvals.begin(); + // for (its = bgs; its != eds; ++its, ++itv) { + // *its += coeff * (*itv); // sum (n_i * pdf_i) + // } + // vectorize to make it faster + vectorized::mul_add(pdfvals.size(), coeff, &pdfvals[0], &partialSum_[0]); } // then get the final nll double ret = 0; - bool fastExit = runtimedef::get("ADDNLL_FASTEXIT"); - for ( its = bgs, itw = bgw ; its != eds ; ++its, ++itw ) { - if (*itw == 0) continue; + for (its = bgs; its != eds ; ++its) { if (!isnormal(*its) || *its <= 0) { std::cerr << "WARNING: underflow to " << *its << " in " << GetName() << std::endl; if (!CachingSimNLL::noDeepLEE_) logEvalError("Number of events is negative or error"); else CachingSimNLL::hasError_ = true; - if (fastExit) { ret += -9e9; break; } + if (fastExit_) { return 9e9; } + else *its = 1; } - double thispiece = (*itw) * (*its <= 0 ? -9e9 : log( ((*its) / sumCoeff) )); - #ifdef ADDNLL_KAHAN_SUM - static bool do_kahan = runtimedef::get("ADDNLL_KAHAN_SUM"); + } + #ifndef ADDNLL_KAHAN_SUM + // Do the reduction + // for ( its = bgs, itw = bgw ; its != eds ; ++its, ++itw ) { + // ret += (*itw) * log( ((*its) / sumCoeff) ); + // } + ret += vectorized::nll_reduce(partialSum_.size(), &partialSum_[0], &weights_[0], sumCoeff, &workingArea_[0]); + #else + double compensation = 0; + static bool do_kahan = runtimedef::get("ADDNLL_KAHAN_SUM"); + for ( its = bgs, itw = bgw ; its != eds ; ++its, ++itw ) { + double thispiece = (*itw) * log( ((*its) / sumCoeff) ); if (do_kahan) { double kahan_y = thispiece - compensation; double kahan_t = ret + kahan_y; @@ -404,10 +519,9 @@ cacheutils::CachingAddNLL::evaluate() const } else { ret += thispiece; } - #else ret += thispiece; - #endif } + #endif // then flip sign ret = -ret; // std::cout << "AddNLL for " << pdf_->GetName() << ": " << ret << std::endl; @@ -422,16 +536,14 @@ cacheutils::CachingAddNLL::evaluate() const ret += zeroPoint_; // multipdfs want to add a correction factor to the NLL - double correctionFactor = 0; - for (std::vector::iterator itp = pdfs_.begin(), edp = pdfs_.end(); itp != edp; ++itp) { - bool isMultiPdf = itp->pdf()->IsA()->InheritsFrom(RooMultiPdf::Class()); - if (isMultiPdf){ - const RooMultiPdf *mpdf = dynamic_cast((*itp).pdf()); - correctionFactor += mpdf->getCorrection(); - } + if (!multiPdfs_.empty()) { + double correctionFactor = 0; + for (std::vector >::iterator itp = multiPdfs_.begin(), edp = multiPdfs_.end(); itp != edp; ++itp) { + correctionFactor += itp->first->getCorrection(); + } + // Add correction + ret+=correctionFactor; } - // Add correction - ret+=correctionFactor; TRACE_NLL("AddNLL for " << pdf_->GetName() << ": " << ret) return ret; @@ -445,31 +557,32 @@ cacheutils::CachingAddNLL::setData(const RooAbsData &data) data_ = &data; setValueDirty(); sumWeights_ = 0.0; - weights_.resize(data.numEntries()); - partialSum_.resize(data.numEntries()); - std::vector::iterator itw = weights_.begin(); + weights_.clear(); weights_.reserve(data.numEntries()); #ifdef ADDNLL_KAHAN_SUM double compensation = 0; #endif - for (int i = 0, n = data.numEntries(); i < n; ++i, ++itw) { + for (int i = 0, n = data.numEntries(); i < n; ++i) { data.get(i); - *itw = data.weight(); + double w = data.weight(); + if (w) weights_.push_back(w); #ifdef ADDNLL_KAHAN_SUM static bool do_kahan = runtimedef::get("ADDNLL_KAHAN_SUM"); if (do_kahan) { - double kahan_y = *itw - compensation; + double kahan_y = w - compensation; double kahan_t = sumWeights_ + kahan_y; double kahan_d = (kahan_t - sumWeights_); compensation = kahan_d - kahan_y; sumWeights_ = kahan_t; } else { - sumWeights_ += *itw; + sumWeights_ += w; } #else - sumWeights_ += *itw; + sumWeights_ += w; #endif } - for (std::vector::iterator itp = pdfs_.begin(), edp = pdfs_.end(); itp != edp; ++itp) { + partialSum_.resize(weights_.size()); + workingArea_.resize(weights_.size()); + for (auto itp = pdfs_.begin(), edp = pdfs_.end(); itp != edp; ++itp) { itp->setDataDirty(); } } @@ -513,8 +626,9 @@ cacheutils::CachingSimNLL::clone(const char *name) const cacheutils::CachingSimNLL::~CachingSimNLL() { - for (std::vector::iterator it = constrainPdfsFast_.begin(), ed = constrainPdfsFast_.end(); it != ed; ++it) { - delete *it; + std::vector::const_iterator ito = constrainPdfsFastOwned_.begin(); + for (std::vector::iterator it = constrainPdfsFast_.begin(), ed = constrainPdfsFast_.end(); it != ed; ++it, ++ito) { + if (*ito) { delete *it; } } } @@ -538,12 +652,16 @@ cacheutils::CachingSimNLL::setup_() RooSimultaneous *simpdf = factorizedPdf_.get(); constrainPdfs_.clear(); if (constraints.getSize()) { - bool FastConstraints = runtimedef::get("SIMNLL_FASTGAUSS"); - //constrainPdfs_.push_back(new RooProdPdf("constraints","constraints", constraints)); + int FastConstraints = optimizeContraints_ && runtimedef::get("SIMNLL_FASTGAUSS"); for (int i = 0, n = constraints.getSize(); i < n; ++i) { RooAbsPdf *pdfi = dynamic_cast(constraints.at(i)); - if (FastConstraints && typeid(*pdfi) == typeid(RooGaussian)) { + if (optimizeContraints_ && typeid(*pdfi) == typeid(SimpleGaussianConstraint)) { + constrainPdfsFast_.push_back(static_cast(pdfi)); + constrainPdfsFastOwned_.push_back(false); + constrainZeroPointsFast_.push_back(0); + } else if (FastConstraints && typeid(*pdfi) == typeid(RooGaussian)) { constrainPdfsFast_.push_back(new SimpleGaussianConstraint(dynamic_cast(*pdfi))); + constrainPdfsFastOwned_.push_back(true); constrainZeroPointsFast_.push_back(0); } else { constrainPdfs_.push_back(pdfi); @@ -605,7 +723,7 @@ cacheutils::CachingSimNLL::evaluate() const ret += nllval; } } - if (!constrainPdfs_.empty()) { + if (!constrainPdfs_.empty() || !constrainPdfsFast_.empty()) { /// ============= GENERIC CONSTRAINTS ========= std::vector::const_iterator itz = constrainZeroPoints_.begin(); for (std::vector::const_iterator it = constrainPdfs_.begin(), ed = constrainPdfs_.end(); it != ed; ++it, ++itz) { @@ -620,6 +738,7 @@ cacheutils::CachingSimNLL::evaluate() const itz = constrainZeroPointsFast_.begin(); for (std::vector::const_iterator it = constrainPdfsFast_.begin(), ed = constrainPdfsFast_.end(); it != ed; ++it, ++itz) { double logpdfval = (*it)->getLogValFast(); + //std::cout << "pdf " << (*it)->GetName() << " = " << logpdfval << std::endl; ret -= (logpdfval + *itz); } } @@ -655,14 +774,27 @@ cacheutils::CachingSimNLL::setData(const RooAbsData &data) void cacheutils::CachingSimNLL::splitWithWeights(const RooAbsData &data, const RooAbsCategory& splitCat, Bool_t createEmptyDataSets) { RooCategory *cat = dynamic_cast(data.get()->find(splitCat.GetName())); if (cat == 0) throw std::logic_error("Error: no category"); + std::auto_ptr catClone((RooAbsCategoryLValue*) splitCat.Clone()); int nb = cat->numBins((const char *)0), ne = data.numEntries(); RooArgSet obs(*data.get()); obs.remove(*cat, true, true); RooRealVar weight("_weight_","",1); RooArgSet obsplus(obs); obsplus.add(weight); if (nb != int(datasets_.size())) throw std::logic_error("Number of categories changed"); // this can happen due to bugs in RooDataSet for (int ib = 0; ib < nb; ++ib) { - if (datasets_[ib] == 0) datasets_[ib] = new RooDataSet("", "", obsplus, "_weight_"); - else datasets_[ib]->reset(); + if (datasets_[ib] == 0) { + catClone->setBin(ib); + RooAbsPdf *pdf = pdfOriginal_->getPdf(catClone->getLabel()); + if (pdf) { + std::auto_ptr myobs(pdf->getObservables(obs)); + myobs->add(weight); + //std::cout << "Observables for bin " << ib << ":"; myobs->Print(""); + datasets_[ib] = new RooDataSet("", "", *myobs, "_weight_"); + } else { + datasets_[ib] = new RooDataSet("", "", obsplus, "_weight_"); + } + } else { + datasets_[ib]->reset(); + } } //utils::printRDH((RooAbsData*)&data); for (int i = 0; i < ne; ++i) { diff --git a/src/Combine.cc b/src/Combine.cc index 2c575cdbe8a3f..1f00920262297 100644 --- a/src/Combine.cc +++ b/src/Combine.cc @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -54,6 +55,7 @@ #include "../interface/ToyMCSamplerOpt.h" #include "../interface/AsimovUtils.h" #include "../interface/CascadeMinimizer.h" +#include "../interface/ProfilingTools.h" using namespace RooStats; using namespace RooFit; @@ -114,6 +116,7 @@ Combine::Combine() : miscOptions_.add_options() ("newGenerator", po::value(&newGen_)->default_value(true), "Use new generator code for toys, fixes all issues with binned and mixed generation (equivalent of --newToyMC but affects the top-level toys from option '-t' instead of the ones within the HybridNew)") ("optimizeSimPdf", po::value(&optSimPdf_)->default_value(true), "Turn on special optimizations of RooSimultaneous. On by default, you can turn it off if it doesn't work for your workspace.") + ("noMCbonly", po::value(&noMCbonly_)->default_value(false), "Don't create a background-only modelConfig") ("rebuildSimPdf", po::value(&rebuildSimPdf_)->default_value(false), "Rebuild simultaneous pdf from scratch to make sure constraints are correct (not needed in CMS workspaces)") ("compile", "Compile expressions instead of interpreting them") ("tempDir", po::value(&makeTempDir_)->default_value(false), "Run the program from a temporary directory (automatically on for text datacards or if 'compile' is activated)") @@ -147,6 +150,10 @@ void Combine::applyOptions(const boost::program_options::variables_map &vm) { mass_ = vm["mass"].as(); saveToys_ = vm.count("saveToys"); validateModel_ = vm.count("validateModel"); + if (vm["method"].as() == "MultiDimFit" || ( vm["method"].as() == "MaxLikelihoodFit" && vm.count("justFit"))) { + //CMSDAS new default, + if (vm["noMCbonly"].defaulted()) noMCbonly_ = 1; + } } bool Combine::mklimit(RooWorkspace *w, RooStats::ModelConfig *mc_s, RooStats::ModelConfig *mc_b, RooAbsData &data, double &limit, double &limitErr) { @@ -222,6 +229,7 @@ void Combine::run(TString hlfFile, const std::string &dataset, double &limit, do if (!withSystematics) options += " --stat "; if (compiledExpr_) options += " --compiled "; if (verbose > 1) options += TString::Format(" --verbose %d", verbose-1); + if (algo->name() == "MaxLikelihoodFit" || algo->name() == "MultiDimFit") options += " --for-fits"; //-- Text mode: old default //int status = gSystem->Exec("text2workspace.py "+options+" '"+txtFile+"' -o "+tmpFile+".hlf"); //isTextDatacard = true; fileToLoad = tmpFile+".hlf"; @@ -290,7 +298,7 @@ void Combine::run(TString hlfFile, const std::string &dataset, double &limit, do w->import(*optpdf); mc->SetPdf(*optpdf); } - if (mc_bonly == 0) { + if (mc_bonly == 0 && !noMCbonly_) { std::cerr << "Missing background ModelConfig '" << modelConfigNameB_ << "' in workspace '" << workspaceName_ << "' in file " << fileToLoad << std::endl; std::cerr << "Will make one from the signal ModelConfig '" << modelConfigName_ << "' setting signal strenth '" << POI->first()->GetName() << "' to zero" << std::endl; w->factory("_zero_[0]"); @@ -395,7 +403,7 @@ void Combine::run(TString hlfFile, const std::string &dataset, double &limit, do RooArgSet newnuis(*nuisances); newnuis.remove(toFreeze, /*silent=*/true, /*byname=*/true); mc->SetNuisanceParameters(newnuis); - mc_bonly->SetNuisanceParameters(newnuis); + if (mc_bonly) mc_bonly->SetNuisanceParameters(newnuis); nuisances = mc->GetNuisanceParameters(); } } @@ -434,8 +442,8 @@ void Combine::run(TString hlfFile, const std::string &dataset, double &limit, do // make sure these things are set consistently with what we expect if (mc->GetNuisanceParameters() && withSystematics) utils::setAllConstant(*mc->GetNuisanceParameters(), false); if (mc->GetGlobalObservables()) utils::setAllConstant(*mc->GetGlobalObservables(), true); - if (mc_bonly->GetNuisanceParameters() && withSystematics) utils::setAllConstant(*mc_bonly->GetNuisanceParameters(), false); - if (mc_bonly->GetGlobalObservables()) utils::setAllConstant(*mc_bonly->GetGlobalObservables(), true); + if (mc_bonly && mc_bonly->GetNuisanceParameters() && withSystematics) utils::setAllConstant(*mc_bonly->GetNuisanceParameters(), false); + if (mc_bonly && mc_bonly->GetGlobalObservables()) utils::setAllConstant(*mc_bonly->GetGlobalObservables(), true); w->saveSnapshot("clean", w->allVars()); @@ -447,15 +455,15 @@ void Combine::run(TString hlfFile, const std::string &dataset, double &limit, do tree_ = tree; - bool isExtended = mc_bonly->GetPdf()->canBeExtended(); + bool isExtended = mc->GetPdf()->canBeExtended(); RooRealVar *MH = w->var("MH"); RooAbsData *dobs = w->data(dataset.c_str()); // Generate with signal model if r or other physics model parameters are defined - RooAbsPdf *genPdf = (expectSignal_ > 0 || setPhysicsModelParameterExpression_ != "") ? mc->GetPdf() : mc_bonly->GetPdf(); - toymcoptutils::SimPdfGenInfo newToyMC(*genPdf, *observables, !unbinned_); RooRealVar *weightVar_ = 0; - if (guessGenMode_ && genPdf->InheritsFrom("RooSimultaneous") && (dobs != 0)) { + RooAbsPdf *genPdf = (expectSignal_ > 0 || setPhysicsModelParameterExpression_ != "" || !mc_bonly) ? mc->GetPdf() : (mc_bonly ? mc_bonly->GetPdf() : 0); + RooRealVar *weightVar_ = 0; // will be needed for toy generation in some cases + if (guessGenMode_ && genPdf && genPdf->InheritsFrom("RooSimultaneous") && (dobs != 0)) { utils::guessChannelMode(dynamic_cast(*mc->GetPdf()), *dobs, verbose); - utils::guessChannelMode(dynamic_cast(*mc_bonly->GetPdf()), *dobs, 0); + if (mc_bonly) utils::guessChannelMode(dynamic_cast(*mc_bonly->GetPdf()), *dobs, 0); } if (expectSignal_ > 0) { if (POI->find("r")) { @@ -468,7 +476,7 @@ void Combine::run(TString hlfFile, const std::string &dataset, double &limit, do if (nToys <= 0) { // observed or asimov iToy = nToys; - if (iToy == -1) { + if (iToy == -1) { if (readToysFromHere != 0){ dobs = dynamic_cast(readToysFromHere->Get("toys/toy_asimov")); if (dobs == 0) { @@ -478,6 +486,7 @@ void Combine::run(TString hlfFile, const std::string &dataset, double &limit, do } } else{ + if (genPdf == 0) throw std::invalid_argument("You can't generate background-only toys if you have no background-only pdf in the workspace and you have set --noMCbonly"); if (newGen_) { if (toysFrequentist_) { w->saveSnapshot("reallyClean", w->allVars()); @@ -491,6 +500,7 @@ void Combine::run(TString hlfFile, const std::string &dataset, double &limit, do w->saveSnapshot("clean", w->allVars()); } } else { + toymcoptutils::SimPdfGenInfo newToyMC(*genPdf, *observables, !unbinned_); dobs = newToyMC.generateAsimov(weightVar_); // as simple as that } } else if (isExtended) { @@ -519,6 +529,8 @@ void Combine::run(TString hlfFile, const std::string &dataset, double &limit, do std::vector limitHistory; std::auto_ptr nuisancePdf; if (nToys > 0) { + if (genPdf == 0) throw std::invalid_argument("You can't generate background-only toys if you have no background-only pdf in the workspace and you have set --noMCbonly"); + toymcoptutils::SimPdfGenInfo newToyMC(*genPdf, *observables, !unbinned_); double expLimit = 0; unsigned int nLimits = 0; w->loadSnapshot("clean"); diff --git a/src/CombinedLimit_LinkDef.h b/src/CombinedLimit_LinkDef.h index 98dc7ddf363be..678ba266f1d7a 100644 --- a/src/CombinedLimit_LinkDef.h +++ b/src/CombinedLimit_LinkDef.h @@ -20,6 +20,7 @@ #include "../interface/rVrFLikelihood.h" #include "../interface/RooMultiPdf.h" #include "../interface/RooBernsteinFast.h" +#include "../interface/SimpleGaussianConstraint.h" #ifdef __CINT__ #pragma link off all globals; @@ -29,13 +30,21 @@ #pragma link C++ class DebugProposal+; #pragma link C++ class VerticalInterpPdf+; #pragma link C++ class VerticalInterpHistPdf+; +#pragma link C++ class FastTemplate+; +#pragma link C++ class FastHisto+; +#pragma link C++ class FastHisto2D+; #pragma link C++ class FastVerticalInterpHistPdfBase+; +#pragma link C++ class FastVerticalInterpHistPdfBase::Morph+; #pragma link C++ class FastVerticalInterpHistPdf+; #pragma link C++ class FastVerticalInterpHistPdf2D+; +#pragma link C++ class FastVerticalInterpHistPdf2Base+; +#pragma link C++ class FastVerticalInterpHistPdf2+; +#pragma link C++ class FastVerticalInterpHistPdf2D2+; #pragma link C++ class AsymPow+; #pragma link C++ class CombDataSetFactory+; #pragma link C++ class TH1Keys+; #pragma link C++ class RooSimultaneousOpt+; +#pragma link C++ class SimpleGaussianConstraint+; #pragma link C++ class SimpleCacheSentry+; #pragma link C++ function th1fmorph; #pragma link C++ class RooqqZZPdf+; diff --git a/src/FastTemplate.cc b/src/FastTemplate.cc index d567cc1553ed3..64afd1a326716 100644 --- a/src/FastTemplate.cc +++ b/src/FastTemplate.cc @@ -20,7 +20,7 @@ void FastTemplate::Clear() { } void FastTemplate::CopyValues(const FastTemplate &other) { - memcpy(values_, other.values_, size_*sizeof(T)); + std::copy(other.values_.begin(), other.values_.begin()+size_, values_.begin()); } void FastTemplate::CopyValues(const TH1 &other) { @@ -37,38 +37,42 @@ void FastTemplate::CopyValues(const TH2 &other) { } void FastTemplate::Dump() const { - printf("--- dumping template with %d bins (@%p) ---\n", size_+1, (void*)values_); - for (unsigned int i = 0; i < size_; ++i) printf(" bin %3d: yval = %9.5f\n", i, values_[i]); + printf("--- dumping template with %d bins (%d active bins) (@%p) ---\n", int(values_.size()), size_, (void*)(&values_[0])); + for (unsigned int i = 0; i < values_.size(); ++i) printf(" bin %3d: yval = %9.5f\n", i, values_[i]); printf("\n"); } FastHisto::FastHisto(const TH1 &hist) : FastTemplate(hist), - binEdges_(new T[size_+1]), - binWidths_(new T[size_]) + binEdges_(size()+1), + binWidths_(size()) { - for (unsigned int i = 0; i < size_; ++i) { + for (unsigned int i = 0, n = size(); i < n; ++i) { binEdges_[i] = hist.GetBinLowEdge(i+1); binWidths_[i] = hist.GetBinWidth(i+1); } - binEdges_[size_] = hist.GetBinLowEdge(size_+1); + binEdges_.back() = hist.GetBinLowEdge(size()+1); } FastHisto::FastHisto(const FastHisto &other) : FastTemplate(other), - binEdges_(size_ ? new T[size_+1] : 0), - binWidths_(size_ ? new T[size_] : 0) + binEdges_(other.binEdges_), + binWidths_(other.binWidths_) { - if (size_) { - memcpy(binEdges_, other.binEdges_, (size_+1)*sizeof(T)); - memcpy(binWidths_, other.binWidths_, size_*sizeof(T)); - } } +int FastHisto::FindBin(const T &x) const { + auto match = std::lower_bound(binEdges_.begin(), binEdges_.end(), x); + if (match == binEdges_.begin()) return -1; + if (match == binEdges_.end()) return values_.size(); + return match - binEdges_.begin() - 1; +} + + FastHisto::T FastHisto::GetAt(const T &x) const { - T *match = std::lower_bound(binEdges_, binEdges_+size_+1, x); - if (match == binEdges_ || match == binEdges_+size_+1) return T(0.0); - return values_[match - binEdges_ - 1]; + auto match = std::lower_bound(binEdges_.begin(), binEdges_.end(), x); + if (match == binEdges_.begin() || match == binEdges_.end()) return T(0.0); + return values_[match - binEdges_.begin() - 1]; } FastHisto::T FastHisto::IntegralWidth() const { @@ -78,8 +82,8 @@ FastHisto::T FastHisto::IntegralWidth() const { } void FastHisto::Dump() const { - printf("--- dumping histo template with %d bins in range %.2f - %.2f (@%p)---\n", size_+1, binEdges_[0], binEdges_[size_], (void*)values_); - for (unsigned int i = 0; i < size_; ++i) { + printf("--- dumping histo template with %d bins (%d active) in range %.2f - %.2f (@%p)---\n", int(values_.size()), size_, binEdges_[0], binEdges_[size_], (void*)&values_[0]); + for (unsigned int i = 0; i < values_.size(); ++i) { printf(" bin %3d, x = %6.2f: yval = %9.5f, width = %6.3f\n", i, 0.5*(binEdges_[i]+binEdges_[i+1]), values_[i], binWidths_[i]); } @@ -88,20 +92,21 @@ void FastHisto::Dump() const { FastHisto2D::FastHisto2D(const TH2 &hist, bool normXonly) : FastTemplate(hist), - binX_(hist.GetNbinsX()), binY_(hist.GetNbinsY()), - binEdgesX_(new T[binX_+1]), - binEdgesY_(new T[binY_+1]), - binWidths_(new T[size_]) + binX_(hist.GetNbinsX()), + binY_(hist.GetNbinsY()), + binEdgesX_(hist.GetNbinsX()+1), + binEdgesY_(hist.GetNbinsY()+1), + binWidths_(size_) { TAxis *ax = hist.GetXaxis(), *ay = hist.GetYaxis(); for (unsigned int ix = 0; ix < binX_; ++ix) { binEdgesX_[ix] = ax->GetBinLowEdge(ix+1); } - binEdgesX_[binX_] = ax->GetBinLowEdge(binX_+1); + binEdgesX_.back() = ax->GetBinLowEdge(binX_+1); for (unsigned int iy = 0; iy < binY_; ++iy) { binEdgesY_[iy] = ay->GetBinLowEdge(iy+1); } - binEdgesY_[binY_] = ay->GetBinLowEdge(binY_+1); + binEdgesY_.back() = ay->GetBinLowEdge(binY_+1); for (unsigned int ix = 1, i = 0; ix <= binX_; ++ix) { for (unsigned int iy = 1; iy <= binY_; ++iy, ++i) { binWidths_[i] = (normXonly ? 1 : ax->GetBinWidth(ix))*ay->GetBinWidth(iy); @@ -111,25 +116,21 @@ FastHisto2D::FastHisto2D(const TH2 &hist, bool normXonly) : FastHisto2D::FastHisto2D(const FastHisto2D &other) : FastTemplate(other), - binX_(other.binX_), binY_(other.binY_), - binEdgesX_(binX_ ? new T[binX_+1] : 0), - binEdgesY_(binX_ ? new T[binY_+1] : 0), - binWidths_(binX_ ? new T[size_] : 0) + binX_(other.binX_), + binY_(other.binY_), + binEdgesX_(other.binEdgesX_), + binEdgesY_(other.binEdgesY_), + binWidths_(other.binWidths_) { - if (binX_) { - memcpy(binEdgesX_, other.binEdgesX_, (binX_+1)*sizeof(T)); - memcpy(binEdgesY_, other.binEdgesY_, (binY_+1)*sizeof(T)); - memcpy(binWidths_, other.binWidths_, size_*sizeof(T)); - } } FastHisto2D::T FastHisto2D::GetAt(const T &x, const T &y) const { - T *matchx = std::lower_bound(binEdgesX_, binEdgesX_+binX_+1, x); - int ix = (matchx - binEdgesX_ - 1); - if (ix < 0 || unsigned(ix) >= binX_) return T(0.0); - T *matchy = std::lower_bound(binEdgesY_, binEdgesY_+binY_+1, y); - int iy = (matchy - binEdgesY_ - 1); - if (iy < 0 || unsigned(iy) >= binY_) return T(0.0); + auto matchx = std::lower_bound(binEdgesX_.begin(), binEdgesX_.end(), x); + if (matchx == binEdgesX_.begin() || matchx == binEdgesX_.end()) return T(0.0); + int ix = (matchx - binEdgesX_.begin() - 1); + auto matchy = std::lower_bound(binEdgesY_.begin(), binEdgesY_.end(), y); + if (matchy == binEdgesY_.begin() || matchy == binEdgesY_.end()) return T(0.0); + int iy = (matchy - binEdgesY_.begin() - 1); return values_[ix * binY_ + iy]; } @@ -152,7 +153,7 @@ void FastHisto2D::NormalizeXSlices() { } void FastHisto2D::Dump() const { - printf("--- dumping histo template with %d x %d bins (@%p)---\n", binX_, binY_, (void*)values_); + printf("--- dumping histo template with %d x %d bins (@%p)---\n", binX_, binY_, (void*)(&values_[0])); for (unsigned int i = 0; i < size_; ++i) { printf(" bin %3d, x = %6.2f, y = %6.2f: yval = %9.5f, width = %6.3f\n", i, 0.5*(binEdgesX_[i/binY_]+binEdgesX_[i/binY_+1]), @@ -191,10 +192,10 @@ namespace { } void FastTemplate::Subtract(const FastTemplate & ref) { - subtract(values_, size_, &ref[0]); + subtract(&values_[0], size_, &ref[0]); } void FastTemplate::LogRatio(const FastTemplate & ref) { - logratio(values_, size_, &ref[0]); + logratio(&values_[0], size_, &ref[0]); } void FastTemplate::SumDiff(const FastTemplate & h1, const FastTemplate & h2, FastTemplate & sum, FastTemplate & diff) { @@ -202,11 +203,12 @@ void FastTemplate::SumDiff(const FastTemplate & h1, const FastTemplate & h2, } void FastTemplate::Meld(const FastTemplate & diff, const FastTemplate & sum, T x, T y) { - meld(values_, size_, &diff[0], &sum[0], x, y); + meld(&values_[0], size_, &diff[0], &sum[0], x, y); } void FastTemplate::Log() { for (unsigned int i = 0; i < size_; ++i) { + //if (values_[i] <= 0) printf("WARNING: log(%g) at bin %d of %d bins (%d active bins)\n", values_[i], i, int(values_.size()), size_); values_[i] = values_[i] > 0 ? std::log(values_[i]) : T(-999); } } @@ -217,8 +219,8 @@ void FastTemplate::Exp() { } } -void FastTemplate::CropUnderflows(T minimum) { - for (unsigned int i = 0; i < size_; ++i) { +void FastTemplate::CropUnderflows(T minimum, bool activebinsonly) { + for (unsigned int i = 0, n = activebinsonly ? size_ : values_.size(); i < n; ++i) { if (values_[i] < minimum) values_[i] = minimum; } } diff --git a/src/FitterAlgoBase.cc b/src/FitterAlgoBase.cc index 196f5761b63bf..f462b4c77b9f6 100644 --- a/src/FitterAlgoBase.cc +++ b/src/FitterAlgoBase.cc @@ -10,6 +10,7 @@ #include "RooSimultaneous.h" #include "RooAddPdf.h" #include "RooProdPdf.h" +#include "RooGaussian.h" #include "RooConstVar.h" #include "RooPlot.h" #include "../interface/RooMinimizerOpt.h" @@ -37,7 +38,7 @@ using namespace RooStats; std::string FitterAlgoBase::minimizerAlgo_ = "Minuit2"; std::string FitterAlgoBase::minimizerAlgoForMinos_ = "Minuit2,simplex"; -float FitterAlgoBase::minimizerTolerance_ = 1e-2; +float FitterAlgoBase::minimizerTolerance_ = 1e-1; float FitterAlgoBase::minimizerToleranceForMinos_ = 1e-4; int FitterAlgoBase::minimizerStrategy_ = 1; int FitterAlgoBase::minimizerStrategyForMinos_ = 0; @@ -141,6 +142,7 @@ bool FitterAlgoBase::run(RooWorkspace *w, RooStats::ModelConfig *mc_s, RooStats: } } + optimizeBounds(w,mc_s); bool ret = runSpecific(w, mc_s, mc_b, *theData, limit, limitErr, hint); if (protectUnbinnedChannels_) { // destroy things in the proper order @@ -148,6 +150,7 @@ bool FitterAlgoBase::run(RooWorkspace *w, RooStats::ModelConfig *mc_s, RooStats: generatedData.reset(); generator.reset(); } + restoreBounds(w,mc_s); return ret; } @@ -484,3 +487,43 @@ double FitterAlgoBase::findCrossingNew(CascadeMinimizer &minim, RooAbsReal &nll, fprintf(sentry.trueStdOut(), "Error: search did not converge, will return approximate answer %+.6f\n",rVal); return rVal; } + +void FitterAlgoBase::optimizeBounds(const RooWorkspace *w, const RooStats::ModelConfig *mc) { + if (runtimedef::get("UNBOUND_GAUSSIANS") && mc->GetNuisanceParameters() != 0) { + RooLinkedListIter iter = mc->GetNuisanceParameters()->iterator(); + for (RooAbsArg *a = (RooAbsArg *) iter.Next(); a != 0; a = (RooAbsArg *) iter.Next()) { + RooRealVar *rrv = dynamic_cast(a); + if (rrv != 0) { + RooAbsPdf *pdf = w->pdf((std::string(a->GetName())+"_Pdf").c_str()); + if (pdf != 0 && dynamic_cast(pdf) != 0) { + rrv->removeMin(); + rrv->removeMax(); + } + } + } + } + if (runtimedef::get("OPTIMIZE_BOUNDS") && mc->GetNuisanceParameters() != 0) { + RooLinkedListIter iter = mc->GetNuisanceParameters()->iterator(); + for (RooAbsArg *a = (RooAbsArg *) iter.Next(); a != 0; a = (RooAbsArg *) iter.Next()) { + RooRealVar *rrv = dynamic_cast(a); + //std::cout << (rrv ? "Var" : "Arg") << ": " << a->GetName() << ": " << a->getAttribute("optimizeBounds") << std::endl; + if (rrv != 0 && rrv->getAttribute("optimizeBounds")) { + //std::cout << "Unboud " << rrv->GetName() << std::endl; + rrv->setRange("optimizeBoundRange", rrv->getMin(), rrv->getMax()); + rrv->removeMin(); + rrv->removeMax(); + } + } + } +} +void FitterAlgoBase::restoreBounds(const RooWorkspace *w, const RooStats::ModelConfig *mc) { + if (runtimedef::get("OPTIMIZE_BOUNDS") && mc->GetNuisanceParameters() != 0) { + RooLinkedListIter iter = mc->GetNuisanceParameters()->iterator(); + for (RooAbsArg *a = (RooAbsArg *) iter.Next(); a != 0; a = (RooAbsArg *) iter.Next()) { + RooRealVar *rrv = dynamic_cast(a); + if (rrv != 0 && rrv->getAttribute("optimizeBounds")) { + rrv->setRange(rrv->getMin("optimizeBoundRange"), rrv->getMax("optimizeBoundRange")); + } + } + } +} diff --git a/src/MultiDimFit.cc b/src/MultiDimFit.cc index fc1ce77c37f8b..9f0e4cab2d8da 100644 --- a/src/MultiDimFit.cc +++ b/src/MultiDimFit.cc @@ -293,6 +293,7 @@ void MultiDimFit::doGrid(RooAbsReal &nll) Combine::commitPoint(true, /*quantile=*/prob); } if (gridType_ == G3x3) { + bool forceProfile = !fastScan_ && std::min(fabs(deltaNLL_ - 1.15), fabs(deltaNLL_ - 2.995)) < 0.5; utils::CheapValueSnapshot center(*params); double x0 = x, y0 = y; for (int i2 = -1; i2 <= +1; ++i2) { @@ -309,7 +310,7 @@ void MultiDimFit::doGrid(RooAbsReal &nll) continue; } deltaNLL_ = nll.getVal() - nll0; - if (!fastScan_ && std::min(fabs(deltaNLL_ - 1.15), fabs(deltaNLL_ - 2.995)) < 0.3) { + if (forceProfile || (!fastScan_ && std::min(fabs(deltaNLL_ - 1.15), fabs(deltaNLL_ - 2.995)) < 0.5)) { minim.minimize(verbose-1); deltaNLL_ = nll.getVal() - nll0; } diff --git a/src/RooMinimizerOpt.cc b/src/RooMinimizerOpt.cc index 0d1043f65366a..e5769513edf2d 100644 --- a/src/RooMinimizerOpt.cc +++ b/src/RooMinimizerOpt.cc @@ -26,26 +26,266 @@ RooMinimizerOpt::edm() return _theFitter->Result().Edm(); } +Int_t RooMinimizerOpt::minimize(const char* type, const char* alg) +{ + if (typeid(*_fcn) == typeid(RooMinimizerFcnOpt)) { + static_cast(_fcn)->Synchronize(_theFitter->Config().ParamsSettings(), + _optConst,_verbose) ; + } else { + _fcn->Synchronize(_theFitter->Config().ParamsSettings(), + _optConst,_verbose) ; + } + + _theFitter->Config().SetMinimizer(type,alg); + + profileStart() ; + RooAbsReal::setEvalErrorLoggingMode(RooAbsReal::CollectErrors) ; + RooAbsReal::clearEvalErrorLog() ; + + bool ret = _theFitter->FitFCN(*_fcn); + _status = ((ret) ? _theFitter->Result().Status() : -1); + + RooAbsReal::setEvalErrorLoggingMode(RooAbsReal::PrintErrors) ; + profileStop() ; + _fcn->BackProp(_theFitter->Result()); + + saveStatus("MINIMIZE",_status) ; + + return _status ; +} +//_____________________________________________________________________________ +Int_t RooMinimizerOpt::improve() +{ + // Execute IMPROVE. Changes in parameter values + // and calculated errors are automatically + // propagated back the RooRealVars representing + // the floating parameters in the MINUIT operation + + if (typeid(*_fcn) == typeid(RooMinimizerFcnOpt)) { + static_cast(_fcn)->Synchronize(_theFitter->Config().ParamsSettings(), + _optConst,_verbose) ; + } else { + _fcn->Synchronize(_theFitter->Config().ParamsSettings(), + _optConst,_verbose) ; + } + profileStart() ; + RooAbsReal::setEvalErrorLoggingMode(RooAbsReal::CollectErrors) ; + RooAbsReal::clearEvalErrorLog() ; + + _theFitter->Config().SetMinimizer(_minimizerType.c_str(),"migradimproved"); + bool ret = _theFitter->FitFCN(*_fcn); + _status = ((ret) ? _theFitter->Result().Status() : -1); + + RooAbsReal::setEvalErrorLoggingMode(RooAbsReal::PrintErrors) ; + profileStop() ; + _fcn->BackProp(_theFitter->Result()); + + saveStatus("IMPROVE",_status) ; + + return _status ; +} + +Int_t RooMinimizerOpt::migrad() +{ + // Execute MIGRAD. Changes in parameter values + // and calculated errors are automatically + // propagated back the RooRealVars representing + // the floating parameters in the MINUIT operation + + if (typeid(*_fcn) == typeid(RooMinimizerFcnOpt)) { + static_cast(_fcn)->Synchronize(_theFitter->Config().ParamsSettings(), + _optConst,_verbose) ; + } else { + _fcn->Synchronize(_theFitter->Config().ParamsSettings(), + _optConst,_verbose) ; + } + + profileStart() ; + RooAbsReal::setEvalErrorLoggingMode(RooAbsReal::CollectErrors) ; + RooAbsReal::clearEvalErrorLog() ; + + _theFitter->Config().SetMinimizer(_minimizerType.c_str(),"migrad"); + bool ret = _theFitter->FitFCN(*_fcn); + _status = ((ret) ? _theFitter->Result().Status() : -1); + + RooAbsReal::setEvalErrorLoggingMode(RooAbsReal::PrintErrors) ; + profileStop() ; + _fcn->BackProp(_theFitter->Result()); + + saveStatus("MIGRAD",_status) ; + + return _status ; +} + + + +//_____________________________________________________________________________ +Int_t RooMinimizerOpt::hesse() +{ + // Execute HESSE. Changes in parameter values + // and calculated errors are automatically + // propagated back the RooRealVars representing + // the floating parameters in the MINUIT operation + + if (_theFitter->GetMinimizer()==0) { + coutW(Minimization) << "RooMinimizerOpt::hesse: Error, run Migrad before Hesse!" + << endl ; + _status = -1; + } + else { + + if (typeid(*_fcn) == typeid(RooMinimizerFcnOpt)) { + static_cast(_fcn)->Synchronize(_theFitter->Config().ParamsSettings(), + _optConst,_verbose) ; + } else { + _fcn->Synchronize(_theFitter->Config().ParamsSettings(), + _optConst,_verbose) ; + } + + profileStart() ; + RooAbsReal::setEvalErrorLoggingMode(RooAbsReal::CollectErrors) ; + RooAbsReal::clearEvalErrorLog() ; + + _theFitter->Config().SetMinimizer(_minimizerType.c_str()); + bool ret = _theFitter->CalculateHessErrors(); + _status = ((ret) ? _theFitter->Result().Status() : -1); + + RooAbsReal::setEvalErrorLoggingMode(RooAbsReal::PrintErrors) ; + profileStop() ; + _fcn->BackProp(_theFitter->Result()); + + saveStatus("HESSE",_status) ; + + } + + return _status ; + +} + +//_____________________________________________________________________________ +Int_t RooMinimizerOpt::minos() +{ + // Execute MINOS. Changes in parameter values + // and calculated errors are automatically + // propagated back the RooRealVars representing + // the floating parameters in the MINUIT operation + + if (_theFitter->GetMinimizer()==0) { + coutW(Minimization) << "RooMinimizerOpt::minos: Error, run Migrad before Minos!" + << endl ; + _status = -1; + } + else { + + if (typeid(*_fcn) == typeid(RooMinimizerFcnOpt)) { + static_cast(_fcn)->Synchronize(_theFitter->Config().ParamsSettings(), + _optConst,_verbose) ; + } else { + _fcn->Synchronize(_theFitter->Config().ParamsSettings(), + _optConst,_verbose) ; + } + profileStart() ; + RooAbsReal::setEvalErrorLoggingMode(RooAbsReal::CollectErrors) ; + RooAbsReal::clearEvalErrorLog() ; + + _theFitter->Config().SetMinimizer(_minimizerType.c_str()); + bool ret = _theFitter->CalculateMinosErrors(); + _status = ((ret) ? _theFitter->Result().Status() : -1); + + RooAbsReal::setEvalErrorLoggingMode(RooAbsReal::PrintErrors) ; + profileStop() ; + _fcn->BackProp(_theFitter->Result()); + + saveStatus("MINOS",_status) ; + + } + + return _status ; + +} + + +//_____________________________________________________________________________ +Int_t RooMinimizerOpt::minos(const RooArgSet& minosParamList) +{ + // Execute MINOS for given list of parameters. Changes in parameter values + // and calculated errors are automatically + // propagated back the RooRealVars representing + // the floating parameters in the MINUIT operation + + if (_theFitter->GetMinimizer()==0) { + coutW(Minimization) << "RooMinimizerOpt::minos: Error, run Migrad before Minos!" + << endl ; + _status = -1; + } + else if (minosParamList.getSize()>0) { + + if (typeid(*_fcn) == typeid(RooMinimizerFcnOpt)) { + static_cast(_fcn)->Synchronize(_theFitter->Config().ParamsSettings(), + _optConst,_verbose) ; + } else { + _fcn->Synchronize(_theFitter->Config().ParamsSettings(), + _optConst,_verbose) ; + } + profileStart() ; + RooAbsReal::setEvalErrorLoggingMode(RooAbsReal::CollectErrors) ; + RooAbsReal::clearEvalErrorLog() ; + + // get list of parameters for Minos + TIterator* aIter = minosParamList.createIterator() ; + RooAbsArg* arg ; + std::vector paramInd; + while((arg=(RooAbsArg*)aIter->Next())) { + RooAbsArg* par = _fcn->GetFloatParamList()->find(arg->GetName()); + if (par && !par->isConstant()) { + Int_t index = _fcn->GetFloatParamList()->index(par); + paramInd.push_back(index); + } + } + delete aIter ; + + if (paramInd.size()) { + // set the parameter indeces + _theFitter->Config().SetMinosErrors(paramInd); + + _theFitter->Config().SetMinimizer(_minimizerType.c_str()); + bool ret = _theFitter->CalculateMinosErrors(); + _status = ((ret) ? _theFitter->Result().Status() : -1); + + } + + RooAbsReal::setEvalErrorLoggingMode(RooAbsReal::PrintErrors) ; + profileStop() ; + _fcn->BackProp(_theFitter->Result()); + + saveStatus("MINOS",_status) ; + + } + + return _status ; +} + + + RooMinimizerFcnOpt::RooMinimizerFcnOpt(RooAbsReal *funct, RooMinimizer *context, bool verbose) : RooMinimizerFcn(funct, context, verbose) { - _vars.resize(_floatParamList->getSize()); - std::vector::iterator itv = _vars.begin(); - RooLinkedListIter iter = _floatParamList->iterator(); - for (TObject *a = iter.Next(); a != 0; a = iter.Next(), ++itv) { - RooRealVar *rrv = dynamic_cast(a); - if (rrv == 0) throw std::logic_error(Form("Float param not a RooRealVar but a %s", a->ClassName())); - *itv = rrv; - } +} + +RooMinimizerFcnOpt::RooMinimizerFcnOpt(const RooMinimizerFcnOpt &other) : + RooMinimizerFcn(other._funct, other._context, other._verbose), + _vars(other._vars), _vals(other._vals), _hasOptimzedBounds(other._hasOptimzedBounds), _optimzedBounds(other._optimzedBounds) +{ + } ROOT::Math::IBaseFunctionMultiDim* RooMinimizerFcnOpt::Clone() const { - return new RooMinimizerFcnOpt(_funct,_context,_verbose); + return new RooMinimizerFcnOpt(*this); } double @@ -53,11 +293,22 @@ RooMinimizerFcnOpt::DoEval(const double * x) const { // Set the parameter values for this iteration for (int index = 0; index < _nDim; index++) { - if (_logfile) (*_logfile) << x[index] << " " ; - RooRealVar* par = _vars[index]; - if (par->getVal()!=x[index]) { + if (_vals[index]!=x[index]) { + RooRealVar* par = _vars[index]; if (_verbose) cout << par->GetName() << "=" << x[index] << ", " ; - par->setVal(x[index]); + if (_hasOptimzedBounds[index]) { + // boundary transformation done internally + par->setVal( _optimzedBounds[index].transform(x[index]) ); + _vals[index] = x[index]; + } else { + par->setVal(x[index]); + _vals[index] = par->getVal(); // might not work otherwise if x is out of the boundary + } + } + } + if (_logfile) { + for (int index = 0; index < _nDim; index++) { + (*_logfile) << x[index] << " " ; } } @@ -109,6 +360,282 @@ RooMinimizerFcnOpt::DoEval(const double * x) const << fvalue << setprecision(4) << " " ; cout.flush() ; } - return fvalue; } + +Bool_t RooMinimizerFcnOpt::Synchronize(std::vector& parameters, + Bool_t optConst, Bool_t verbose) +{ + + // Internal function to synchronize TMinimizer with current + // information in RooAbsReal function parameters + + Bool_t constValChange(kFALSE) ; + Bool_t constStatChange(kFALSE) ; + + Int_t index(0) ; + + // Handle eventual migrations from constParamList -> floatParamList + for(index= 0; index < _constParamList->getSize() ; index++) { + + RooRealVar *par= dynamic_cast(_constParamList->at(index)) ; + if (!par) continue ; + + RooRealVar *oldpar= dynamic_cast(_initConstParamList->at(index)) ; + if (!oldpar) continue ; + + // Test if constness changed + if (!par->isConstant()) { + + // Remove from constList, add to floatList + _constParamList->remove(*par) ; + _floatParamList->add(*par) ; + _initFloatParamList->addClone(*oldpar) ; + _initConstParamList->remove(*oldpar) ; + constStatChange=kTRUE ; + _nDim++ ; + + if (verbose) { + oocoutI(_context,Minimization) << "RooMinimizerFcn::synchronize: parameter " + << par->GetName() << " is now floating." << endl ; + } + } + + // Test if value changed + if (par->getVal()!= oldpar->getVal()) { + constValChange=kTRUE ; + if (verbose) { + oocoutI(_context,Minimization) << "RooMinimizerFcn::synchronize: value of constant parameter " + << par->GetName() + << " changed from " << oldpar->getVal() << " to " + << par->getVal() << endl ; + } + } + + } + + // Update reference list + *_initConstParamList = *_constParamList ; + + // Synchronize MINUIT with function state + // Handle floatParamList + for(index= 0; index < _floatParamList->getSize(); index++) { + RooRealVar *par= dynamic_cast(_floatParamList->at(index)) ; + + if (!par) continue ; + + Double_t pstep(0) ; + Double_t pmin(0) ; + Double_t pmax(0) ; + + if(!par->isConstant()) { + + // Verify that floating parameter is indeed of type RooRealVar + if (!par->IsA()->InheritsFrom(RooRealVar::Class())) { + oocoutW(_context,Minimization) << "RooMinimizerFcn::fit: Error, non-constant parameter " + << par->GetName() + << " is not of type RooRealVar, skipping" << endl ; + _floatParamList->remove(*par); + index--; + _nDim--; + continue ; + } + + // Set the limits, if not infinite + if (par->hasMin() ) + pmin = par->getMin(); + if (par->hasMax() ) + pmax = par->getMax(); + + // Calculate step size + pstep = par->getError(); + if(pstep <= 0) { + // Floating parameter without error estitimate + if (par->hasMin() && par->hasMax()) { + pstep= 0.1*(pmax-pmin); + + // Trim default choice of error if within 2 sigma of limit + if (pmax - par->getVal() < 2*pstep) { + pstep = (pmax - par->getVal())/2 ; + } else if (par->getVal() - pmin < 2*pstep) { + pstep = (par->getVal() - pmin )/2 ; + } + + // If trimming results in zero error, restore default + if (pstep==0) { + pstep= 0.1*(pmax-pmin); + } + + } else { + pstep=1 ; + } + if(verbose) { + oocoutW(_context,Minimization) << "RooMinimizerFcn::synchronize: WARNING: no initial error estimate available for " + << par->GetName() << ": using " << pstep << endl; + } + } + } else { + pmin = par->getVal() ; + pmax = par->getVal() ; + } + + // new parameter + if (index>=Int_t(parameters.size())) { + + if (par->hasMin() && par->hasMax()) { + parameters.push_back(ROOT::Fit::ParameterSettings(par->GetName(), + par->getVal(), + pstep, + pmin,pmax)); + } + else { + parameters.push_back(ROOT::Fit::ParameterSettings(par->GetName(), + par->getVal(), + pstep)); + if (par->hasMin() ) + parameters.back().SetLowerLimit(pmin); + else if (par->hasMax() ) + parameters.back().SetUpperLimit(pmax); + } + + continue; + + } + + Bool_t oldFixed = parameters[index].IsFixed(); + Double_t oldVar = parameters[index].Value(); + Double_t oldVerr = parameters[index].StepSize(); + Double_t oldVlo = parameters[index].LowerLimit(); + Double_t oldVhi = parameters[index].UpperLimit(); + + if (par->isConstant() && !oldFixed) { + + // Parameter changes floating -> constant : update only value if necessary + if (oldVar!=par->getVal()) { + parameters[index].SetValue(par->getVal()); + if (verbose) { + oocoutI(_context,Minimization) << "RooMinimizerFcn::synchronize: value of parameter " + << par->GetName() << " changed from " << oldVar + << " to " << par->getVal() << endl ; + } + } + parameters[index].Fix(); + constStatChange=kTRUE ; + if (verbose) { + oocoutI(_context,Minimization) << "RooMinimizerFcn::synchronize: parameter " + << par->GetName() << " is now fixed." << endl ; + } + + } else if (par->isConstant() && oldFixed) { + + // Parameter changes constant -> constant : update only value if necessary + if (oldVar!=par->getVal()) { + parameters[index].SetValue(par->getVal()); + constValChange=kTRUE ; + + if (verbose) { + oocoutI(_context,Minimization) << "RooMinimizerFcn::synchronize: value of fixed parameter " + << par->GetName() << " changed from " << oldVar + << " to " << par->getVal() << endl ; + } + } + + } else { + // Parameter changes constant -> floating + if (!par->isConstant() && oldFixed) { + parameters[index].Release(); + constStatChange=kTRUE ; + + if (verbose) { + oocoutI(_context,Minimization) << "RooMinimizerFcn::synchronize: parameter " + << par->GetName() << " is now floating." << endl ; + } + } + + // Parameter changes constant -> floating : update all if necessary + if (oldVar!=par->getVal() || oldVlo!=pmin || oldVhi != pmax || oldVerr!=pstep) { + parameters[index].SetValue(par->getVal()); + parameters[index].SetStepSize(pstep); + if (par->hasMin() && par->hasMax() ) { + parameters[index].SetLimits(pmin,pmax); + } else if (par->hasMin() ) { + parameters[index].SetLowerLimit(pmin); + } else if (par->hasMax() ) { + parameters[index].SetUpperLimit(pmax); + } + } + + // Inform user about changes in verbose mode + if (verbose) { + // if ierr<0, par was moved from the const list and a message was already printed + + if (oldVar!=par->getVal()) { + oocoutI(_context,Minimization) << "RooMinimizerFcn::synchronize: value of parameter " + << par->GetName() << " changed from " << oldVar << " to " + << par->getVal() << endl ; + } + if (oldVlo!=pmin || oldVhi!=pmax) { + oocoutI(_context,Minimization) << "RooMinimizerFcn::synchronize: limits of parameter " + << par->GetName() << " changed from [" << oldVlo << "," << oldVhi + << "] to [" << pmin << "," << pmax << "]" << endl ; + } + + // If oldVerr=0, then parameter was previously fixed + if (oldVerr!=pstep && oldVerr!=0) { + oocoutI(_context,Minimization) << "RooMinimizerFcn::synchronize: error/step size of parameter " + << par->GetName() << " changed from " << oldVerr << " to " << pstep << endl ; + } + } + } + } + + if (optConst) { + if (constStatChange) { + + RooAbsReal::setEvalErrorLoggingMode(RooAbsReal::CollectErrors) ; + + oocoutI(_context,Minimization) << "RooMinimizerFcn::synchronize: set of constant parameters changed, rerunning const optimizer" << endl ; + _funct->constOptimizeTestStatistic(RooAbsArg::ConfigChange) ; + } else if (constValChange) { + oocoutI(_context,Minimization) << "RooMinimizerFcn::synchronize: constant parameter values changed, rerunning const optimizer" << endl ; + _funct->constOptimizeTestStatistic(RooAbsArg::ValueChange) ; + } + + RooAbsReal::setEvalErrorLoggingMode(RooAbsReal::PrintErrors) ; + + } + + updateFloatVec() ; + + initStdVects(); + + return 0 ; + +} + +void RooMinimizerFcnOpt::initStdVects() const { + _vars.resize(_floatParamList->getSize()); + _vals.resize(_vars.size()); + _hasOptimzedBounds.resize(_vars.size()); + _optimzedBounds.resize(_vars.size()); + std::vector::iterator itv = _vars.begin(); + std::vector::iterator it = _vals.begin(); + RooLinkedListIter iter = _floatParamList->iterator(); + std::fill(_hasOptimzedBounds.begin(), _hasOptimzedBounds.end(), false); + unsigned int i = 0; + for (TObject *a = iter.Next(); a != 0; a = iter.Next(), ++itv, ++it, ++i) { + RooRealVar *rrv = dynamic_cast(a); + if (rrv == 0) throw std::logic_error(Form("Float param not a RooRealVar but a %s", a->ClassName())); + *itv = rrv; + *it = rrv->getVal(); + if (rrv->getAttribute("optimizeBounds")) { + _hasOptimzedBounds[i] = true; + OptBound &b = _optimzedBounds[i]; + b.hardMax = rrv->getMax("optimizeBoundRange"); + b.hardMin = rrv->getMin("optimizeBoundRange"); + double x0 = 0.5*(b.hardMax+b.hardMin); + b.softMax = 0.75*b.hardMax + 0.25*x0; + b.softMin = 0.75*b.hardMin + 0.25*x0; + } + } +} diff --git a/src/SimpleGaussianConstraint.cc b/src/SimpleGaussianConstraint.cc new file mode 100644 index 0000000000000..6198eecf3a10a --- /dev/null +++ b/src/SimpleGaussianConstraint.cc @@ -0,0 +1,14 @@ +#include "../interface/SimpleGaussianConstraint.h" + +#include +#include + +void SimpleGaussianConstraint::init() { + if (!sigma.arg().isConstant()) { + throw std::invalid_argument(std::string("SimpleGaussianConstraint created with non-constant sigma: ") + GetName()); + } + Double_t sig = sigma; + scale_ = -0.5/(sig*sig); +} + +ClassImp(SimpleGaussianConstraint) diff --git a/src/VectorizedGaussian.cc b/src/VectorizedGaussian.cc new file mode 100644 index 0000000000000..7b894534615a3 --- /dev/null +++ b/src/VectorizedGaussian.cc @@ -0,0 +1,44 @@ +#include "../interface/VectorizedGaussian.h" +#include "RooMath.h" +#include "vectorized.h" +#include +#include + +VectorizedGaussian::VectorizedGaussian(const RooGaussian &gaus, const RooAbsData &data) +{ + RooArgSet obs(*data.get()); + if (obs.getSize() != 1) throw std::invalid_argument("Multi-dimensional dataset?"); + RooRealVar *x = dynamic_cast(obs.first()); + + Worker w(gaus); + if (obs.contains(w.xvar())) { + x_ = dynamic_cast(& w.xvar()); + mean_ = & w.meanvar(); + sigma_ = & w.sigvar(); + } else if (obs.contains(w.meanvar())) { + x_ = dynamic_cast(& w.meanvar()); + mean_ = & w.xvar(); + sigma_ = & w.sigvar(); + } else { + throw std::invalid_argument("Dataset is not the mean or x of the gaussian"); + } + + xvals_.reserve(data.numEntries()); + for (unsigned int i = 0, n = data.numEntries(); i < n; ++i) { + obs.assignValueOnly(*data.get(i), true); + if (data.weight()) xvals_.push_back(x->getVal()); + } + work_.resize(xvals_.size()); + work2_.resize(xvals_.size()); +} + +void VectorizedGaussian::fill(std::vector &out) const { + // calculate normalizatio integral + constexpr Double_t root2 = std::sqrt(2.) ; + constexpr Double_t rootPiBy2 = std::sqrt(M_PI/2.0); + Double_t xscale = root2*sigma_->getVal(); + Double_t mean = mean_->getVal(); + Double_t norm = rootPiBy2*sigma_->getVal()*(RooMath::erf((x_->getMax()-mean)/xscale)-RooMath::erf((x_->getMin()-mean)/xscale)); + out.resize(xvals_.size()); + vectorized::gaussians(xvals_.size(), mean, sigma_->getVal(), norm, &xvals_[0], &out[0], &work_[0], &work2_[0]); +} diff --git a/src/VerticalInterpHistPdf.cc b/src/VerticalInterpHistPdf.cc index 5909ad5f96810..300b3bc1fc2fa 100644 --- a/src/VerticalInterpHistPdf.cc +++ b/src/VerticalInterpHistPdf.cc @@ -9,6 +9,7 @@ #include "TIterator.h" #include "RooRealVar.h" #include "RooMsgService.h" +#include "RooAbsData.h" //#define TRACE_CALLS #ifdef TRACE_CALLS @@ -295,19 +296,13 @@ FastVerticalInterpHistPdfBase::FastVerticalInterpHistPdfBase(const FastVerticalI _smoothRegion(other._smoothRegion), _smoothAlgo(other._smoothAlgo), _init(false), -#ifdef FastVerticalInterpHistPdf_CopyConstructor _morphs(other._morphs), _morphParams(other._morphParams) -#else - _morphs(), _morphParams() -#endif { // Copy constructor _funcIter = _funcList.createIterator() ; _coefIter = _coefList.createIterator() ; -#ifdef FastVerticalInterpHistPdf_CopyConstructor _sentry.addVars(_coefList); _sentry.setValueDirty(); -#endif } @@ -529,3 +524,512 @@ void FastVerticalInterpHistPdf2D::setupCaches() const { } +FastVerticalInterpHistPdfV::FastVerticalInterpHistPdfV(const FastVerticalInterpHistPdf &hpdf, const RooAbsData &data) : + hpdf_(hpdf),begin_(0),end_(0) +{ + // check init + if (hpdf._cache.size() == 0) hpdf.setupCaches(); + if (!hpdf._sentry.good() || !hpdf._init) hpdf.syncTotal(); + // find bins + std::vector bins; + RooArgSet obs(hpdf._x.arg()); + const RooRealVar &x = static_cast(*obs.first()); + bool aligned = true; + for (int i = 0, n = data.numEntries(); i < n; ++i) { + obs = *data.get(i); + if (data.weight() == 0) continue; + int idx = hpdf._cache.FindBin(x.getVal()); + if (!bins.empty() && idx != bins.back() + 1) aligned = false; + bins.push_back(idx); + } + if (aligned) { + begin_ = bins.front(); + end_ = bins.back()+1; + //std::cout << "Created FastVerticalInterpHistPdfV from " << hpdf.GetName() << ", aligned, " << (end_-begin_) << " bins." << std::endl; + } else { + nbins_ = bins.size(); + bins_.swap(bins); + blocks_.clear(); + int start = bins_[0], istart = 0; + for (int i = 1, n = bins_.size(); i < n; ++i) { + if (bins_[i] != bins_[i-1]+1) { + blocks_.push_back(Block(istart,start,bins_[i-1]+1)); + start = bins_[i]; + istart = i; + } + } + blocks_.push_back(Block(istart,start,bins_.back()+1)); + if (blocks_.size() < 4*bins_.size()) { + //std::cout << "Created FastVerticalInterpHistPdfV from " << hpdf.GetName() << ", block-aligned, " << bins_.size() << " bins, " << blocks_.size() << " blocks." << std::endl; + bins_.clear(); + } else { + //std::cout << "Created FastVerticalInterpHistPdfV from " << hpdf.GetName() << ", non-aligned, " << bins_.size() << " bins, " << blocks_.size() << " blocks." << std::endl; + blocks_.clear(); + } + } +} + +void FastVerticalInterpHistPdfV::fill(std::vector &out) const +{ + if (!hpdf_._sentry.good()) hpdf_.syncTotal(); + if (begin_ != end_) { + out.resize(end_-begin_); + std::copy(& hpdf_._cache.GetBinContent(begin_), & hpdf_._cache.GetBinContent(end_), out.begin()); + } else if (!blocks_.empty()) { + out.resize(nbins_); + for (auto b : blocks_) std::copy(& hpdf_._cache.GetBinContent(b.begin), & hpdf_._cache.GetBinContent(b.end), out.begin()+b.index); + } else { + out.resize(bins_.size()); + for (int i = 0, n = bins_.size(); i < n; ++i) { + out[i] = hpdf_._cache.GetBinContent(bins_[i]); + } + } +} + +//============================================================================================= +ClassImp(FastVerticalInterpHistPdf2Base) +ClassImp(FastVerticalInterpHistPdf2) +//ClassImp(FastVerticalInterpHistPdf2D) + + +//_____________________________________________________________________________ +FastVerticalInterpHistPdf2Base::FastVerticalInterpHistPdf2Base() : + _initBase(false) +{ + // Default constructor +} + + +//_____________________________________________________________________________ +FastVerticalInterpHistPdf2Base::FastVerticalInterpHistPdf2Base(const char *name, const char *title, const RooArgSet &obs, const TList& inFuncList, const RooArgList& inCoefList, Double_t smoothRegion, Int_t smoothAlgo) : + RooAbsPdf(name,title), + _coefList("coefList","List of coefficients",this), // we should get shapeDirty when coefficients change + _smoothRegion(smoothRegion), + _smoothAlgo(smoothAlgo), + _initBase(false), + _morphs(), _morphParams() +{ + if (inFuncList.GetSize()!=2*inCoefList.getSize()+1) { + coutE(InputArguments) << "VerticalInterpHistPdf::VerticalInterpHistPdf(" << GetName() + << ") number of pdfs and coefficients inconsistent, must have Nfunc=1+2*Ncoef" + << "while Nfunc= " << inFuncList.GetSize() << " and Ncoef= " << inCoefList.getSize() <(func); + RooAbsPdf *pdf = dynamic_cast(func); + if (!pdf && !hist) { + coutE(InputArguments) << "ERROR: VerticalInterpHistPdf::VerticalInterpHistPdf(" << GetName() << ") function " << func->GetName() << " is not of type TH1 or RooAbsPdf" << endl; + assert(0); + } + if (pdf) { + RooArgSet *params = pdf->getParameters(obs); + if (params->getSize() > 0) { + coutE(InputArguments) << "ERROR: VerticalInterpHistPdf::VerticalInterpHistPdf(" << GetName() << ") pdf " << func->GetName() << " (" << func->ClassName()<<") has some parameters." << endl; + obs.Print(""); + params->Print(""); + assert(0); + } + delete params; + } + } + + TIterator* coefIter = inCoefList.createIterator() ; + RooAbsArg* coef; + while((coef = (RooAbsArg*)coefIter->Next())) _coefList.add(*coef) ; + delete coefIter; +} + +//_____________________________________________________________________________ +FastVerticalInterpHistPdf2Base::FastVerticalInterpHistPdf2Base(const FastVerticalInterpHistPdf2Base& other, const char* name) : + RooAbsPdf(other,name), + _coefList("coefList", this, other._coefList), + _smoothRegion(other._smoothRegion), + _smoothAlgo(other._smoothAlgo), + _initBase(other._initBase), + _morphs(other._morphs), _morphParams(other._morphParams) +{ + if (_initBase) { + // Morph params are already set, but we must set the sentry + _sentry.addVars(_coefList); + _sentry.setValueDirty(); + } +} + +//_____________________________________________________________________________ +FastVerticalInterpHistPdf2Base::FastVerticalInterpHistPdf2Base(const FastVerticalInterpHistPdfBase& other, const char* name) : + RooAbsPdf(other,name), + _coefList("coefList", this, other._coefList), + _smoothRegion(other._smoothRegion), + _smoothAlgo(other._smoothAlgo), + _initBase(false), + _morphs(), _morphParams() +{ + // Convert constructor +} + + + + +//_____________________________________________________________________________ +FastVerticalInterpHistPdf2Base::~FastVerticalInterpHistPdf2Base() +{ + // Destructor +} + +void +FastVerticalInterpHistPdf2Base::initBase() const +{ + if (_initBase) return; + + TIterator* coefIter = _coefList.createIterator() ; + RooAbsArg* coef; + while((coef = (RooAbsArg*)coefIter->Next())) { + const RooAbsReal *rrv = dynamic_cast(coef); + if (!rrv) { + coutE(InputArguments) << "ERROR: VerticalInterpHistPdf::VerticalInterpHistPdf(" << GetName() << ") coefficient " << coef->GetName() << " is not of type RooAbsReal" << endl; + assert(0); + } + _morphParams.push_back(rrv); + } + delete coefIter; + + + _sentry.addVars(_coefList); + _sentry.setValueDirty(); + _initBase = true; +} + +FastVerticalInterpHistPdf2::FastVerticalInterpHistPdf2(const char *name, const char *title, const RooRealVar &x, const TList & funcList, const RooArgList& coefList, Double_t smoothRegion, Int_t smoothAlgo) : + FastVerticalInterpHistPdf2Base(name,title,RooArgSet(x),funcList,coefList,smoothRegion,smoothAlgo), + _x("x","Independent variable",this,const_cast(x)), + _cache(), _cacheNominal(), _cacheNominalLog() +{ + initBase(); + initNominal(funcList.At(0)); + _morphs.resize(coefList.getSize()); + for (int i = 0, n = coefList.getSize(); i < n; ++i) { + initComponent(i, funcList.At(2*i+1), funcList.At(2*i+2)); + } +} + +FastVerticalInterpHistPdf2D2::FastVerticalInterpHistPdf2D2(const char *name, const char *title, const RooRealVar &x, const RooRealVar &y, bool conditional, const TList & funcList, const RooArgList& coefList, Double_t smoothRegion, Int_t smoothAlgo) : + FastVerticalInterpHistPdf2Base(name,title,RooArgSet(x,y),funcList,coefList,smoothRegion,smoothAlgo), + _x("x","Independent variable",this,const_cast(x)), + _y("y","Independent variable",this,const_cast(y)), + _conditional(conditional), + _cache(), _cacheNominal(), _cacheNominalLog() +{ + initBase(); + initNominal(funcList.At(0)); + _morphs.resize(coefList.getSize()); + for (int i = 0, n = coefList.getSize(); i < n; ++i) { + initComponent(i, funcList.At(2*i+1), funcList.At(2*i+2)); + } +} + + + +FastVerticalInterpHistPdf2::FastVerticalInterpHistPdf2(const FastVerticalInterpHistPdf& other, const char* name) : + FastVerticalInterpHistPdf2Base(other,name), + _x("x",this,other._x), + _cache(), _cacheNominal(), _cacheNominalLog() +{ + initBase(); + other.getVal(RooArgSet(_x.arg())); + _morphs = other._morphs; + _cache = other._cache; + _cacheNominal = other._cacheNominal; + _cacheNominalLog = other._cacheNominalLog; +} + + +FastVerticalInterpHistPdf2D2::FastVerticalInterpHistPdf2D2(const FastVerticalInterpHistPdf2D& other, const char* name) : + FastVerticalInterpHistPdf2Base(other,name), + _x("x",this,other._x), + _y("y",this,other._y), + _conditional(other._conditional), + _cache(), _cacheNominal(), _cacheNominalLog() +{ + initBase(); + other.getVal(RooArgSet(_x.arg(), _y.arg())); + _morphs = other._morphs; + _cache = other._cache; + _cacheNominal = other._cacheNominal; + _cacheNominalLog = other._cacheNominalLog; +} + + + +//_____________________________________________________________________________ +Double_t FastVerticalInterpHistPdf2::evaluate() const +{ + if (!_initBase) initBase(); + if (_cache.size() == 0) _cache = _cacheNominal; // _cache is not persisted + if (!_sentry.good()) syncTotal(); + return _cache.GetAt(_x); +} +Double_t FastVerticalInterpHistPdf2D2::evaluate() const +{ + if (!_initBase) initBase(); + if (_cache.size() == 0) _cache = _cacheNominal; // _cache is not persisted + if (!_sentry.good()) syncTotal(); + return _cache.GetAt(_x,_y); +} + + +void FastVerticalInterpHistPdf2::setActiveBins(unsigned int bins) { + assert(bins <= _cacheNominal.fullsize()); + if (_cache.size() == 0) _cache = _cacheNominal; // _cache is not persisted + _cache.CropUnderflows(1e-9,false); // set all bins, also the non-active ones + _cacheNominal.CropUnderflows(1e-9,false); // set all bins, also the non-active ones + _cache.SetActiveSize(bins); + _cacheNominal.SetActiveSize(bins); + _cacheNominalLog.SetActiveSize(bins); + for (Morph & m : _morphs) { + m.sum.SetActiveSize(bins); + m.diff.SetActiveSize(bins); + } + //printf("Setting the number of active bins to be %d/%d for %s\n", bins, _cacheNominal.fullsize(), GetName()); +} + +void FastVerticalInterpHistPdf2::initNominal(TObject *templ) { + TH1 *hist = dynamic_cast(templ); + if (hist) { + _cacheNominal = FastHisto(*hist); + } else { + RooAbsPdf *pdf = dynamic_cast(templ); + const RooRealVar &x = dynamic_cast(_x.arg()); + hist = pdf->createHistogram("",x); + hist->SetDirectory(0); + _cacheNominal = FastHisto(*hist); + delete hist; + } + _cacheNominal.Normalize(); + //printf("Normalized nominal templated: \n"); _cacheNominal.Dump(); + if (_smoothAlgo < 0) { + _cacheNominalLog = _cacheNominal; + _cacheNominalLog.Log(); + } + _cache = _cacheNominal; +} + +void FastVerticalInterpHistPdf2D2::initNominal(TObject *templ) { + TH2 *hist = dynamic_cast(templ); + if (hist) { + _cacheNominal = FastHisto2D(*hist, _conditional); + } else { + RooAbsPdf *pdf = dynamic_cast(templ); + const RooRealVar &x = dynamic_cast(_x.arg()); + const RooRealVar &y = dynamic_cast(_y.arg()); + const RooCmdArg &cond = _conditional ? RooFit::ConditionalObservables(RooArgSet(x)) : RooCmdArg::none(); + hist = dynamic_cast(pdf->createHistogram("", x, RooFit::YVar(y), cond)); + hist->SetDirectory(0); + _cacheNominal = FastHisto2D(*hist, _conditional); + delete hist; + } + if (_conditional) _cacheNominal.NormalizeXSlices(); + else _cacheNominal.Normalize(); + if (_smoothAlgo < 0) { + _cacheNominalLog = _cacheNominal; + _cacheNominalLog.Log(); + } + _cache = _cacheNominal; +} + + +void FastVerticalInterpHistPdf2::initComponent(int dim, TObject *thi, TObject *tlo) { + FastHisto hi, lo; + TH1 *histHi = dynamic_cast(thi); + TH1 *histLo = dynamic_cast(tlo); + if (histHi && histLo) { + hi = *histHi; lo = *histLo; + } else { + RooAbsPdf *pdfHi = dynamic_cast(thi); + RooAbsPdf *pdfLo = dynamic_cast(tlo); + const RooRealVar &x = dynamic_cast(_x.arg()); + histHi = pdfHi->createHistogram("",x); histHi->SetDirectory(0); + histLo = pdfLo->createHistogram("",x); histLo->SetDirectory(0); + hi = *histHi; lo = *histLo; + delete histHi; delete histLo; + } + //printf("Un-normalized templates for dimension %d: \n", dim); hi.Dump(); lo.Dump(); + hi.Normalize(); lo.Normalize(); + //printf("Normalized templates for dimension %d: \n", dim); hi.Dump(); lo.Dump(); + initMorph(_morphs[dim], _cacheNominal, lo, hi); +} +void FastVerticalInterpHistPdf2D2::initComponent(int dim, TObject *thi, TObject *tlo) { + FastHisto2D hi, lo; + TH2 *histHi = dynamic_cast(thi); + TH2 *histLo = dynamic_cast(tlo); + if (histHi && histLo) { + hi = FastHisto2D(*histHi,_conditional); + lo = FastHisto2D(*histLo,_conditional); + } else { + RooAbsPdf *pdfHi = dynamic_cast(thi); + RooAbsPdf *pdfLo = dynamic_cast(tlo); + const RooRealVar &x = dynamic_cast(_x.arg()); + const RooRealVar &y = dynamic_cast(_y.arg()); + const RooCmdArg &cond = _conditional ? RooFit::ConditionalObservables(RooArgSet(x)) : RooCmdArg::none(); + histHi = dynamic_cast(pdfHi->createHistogram("", x, RooFit::YVar(y), cond)); histHi->SetDirectory(0); + histLo = dynamic_cast(pdfLo->createHistogram("", x, RooFit::YVar(y), cond)); histLo->SetDirectory(0); + hi = FastHisto2D(*histHi,_conditional); + lo = FastHisto2D(*histLo,_conditional); + delete histHi; delete histLo; + } + //printf("Un-normalized templates for dimension %d: \n", dim); hi.Dump(); lo.Dump(); + if (_conditional) { + hi.NormalizeXSlices(); lo.NormalizeXSlices(); + } else { + hi.Normalize(); lo.Normalize(); + } + //printf("Normalized templates for dimension %d: \n", dim); hi.Dump(); lo.Dump(); + initMorph(_morphs[dim], _cacheNominal, lo, hi); +} + + + +void FastVerticalInterpHistPdf2Base::initMorph(Morph &out, const FastTemplate &nominal, FastTemplate &lo, FastTemplate &hi) const { + out.sum.Resize(hi.size()); + out.diff.Resize(hi.size()); + if (_smoothAlgo < 0) { + hi.LogRatio(nominal); lo.LogRatio(nominal); + //printf("Log-ratios for dimension %d: \n", dim); hi.Dump(); lo.Dump(); + } else { + hi.Subtract(nominal); lo.Subtract(nominal); + //printf("Differences for dimension %d: \n", dim); hi.Dump(); lo.Dump(); + } + FastTemplate::SumDiff(hi, lo, out.sum, out.diff); + //printf("Sum and diff for dimension %d: \n", dim); out.sum.Dump(); out.diff.Dump(); +} + + + + +void FastVerticalInterpHistPdf2Base::syncTotal(FastTemplate &cache, const FastTemplate &cacheNominal, const FastTemplate &cacheNominalLog) const { + /* === how the algorithm works, in theory === + * let dhi = h_hi - h_nominal + * dlo = h_lo - h_nominal + * and x be the morphing parameter + * we define alpha = x * 0.5 * ((dhi-dlo) + (dhi+dlo)*smoothStepFunc(x)); + * which satisfies: + * alpha(0) = 0 + * alpha(+1) = dhi + * alpha(-1) = dlo + * alpha(x >= +1) = |x|*dhi + * alpha(x <= -1) = |x|*dlo + * alpha is continuous and has continuous first and second derivative, as smoothStepFunc has them + * === and in practice === + * we already have computed the histogram for diff=(dhi-dlo) and sum=(dhi+dlo) + * so we just do template += (0.5 * x) * (diff + smoothStepFunc(x) * sum) + * ========================================== */ + + // start from nominal + cache.CopyValues(_smoothAlgo < 0 ? cacheNominalLog : cacheNominal); + //printf("Cache initialized to nominal template: \n"); cacheNominal.Dump(); + + // apply all morphs one by one + for (int i = 0, ndim = _coefList.getSize(); i < ndim; ++i) { + double x = _morphParams[i]->getVal(); + double a = 0.5*x, b = smoothStepFunc(x); + cache.Meld(_morphs[i].diff, _morphs[i].sum, a, b); + //printf("Merged transformation for dimension %d, x = %+5.3f, step = %.3f: \n", i, x, b); cache.Dump(); + } + + // if necessary go back to linear scale + if (_smoothAlgo < 0) { + cache.Exp(); + //printf("Done exponential tranformation\n"); cache.Dump(); + } else { + cache.CropUnderflows(); + } + + // mark as done + _sentry.reset(); +} + +void FastVerticalInterpHistPdf2::syncTotal() const { + FastVerticalInterpHistPdf2Base::syncTotal(_cache, _cacheNominal, _cacheNominalLog); + + // normalize the result + _cache.Normalize(); + //printf("Normalized result\n"); _cache.Dump(); +} + +void FastVerticalInterpHistPdf2D2::syncTotal() const { + FastVerticalInterpHistPdf2Base::syncTotal(_cache, _cacheNominal, _cacheNominalLog); + + // normalize the result + if (_conditional) _cache.NormalizeXSlices(); + else _cache.Normalize(); + //printf("Normalized result\n"); _cache.Dump(); +} + +FastVerticalInterpHistPdf2V::FastVerticalInterpHistPdf2V(const FastVerticalInterpHistPdf2 &hpdf, const RooAbsData &data) : + hpdf_(hpdf),begin_(0),end_(0) +{ + // check init + if (!hpdf._initBase) hpdf.initBase(); + if (hpdf._cache.size() == 0) hpdf._cache = hpdf._cacheNominal; + if (!hpdf._sentry.good()) hpdf.syncTotal(); + // find bins + std::vector bins; + RooArgSet obs(hpdf._x.arg()); + const RooRealVar &x = static_cast(*obs.first()); + bool aligned = true; + for (int i = 0, n = data.numEntries(); i < n; ++i) { + obs = *data.get(i); + if (data.weight() == 0) continue; + int idx = hpdf._cache.FindBin(x.getVal()); + if (!bins.empty() && idx != bins.back() + 1) aligned = false; + bins.push_back(idx); + } + if (bins.empty()) { + // nothing to do. + } else if (aligned) { + begin_ = bins.front(); + end_ = bins.back()+1; + //std::cout << "Created FastVerticalInterpHistPdf2V from " << hpdf.GetName() << ", aligned, " << (end_-begin_) << " bins." << std::endl; + } else { + nbins_ = bins.size(); + bins_.swap(bins); + blocks_.clear(); + int start = bins_[0], istart = 0; + for (int i = 1, n = bins_.size(); i < n; ++i) { + if (bins_[i] != bins_[i-1]+1) { + blocks_.push_back(Block(istart,start,bins_[i-1]+1)); + start = bins_[i]; + istart = i; + } + } + blocks_.push_back(Block(istart,start,bins_.back()+1)); + if (blocks_.size() < 4*bins_.size()) { + //std::cout << "Created FastVerticalInterpHistPdf2V from " << hpdf.GetName() << ", block-aligned, " << bins_.size() << " bins, " << blocks_.size() << " blocks." << std::endl; + bins_.clear(); + } else { + //std::cout << "Created FastVerticalInterpHistPdf2V from " << hpdf.GetName() << ", non-aligned, " << bins_.size() << " bins, " << blocks_.size() << " blocks." << std::endl; + blocks_.clear(); + } + } +} + +void FastVerticalInterpHistPdf2V::fill(std::vector &out) const +{ + if (!hpdf_._sentry.good()) hpdf_.syncTotal(); + if (begin_ != end_) { + out.resize(end_-begin_); + std::copy(& hpdf_._cache.GetBinContent(begin_), & hpdf_._cache.GetBinContent(end_), out.begin()); + } else if (!blocks_.empty()) { + out.resize(nbins_); + for (auto b : blocks_) std::copy(& hpdf_._cache.GetBinContent(b.begin), & hpdf_._cache.GetBinContent(b.end), out.begin()+b.index); + } else { + out.resize(bins_.size()); + for (int i = 0, n = bins_.size(); i < n; ++i) { + out[i] = hpdf_._cache.GetBinContent(bins_[i]); + } + } +} + diff --git a/src/vectorized.cc b/src/vectorized.cc new file mode 100644 index 0000000000000..87c06f4f09923 --- /dev/null +++ b/src/vectorized.cc @@ -0,0 +1,40 @@ +#include "vectorized.h" + +void vectorized::mul_add(const uint32_t size, double coeff, double const * __restrict__ iarray, double* __restrict__ oarray) { + for (uint32_t i = 0; i < size; ++i) { + oarray[i] += coeff * iarray[i]; + } +} +double vectorized::nll_reduce(const uint32_t size, double* __restrict__ pdfvals, double const * __restrict__ weights, double sumcoeff, double * __restrict__ workingArea) { + double invsum = 1.0/sumcoeff; + for (uint32_t i = 0; i < size; ++i) { + pdfvals[i] *= invsum; + } + + vdt::fast_logv(size, pdfvals, workingArea); + + for (uint32_t i = 0; i < size; ++i) { + pdfvals[i] = weights[i] * workingArea[i]; + } + + + double ret = 0; + for (uint32_t i = 0; i < size; ++i) { + ret += pdfvals[i]; + } + + return ret; +} + +void vectorized::gaussians(const uint32_t size, double mean, double sigma, double norm, const double* __restrict__ xvals, double * __restrict__ out, double * __restrict__ workingArea, double * __restrict__ workingArea2) +{ + double xscale = -0.5/(sigma*sigma); + for (uint32_t i = 0; i < size; ++i) { + workingArea[i] = xscale * std::pow(xvals[i] - mean, 2); + } + vdt::fast_expv(size, workingArea, workingArea2); + double inorm = 1.0/norm; + for (uint32_t i = 0; i < size; ++i) { + out[i] = inorm*workingArea2[i]; + } +} diff --git a/src/vectorized.h b/src/vectorized.h new file mode 100644 index 0000000000000..4a76111cc984d --- /dev/null +++ b/src/vectorized.h @@ -0,0 +1,13 @@ +#include "vdt/vdtMath.h" + +namespace vectorized { + // oarray += coeff * iarray + void mul_add(const uint32_t size, double coeff, double const * __restrict__ iarray, double* __restrict__ oarray) ; + + // nll_reduce = sum ( weights * log(pdfvals/sumCoeff) ) + double nll_reduce(const uint32_t size, double* __restrict__ pdfvals, double const * __restrict__ weights, double sumcoeff, double * __restrict__ workingArea) ; + + // gaussians + void gaussians(const uint32_t size, double mean, double sigma, double norm, const double* __restrict__ xvals, double * __restrict__ out, double * __restrict__ workingArea, double * __restrict__ workingArea2) ; +} + diff --git a/test/parallelScan.py b/test/parallelScan.py new file mode 100755 index 0000000000000..deb19b0bed592 --- /dev/null +++ b/test/parallelScan.py @@ -0,0 +1,89 @@ +#!/usr/bin/env python + +## run combine -M MultiDimFit scans in multiple local jobs in parallel +## usage: just replace "combine" with "parallelScan.py" in the command line +## and add a "-j" to select how many threads to use +import os, sys, subprocess; +from math import ceil +from re import match + +if len(sys.argv) < 3: + print "usage: parallelScan.py [ -j processes ]" + exit() + +jobs, points, name, method, mass, hadd = 0, 0, "Test", None, 120, False +## take out the -n and -j; find the --points +args = [] +i = 1; +if sys.argv[i] == "combine": i+= 1 +while i < len(sys.argv): + if sys.argv[i] == "-j": + jobs = int(sys.argv[i+1]) + i += 2 + elif sys.argv[i] == "-n": + name = sys.argv[i+1] + i += 2 + elif sys.argv[i] == "--points": + points = int(sys.argv[i+1]) + args.append(sys.argv[i]); + args.append(sys.argv[i+1]); + i += 2 + elif "--points=" in sys.argv[i]: + points = int(sys.argv[i].replace("--points=","")) + args.append(sys.argv[i]); + i += 1 + elif sys.argv[i] in [ "--mass", "-m" ]: + mass = float(sys.argv[i+1]) + args.append(sys.argv[i]); + args.append(sys.argv[i+1]); + i += 2 + elif sys.argv[i] in [ "-M", "--method" ]: + method = sys.argv[i+1] + args.append(sys.argv[i]); + args.append(sys.argv[i+1]); + i += 2 + elif sys.argv[i] == "--hadd": + hadd = True + i += 1 + else: + args.append(sys.argv[i]); + i += 1 + +if points == 0: raise RuntimeError, "parallelScan requires that there be a --points= or --points option in the command line\n"; +if jobs == 0: + cpuinfo = open("/proc/cpuinfo","r") + cores = sum([(1 if match("^processor\\b.*",l) else 0) for l in cpuinfo]) + if cores == 0: raise RuntimeError, "Cannot determine number of cores from /proc/cpuinfo, so I need a -j option\n"; + if cores > 2 and match("(lxplus|cmslpc).*", os.environ['HOSTNAME']): + jobs = cores/2 + print "Will run with %d jobs (half of the cores, for respect to other users)" % jobs + else: + jobs = cores + print "Will run with %d jobs (one per core)" % jobs +if hadd: + if not method: + print "Cannot understand what method of combine you are using, so cannot do hadd" + exit() + +workers = [] +for j in xrange(jobs): + start = int(ceil(points*j/float(jobs))) + end = int(ceil(points*(j+1)/float(jobs))-1) + myargs = ["combine"] + args[:] + [ "-n", "%s.%d" % (name,j), "--firstPoint", str(start), "--lastPoint", str(end) ] + print "spawning %s" % (" ".join(myargs)) + workers.append( subprocess.Popen(myargs) ) + +for w in workers: + w.wait() + +if hadd: + if not method: + print "Cannot understand what method of combine you are using, so cannot do hadd" + else: + output = "higgsCombine%s.%s.mH%g.root" % (name,method,mass) + input = " ".join(["higgsCombine%s.%d.%s.mH%g.root" % (name,j,method,mass) for j in xrange(jobs)]) + print "All workers done, doing the hadd to make %s out of higgsCombine%s.{0..%d}.%s.mH%g.root" % (output, name,jobs-1,method,mass) + os.system("hadd -f %s %s" % (output,input)) +else: + print "All workers done, now you have to hadd the results yourself." + diff --git a/test/plotting/contours2D.cxx b/test/plotting/contours2D.cxx new file mode 100644 index 0000000000000..f5b509cd23847 --- /dev/null +++ b/test/plotting/contours2D.cxx @@ -0,0 +1,195 @@ +TGraph* bestFit(TTree *t, TString x, TString y, TCut cut) { + int nfind = t->Draw(y+":"+x, cut + "deltaNLL == 0"); + if (nfind == 0) { + TGraph *gr0 = new TGraph(1); + gr0->SetPoint(0,-999,-999); + gr0->SetMarkerStyle(34); gr0->SetMarkerSize(2.0); + return gr0; + } else { + TGraph *gr0 = (TGraph*) gROOT->FindObject("Graph")->Clone(); + gr0->SetMarkerStyle(34); gr0->SetMarkerSize(2.0); + if (gr0->GetN() > 1) gr0->Set(1); + return gr0; + } +} + +TH2 *treeToHist2D(TTree *t, TString x, TString y, TString name, TCut cut, double xmin, double xmax, double ymin, double ymax, int xbins, int ybins) { + t->Draw(Form("2*deltaNLL:%s:%s>>%s_prof(%d,%10g,%10g,%d,%10g,%10g)", y.Data(), x.Data(), name.Data(), xbins, xmin, xmax, ybins, ymin, ymax), cut + "deltaNLL != 0", "PROF"); + TH2 *prof = (TH2*) gROOT->FindObject(name+"_prof"); + TH2D *h2d = new TH2D(name, name, xbins, xmin, xmax, ybins, ymin, ymax); + for (int ix = 1; ix <= xbins; ++ix) { + for (int iy = 1; iy <= ybins; ++iy) { + double z = prof->GetBinContent(ix,iy); + if (z != z) z = (name.Contains("bayes") ? 0 : 999); // protect agains NANs + h2d->SetBinContent(ix, iy, z); + } + } + h2d->GetXaxis()->SetTitle(x); + h2d->GetYaxis()->SetTitle(y); + h2d->SetDirectory(0); + return h2d; +} + +TList* contourFromTH2(TH2 *h2in, double threshold, int minPoints=20) { + std::cout << "Getting contour at threshold " << threshold << " from " << h2in->GetName() << std::endl; + //http://root.cern.ch/root/html/tutorials/hist/ContourList.C.html + Double_t contours[1]; + contours[0] = threshold; + if (h2in->GetNbinsX() * h2in->GetNbinsY() > 10000) minPoints = 50; + if (h2in->GetNbinsX() * h2in->GetNbinsY() <= 100) minPoints = 10; + + TH2D *h2 = frameTH2D((TH2D*)h2in,threshold); + + h2->SetContour(1, contours); + + // Draw contours as filled regions, and Save points + h2->Draw("CONT Z LIST"); + gPad->Update(); // Needed to force the plotting and retrieve the contours in TGraphs + + + // Get Contours + TObjArray *conts = (TObjArray*)gROOT->GetListOfSpecials()->FindObject("contours"); + TList* contLevel = NULL; + + if (conts == NULL || conts->GetSize() == 0){ + printf("*** No Contours Were Extracted!\n"); + return 0; + } + + TList *ret = new TList(); + for(int i = 0; i < conts->GetSize(); i++){ + contLevel = (TList*)conts->At(i); + //printf("Contour %d has %d Graphs\n", i, contLevel->GetSize()); + for (int j = 0, n = contLevel->GetSize(); j < n; ++j) { + TGraph *gr1 = (TGraph*) contLevel->At(j); + //printf("\t Graph %d has %d points\n", j, gr1->GetN()); + if (gr1->GetN() > minPoints) ret->Add(gr1->Clone()); + //break; + } + } + return ret; +} + +TH2D* frameTH2D(TH2D *in, double threshold){ + // NEW LOGIC: + // - pretend that the center of the last bin is on the border if the frame + // - add one tiny frame with huge values + double frameValue = 1000; + if (TString(in->GetName()).Contains("bayes")) frameValue = 0.0; + + Double_t xw = in->GetXaxis()->GetBinWidth(1); + Double_t yw = in->GetYaxis()->GetBinWidth(1); + + Int_t nx = in->GetNbinsX(); + Int_t ny = in->GetNbinsY(); + + Double_t x0 = in->GetXaxis()->GetXmin(); + Double_t x1 = in->GetXaxis()->GetXmax(); + + Double_t y0 = in->GetYaxis()->GetXmin(); + Double_t y1 = in->GetYaxis()->GetXmax(); + Double_t xbins[999], ybins[999]; + double eps = 0.1; + + xbins[0] = x0 - eps*xw - xw; xbins[1] = x0 + eps*xw - xw; + for (int ix = 2; ix <= nx; ++ix) xbins[ix] = x0 + (ix-1)*xw; + xbins[nx+1] = x1 - eps*xw + 0.5*xw; xbins[nx+2] = x1 + eps*xw + xw; + + ybins[0] = y0 - eps*yw - yw; ybins[1] = y0 + eps*yw - yw; + for (int iy = 2; iy <= ny; ++iy) ybins[iy] = y0 + (iy-1)*yw; + ybins[ny+1] = y1 - eps*yw + yw; ybins[ny+2] = y1 + eps*yw + yw; + + TH2D *framed = new TH2D( + Form("%s framed",in->GetName()), + Form("%s framed",in->GetTitle()), + nx + 2, xbins, + ny + 2, ybins + ); + + //Copy over the contents + for(int ix = 1; ix <= nx ; ix++){ + for(int iy = 1; iy <= ny ; iy++){ + framed->SetBinContent(1+ix, 1+iy, in->GetBinContent(ix,iy)); + } + } + //Frame with huge values + nx = framed->GetNbinsX(); + ny = framed->GetNbinsY(); + for(int ix = 1; ix <= nx ; ix++){ + framed->SetBinContent(ix, 1, frameValue); + framed->SetBinContent(ix, ny, frameValue); + } + for(int iy = 2; iy <= ny-1 ; iy++){ + framed->SetBinContent( 1, iy, frameValue); + framed->SetBinContent(nx, iy, frameValue); + } + + return framed; +} +void styleMultiGraph(TList *tmg, int lineColor, int lineWidth, int lineStyle) { + for (int i = 0; i < tmg->GetSize(); ++i) { + TGraph *g = (TGraph*) tmg->At(i); + g->SetLineColor(lineColor); g->SetLineWidth(lineWidth); g->SetLineStyle(lineStyle); + } +} +void styleMultiGraphMarker(TList *tmg, int markerColor, int markerSize, int markerStyle) { + for (int i = 0; i < tmg->GetSize(); ++i) { + TGraph *g = (TGraph*) tmg->At(i); + g->SetMarkerColor(markerColor); g->SetMarkerSize(markerSize); g->SetMarkerStyle(markerStyle); + } +} + + +/** Make a 2D contour plot from the output of MultiDimFit + * + * Inputs: + * - gFile should point to the TFile containing the 'limit' TTree + * - xvar should be the variable to use on the X axis, with xbins bins in the [xmin, xmax] range + * - yvar should be the variable to use on the Y axis, with ybins bins in the [ymin, ymax] range + * - (smx, smy) are the coordinates at which to put a diamond representing the SM expectation + * - if fOut is not null, then the output objects will be saved to fOut: + * - the 2D histogram will be saved as a TH2 with name name+"_h2d" + * - the 68% CL contour will be saved as a TList of TGraphs with name name+"_c68" + * - the 95% CL contour will be saved as a TList of TGraphs with name name+"_c95" + * - the 99.7% CL contour will be saved as a TList of TGraphs with name name+"_c997" + * - the best fit point will be saved as a TGraph with name name+"_best" + * + * Notes: + * - it's up to you to make sure that the binning you use for this plot matches the one used + * when running MultiDimFit (but you can just plot a subset of the points; e.g. if you had + * 100x100 points in [-1,1]x[-1,1] you can make a 50x50 plot for [0,1]x[0,1]) + * - the 99.7 contour is not plotted by default + * - the SM marker is not saved +*/ +void contour2D(TString xvar, int xbins, float xmin, float xmax, TString yvar, int ybins, float ymin, float ymax, float smx=1.0, float smy=1.0, TFile *fOut=0, TString name="contour2D") { + TTree *tree = (TTree*) gFile->Get("limit") ; + TH2 *hist2d = treeToHist2D(tree, xvar, yvar, "h2d", "", xmin, xmax, ymin, ymax, xbins, ybins); + hist2d->SetContour(200); + hist2d->GetZaxis()->SetRangeUser(0,21); + TGraph *fit = bestFit(tree, xvar, yvar, ""); + TList *c68 = contourFromTH2(hist2d, 2.30); + TList *c95 = contourFromTH2(hist2d, 5.99); + TList *c997 = contourFromTH2(hist2d, 11.83); + styleMultiGraph(c68, /*color=*/1, /*width=*/3, /*style=*/1); + styleMultiGraph(c95, /*color=*/1, /*width=*/3, /*style=*/9); + styleMultiGraph(c997, /*color=*/1, /*width=*/3, /*style=*/2); + hist2d->Draw("AXIS"); gStyle->SetOptStat(0); + c68->Draw("L SAME"); + c95->Draw("L SAME"); + fit->Draw("P SAME"); + TMarker m; + m.SetMarkerSize(3.0); m.SetMarkerColor(97); m.SetMarkerStyle(33); + m.DrawMarker(smx,smy); + m.SetMarkerSize(1.8); m.SetMarkerColor(89); m.SetMarkerStyle(33); + m.DrawMarker(smx,smy); + if (fOut != 0) { + hist2d->SetName(name+"_h2d"); fOut->WriteTObject(hist2d,0); + fit->SetName(name+"_best"); fOut->WriteTObject(fit,0); + c68->SetName(name+"_c68"); fOut->WriteTObject(c68,0,"SingleKey"); + c95->SetName(name+"_c95"); fOut->WriteTObject(c95,0,"SingleKey"); + c997->SetName(name+"_c997"); fOut->WriteTObject(c997,0,"SingleKey"); + } +} + +void contours2D() {} + diff --git a/test/plotting/tdrstyle.cc b/test/plotting/tdrstyle.cc new file mode 100644 index 0000000000000..c0873b89b1c36 --- /dev/null +++ b/test/plotting/tdrstyle.cc @@ -0,0 +1,174 @@ +// +// TDR style macro for plots in ROOT +// .L tdrstyle.C +// setTDRStyle() +// +#include +#include + +// tdrGrid: Turns the grid lines on (true) or off (false) + +/* +void tdrGrid(bool gridOn) { + tdrStyle->SetPadGridX(gridOn); + tdrStyle->SetPadGridY(gridOn); +} +*/ + +// fixOverlay: Redraws the axis + +void fixOverlay() { + gPad->RedrawAxis(); +} + +void setTDRStyle(bool force) { +// TStyle *tdrStyle = new TStyle("tdrStyle","Style for P-TDR"); + +// For the canvas: + gStyle->SetCanvasBorderMode(0); + gStyle->SetCanvasColor(kWhite); + gStyle->SetCanvasDefH(600); //Height of canvas + gStyle->SetCanvasDefW(600); //Width of canvas + gStyle->SetCanvasDefX(0); //POsition on screen + gStyle->SetCanvasDefY(0); + +// For the Pad: + gStyle->SetPadBorderMode(0); + // gStyle->SetPadBorderSize(Width_t size = 1); + gStyle->SetPadColor(kWhite); + gStyle->SetPadGridX(false); + gStyle->SetPadGridY(false); + gStyle->SetGridColor(0); + gStyle->SetGridStyle(3); + gStyle->SetGridWidth(1); + +// For the frame: + gStyle->SetFrameBorderMode(0); + gStyle->SetFrameBorderSize(1); + gStyle->SetFrameFillColor(0); + gStyle->SetFrameFillStyle(0); + gStyle->SetFrameLineColor(1); + gStyle->SetFrameLineStyle(1); + gStyle->SetFrameLineWidth(1); + +// For the histo: + if (force) { + // gStyle->SetHistFillColor(1); + // gStyle->SetHistFillStyle(0); + gStyle->SetHistLineColor(1); + gStyle->SetHistLineStyle(0); + gStyle->SetHistLineWidth(1); + // gStyle->SetLegoInnerR(Float_t rad = 0.5); + // gStyle->SetNumberContours(Int_t number = 20); + } + + gStyle->SetEndErrorSize(2); + //gStyle->SetErrorMarker(20); + gStyle->SetErrorX(0.); + + gStyle->SetMarkerStyle(20); + +//For the fit/function: + gStyle->SetOptFit(1); + gStyle->SetFitFormat("5.4g"); + gStyle->SetFuncColor(2); + gStyle->SetFuncStyle(1); + gStyle->SetFuncWidth(1); + +//For the date: + gStyle->SetOptDate(0); + // gStyle->SetDateX(Float_t x = 0.01); + // gStyle->SetDateY(Float_t y = 0.01); + +// For the statistics box: + gStyle->SetOptFile(0); + //gStyle->SetOptStat(0); + gStyle->SetOptStat("mr"); + gStyle->SetStatColor(kWhite); + gStyle->SetStatFont(42); + gStyle->SetStatFontSize(0.04);///---> gStyle->SetStatFontSize(0.025); + gStyle->SetStatTextColor(1); + gStyle->SetStatFormat("6.4g"); + gStyle->SetStatBorderSize(1); + gStyle->SetStatH(0.1); + gStyle->SetStatW(0.2);///---> gStyle->SetStatW(0.15); + + // gStyle->SetStatStyle(Style_t style = 1001); + // gStyle->SetStatX(Float_t x = 0); + // gStyle->SetStatY(Float_t y = 0); + +// Margins: + gStyle->SetPadTopMargin(0.05); + gStyle->SetPadBottomMargin(0.13); + gStyle->SetPadLeftMargin(0.16); + gStyle->SetPadRightMargin(0.04); + +// For the Global title: + + gStyle->SetOptTitle(0); + gStyle->SetTitleFont(42); + gStyle->SetTitleColor(1); + gStyle->SetTitleTextColor(1); + gStyle->SetTitleFillColor(10); + gStyle->SetTitleFontSize(0.05); + // gStyle->SetTitleH(0); // Set the height of the title box + // gStyle->SetTitleW(0); // Set the width of the title box + // gStyle->SetTitleX(0); // Set the position of the title box + // gStyle->SetTitleY(0.985); // Set the position of the title box + // gStyle->SetTitleStyle(Style_t style = 1001); + // gStyle->SetTitleBorderSize(2); + +// For the axis titles: + + gStyle->SetTitleColor(1, "XYZ"); + gStyle->SetTitleFont(42, "XYZ"); + gStyle->SetTitleSize(0.06, "XYZ"); + // gStyle->SetTitleXSize(Float_t size = 0.02); // Another way to set the size? + // gStyle->SetTitleYSize(Float_t size = 0.02); + gStyle->SetTitleXOffset(0.9); + gStyle->SetTitleYOffset(1.25); + // gStyle->SetTitleOffset(1.1, "Y"); // Another way to set the Offset + +// For the axis labels: + + gStyle->SetLabelColor(1, "XYZ"); + gStyle->SetLabelFont(42, "XYZ"); + gStyle->SetLabelOffset(0.007, "XYZ"); + gStyle->SetLabelSize(0.05, "XYZ"); + +// For the axis: + + gStyle->SetAxisColor(1, "XYZ"); + gStyle->SetStripDecimals(kTRUE); + gStyle->SetTickLength(0.03, "XYZ"); + gStyle->SetNdivisions(510, "XYZ"); + gStyle->SetPadTickX(1); // To get tick marks on the opposite side of the frame + gStyle->SetPadTickY(1); + +// Change for log plots: + gStyle->SetOptLogx(0); + gStyle->SetOptLogy(0); + gStyle->SetOptLogz(0); + +// Postscript options: + gStyle->SetPaperSize(20.,20.); + // gStyle->SetLineScalePS(Float_t scale = 3); + // gStyle->SetLineStyleString(Int_t i, const char* text); + // gStyle->SetHeaderPS(const char* header); + // gStyle->SetTitlePS(const char* pstitle); + + // gStyle->SetBarOffset(Float_t baroff = 0.5); + // gStyle->SetBarWidth(Float_t barwidth = 0.5); + // gStyle->SetPaintTextFormat(const char* format = "g"); + // gStyle->SetPalette(Int_t ncolors = 0, Int_t* colors = 0); + // gStyle->SetTimeOffset(Double_t toffset); + // gStyle->SetHistMinimumZero(kTRUE); + +// gStyle->cd(); + gROOT->ForceStyle(); + +} + +void tdrstyle(bool force=true) { + setTDRStyle(force); +}