In [None]:
import ROOT
from math import log, sqrt

# silence most of the roofit message (bug in ROOT saturate jupyter stream)
ROOT.RooMsgService.instance().getStream(1).removeTopic(ROOT.RooFit.NumIntegration)
ROOT.RooMsgService.instance().getStream(1).removeTopic(ROOT.RooFit.Fitting)
ROOT.RooMsgService.instance().getStream(1).removeTopic(ROOT.RooFit.Minimization)
ROOT.RooMsgService.instance().getStream(1).removeTopic(ROOT.RooFit.InputArguments)
ROOT.RooMsgService.instance().getStream(1).removeTopic(ROOT.RooFit.Eval)
ROOT.RooMsgService.instance().setGlobalKillBelow(ROOT.RooFit.ERROR)

ROOT.RooStats.AsymptoticCalculator.SetPrintLevel(-1)

ROOT.enableJSVis()

In [None]:
f = ROOT.TFile.Open('../data/workspace.root')
ws = f.Get("ws")
data = ws.data('data_binned')

# Discovery
Try to exclude the background only model

We are using the workspace in the `../data` folder, which can be different from the one generated by you

## Using RooStats calculator

In [None]:
# the main object: as usual it needs the data and the model config
hypoCalc = ROOT.RooStats.AsymptoticCalculator(data,
                                              ws.obj('sb_model_config'),
                                              ws.obj('b_model_config'))

# with the asymptotic calculator it is assumed we are using some sort of test statistics
# based on the profile likelihood ratio. In this case we are using the version where
# if the fitted signal is negative the value of the test statistic is 0 (OneSideDiscovery)
# This means that we are never excluding the background hypothesis when the fitted signal
# is negative
hypoCalc.SetOneSidedDiscovery(True)    
htr = hypoCalc.GetHypoTest()         # the result of the interference
htr.SetPValueIsRightTail(True)       # the signal hypothesis is for high value (right)
htr.SetBackgroundAsAlt(False)
z = htr.Significance()

print "significance is %f sigma" % z

## The same just using asymptotic formulas manually

In [None]:
ws.loadSnapshot('data_fit')
# s + b fit
ws.var('xsection_x_br').setConstant(False)
r_sb = ws.pdf('model').fitTo(data, ROOT.RooFit.Save(), ROOT.RooFit.PrintLevel(-1))
# b-only fit
ws.var('xsection_x_br').setVal(0)
ws.var('xsection_x_br').setConstant(True)
r_b = ws.pdf('model').fitTo(data, ROOT.RooFit.Save(), ROOT.RooFit.PrintLevel(-1))
ws.var('xsection_x_br').setConstant(False)
print sqrt(2 * (r_b.minNll() - r_sb.minNll()))

In [None]:
# (unfortunately) the workspace has a status.
# We have done some fits so its status (the values of the variables) has changed
# Reload the original status, the one after the first fit on data
ws.loadSnapshot('data_fit')
model_config = ws.obj('model_config')
original_lumi = ws.obj('nominal_luminosity').getVal()

gr = ROOT.TGraph()

for ilumi, lumi in enumerate([0.1E3, 1E3, 2E3, 5E3, 10E3, 20E3, 30E3, 50E3]):
    ws.loadSnapshot('data_fit')
    ws.var('nominal_luminosity').setVal(lumi)
    # create an asimov dataset with this new luminosity
    data_asimov_lumi = ws.pdf('model').generateBinned(model_config.GetObservables(),
                                                      ROOT.RooFit.ExpectedData())
    result_sb = ws.pdf('model').fitTo(data_asimov_lumi, ROOT.RooFit.Save(), ROOT.RooFit.PrintLevel(-1))
    ws.var('xsection_x_br').setVal(0)
    ws.var('xsection_x_br').setConstant(True)
    result_b = ws.pdf('model').fitTo(data_asimov_lumi, ROOT.RooFit.Save(), ROOT.RooFit.PrintLevel(-1))
    ws.var('xsection_x_br').setConstant(False)

    z = sqrt(2 * (result_b.minNll() - result_sb.minNll()))
    print "lumi: %.2e, #events=%d, z=%.2f" % (lumi, data_asimov_lumi.sumEntries(), z)
    
    gr.SetPoint(ilumi, lumi / 1E3, z)
    
canvas = ROOT.TCanvas()
gr.SetMarkerStyle(20)
gr.GetXaxis().SetTitle('luminosity [fb^{-1}]')
gr.GetYaxis().SetTitle('expected significance')
gr.Draw("APL")
canvas.Draw()
ws.obj('nominal_luminosity').setVal(original_lumi)

## Using frequentist calculator (toys)

In [None]:
# this will take ~30 seconds

# the main object: as usual it needs the data and the model config
hypoCalc = ROOT.RooStats.FrequentistCalculator(data,
                                               ws.obj('sb_model_config'),
                                               ws.obj('b_model_config'))
hypoCalc.SetToys(1000, 200)  # set how many toys to get the distribution of the test-statistics
                             # under s+b and b-only hypothesis

# frequentist calculator is very flexible, so you have to define externally the test statistic
# (you can also implement your own). Here using the profiled likelihood
profll = ROOT.RooStats.ProfileLikelihoodTestStat(ws.obj('b_model_config').GetPdf())
profll.SetOneSidedDiscovery(1)

# the sampler (the object generating toys)
sampler = hypoCalc.GetTestStatSampler()
sampler.SetTestStatistic(profll)
sampler.SetGenerateBinned(True)      # to speed up generate binned toys

# get the result
htr = hypoCalc.GetHypoTest()         # the result of the interference (hypo-test-result)
htr.SetPValueIsRightTail(True)       # the signal hypothesis is for high value (right)
htr.SetBackgroundAsAlt(False)
z = htr.Significance()

print "significance is %f sigma" % z

# plot the distribution of the test statistics
plot = ROOT.RooStats.HypoTestPlot(htr, 100) # number of bins
canvas = ROOT.TCanvas()
plot.Draw()
canvas.SetLogy()
canvas.Draw()

***Exercize*** Explain why Frequentist calculator is returning infinite significance

***Exercize*** Explain why the peak at 0 for the distribution under the b-only hypothesis

## Do a scan of significance and p-value as a function of $m_H$
*** Exercize ***: complete the code

In [None]:
data_binned = ws.data('data_binned')
data_asimov = ws.data('data_asimov')

import numpy as np

ws.loadSnapshot('data_fit')
model_config = ws.obj('model_config')
original_mH = ws.obj('mH').getVal()

gr_pvalue = ROOT.TGraph()
gr_significance = ROOT.TGraph()

ws.pdf('model').fitTo(data_asimov, ROOT.RooFit.PrintLevel(-1))

for imH, mH_value in enumerate(np.linspace(100, 140, 100)):
    ws.loadSnapshot('data_fit')
    ws.var('mH').setVal(mH_value)
    
    ws.pdf('model').fitTo(data_asimov, ROOT.RooFit.PrintLevel(-1))
    # FILL HERE
    # hypoCalc = ...
    # hypoCalc.SetPrintLevel(-1)
    # FILL HERE
    #
    # gr_pvalue.SetPoint(imH, mH_value, htr.NullPValue())
    # gr_significance.SetPoint(imH, mH_value, htr.Significance())
    
    #print mH_value, htr.Significance()

The same, but implemented manually

In [None]:
# (unfortunately) the workspace has a status.
# We have done some fits so its status (the values of the variables) has changed
# Reload the original status, the one after the first fit on data
ws.loadSnapshot('data_fit')
model_config = ws.obj('model_config')
original_mH = ws.obj('mH').getVal()

gr = ROOT.TGraph()

for imH, mH_value in enumerate(np.linspace(100, 140, 100)):
    ws.loadSnapshot('data_fit')
    ws.var('mH').setVal(mH_value)
    
    # unconditional fit (denominator)
    result_sb = ws.pdf('model').fitTo(data_binned, ROOT.RooFit.Save(), ROOT.RooFit.PrintLevel(-1))
    poi_hat = ws.var('xsection_x_br').getVal()
    
    ws.loadSnapshot('data_fit')
    ws.var('mH').setVal(mH_value)
    ws.var('xsection_x_br').setVal(0)
    ws.var('xsection_x_br').setConstant(True)
    result_b = ws.pdf('model').fitTo(data_binned, ROOT.RooFit.Save(), ROOT.RooFit.PrintLevel(-1))
    ws.var('xsection_x_br').setConstant(False)

    if (result_b.minNll() - result_sb.minNll()) < 0 or poi_hat < 0:
        z = 0
    else:
        z = sqrt(2 * (result_b.minNll() - result_sb.minNll()))
    # print "mH: %.2e, #events=%d, z=%.2f" % (mH_value, data_asimov_lumi.sumEntries(), z)
    
    gr.SetPoint(imH, mH_value, z)
    
canvas = ROOT.TCanvas()
gr.GetYaxis().SetTitle('expected significance')
gr.Draw("APL")
canvas.Draw()
ws.obj('mH').setVal(original_mH)

### Running the StandardHypoTestDemo macro
This is a standard macro (included in ROOT) to compute p-values for discovery (and also exlusions). It computes the observed values and also the expected values (internally it generate an Asimov dataset). Remember that we are testing one particular value of $m_H$.

This macro is very popular and it is very used in statistical analysis

In [None]:
ROOT.gROOT.ProcessLine('.L ../StandardHypoTestDemo.C')

In [None]:
ROOT.StandardHypoTestDemo("../data/workspace.root",  # workspace filename
                          "ws",              # workspace name
                          "sb_model_config", # signal+background model
                          "",                # not needed: set poi to 0
                          "data_binned",     # data name
                          2,                 # use asymtotic calculator               
                          3)                 # use profile Likelihood one sided (i.e. = 0 if mu_hat < 0)