In [1]:
import ROOT

OBJ: TStyle	ildStyle	ILD Style : 0 at: 0x83b0210


In [2]:
run_toy_diagnostics = False
# run_toy_diagnostics = True

In [None]:
# TODO: make it possible to fit multiple runs simultaneously
run = {
    "lumi": 5000,
    "e_pol": 0.,
    "p_pol": 0.,
}
n_bins = 65

# https://arxiv.org/pdf/1506.07830
ilc_250_h20_lumi = 2000
ilc_250_h20 = [
    {
        "lumi": ilc_250_h20_lumi * 0.675,
        "e_pol": -0.8,
        "p_pol": 0.3,
    },
    {
        "lumi": ilc_250_h20_lumi * 0.225,
        "e_pol": 0.8,
        "p_pol": -0.3,
    },
    {
        "lumi": ilc_250_h20_lumi * 0.05,
        "e_pol": -0.8,
        "p_pol": -0.3,
    },
    {
        "lumi": ilc_250_h20_lumi * 0.05,
        "e_pol": 0.8,
        "p_pol": 0.3,
    },
]

In [4]:
parameters = {
    "g1z": 0.0,
    "ka": 0.0,
    "la": 0.0,
}
obs_names = [
    "O_g1z_pos_1em05",
    "O_ka_pos_1em05",
    "O_la_pos_1em05",
]
input_path = "data/histograms/full/raw_histograms.root"
signal_cat = "4f_sw_sl_signal"
signal_processes = [
    "4f_sw_sl_eLpL_signal",
    "4f_sw_sl_eLpR_signal",
    "4f_sw_sl_eRpL_signal",
    "4f_sw_sl_eRpR_signal",
    ]
background_processes = []

In [5]:
signal_histograms = {}
signal_meta = {}
oo_matrix = {}
template_parameters = {}
with ROOT.TFile(input_path) as input_file:
    # take care of signals first
    signal_dir = input_file[signal_cat]
    for process_name in signal_processes:
        signal_histos = {}
        p_dir = signal_dir[process_name]
        obs = "obs_ND"
        # very consistent ownership model of root requires us to do this
        histo = p_dir[obs]
        # not needed on nD???
        # histo.SetDirectory(ROOT.nullptr)
        signal_histo = histo
        meta_dir = p_dir["meta"]
        obs_meta = {}
        for key in meta_dir.GetListOfKeys():
            key_name = key.GetName()
            obs_meta[key_name] = meta_dir[key_name]
        signal_histograms[process_name] = signal_histo
        signal_meta[process_name] = obs_meta
        # get OO matrix
        mat_dir = input_file["oo_matrix"]
        oo_matrix[process_name] = mat_dir[process_name]
    # get template parameters
    # unfortunately here we have the loops the other way around
    template_dir = input_file["template_parametrisations"]
    for obs in obs_names:
        obs_dir = template_dir[obs]
        p_pars = {}
        for process_name in signal_processes:
            p_dir = obs_dir[process_name]
            pars = {}
            for par in parameters.keys():
            # very consistent ownership model of root requires us to do
                par_hist = p_dir[par]
                par_hist.SetDirectory(ROOT.nullptr)
                pars[par] = par_hist
            p_pars[process_name] = pars
        template_parameters[obs] = p_pars


    # TODO: implement background handling

print(signal_histograms)
print(signal_meta)
print(oo_matrix)
print(template_parameters)

{'4f_sw_sl_eLpL_signal': <cppyy.gbl.THnT<double> object at 0xa11f080>, '4f_sw_sl_eLpR_signal': <cppyy.gbl.THnT<double> object at 0xa27add0>, '4f_sw_sl_eRpL_signal': <cppyy.gbl.THnT<double> object at 0xa2530a0>, '4f_sw_sl_eRpR_signal': <cppyy.gbl.THnT<double> object at 0xa21f460>}
{'4f_sw_sl_eLpL_signal': {'lumi': <cppyy.gbl.TParameter<float> object at 0xa28b5f0>, 'e_pol': <cppyy.gbl.TParameter<float> object at 0xa2580b0>, 'p_pol': <cppyy.gbl.TParameter<float> object at 0xa298290>}, '4f_sw_sl_eLpR_signal': {'lumi': <cppyy.gbl.TParameter<float> object at 0xa26feb0>, 'e_pol': <cppyy.gbl.TParameter<float> object at 0xa2a2c90>, 'p_pol': <cppyy.gbl.TParameter<float> object at 0xa2a2d10>}, '4f_sw_sl_eRpL_signal': {'lumi': <cppyy.gbl.TParameter<float> object at 0xa21bdb0>, 'e_pol': <cppyy.gbl.TParameter<float> object at 0xa21be70>, 'p_pol': <cppyy.gbl.TParameter<float> object at 0x9ea6720>}, '4f_sw_sl_eRpR_signal': {'lumi': <cppyy.gbl.TParameter<float> object at 0xa2e4580>, 'e_pol': <cppyy.gbl

In [6]:
from fit_utils import make_observed_histogram, hist_to_np, make_asimov_np, make_asimov_fast, make_1D_projections, make_observables, make_new_asimov_observables_np
h_obs = make_observed_histogram(signal_histograms, signal_meta, run)
h_obs_np = hist_to_np(h_obs)
h_asimov_np = make_asimov_np(h_obs_np)
h_asimov = make_asimov_fast(h_obs, 1234)

{ @0x7ffcf2c58ed0, @0x7ffcf2c58ed0, @0x7ffcf2c58ed0 } 3


In [7]:
import numpy as np
h_example = make_1D_projections(h_obs)[0]
bin_centers = np.asarray([h_example.GetBinCenter(i+1) for i in range(h_example.GetNbinsX())])

In [8]:
signal_1d_histograms = [make_1D_projections(h) for h in signal_histograms.values()]
# ugly hack to "transpose" that list[list]
signal_1d_histograms = list(map(list, zip(*signal_1d_histograms)))
print(signal_1d_histograms)
signal_lumi = [par["lumi"].GetVal() for par in signal_meta.values()]
print(signal_lumi)
oo_matrices = list(oo_matrix.values())
print(oo_matrices)
template_param = [[[ph for ph in cpl_h.values()] for cpl_h in hel_h.values()] for hel_h in template_parameters.values()]
print(template_param)

[[<cppyy.gbl.TH1D object at 0xc899680>, <cppyy.gbl.TH1D object at 0xc978520>, <cppyy.gbl.TH1D object at 0x6be4c20>, <cppyy.gbl.TH1D object at 0xc9eef60>], [<cppyy.gbl.TH1D object at 0xc845840>, <cppyy.gbl.TH1D object at 0xc97e790>, <cppyy.gbl.TH1D object at 0xcdc9e60>, <cppyy.gbl.TH1D object at 0xc98d230>], [<cppyy.gbl.TH1D object at 0xc7caab0>, <cppyy.gbl.TH1D object at 0xc933ad0>, <cppyy.gbl.TH1D object at 0xcb6cc60>, <cppyy.gbl.TH1D object at 0xcdd8980>]]
[598.3264770507812, 100.83772277832031, 4009.4052734375, 696.610107421875]
[<cppyy.gbl.ROOT.Math.SVector<double,6> object at 0xa29c330>, <cppyy.gbl.ROOT.Math.SVector<double,6> object at 0xa25b160>, <cppyy.gbl.ROOT.Math.SVector<double,6> object at 0xa258ac0>, <cppyy.gbl.ROOT.Math.SVector<double,6> object at 0xa2fc870>]
[[[<cppyy.gbl.TH1D object at 0x94bddb0>, <cppyy.gbl.TH1D object at 0xa35e020>, <cppyy.gbl.TH1D object at 0x92fd0f0>], [<cppyy.gbl.TH1D object at 0x4caece0>, <cppyy.gbl.TH1D object at 0x4bcdc50>, <cppyy.gbl.TH1D object

In [9]:
ROOT.gInterpreter.Declare("#include \"fit.h\"")

True

In [10]:
fun2 = ROOT.fit_fun2[f"3, {n_bins}, 3"](signal_1d_histograms, signal_lumi, oo_matrices, template_param)

start constructor
finished constructor


In [11]:
if run_toy_diagnostics:
    n_obs = 3
    n_toys = 10
    # h_chi2 = ROOT.TH1D("", ";#chi^{2}", 100, -10., 10.)
    h_chi2 = ROOT.TH1D("", ";#chi^{2}", 50, 0., 10.)
    h_prob = ROOT.TH1D("", ";probability", 50, 0., 1.)
    h_diff0 = ROOT.TH1D("", ";obs_asimov - obs [0]", 100, -10000., 10000.)
    h_diff1 = ROOT.TH1D("", ";obs_asimov - obs [1]", 100, -10000., 10000.)
    h_diff2 = ROOT.TH1D("", ";obs_asimov - obs [2]", 100, -10000., 10000.)
    obs_initial = make_observables(make_1D_projections(h_obs))
    C = np.zeros((n_obs, n_obs))
    v = np.zeros(n_obs)
    for seed in range(n_toys):
        # obs = make_new_asimov_observables(h_obs, seed)
        obs = make_new_asimov_observables_np(h_obs_np, bin_centers)
        pars = list(run.values()) + [0., 0., 0.] + [1.]
        chi2 = fun2(obs, pars)
        # print(obs, chi2)
        prob = ROOT.Math.chisquared_cdf(chi2, 3)
        h_chi2.Fill(chi2)
        h_prob.Fill(prob)
        diff = [0.] * n_obs
        for i in range(n_obs):
            diff[i] = obs[i] - obs_initial[i]
            v[i] += diff[i]
        for i in range(n_obs):
            for j in range(n_obs):
                C[i,j] += diff[i] * diff[j]
        h_diff0.Fill(diff[0])
        h_diff1.Fill(diff[1])
        h_diff2.Fill(diff[2])
    C /= n_toys
    v /= n_toys

In [12]:
if run_toy_diagnostics:
    c_chi2 = ROOT.TCanvas()
    f = ROOT.TF1("chi2", "[0] * ROOT::Math::chisquared_pdf(x, [1])", 0., 10.)
    f.SetParameters(10000/5., 3.)
    # f.FixParameter(1, 3.)
    h_chi2.Fit(f)
    h_chi2.Draw()
    # f.Draw("same")
    c_chi2.Draw()
    # c_chi2.SaveAs("plots/fit/diagnostics/chi2.pdf")

    c_prob = ROOT.TCanvas()
    h_prob.SetMinimum(0)
    h_prob.Draw()
    c_prob.Draw()
    # c_prob.SaveAs("plots/fit/diagnostics/prob.pdf")

    c_diff0 = ROOT.TCanvas()
    h_diff0.Fit("gaus")
    h_diff0.Draw()
    c_diff0.Draw()
    # c_diff0.SaveAs("plots/fit/diagnostics/diff0.pdf")

    c_diff1 = ROOT.TCanvas()
    h_diff1.Fit("gaus")
    h_diff1.Draw()
    c_diff1.Draw()
    # c_diff1.SaveAs("plots/fit/diagnostics/diff1.pdf")

    c_diff2 = ROOT.TCanvas()
    h_diff2.Fit("gaus")
    h_diff2.Draw()
    c_diff2.Draw()
    # c_diff2.SaveAs("plots/fit/diagnostics/diff2.pdf")

In [13]:
# build all the RooFit stuff

# use the initial assumed as true values to init
obs_initial = make_observables(make_1D_projections(h_obs))
# define observable parameters
obs_pars = []
for i, o in enumerate(obs_initial):
    name = obs_names[i]
    # lets just choose #pm 10% where ever we're not sure
    min_o = o*0.9
    max_o = o*1.1
    if o < 0:
        # swap around to avoid RooFit printing an error when doing it
        min_o, max_o = max_o, min_o
    ob = ROOT.RooRealVar(name, name, min_o, max_o)
    obs_pars.append(ob)

# define run parameters
lumi_par = ROOT.RooRealVar("lumi", "lumi", run["lumi"], 0.9*run["lumi"], 1.1 * run["lumi"])
lumi_par.setConstant()
e_pol_par = ROOT.RooRealVar("e_pol", "e_pol", 0., -1., 1.)
e_pol_par.setConstant()
p_pol_par = ROOT.RooRealVar("p_pol", "p_pol", 0., -1., 1.)
p_pol_par.setConstant()

# define coupling parameters
coupling_pars = []
for name, value in parameters.items():
    # that should be tight enough...
    cpl = ROOT.RooRealVar(name, name, value, -0.5, 0.5)
    coupling_pars.append(cpl)

# define nuisance parameters
nuisance_pars = []
# signal only so far
nu_par = ROOT.RooRealVar("mu_signal", "mu_signal", 1., 0.9, 1.1)
nu_par.setConstant()
nuisance_pars.append(nu_par)

all_pars = [lumi_par, e_pol_par, p_pol_par] + coupling_pars + nuisance_pars
obs_and_pars = obs_pars + all_pars
obs_and_pars_arglist = ROOT.RooArgList(obs_and_pars)

In [14]:
fun2_functor = ROOT.Math.Functor(fun2, len(obs_and_pars))
chi2_fun = ROOT.RooFit.bindFunction("chi2_fun", fun2_functor, obs_and_pars_arglist)

In [15]:
# should be constant with the number of observables
chi2_ndf = ROOT.RooRealVar("chi2_ndf", "chi2_ndf", len(obs_pars))
model = ROOT.RooChiSquarePdf("chi2_pdf", "chi2_pdf", chi2_fun, chi2_ndf)

In [16]:
def make_ttree_from_obs(obs):
    from array import array
    tree = ROOT.TTree("tree", "tree")
    branch_pointers = []
    for i, o in enumerate(obs_initial):
        name = obs_names[i]
        p = array("d", [0])
        tree.Branch(name, p, f"{name}/D")
        p[0] = o
        branch_pointers.append(p)
    tree.Fill()
    return tree

In [17]:
obs = make_new_asimov_observables_np(h_obs_np, bin_centers)
obs_tree = make_ttree_from_obs(obs)
ds = ROOT.RooDataSet("ds", "ds", obs_pars, Import=obs_tree)

In [18]:
# model.fitTo(ds)
model.getVal()
model.Print("t")

0xf63cbb0 RooChiSquarePdf::chi2_pdf = 5.11417e-12 [Auto,Dirty] 
  0xf196910/V- RooFunctorBinding::chi2_fun = 1.64335e-22 [Auto,Clean] 
    0xdd5d510/V- RooRealVar::O_g1z_pos_1em05 = -328313
    0xdcbd180/V- RooRealVar::O_ka_pos_1em05 = -816884
    0xdd43ef0/V- RooRealVar::O_la_pos_1em05 = -404445
    0xd9c9830/V- RooRealVar::lumi = 5000
    0xdc30750/V- RooRealVar::e_pol = 0
    0xda0fc80/V- RooRealVar::p_pol = 0
    0x6be1a80/V- RooRealVar::g1z = 0
    0xe3ff600/V- RooRealVar::ka = 0
    0xe3f8d00/V- RooRealVar::la = 0
    0xe43e0c0/V- RooRealVar::mu_signal = 1
  0xeea8380/V- RooRealVar::chi2_ndf = 3


In [19]:
# model.fitTo(ds)

In [20]:
nll = model.createNLL(ds, EvalBackend="cpu")
# nll = model.createNLL(ds, EvalBackend="legacy")
# nll = model.createNLL(ds, EvalBackend="codegen")
# nll = model.createNLL(ds, EvalBackend="codegen_no_grad")

[#1] INFO:Fitting -- RooAbsPdf::fitTo(chi2_pdf_over_chi2_pdf_Int[O_g1z_pos_1em05,O_ka_pos_1em05,O_la_pos_1em05]) fixing normalization set for coefficient determination to observables in data
[#1] INFO:Fitting -- using generic CPU library compiled with no vectorizations
[#1] INFO:Fitting -- Creation of NLL object took 8.02878 ms


In [21]:
%time
ROOT.RooMinimizer(nll).migrad()

CPU times: user 8 μs, sys: 0 ns, total: 8 μs
Wall time: 12.9 μs


1

[#1] INFO:Fitting -- RooAddition::defaultErrorLevel(nll_chi2_pdf_over_chi2_pdf_Int[O_g1z_pos_1em05,O_ka_pos_1em05,O_la_pos_1em05]_ds) Summation contains a RooNLLVar, using its error level
Minuit2Minimizer: Minimize with max-calls 1500 convergence for edm < 1 strategy 1
[#1] INFO:NumericIntegration -- RooRealIntegral::init(chi2_pdf_Int[O_g1z_pos_1em05,O_ka_pos_1em05,O_la_pos_1em05]) using numeric integrator RooAdaptiveIntegratorND to calculate Int(O_g1z_pos_1em05,O_ka_pos_1em05,O_la_pos_1em05)
Minuit2Minimizer : Valid minimum - status = 1
FVAL  = 26.5366925318613767
Edm   = 1.26735382324738549e-10
Nfcn  = 71
g1z	  = -0.000504609	 +/-  0.00639371	(limited)
ka	  = -0.000334195	 +/-  0.0170575	(limited)
la	  = 1.73108e-06	 +/-  0.00676977	(limited)


Info in <Minuit2>: MnSeedGenerator Computing seed using NumericalGradient calculator
Info in <Minuit2>: MnSeedGenerator Evaluated function and gradient in 51.6101 s
Info in <Minuit2>: MnSeedGenerator Initial state: FCN =       51.11858849 Edm =   1.456358589e-13 NCalls =     13
Info in <Minuit2>: NegativeG2LineSearch Doing a NegativeG2LineSearch since one of the G2 component is negative
Info in <Minuit2>: NegativeG2LineSearch Done after 1 min 52.6535 s
Info in <Minuit2>: MnSeedGenerator Negative G2 found - new state: 
  Minimum value : 26.536728
  Edm           : 0.0001067019155
  Internal parameters:	[  -0.001006789604 -0.0006608802813                0]	
  Internal gradient  :	[      9.704948918      3.164874366     -6.843654895]	
  Internal covariance matrix:
[[  1.5034328e-06              0              0]
 [              0  1.4259771e-05              0]
 [              0              0  3.0398511e-06]]]
Info in <Minuit2>: MnSeedGenerator Initial state  
  Minimum value : 26.536728
