In [1]:
import os

os.environ["CLING_DEBUG"] = "1"

In [2]:
import ROOT

OBJ: TStyle	ildStyle	ILD Style : 0 at: 0xd3c6a10


In [3]:
# TODO: make it possible to fit multiple runs simultaneously
run = {
    "lumi": 5000,
    "e_pol": 0.,
    "p_pol": 0.,
}

In [4]:
parameters = {
    "g1z": 0.0,
    "ka": 0.0,
    "la": 0.0,
}
obs_names = [
    "O_g1z_pos_1em05",
    "O_ka_pos_1em05",
    "O_la_pos_1em05",
]
input_path = "data/histograms/full/raw_histograms.root"
signal_cat = "4f_sw_sl_signal"
signal_processes = [
    "4f_sw_sl_eLpL_signal",
    "4f_sw_sl_eLpR_signal",
    "4f_sw_sl_eRpL_signal",
    "4f_sw_sl_eRpR_signal",
    ]
background_processes = []

In [5]:
signal_histograms = {}
signal_meta = {}
oo_matrix = {}
template_parameters = {}
with ROOT.TFile(input_path) as input_file:
    # take care of signals first
    signal_dir = input_file[signal_cat]
    for process_name in signal_processes:
        signal_histos = {}
        p_dir = signal_dir[process_name]
        for obs in obs_names:
            # very consistent ownership model of root requires us to do this
            histo = p_dir[obs]
            histo.SetDirectory(ROOT.nullptr)
            signal_histos[obs] = histo
            meta_dir = p_dir["meta"]
            obs_meta = {}
            for key in meta_dir.GetListOfKeys():
                key_name = key.GetName()
                obs_meta[key_name] = meta_dir[key_name]
        signal_histograms[process_name] = signal_histos
        signal_meta[process_name] = obs_meta
        # get OO matrix
        mat_dir = input_file["oo_matrix"]
        oo_matrix[process_name] = mat_dir[process_name]
    # get template parameters
    # unfortunately here we have the loops the other way around
    template_dir = input_file["template_parametrisations"]
    for obs in obs_names:
        obs_dir = template_dir[obs]
        p_pars = {}
        for process_name in signal_processes:
            p_dir = obs_dir[process_name]
            pars = {}
            for par in parameters.keys():
            # very consistent ownership model of root requires us to do
                par_hist = p_dir[par]
                par_hist.SetDirectory(ROOT.nullptr)
                pars[par] = par_hist
            p_pars[process_name] = pars
        template_parameters[obs] = p_pars


    # TODO: implement background handling

print(signal_histograms)
print(signal_meta)
print(oo_matrix)
print(template_parameters)

{'4f_sw_sl_eLpL_signal': {'O_g1z_pos_1em05': <cppyy.gbl.TH1D object at 0x1914a700>, 'O_ka_pos_1em05': <cppyy.gbl.TH1D object at 0x23725ca0>, 'O_la_pos_1em05': <cppyy.gbl.TH1D object at 0x19050e60>}, '4f_sw_sl_eLpR_signal': {'O_g1z_pos_1em05': <cppyy.gbl.TH1D object at 0x2fe419b0>, 'O_ka_pos_1em05': <cppyy.gbl.TH1D object at 0x2efd64c0>, 'O_la_pos_1em05': <cppyy.gbl.TH1D object at 0x5c1f7b0>}, '4f_sw_sl_eRpL_signal': {'O_g1z_pos_1em05': <cppyy.gbl.TH1D object at 0x5b52090>, 'O_ka_pos_1em05': <cppyy.gbl.TH1D object at 0x4f9d0d0>, 'O_la_pos_1em05': <cppyy.gbl.TH1D object at 0x27d6840>}, '4f_sw_sl_eRpR_signal': {'O_g1z_pos_1em05': <cppyy.gbl.TH1D object at 0x2798780>, 'O_ka_pos_1em05': <cppyy.gbl.TH1D object at 0x23872490>, 'O_la_pos_1em05': <cppyy.gbl.TH1D object at 0x25635830>}}
{'4f_sw_sl_eLpL_signal': {'lumi': <cppyy.gbl.TParameter<float> object at 0x25e55410>, 'e_pol': <cppyy.gbl.TParameter<float> object at 0x2f26e1b0>, 'p_pol': <cppyy.gbl.TParameter<float> object at 0x2f2169e0>}, '4f

In [6]:
# try to write down the chi2 definition
# mean or sum?? just reference the part from Markus here
# sum! see (58) in https://inspirehep.net/literature/360476
# I will need one per run
# Jonas said it will be fine to make it just one big function, the automatic differentiation will take care of it then...
ROOT.gInterpreter.Declare("#include \"fit.h\"")
# ROOT.gSystem.Load("fit_h.so")

True

In [7]:
n_obs = len(obs_names)
n_couplings = len(parameters)
example_hist = signal_histograms["4f_sw_sl_eLpL_signal"]["O_g1z_pos_1em05"]
n_bins = example_hist.GetNbinsX()
template_lumi = [par["lumi"].GetVal() for par in signal_meta.values()]
print(template_lumi)

[598.3264770507812, 100.83772277832031, 4009.4052734375, 696.610107421875]


In [8]:
# print(n_bins)
# urgh depends on root version
# template_param = [[[[h[i] for i in range(1, n_bins+1)] for h in par_hists.values()] for par_hists in process_hists.values()] for process_hists in template_parameters.values()]
template_param = [[[[h[i] for i in range(n_bins)] for h in par_hists.values()] for par_hists in process_hists.values()] for process_hists in template_parameters.values()]
print(template_param)

[[[[0.9096369156285996, -0.005166851354954982, 0.17991448016937284, 0.3158426694544779, 0.5601166571283276, 0.06634549606142803, 0.0955816569776522, 0.05763101313522767, 0.11061345950795654, -0.08272012681275376, -0.15301541157737097, -0.5188105911981069, -0.762350902921727, -0.8630857549241517, -1.2431659670149906, -1.3365960778615031, -1.3622460962023233, -0.8645892086409486, -1.5315252642350337, -1.7578516669021593], [-0.48069219548899933, 0.5187360803189466, 1.1762099081834203, 0.9280407305992735, 0.47969196909082956, 0.5323149363475456, 0.7662414199929687, 0.6272336241405214, 0.5280800092502809, 1.002158007591126, 1.0392869878056934, 0.9873739017579424, 0.8383532035281493, 0.8843858702826801, 0.7266492165094023, 0.6477606862911365, 0.6815744589799517, 0.29612007264299756, 0.5291359897311966, 0.10338252698031103], [0.5145522577419203, 1.087134466401358, 0.8596634721994156, 0.3750747106085004, 0.3393597250003695, 0.5103933337945998, 0.6697301739369514, 0.33951812857948704, 0.1089836

In [9]:
# bin_contents_sm = [[[h[i] for i in range(1, n_bins+1)] for h in process_hists.values()] for process_hists in signal_histograms.values()]
bin_contents_sm = [[[h[i] for i in range(n_bins)] for h in process_hists.values()] for process_hists in signal_histograms.values()]
print(bin_contents_sm)
print(bin_contents_sm[0])

[[[35.0, 34.0, 52.0, 69.0, 99.0, 185.0, 327.0, 828.0, 2931.0, 17551.0, 26352.0, 3606.0, 1446.0, 686.0, 412.0, 265.0, 139.0, 114.0, 82.0, 53.0], [81.0, 145.0, 408.0, 1219.0, 3475.0, 9525.0, 16286.0, 10304.0, 5506.0, 4645.0, 2451.0, 663.0, 294.0, 146.0, 73.0, 43.0, 33.0, 26.0, 11.0, 9.0], [187.0, 277.0, 379.0, 536.0, 881.0, 1381.0, 2395.0, 4235.0, 8139.0, 15841.0, 11928.0, 4643.0, 1810.0, 856.0, 504.0, 272.0, 170.0, 109.0, 78.0, 65.0]], [[413.0, 612.0, 818.0, 1209.0, 1994.0, 3863.0, 8616.0, 29612.0, 172491.0, 266914.0, 147720.0, 59622.0, 30806.0, 18021.0, 11149.0, 7415.0, 4914.0, 3493.0, 2507.0, 1849.0], [335.0, 562.0, 1044.0, 1962.0, 4363.0, 9650.0, 18990.0, 27633.0, 52111.0, 365739.0, 201100.0, 48270.0, 20611.0, 10673.0, 6119.0, 3758.0, 2386.0, 1714.0, 1131.0, 757.0], [3311.0, 4669.0, 6498.0, 9082.0, 13131.0, 19893.0, 29502.0, 44136.0, 70109.0, 120376.0, 222364.0, 144805.0, 35580.0, 15696.0, 9131.0, 5678.0, 3789.0, 2853.0, 1983.0, 1650.0]], [[2364.0, 2826.0, 3522.0, 4386.0, 5842.0, 791

In [10]:
bin_midpoints = [example_hist.GetXaxis().GetBinCenter(i) for i in range(1, n_bins+1)]
print(bin_midpoints)

[-3.325, -2.975, -2.625, -2.2750000000000004, -1.925, -1.575, -1.2250000000000003, -0.8750000000000002, -0.5250000000000001, -0.1750000000000001, 0.175, 0.5249999999999997, 0.8749999999999993, 1.2249999999999999, 1.5749999999999995, 1.925, 2.2749999999999995, 2.624999999999999, 2.9749999999999996, 3.3249999999999993]


In [11]:
C = list(oo_matrix.values())
print(C)

[<cppyy.gbl.ROOT.Math.SVector<double,6> object at 0x2d8fb150>, <cppyy.gbl.ROOT.Math.SVector<double,6> object at 0x263030a0>, <cppyy.gbl.ROOT.Math.SVector<double,6> object at 0x2f210b90>, <cppyy.gbl.ROOT.Math.SVector<double,6> object at 0x2f25b090>]


In [12]:

fit_fun = ROOT.fit_fun[str(n_obs)](n_couplings, n_bins, template_lumi, template_param, bin_contents_sm, bin_midpoints, C)

start constructor
finished constructor


In [13]:
#%timeit fit_fun([5., 12., 4.], [50., 0., 0., 0., 0., 0.])

In [14]:
# TODO: create asimov data
# TODO: run fit

In [15]:
# build summed histogram for run config
# ignore pol for today...
lumi = run["lumi"]
h = [[0.] * n_bins for i in range(n_obs)]
# loop over processes
for i, hel in enumerate(bin_contents_sm):
    lumi_scale = lumi / template_lumi[i] * 0.25 # to account for no pol
    for j, obs in enumerate(hel):
        print(lumi_scale, sum(obs), obs)
        for k in range(n_bins):
            h[j][k] += lumi_scale * obs[k]
print(sum(h[0]), h[0])
print(sum(h[1]), h[1])
print(sum(h[2]), h[2])
# ideally all the sums should be the same but there is also overflow...

2.089160429873321 55266.0 [35.0, 34.0, 52.0, 69.0, 99.0, 185.0, 327.0, 828.0, 2931.0, 17551.0, 26352.0, 3606.0, 1446.0, 686.0, 412.0, 265.0, 139.0, 114.0, 82.0, 53.0]
2.089160429873321 55343.0 [81.0, 145.0, 408.0, 1219.0, 3475.0, 9525.0, 16286.0, 10304.0, 5506.0, 4645.0, 2451.0, 663.0, 294.0, 146.0, 73.0, 43.0, 33.0, 26.0, 11.0, 9.0]
2.089160429873321 54686.0 [187.0, 277.0, 379.0, 536.0, 881.0, 1381.0, 2395.0, 4235.0, 8139.0, 15841.0, 11928.0, 4643.0, 1810.0, 856.0, 504.0, 272.0, 170.0, 109.0, 78.0, 65.0]
12.396154589369056 774038.0 [413.0, 612.0, 818.0, 1209.0, 1994.0, 3863.0, 8616.0, 29612.0, 172491.0, 266914.0, 147720.0, 59622.0, 30806.0, 18021.0, 11149.0, 7415.0, 4914.0, 3493.0, 2507.0, 1849.0]
12.396154589369056 778908.0 [335.0, 562.0, 1044.0, 1962.0, 4363.0, 9650.0, 18990.0, 27633.0, 52111.0, 365739.0, 201100.0, 48270.0, 20611.0, 10673.0, 6119.0, 3758.0, 2386.0, 1714.0, 1131.0, 757.0]
12.396154589369056 764236.0 [3311.0, 4669.0, 6498.0, 9082.0, 13131.0, 19893.0, 29502.0, 44136.0,

In [16]:
def make_asimov(h: list[list[float]], seed: int = 321) -> list[list[float]]:
    h_asimov = [[] for i in range(n_obs)]
    rnd = ROOT.TRandomMT64(seed)
    for i, hist in enumerate(h):
        for bin in hist:
            new_bin = rnd.Poisson(bin)
            h_asimov[i].append(new_bin)
    return h_asimov

In [17]:
h_asimov = make_asimov(h)
print(sum(h_asimov[0]), h_asimov[0])
print(sum(h_asimov[1]), h_asimov[1])
print(sum(h_asimov[2]), h_asimov[2])
# thankfully differences due to overflow are much greater than due to ignoring the correlation during asimov creation!!!!
# but still, this way of creating asimov is not quite correct...

9902839 [5866, 8590, 11695, 16488, 26946, 51043, 111505, 378087, 2164050, 3394613, 1951466, 760101, 392104, 229596, 142180, 94661, 62906, 45162, 31984, 23796]
9964370 [4519, 7802, 15020, 29427, 68746, 159118, 304861, 386091, 671325, 4566025, 2519917, 612553, 264681, 139439, 81222, 50512, 32814, 23460, 15939, 10899]
9770884 [42981, 60813, 84007, 117148, 169409, 256376, 380796, 570058, 908069, 1564337, 2812306, 1820560, 452678, 202266, 118834, 74153, 50271, 37695, 26305, 21822]


In [18]:
# calc obs
def calc_obs(histos: list[list[float]])-> list[float]:
    obs = [0.] * n_obs
    for i in range(n_obs):
        tmp = 0.
        for j in range(n_bins):
            tmp += histos[i][j] * bin_midpoints[j]
        obs[i] = tmp
    return obs

obs_asimov = calc_obs(h_asimov)
for i in range(n_obs):
    # scale to same lumi as before
    obs_asimov[i] *= sum(h[i]) / sum(h_asimov[i])
print(obs_asimov)

[-236129.15269545864, -780222.6446156213, -470872.4179306811]


In [19]:
chi2 = fit_fun(obs_asimov, [5000., 0., 0., 0., 0., 0.])
print(chi2)
prob = ROOT.Math.chisquared_pdf(chi2, 3)
print(prob)
# meh when I ran it the first time I got 3.9 with 10% :(
# now I get -0.3468961883761793 all the time
# switched to TRandomMT64
# for seed 1337: 1.7534120087597875
# for seed 42: 3.27894146507134
# for seed 123456789: -0.375755586233825
# did I run out of random numbers again???
# for seed 1234: 0.6979096099490846
# 4321: -0.1601710911465315
# TODO: urgh, wrap this last part in a loop, let it run for 10k different seeds and check how chi2 distributed it is...
# I guess the prob should be flat as with a fit of the correct model?
# 666: 0.4010651186770054
# now scaling events to be the same in asimov...
# 666: 0.4354920905563601
# 4321: -0.15019993208021595
# 321: 0.7803486687116501


0.7803486687116982
0.238563420116218
chi2: 0.780349
diff_v: -2100.37, -38.6898, 324.351


In [20]:
# no clue about the x range here or if it is even used...
# I am maybe abusing the TF1??
f = ROOT.TF1("f", fit_fun, 0, 1, n_couplings+3)
# I will be able to use RooFit if I put my 3 histograms/values next to each other in a 1D hist
# and then split them again in my own function

In [21]:
n_toys = 100000
h_chi2 = ROOT.TH1D("", ";#chi^{2}", 100, -10., 10.)
h_prob = ROOT.TH1D("", ";probability", 50, 0., 1.)
h_diff0 = ROOT.TH1D("", ";obs_asimov - obs [0]", 100, -10000., 10000.)
h_diff1 = ROOT.TH1D("", ";obs_asimov - obs [1]", 100, -10000., 10000.)
h_diff2 = ROOT.TH1D("", ";obs_asimov - obs [2]", 100, -10000., 10000.)
obs_initial = calc_obs(h)
for seed in range(n_toys):
    h_asimov = make_asimov(h, seed)
    obs_asimov = calc_obs(h_asimov)
    diff = [0.] * n_obs
    for i in range(n_obs):
        # scale to same lumi as before
        obs_asimov[i] *= sum(h[i]) / sum(h_asimov[i])
        diff[i] = obs_asimov[i] - obs_initial[i]
        h_diff0.Fill(diff[0])
    chi2 = fit_fun(obs_asimov, [5000., 0., 0., 0., 0., 0.])
    # print(f"python diff: {diff}")
    prob = ROOT.Math.chisquared_cdf(chi2, 3)
    h_chi2.Fill(chi2)
    h_prob.Fill(prob)



chi2: -3.35777
diff_v: -1217.59, 3785.64, -1660.55
chi2: 1.54838
diff_v: 203.066, 442.287, -3967.01
chi2: -0.203094
diff_v: -228.444, 947.388, -423.41
chi2: -0.31403
diff_v: -515.292, -1442.71, -1388.91
chi2: 0.644512
diff_v: 6.98618, -1726.83, 3472.67
chi2: -4.12249
diff_v: -185, -5112.48, 3287.63
chi2: 0.983254
diff_v: 3303.21, -1562.65, -1312.6
chi2: 0.747166
diff_v: 2319.75, 2271.23, -927.636
chi2: 3.47988
diff_v: -3396.45, -555.375, 2503.32
chi2: -3.95268
diff_v: -3739.76, -6109.1, -1282.65
chi2: 0.205148
diff_v: -186.35, -547.751, 1466.98
chi2: 0.0714758
diff_v: -2019.08, -3464.42, 2260.56
chi2: -0.979927
diff_v: -1356.17, 2041.22, 494.307
chi2: -2.25635
diff_v: -1681.94, 2995.48, -825.348
chi2: 1.73066
diff_v: 2293.34, -664.058, -2681.85
chi2: 3.17309
diff_v: 3589.87, -3164.03, -6179.72
chi2: 2.92437
diff_v: 85.6009, 593.485, -5582.43
chi2: 0.976515
diff_v: -2403.38, -776.1, -139.145
chi2: 0.923798
diff_v: -2247.17, -964.314, -3187.52
chi2: 2.75856
diff_v: 2052.88, 120.173, 6128

In [22]:
ROOT.gStyle.SetOptStat(1111)
c_chi2 = ROOT.TCanvas()
h_chi2.Draw()
c_chi2.Draw()

c_prob = ROOT.TCanvas()
h_prob.Draw()
c_prob.Draw()

c_diff0 = ROOT.TCanvas()
h_diff0.Draw()
c_diff0.Draw()
