<a href="https://colab.research.google.com/github/donalrinho/Bc2JpsiMuNu/blob/main/Bc2JpsiMuNu_RapidSim_LHCb_binned_fit.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [37]:
!pip install uproot
!pip install tensorflow==2.6.2 #specific versions for compatability with zfit
!pip install hist
!pip install mplhep
!pip install git+https://github.com/zfit/zfit #development version of zfit needed to get binned fit tools
!pip install uncertainties

Collecting git+https://github.com/zfit/zfit
  Cloning https://github.com/zfit/zfit to /tmp/pip-req-build-je0gudfe
  Running command git clone -q https://github.com/zfit/zfit /tmp/pip-req-build-je0gudfe
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
    Preparing wheel metadata ... [?25l[?25hdone


In [38]:
import uproot
import numpy as np
import tensorflow as tf
import zfit
import hist
from hist import Hist
import mplhep
import pandas as pd
import pickle
import json
import random

In [39]:
#Load our histogram templates from previous notebook
all_h_norm = {}
hist_path = "/content/drive/MyDrive/Bc2JpsiMuNu_Analysis/pickle"
for i in range(0,6):
  with open(f"{hist_path}/hist_{i}.pkl", "rb") as f:
    all_h_norm[i] = pickle.load(f)
all_h_norm

{0: Hist(
   Variable([-1, -0.79216, -0.58822, -0.38891, -0.19395, 0.00055617, 0.19486, 0.39035, 0.5888, 0.79242, 1], name='costheta_Jpsi_reco', label='costheta_Jpsi_reco'),
   Variable([-1, -0.46394, -0.12545, 0.13095, 0.33459, 0.50304, 0.64358, 0.76183, 0.861, 0.94157, 1], name='costheta_W_reco', label='costheta_W_reco'),
   Variable([-3.14159, -2.42962, -1.84089, -1.30021, -0.71122, 0.0043844, 0.71329, 1.29936, 1.83985, 2.42724, 3.14159], name='chi_reco', label='chi_reco'),
   storage=Weight()) # Sum: WeightedSum(value=1, variance=1.42964e-06) (WeightedSum(value=1.0012, variance=1.43138e-06) with flow),
 1: Hist(
   Variable([-1, -0.79216, -0.58822, -0.38891, -0.19395, 0.00055617, 0.19486, 0.39035, 0.5888, 0.79242, 1], name='costheta_Jpsi_reco', label='costheta_Jpsi_reco'),
   Variable([-1, -0.46394, -0.12545, 0.13095, 0.33459, 0.50304, 0.64358, 0.76183, 0.861, 0.94157, 1], name='costheta_W_reco', label='costheta_W_reco'),
   Variable([-3.14159, -2.42962, -1.84089, -1.30021, -0.7112

In [40]:
#Load our ROOT file containing the MC we want to fit
drive_dir = "/content/drive/MyDrive/Bc2JpsiMuNu_ROOT_files"
file_path = f"{drive_dir}/Bc2JpsiMuNu_RapidSim_LHCb_Vars_Weights"
print(f"Loading ROOT file {file_path}.root")
tree_name = "DecayTree"
events = uproot.open(f"{file_path}.root:{tree_name}")
events

Loading ROOT file /content/drive/MyDrive/Bc2JpsiMuNu_ROOT_files/Bc2JpsiMuNu_RapidSim_LHCb_Vars_Weights.root


<TTree 'DecayTree' (172 branches) at 0x7f9fbcf50fd0>

In [41]:
#Make pandas DataFrame
df = events.arrays(library="pd")

In [42]:
#Downsample to DataFrame to 100k events, which will act as our fit dataset
df_fit = df.sample(n=100000, random_state=42)
len(df_fit)

100000

In [43]:
#Define fit variables
vars = {}
vars["x_var"] = {"name": "costheta_Jpsi_reco", "min": -1., "max": 1., "bins": 10, "latex": "$\\cos(\\theta_{J/\\psi})$"}
vars["y_var"] = {"name": "costheta_W_reco", "min": -1., "max": 1., "bins": 10, "latex": "$\\cos(\\theta_{W})$"}
vars["z_var"] = {"name": "chi_reco", "min": -np.pi, "max": np.pi, "bins": 10, "latex": "$\\chi$ [rad]"}

In [44]:
#Get the binning schemes we used to make our templates (we saved them into a JSON file)
json_path = "/content/drive/MyDrive/Bc2JpsiMuNu_Analysis/json"
with open(f"{json_path}/binnings.json") as json_file:
  binnings = json.load(json_file)
binnings

{'x_var': [-1.0,
  -0.79216,
  -0.58822,
  -0.38891,
  -0.19395,
  0.00055617,
  0.19486,
  0.39035,
  0.5888,
  0.79242,
  1.0],
 'y_var': [-1.0,
  -0.46394,
  -0.12545,
  0.13095,
  0.33459,
  0.50304,
  0.64358,
  0.76183,
  0.861,
  0.94157,
  1.0],
 'z_var': [-3.141592653589793,
  -2.42962,
  -1.84089,
  -1.30021,
  -0.71122,
  0.0043844,
  0.71329,
  1.29936,
  1.83985,
  2.42724,
  3.141592653589793]}

In [45]:
#Define a 3D histogram of the data, with the same binning we used to make our templates
data_hist = (
    Hist.new
    .Variable(binnings["x_var"], name=vars["x_var"]["name"])
    .Variable(binnings["y_var"], name=vars["y_var"]["name"])
    .Variable(binnings["z_var"], name=vars["z_var"]["name"])
    .Weight()
    )

data_hist.fill(df_fit[vars["x_var"]["name"]], 
               df_fit[vars["y_var"]["name"]], 
               df_fit[vars["z_var"]["name"]])

In [46]:
#Create a zfit dataset of the data
binned_data = zfit.data.BinnedData.from_hist(data_hist)
binned_data

In [47]:
#Create zfit PDFs from each of our templates
hist_pdf = {}
for h in all_h_norm:
  hist_pdf[h] = zfit.pdf.HistogramPDF(all_h_norm[h])
hist_pdf

{0: <zfit.models.histogram.HistogramPDF at 0x7f9f7f64d490>,
 1: <zfit.models.histogram.HistogramPDF at 0x7f9f7f64d190>,
 2: <zfit.models.histogram.HistogramPDF at 0x7f9f7f64d290>,
 3: <zfit.models.histogram.HistogramPDF at 0x7f9fbd587090>,
 4: <zfit.models.histogram.HistogramPDF at 0x7f9fbd587690>,
 5: <zfit.models.histogram.HistogramPDF at 0x7f9fbd587490>}

In [69]:
#Helicity amplitude parameters
#Random number to use in the param names, so we can run the fit lots of times
rand = random.randint(0,100000)
H0_amp = zfit.Parameter(f"H0_amp_{rand}", 0.7, 0., 1.)
Hm_amp = zfit.Parameter(f"Hm_amp_{rand}", 0.6, 0., 1.)
#One helicity amplitude is fixed by the fact that their squares must sum to 1
def Hp_amp_func(H0_amp, Hm_amp):
  return tf.sqrt(1. - H0_amp**2 - Hm_amp**2)
Hp_amp = zfit.ComposedParameter(f"Hp_amp_{rand}", Hp_amp_func, params=[H0_amp, Hm_amp])

#Phases - H0 phase is fixed to zero by convention
H0_phi =  zfit.Parameter(f"H0_phi_{rand}", 0., floating=False)
Hp_phi =  zfit.Parameter(f"Hp_phi_{rand}", 1.5, -2*np.pi, 2*np.pi)
Hm_phi =  zfit.Parameter(f"Hm_phi_{rand}", -1.5,-2*np.pi, 2*np.pi)

In [70]:
#Derived parameters which act as multipliers of the PDFs

#|H_0|^2
def term_0_func(H0_amp):
  return 2 * H0_amp**2
term_0 = zfit.ComposedParameter(f"term_0_{rand}", term_0_func, params=[H0_amp])

#|H_+|^2
def term_1_func(Hp_amp):
  return 0.5 * Hp_amp**2
term_1 = zfit.ComposedParameter(f"term_1_{rand}", term_1_func, params=[Hp_amp])

#|H_-|^2
def term_2_func(Hm_amp):
  return 0.5 * Hm_amp**2
term_2 = zfit.ComposedParameter(f"term_2_{rand}", term_2_func, params=[Hm_amp])

#Re(Hp H0*)
#Hp = Hp_amp*cos(Hp_phi) + i Hp_amp*sin(Hp_phi)
#H0* = H0_amp*cos(H0_phi) - i H0_amp*sin(H0_phi)
#Re(Hp H0*) = Hp_amp*cos(Hp_phi) * H0_amp*cos(H0_phi) + Hp_amp*sin(Hp_phi) * H0_amp*sin(H0_phi)
def term_3_func(Hp_amp, Hp_phi, H0_amp, H0_phi):
  return Hp_amp*tf.cos(Hp_phi) * H0_amp*tf.cos(H0_phi) + Hp_amp*tf.sin(Hp_phi) * H0_amp*tf.sin(H0_phi)
term_3 = zfit.ComposedParameter(f"term_3_{rand}", term_3_func, params=[Hp_amp, Hp_phi, H0_amp, H0_phi])

#Re(Hm H0*)
#Hm = Hm_amp*cos(Hm_phi) + i Hm_amp*sin(Hm_phi)
#H0* = H0_amp*cos(H0_phi) - i H0_amp*sin(H0_phi)
#Re(Hm H0*) = Hm_amp*cos(Hm_phi) * H0_amp*cos(H0_phi) + Hm_amp*sin(Hm_phi) * H0_amp*sin(H0_phi)
def term_4_func(Hm_amp, Hm_phi, H0_amp, H0_phi):
  return Hm_amp*tf.cos(Hm_phi) * H0_amp*tf.cos(H0_phi) + Hm_amp*tf.sin(Hm_phi) * H0_amp*tf.sin(H0_phi)
term_4 = zfit.ComposedParameter(f"term_4_{rand}", term_4_func, params=[Hm_amp, Hm_phi, H0_amp, H0_phi])

#Re(Hp Hm*)
#Hp = Hp_amp*cos(Hp_phi) + i Hp_amp*sin(Hp_phi)
#Hm* = Hm_amp*cos(Hm_phi) - i Hm_amp*sin(Hm_phi)
#Re(Hp Hm*) = Hp_amp*cos(Hp_phi) * Hm_amp*cos(Hm_phi) + Hp_amp*sin(Hp_phi) * Hm_amp*sin(Hm_phi)
def term_5_func(Hp_amp, Hp_phi, Hm_amp, Hm_phi):
  return Hp_amp*tf.cos(Hp_phi) * Hm_amp*tf.cos(Hm_phi) + Hp_amp*tf.sin(Hp_phi) * Hm_amp*tf.sin(Hm_phi)
term_5 = zfit.ComposedParameter(f"term_5_{rand}", term_5_func, params=[Hp_amp, Hp_phi, Hm_amp, Hm_phi])

In [71]:
tot_pdf = zfit.pdf.BinnedSumPDF(pdfs=[hist_pdf[0],
                                      hist_pdf[1],
                                      hist_pdf[2],
                                      hist_pdf[3],
                                      hist_pdf[4],
                                      hist_pdf[5]],
                                fracs=[term_0,
                                       term_1,
                                       term_2,
                                       term_3,
                                       term_4,
                                       term_5])
tot_pdf

<zfit.models.binned_functor.BinnedSumPDF at 0x7f9f7e872390>

In [72]:
#Run the fit 

# Stage 1: create a binned likelihood with the given PDF and dataset
nll = zfit.loss.BinnedNLL(tot_pdf, binned_data)

# Stage 2: instantiate a minimiser (in this case a basic minuit)
minimizer = zfit.minimize.Minuit()

#Stage 3: minimise the given negative likelihood
result = minimizer.minimize(nll)

#Get the parameter uncertainties using Hesse
param_errors = result.hesse(method="minuit_hesse")

print("Function minimum:", result.fmin)
print("Converged:", result.converged)
print("Full minimizer information:", result.info)

params = result.params
print(params)

The minimization failed due to too many NaNs being produced in the loss.This is most probably caused by negative values returned from the PDF. Changing the initial values/stepsize of the parameters can solve this problem. Also check your model (if custom) for problems. For more information, visit https://github.com/zfit/zfit/wiki/FAQ#fitting-and-minimization


FailMinimizeNaN: ignored