<a href="https://colab.research.google.com/github/donalrinho/Bc2JpsiMuNu/blob/main/Bc2JpsiMuNu_RapidSim_LHCb_binned_fit_new.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install -q uproot
!pip install -q tensorflow==2.6.2 #specific versions for compatability with zfit
!pip install -q hist
!pip install -q mplhep
#!pip install -q git+https://github.com/zfit/zfit #development version of zfit needed to get binned fit tools
!pip install -q zfit --pre
!pip install -q uncertainties

[K     |████████████████████████████████| 301 kB 9.2 MB/s 
[K     |████████████████████████████████| 458.3 MB 12 kB/s 
[K     |████████████████████████████████| 5.6 MB 22.8 MB/s 
[K     |████████████████████████████████| 1.3 MB 37.3 MB/s 
[K     |████████████████████████████████| 132 kB 55.5 MB/s 
[K     |████████████████████████████████| 462 kB 60.0 MB/s 
[?25h  Building wheel for clang (setup.py) ... [?25l[?25hdone
  Building wheel for wrapt (setup.py) ... [?25l[?25hdone
[K     |████████████████████████████████| 1.4 MB 12.6 MB/s 
[K     |████████████████████████████████| 5.8 MB 14.1 MB/s 
[K     |████████████████████████████████| 11.2 MB 44.2 MB/s 
[K     |████████████████████████████████| 895 kB 55.9 MB/s 
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
albumentations 0.1.12 requires imgaug<0.2.7,>=0.2.5, but you have imgaug 0.2.9 which i

In [62]:
import uproot
import numpy as np
import tensorflow as tf
import zfit
import hist
from hist import Hist
import mplhep
import pandas as pd
import pickle
import json
import random
from typing import Optional
from zfit.core.space import supports
import zfit.z.numpy as znp

In [2]:
#Load our histogram templates from previous notebook
all_h_norm = {}
hist_path = "/content/drive/MyDrive/Bc2JpsiMuNu_Analysis/pickle"
for i in range(0,6):
  with open(f"{hist_path}/hist_{i}.pkl", "rb") as f:
    all_h_norm[i] = pickle.load(f)
all_h_norm

{0: Hist(
   Variable([-1, -0.79216, -0.58822, -0.38891, -0.19395, 0.00055617, 0.19486, 0.39035, 0.5888, 0.79242, 1], name='costheta_Jpsi_reco', label='costheta_Jpsi_reco'),
   Variable([-1, -0.46394, -0.12545, 0.13095, 0.33459, 0.50304, 0.64358, 0.76183, 0.861, 0.94157, 1], name='costheta_W_reco', label='costheta_W_reco'),
   Variable([-3.14159, -2.42962, -1.84089, -1.30021, -0.71122, 0.0043844, 0.71329, 1.29936, 1.83985, 2.42724, 3.14159], name='chi_reco', label='chi_reco'),
   storage=Weight()) # Sum: WeightedSum(value=1, variance=1.42964e-06) (WeightedSum(value=1.0012, variance=1.43138e-06) with flow),
 1: Hist(
   Variable([-1, -0.79216, -0.58822, -0.38891, -0.19395, 0.00055617, 0.19486, 0.39035, 0.5888, 0.79242, 1], name='costheta_Jpsi_reco', label='costheta_Jpsi_reco'),
   Variable([-1, -0.46394, -0.12545, 0.13095, 0.33459, 0.50304, 0.64358, 0.76183, 0.861, 0.94157, 1], name='costheta_W_reco', label='costheta_W_reco'),
   Variable([-3.14159, -2.42962, -1.84089, -1.30021, -0.7112

In [3]:
#Load our ROOT file containing the MC we want to fit
drive_dir = "/content/drive/MyDrive/Bc2JpsiMuNu_ROOT_files"
file_path = f"{drive_dir}/Bc2JpsiMuNu_RapidSim_LHCb_Vars_Weights"
print(f"Loading ROOT file {file_path}.root")
tree_name = "DecayTree"
events = uproot.open(f"{file_path}.root:{tree_name}")
events

Loading ROOT file /content/drive/MyDrive/Bc2JpsiMuNu_ROOT_files/Bc2JpsiMuNu_RapidSim_LHCb_Vars_Weights.root


<TTree 'DecayTree' (172 branches) at 0x7f42ea6ca810>

In [4]:
#Make pandas DataFrame
df = events.arrays(library="pd")

  out[name] = series[name]


In [5]:
#Downsample to DataFrame to 100k events, which will act as our fit dataset
df_fit = df.sample(n=100000, random_state=42)
len(df_fit)

100000

In [6]:
#Define fit variables
vars = {}
vars["x_var"] = {"name": "costheta_Jpsi_reco", "min": -1., "max": 1., "bins": 10, "latex": "$\\cos(\\theta_{J/\\psi})$"}
vars["y_var"] = {"name": "costheta_W_reco", "min": -1., "max": 1., "bins": 10, "latex": "$\\cos(\\theta_{W})$"}
vars["z_var"] = {"name": "chi_reco", "min": -np.pi, "max": np.pi, "bins": 10, "latex": "$\\chi$ [rad]"}

In [7]:
#Get the binning schemes we used to make our templates (we saved them into a JSON file)
json_path = "/content/drive/MyDrive/Bc2JpsiMuNu_Analysis/json"
with open(f"{json_path}/binnings.json") as json_file:
  binnings = json.load(json_file)
binnings

{'x_var': [-1.0,
  -0.79216,
  -0.58822,
  -0.38891,
  -0.19395,
  0.00055617,
  0.19486,
  0.39035,
  0.5888,
  0.79242,
  1.0],
 'y_var': [-1.0,
  -0.46394,
  -0.12545,
  0.13095,
  0.33459,
  0.50304,
  0.64358,
  0.76183,
  0.861,
  0.94157,
  1.0],
 'z_var': [-3.141592653589793,
  -2.42962,
  -1.84089,
  -1.30021,
  -0.71122,
  0.0043844,
  0.71329,
  1.29936,
  1.83985,
  2.42724,
  3.141592653589793]}

In [23]:
#Create a zfit binned dataset of the data, using the same binning as our templates (above)
binning_x = zfit.binned.VariableBinning(binnings["x_var"], name=vars["x_var"]["name"])
obs_x = zfit.Space(vars["x_var"]["name"], binning=binning_x)

binning_y = zfit.binned.VariableBinning(binnings["y_var"], name=vars["y_var"]["name"])
obs_y = zfit.Space(vars["y_var"]["name"], binning=binning_y)

binning_z = zfit.binned.VariableBinning(binnings["z_var"], name=vars["z_var"]["name"])
obs_z = zfit.Space(vars["z_var"]["name"], binning=binning_z)

obs = obs_x * obs_y * obs_z

df_fit = df_fit[["costheta_Jpsi_reco","costheta_W_reco","chi_reco"]]

unbinned_data = zfit.Data.from_pandas(df_fit, obs=obs)

binned_data = unbinned_data.to_binned(obs)

  result = func(*[np.array(x) for x in inp])


In [12]:
#Make a hist of the dataset we will fit, filling it with our MC
#data_h = (
#    Hist.new
#    .Variable(binnings["x_var"], name=vars["x_var"]["name"])
#    .Variable(binnings["y_var"], name=vars["y_var"]["name"])
#    .Variable(binnings["z_var"], name=vars["z_var"]["name"])
#    .Weight()
#    )
#Fill the histogram with the sampled MC DataFrame of 100k events
#data_h.fill(df_fit[vars["x_var"]["name"]], 
#            df_fit[vars["y_var"]["name"]], 
#            df_fit[vars["z_var"]["name"]])

#binned_data = zfit.data.BinnedData.from_hist(data_h)
#binned_data

In [45]:
#Create zfit PDFs from each of our templates
hist_pdfs = {}
for h in all_h_norm:
  hist_pdfs[h] = zfit.pdf.HistogramPDF(all_h_norm[h])
hist_pdfs

{0: <zfit.models.histogram.HistogramPDF at 0x7f42a75bbd50>,
 1: <zfit.models.histogram.HistogramPDF at 0x7f42a74e0910>,
 2: <zfit.models.histogram.HistogramPDF at 0x7f42a74f7990>,
 3: <zfit.models.histogram.HistogramPDF at 0x7f42a7683b90>,
 4: <zfit.models.histogram.HistogramPDF at 0x7f42a7572110>,
 5: <zfit.models.histogram.HistogramPDF at 0x7f42a7572d90>}

In [27]:
#Helicity amplitude parameters
#Random number to use in the param names, so we can run the fit lots of times
rand = random.randint(0,100000)
H0_amp = zfit.Parameter(f"H0_amp_{rand}", 0.7, 0., 1.)
Hm_amp = zfit.Parameter(f"Hm_amp_{rand}", 0.6, 0., 1.)
#One helicity amplitude is fixed by the fact that their squares must sum to 1
def Hp_amp_func(H0_amp, Hm_amp):
  return tf.sqrt(1. - H0_amp**2 - Hm_amp**2)
Hp_amp = zfit.ComposedParameter(f"Hp_amp_{rand}", Hp_amp_func, params=[H0_amp, Hm_amp])

#Phases - H0 phase is fixed to zero by convention
H0_phi =  zfit.Parameter(f"H0_phi_{rand}", 0., floating=False)
Hp_phi =  zfit.Parameter(f"Hp_phi_{rand}", 1.5, -2*np.pi, 2*np.pi)
Hm_phi =  zfit.Parameter(f"Hm_phi_{rand}", -1.5,-2*np.pi, 2*np.pi)

fit_params = {"H0_amp": H0_amp,
              "Hm_amp": Hm_amp,
              "Hp_amp": Hp_amp,
              "H0_phi": H0_phi,
              "Hp_phi": Hp_phi,
              "Hm_phi": Hm_phi
}

In [63]:
class CustomPDF(zfit.core.binnedpdf.BaseBinnedPDFV1):

    def __init__(
            self,
            templates,
            pdf_params,
            name: str = "CustomPDF"
    ) -> None:
        """Total binned PDF of angular decay rate.
        Args:
            templates: Dictionary of histogram templates.
            params: Dictionary of fit parameters.
            name: |@doc:model.init.name| Human-readable name
               or label of
               the PDF for better identification.
               Has no programmatical functional purpose as identification. |@docend:model.init.name|
        """
        params = {"H0_amp": pdf_params["H0_amp"],
                  "Hm_amp": pdf_params["Hm_amp"],
                  "Hp_amp": pdf_params["Hp_amp"],
                  "H0_phi": pdf_params["H0_phi"],
                  "Hm_phi": pdf_params["Hm_phi"],
                  "Hp_phi": pdf_params["Hp_phi"]
        }
        
        super().__init__(obs=obs, extended=None, norm=None, params=params, name=name)
        self._templates = templates
        self._params = params

    @supports(norm=False)
    def _rel_counts(self, x, norm):
      
      #Complex numbers defined
      h_0 = tf.complex(self._params["H0_amp"]*tf.cos(self._params["H0_phi"]),self._params["H0_amp"]*tf.sin(self._params["H0_phi"]))
      h_p = tf.complex(self._params["Hp_amp"]*tf.cos(self._params["Hp_phi"]),self._params["Hp_amp"]*tf.sin(self._params["Hp_phi"]))
      h_m = tf.complex(self._params["Hm_amp"]*tf.cos(self._params["Hm_phi"]),self._params["Hm_amp"]*tf.sin(self._params["Hm_phi"]))
      
      h_0st = tf.math.conj(h_0)
      h_pst = tf.math.conj(h_p)
      h_mst = tf.math.conj(h_m)
      
      HpHmst = h_p*h_mst
      HpH0st = h_p*h_0st
      HmH0st = h_m*h_0st
      
      #Total PDF given by a sum over each template multiplied by its corresponding bit of helicity amplitude
      pdf = self._params["H0_amp"]**2 * 2 * self._templates[0].counts(x, norm=norm)
      pdf += self._params["Hp_amp"]**2 * 0.5 * self._templates[1].counts(x, norm=norm)
      pdf += self._params["Hm_amp"]**2 * 0.5 * self._templates[2].counts(x, norm=norm)
      pdf += tf.math.real(HpH0st) * self._templates[3].counts(x, norm=norm)
      pdf += -tf.math.real(HmH0st) * self._templates[4].counts(x, norm=norm)
      pdf += tf.math.real(HpHmst) * self._templates[5].counts(x, norm=norm)

      #Normalise the PDF to it integrates to 1
      pdf = pdf / znp.sum(pdf)

      return pdf

In [64]:
tot_pdf = CustomPDF(templates=hist_pdfs, pdf_params=fit_params)

In [65]:
#Run the fit 

# Stage 1: create a binned likelihood with the given PDF and dataset
nll = zfit.loss.BinnedNLL(tot_pdf, binned_data)

# Stage 2: instantiate a minimiser (in this case a basic minuit)
minimizer = zfit.minimize.Minuit()

#Stage 3: minimise the given negative likelihood
result = minimizer.minimize(nll)

#Get the parameter uncertainties using Hesse
param_errors = result.hesse(method="minuit_hesse")

print("Function minimum:", result.fmin)
print("Converged:", result.converged)
print("Full minimizer information:", result.info)

params = result.params
print(params)

Function minimum: -358286.9614458389
Converged: True
Full minimizer information: {'n_eval': 106, 'minuit': <FMin algorithm='Migrad' edm=3.452846875200306e-05 edm_goal=0.001 errordef=0.5 fval=-358286.9614458389 has_accurate_covar=True has_covariance=True has_made_posdef_covar=False has_parameters_at_limit=False has_posdef_covar=True has_reached_call_limit=False has_valid_parameters=True hesse_failed=False is_above_max_edm=False is_valid=True nfcn=129 ngrad=0 reduced_chi2=nan>
(Param(number=0, name='H0_amp_91112', value=0.6842914444206133, error=0.0035865535516281244, merror=None, is_const=False, is_fixed=False, lower_limit=0.0, upper_limit=1.0), Param(number=1, name='Hm_amp_91112', value=0.6426019482492412, error=0.00284249010779164, merror=None, is_const=False, is_fixed=False, lower_limit=0.0, upper_limit=1.0), Param(number=2, name='Hm_phi_91112', value=-1.5180823380226156, error=0.03185201438411367, merror=None, is_const=False, is_fixed=False, lower_limit=-6.2831854820251465, upper_li