# Fit missing-mass squared distributions

In [None]:
# on ifarm, use Python 3.6.8
import array

import numpy as np
import ROOT
from pathvalidate import sanitize_filename, sanitize_filepath
from uncertainties import ufloat
from uncertainties import umath

import fitFunction  # defines fit function: double Gaussian on top of a 2nd-order polynomial
import makePlots    # defines helper functions to generate histograms from data trees

makePlots.setupPlotStyle()

## Define parameters and load input data

In [None]:
# selection         = "paul"
selection         = "justin"
# particle          = "Pi-"
# particle          = "Pi+"
particle          = "Proton"
# channel           = "2pi"
channel           = "4pi"
# dataset           = "030730"
dataset           = "bggen_2017_01-ver03"
# if selection == "paul":
#   inFileName = f"../pmatt/trackeff_{particle}_{channel}{f'.{dataset}' if dataset != '' else ''}.root"
# elif selection == "justin":
#   inFileName = f"../ReactionEfficiency/pippippimpimpmiss_bggen_2017_01-ver03.root"
# else:
#   raise ValueError(f"Unknown selection '{selection}'")
treeFileName      = f"../ReactionEfficiency/pippippimpimpmiss_flatTree.{dataset}.root"
treeName          = "pippippimpimpmiss"
#TODO check uncertainty for weighted histograms

trackFoundCondition = makePlots.UNUSED_TRACK_FOUND_CONDITION

additionalFilter  = None
pdfFileNameSuffix = ""
# additionalFilter  = '(ThrownTopology.GetString() == "2#pi^{#plus}2#pi^{#minus}p")'
# pdfFileNameSuffix = "__sig"
# additionalFilter  = '(ThrownTopology.GetString() == "2#gamma2#pi^{#plus}2#pi^{#minus}p[#pi^{0}]")'
# pdfFileNameSuffix = "__2#gamma2#pi^{#plus}2#pi^{#minus}p[#pi^{0}]"
# additionalFilter  = '(ThrownTopology.GetString() != "2#pi^{#plus}2#pi^{#minus}p")'
# pdfFileNameSuffix = "__bkg"
# additionalFilter  = "(NmbUnusedShowers == 0)"
# pdfFileNameSuffix = "__noUnusedShowers"

mm2HistDef        = {
  "variables"      : ("MissingMassSquared_Measured",),
  "axisTitles"     : "(#it{m}^{miss}_{measured})^{2} (GeV/c^{2})^{2}",
  # "binning"        : (125, -0.5, 4.5),
  "binning"        : (100, -0.5, 4.0),
  "weightVariable" : "AccidWeightFactor",
}
mm2FitRange       = (-0.5, 4.0)  # [(GeV/c)^2]

# define equidistant 1D kinematic binning
kinematicBinning  = {  # beam energy
  "variable" : "BeamEnergy", "label" : "Beam Energy", "unit" : "GeV",
  "nmbBins" : 9, "range" : (3.0, 12.0)
}
# kinematicBinning  = {  # momentum of missing proton
#   "variable" : "MissingProtonP", "label" : "#it{p}^{miss}_{kin. fit}", "unit" : "GeV/c",
#   "nmbBins" : 10, "range" : (0, 3.5)
# }
# kinematicBinning  = {  # polar angle of missing proton
#   "variable" : "MissingProtonTheta", "label" : "#it{#theta}^{miss}_{kin. fit}", "unit" : "deg",
#   "nmbBins" : 13, "range" : (0, 65)
# }
# kinematicBinning  = {  # azimuthal angle of missing proton
#   "variable" : "MissingProtonPhi", "label" : "#it{#phi}^{miss}_{kin. fit}", "unit" : "deg",
#   "nmbBins" : 10, "range" : (-180, +180)
# }
useNarrowGaussian = False
markerSize        = 0.75

ROOT.EnableImplicitMT()   # activate implicit multi-threading for RDataFrame
inputData = ROOT.RDataFrame(treeName, treeFileName).Define("TrackFound", trackFoundCondition)
ROOT.DisableImplicitMT()  # NOTE! enabling multithreading via ROOT.EnableImplicitMT() sends Minuit into an infinite loop (on ROOT 6.24/04 (ifarm) and 6.26/10)

## Tracking efficiency from overall missing-mass squared distributions

### Get overall missing-mass squared distributions by projecting the 2D histograms and adjust binning

In [None]:
ROOT.EnableImplicitMT()
MM2Hists = {}
for case, caseFilter in makePlots.FILTER_CASES.items():
  caseData = inputData.Filter(caseFilter)
  MM2Hists[case] = makePlots.getHistND(caseData, **mm2HistDef, filterExpression = additionalFilter,
    histNameSuffix = case, histTitle = f"{particle} ({channel}) {case}")
canvs = []
for case, MM2Hist in MM2Hists.items():
  canv = ROOT.TCanvas(sanitize_filename(f"{selection}_{particle}_{channel}_{dataset}_mm2_{case}{pdfFileNameSuffix}"), "")
  MM2Hist.Draw("HIST")
  line = ROOT.TLine()
  line.SetLineStyle(ROOT.kDashed)
  xAxis = MM2Hist.GetXaxis()
  line.DrawLine(xAxis.GetBinLowEdge(xAxis.GetFirst()), 0, xAxis.GetBinUpEdge(xAxis.GetLast()), 0)
  MM2Hist.SetYTitle(f"Counts / {MM2Hist.GetBinWidth(1)} (GeV/c^{{2}})^{{2}}")
  canv.Draw()
  canv.SaveAs(".pdf")
  canvs.append(canv)
ROOT.DisableImplicitMT()

### Calculate efficiency using integrals of the respective histograms

In [None]:
# integrals = {case : hist.Integral(hist.FindBin(mm2FitRange[0]), hist.FindBin(mm2FitRange[1])) for case, hist in MM2Hists.items()}
integrals = {case : hist.Integral() for case, hist in MM2Hists.items()}
print(f"Overall efficiency = {100 * integrals['Found'] / (integrals['Found'] + integrals['Missing'])}%")

### Fit missing-mass squared distributions for the 3 cases

In [None]:
fitResults = {case : fitFunction.fitDistribution(MM2Hist, particle, mm2FitRange, forceCommonGaussianMean = False) for (case, MM2Hist) in MM2Hists.items()}

In [None]:
def fitStatusString(fitResult):
  return (
    f"minimizer status = {fitResult.Status()}, "
    f"fit result is valid = {fitResult.IsValid()}, "
    f"covariance matrix status = {fitFunction.COV_MATRIX_STATUS_CODE[fitResult.CovMatrixStatus()][1]}"
  )

Show fit result

In [None]:
canvsOverall = {}
for case, MM2Hist in MM2Hists.items():
  canvsOverall[case] = ROOT.TCanvas(sanitize_filename(f"{selection}_{particle}_{channel}_{dataset}_mm2_fit_{case}{pdfFileNameSuffix}"), "")
  MM2Hist.Draw()
  canvsOverall[case].Update()  # needed otherwise TPaveStats object is not created
  stats = MM2Hist.FindObject("stats")
  stats.SetX1NDC(0.65)
  stats.SetX2NDC(0.98)
  stats.SetY1NDC(0.3)
  stats.SetY2NDC(0.92)
  canvsOverall[case].Draw()
  canvsOverall[case].SaveAs(".pdf")
for case, fitResult in fitResults.items():
  print(f"{case}: {fitStatusString(fitResult)}")

### Get overall signal yields and calculate average efficiency

In [None]:
def getFitParameter(fitResult, parName):
  parIndex = fitResult.Index(parName)
  return ufloat(fitResult.Parameter(parIndex), fitResult.ParError(parIndex))

In [None]:
def getSignalYield(fitResult, useNarrowGaussian = False):
  if useNarrowGaussian:
    A      = getFitParameter(fitResult, "A")
    rHat   = getFitParameter(fitResult, "#hat{r}")
    sigma1 = getFitParameter(fitResult, "#sigma_{1}")
    sigma2 = getFitParameter(fitResult, "#sigma_{2}")
    ratio  = umath.sin(rHat)**2
    # print("!!!", ratio)
    if sigma1.nominal_value < sigma2.nominal_value:
      # Gaussian 1 is the narrow one
      return A * (1 - ratio)
    else:
      # Gaussian 2 is the narrow one
      return A * ratio
  else:
    return getFitParameter(fitResult, "A")
# getSignalYield(fitResults["Total"], True)

In [None]:
def calculateEfficiency(fitResults, useNarrowGaussian = False):  # dictionary with TFitResults for "Total", "Found", and "Missing"
  signalYields = {case : getSignalYield(fitResult, useNarrowGaussian) for (case, fitResult) in fitResults.items()}
  return signalYields["Found"] / (signalYields["Found"] + signalYields["Missing"])

In [None]:
signalYields = {case : getSignalYield(fitResult) for (case, fitResult) in fitResults.items()}
#TODO one could perform a combined fit of all 3 histograms enforcing Found + Missing == Total
print(signalYields, f"mismatch = {100 * (signalYields['Total'] - signalYields['Found'] - signalYields['Missing']) / signalYields['Total']}%")
overallEff = calculateEfficiency(fitResults)
print(f"both Gaussians: overall efficiency = {100 * overallEff}%")
signalYieldsNarrow = {case : getSignalYield(fitResult, useNarrowGaussian = True) for (case, fitResult) in fitResults.items()}
print(signalYieldsNarrow, f"mismatch = {100 * (signalYieldsNarrow['Total'] - signalYieldsNarrow['Found'] - signalYieldsNarrow['Missing']) / signalYieldsNarrow['Total']}%")
overallEffNarrow = calculateEfficiency(fitResults, useNarrowGaussian = True)
print(f"narrow Gaussian: overall efficiency = {100 * overallEffNarrow}%")

## Tracking efficiencies as a function of the given kinematic variable from missing-mass squared distributions

### Fit missing-mass squared distributions in bins of the given kinematic variable

In [None]:
# generates histograms from tree and returns dictionary of dictionaries {case : {kinematic bin : histogram, ...}, ...}
#TODO complete interface
def getHistsKinematicBins1D(
  kinematicBinning,  # dict with binning definition
  minCounts = 100   # minimum number of counts required in kinematic bin
):
  ROOT.EnableImplicitMT()
  MM2HistsKinBins = {}
  kinBinVariable = kinematicBinning["variable"]
  nmbKinBins     = kinematicBinning["nmbBins"]
  kinBinRange    = kinematicBinning["range"]
  kinBinWidth    = (kinBinRange[1] - kinBinRange[0]) / float(nmbKinBins)
  for case, caseFilter in makePlots.FILTER_CASES.items():
    caseData = inputData.Filter(caseFilter)
    MM2HistsKinBins[case] = {}
    for kinBinIndex in range(nmbKinBins):
      kinBinMin = kinBinRange[0] + kinBinIndex * kinBinWidth
      kinBinMax = kinBinMin + kinBinWidth
      kinBinFilter = f"(({kinBinMin} < {kinBinVariable}) and ({kinBinVariable} < {kinBinMax}))"
      kinBinData = caseData.Filter(kinBinFilter)
      MM2Hist = makePlots.getHistND(kinBinData, **mm2HistDef, filterExpression = additionalFilter,
        histNameSuffix = f"{kinBinVariable}_{kinBinMin}_{kinBinMax}_{case}",
        histTitle = f"{particle} ({channel}) {case}, {kinBinMin} < {kinBinVariable} < {kinBinMax} {kinematicBinning['unit']}")
      MM2Hist.Draw()
      MM2Hist.SetYTitle(f"Counts / {MM2Hist.GetBinWidth(1)} (GeV/c^{{2}})^{{2}}")
      counts = MM2Hist.Integral(1, MM2Hist.GetNbinsX())
      if counts >= minCounts:
        MM2HistsKinBins[case][(kinBinVariable, kinBinMin, kinBinMax)] = MM2Hist
      else:
        print(f"Warning: {counts} counts in kinematic bin {kinBinMin} < {kinBinVariable} < {kinBinMax} {kinematicBinning['unit']} for {case} case are below the required minimum of {minCounts}. Skipping bin.")
  ROOT.DisableImplicitMT()
  return MM2HistsKinBins

In [None]:
MM2HistsKinBins = getHistsKinematicBins1D(kinematicBinning)

In [None]:
def fitMissingMassSquaredKinematicBins1D(
  MM2HistsKinBins,  # dictionary of dictionaries {case : {kinematic bin : histogram, ...}, ...}
  **kwargs
):
  fitResults = {}
  for case, MM2Hists in MM2HistsKinBins.items():
    fitResults[case] = {}
    for kinBin, MM2Hist in MM2Hists.items():
      # fit distribution
      print(f"Fitting case {case} for kinematic bin {kinBin[1]} < {kinBin[0]} < {kinBin[2]} {kinematicBinning['unit']}")
      fitResults[case][kinBin] = fitFunction.fitDistribution(MM2Hist, **kwargs)

  return fitResults

In [None]:
fitResultsKinBins = fitMissingMassSquaredKinematicBins1D(MM2HistsKinBins, particle = particle, fitRange = mm2FitRange, forceCommonGaussianMean = True)

### Check whether fits converged

In [None]:
for case, fitResults in fitResultsKinBins.items():
  for kinBin, fitResult in fitResults.items():
    print(f"case {case}, kinematic bin {kinBin[1]} < {kinBin[0]} < {kinBin[2]} {kinematicBinning['unit']}: {fitStatusString(fitResult)}")

### Show fits

In [None]:
canvsKinBins = {}
for case, MM2Hists in MM2HistsKinBins.items():
  canvsKinBins[case] = {}
  for kinBin, MM2Hist in MM2Hists.items():
    canvsKinBins[case][kinBin] = ROOT.TCanvas(sanitize_filename(f"{selection}_{particle}_{channel}_{dataset}_mm2_{kinBin[0]}_{kinBin[1]}_{kinBin[2]}_fit_{case}{pdfFileNameSuffix}"), "")
    MM2Hist.SetMinimum(0)
    MM2Hist.Draw()
    canvsKinBins[case][kinBin].Update()  # needed otherwise TPaveStats object is not created
    stats = MM2Hist.FindObject("stats")
    stats.SetX1NDC(0.58)
    stats.SetX2NDC(0.98)
    stats.SetY1NDC(0.28)
    stats.SetY2NDC(0.93)
    canvsKinBins[case][kinBin].Draw()
    canvsKinBins[case][kinBin].SaveAs(".pdf")

### Plot fit parameters

Get values of fit parameters

In [None]:
fitParValues = {}  # {case : {parameter name : [ [kinematic bin center, parameter value] ...] ...} ...}
for case in fitResultsKinBins.keys():
  fitParValues[case] = {}
  for kinBin, fitResult in fitResultsKinBins[case].items():
    kinBinCenter = (kinBin[1] + kinBin[2]) / 2
    for parIndex in range(fitResult.NTotalParameters()):
      parName = fitResult.ParName(parIndex)
      parValue = getFitParameter(fitResult, parName)
      if not parName in fitParValues[case]:
        fitParValues[case][parName] = []
      fitParValues[case][parName].append([kinBinCenter, parValue])
    parName  = "#chi^{2}/ndf"
    parValue = ufloat(fitResult.Chi2() / fitResult.Ndf(), 0)
    if not parName in fitParValues[case]:
      fitParValues[case][parName] = []
    fitParValues[case][parName].append([kinBinCenter, parValue])
# print(fitParValues)

Postprocess fit-parameter values

In [None]:
for case in fitParValues.keys():
  for kinBinIndex, rHat in enumerate(fitParValues[case]["#hat{r}"]):  # fitParValues[case][parameter name][kinematic bin index][0 = kinematic bin center; 1 = parameter value]
    # calculate Gaussian 2 fraction
    ratio = umath.sin(rHat[1])**2
    # make sure sigma_1 is the narrow Gaussian
    sigma1 = fitParValues[case]["#sigma_{1}"][kinBinIndex][1]
    sigma2 = fitParValues[case]["#sigma_{2}"][kinBinIndex][1]
    if sigma1 > sigma2:
      # print("!!!swap")
      # swap values
      fitParValues[case]["#sigma_{1}"][kinBinIndex][1] = sigma2
      fitParValues[case]["#sigma_{2}"][kinBinIndex][1] = sigma1
      #TODO add case for separate mean values
      ratio = 1 - ratio
    # print(kinBinIndex, ratio)
    fitParValues[case]["#hat{r}"][kinBinIndex][1] = ratio
# print(fitParValues)

Plot fit-parameter values

In [None]:
caseColors = {
  "Total"   : ROOT.kBlack,
  "Found"   : ROOT.kGreen + 2,
  "Missing" : ROOT.kRed + 1}
kinVarOffsets = {  #TODO set these according to bin range
  "Total"   : 0,
  "Found"   : 0.1,
  "Missing" : 0.05}
canvsFitPar = []
parValueMultiGraphs = []
parValueGraphs = []
for parName in fitParValues["Total"].keys():
  canvsFitPar.append(ROOT.TCanvas(sanitize_filename(f"{selection}_{particle}_{channel}_{dataset}_mm2_fitpar_{parName}_{kinematicBinning['variable']}{pdfFileNameSuffix}"), ""))
  parValueMultiGraphs.append(ROOT.TMultiGraph())
  parValueGraphs.append({})
  for case in fitParValues.keys():
    parValues = fitParValues[case][parName]
    xVals = array.array('d', [kinVal + kinVarOffsets[case] for kinVal, _ in parValues])
    yVals = array.array('d', [parValue.nominal_value for _, parValue in parValues])
    yErrs = array.array('d', [parValue.std_dev       for _, parValue in parValues])
    parValueGraphs[-1][case] = ROOT.TGraphErrors(len(xVals), xVals, yVals, ROOT.nullptr, yErrs)
    graph = parValueGraphs[-1][case]
    graph.SetTitle(case)
    graph.SetMarkerStyle(ROOT.kFullCircle)
    graph.SetMarkerSize(markerSize)
    graph.SetMarkerColor(caseColors[case])
    graph.SetLineColor(caseColors[case])
    parValueMultiGraphs[-1].Add(graph)
  parValueMultiGraph = parValueMultiGraphs[-1]
  parValueMultiGraph.SetTitle(f"{particle} ({channel})")
  parValueMultiGraph.GetXaxis().SetTitle(f"{kinematicBinning['label']} ({kinematicBinning['unit']})")
  parValueMultiGraph.GetYaxis().SetTitle(parName if parName != "#hat{r}" else "Fraction Wide Gaussian")
  parValueMultiGraph.Draw("APZ")
  canvsFitPar[-1].BuildLegend()  # crashes for unknown reason
  if parName == "#mu":
    parValueMultiGraph.SetMinimum(0.86)
    parValueMultiGraph.SetMaximum(1.11)
    # indicate nominal value m_p^2 value
    line = ROOT.TLine()
    line.SetLineStyle(ROOT.kDashed)
    protonMassSq = 0.93827208816**2
    line.DrawLine(parValueMultiGraph.GetXaxis().GetXmin(), protonMassSq, parValueMultiGraph.GetXaxis().GetXmax(), protonMassSq)
  if parName == "#chi^{2}/ndf":
    parValueMultiGraph.SetMinimum(0)
    parValueMultiGraph.SetMaximum(10)
    # indicate nominal value
    line = ROOT.TLine()
    line.SetLineStyle(ROOT.kDashed)
    line.DrawLine(parValueMultiGraph.GetXaxis().GetXmin(), 1, parValueMultiGraph.GetXaxis().GetXmax(), 1)
  canvsFitPar[-1].Draw()
  canvsFitPar[-1].SaveAs(".pdf")

### Calculate and plot efficiencies

In [None]:
efficienciesKinBins = {}
for kinBin in fitResultsKinBins["Total"].keys():
  fitResults = {case : fitResultsKinBins[case][kinBin] for case in fitResultsKinBins.keys()}
  efficienciesKinBins[kinBin] = calculateEfficiency(fitResults, useNarrowGaussian)
for kinBin, efficiency in efficienciesKinBins.items():
  print(f"Efficiency in kinematic bin {kinBin[1]} < {kinBin[0]} < {kinBin[2]} {kinematicBinning['unit']} = {100 * efficiency}%")

In [None]:

graphVals = [((kinBin[1] + kinBin[2]) / 2, efficiency) for (kinBin, efficiency) in efficienciesKinBins.items()]
xVals = array.array('d', [graphVal[0]               for graphVal in graphVals])
yVals = array.array('d', [graphVal[1].nominal_value for graphVal in graphVals])
yErrs = array.array('d', [graphVal[1].std_dev       for graphVal in graphVals])
# print(xVals, yVals, yErrs)
efficienciesKinBinsGraph = ROOT.TGraphErrors(len(graphVals), xVals, yVals, ROOT.nullptr, yErrs)
efficienciesKinBinsGraph.SetTitle(f"{particle} Track-Finding Efficiency ({channel})")
efficienciesKinBinsGraph.SetMarkerStyle(ROOT.kFullCircle)
efficienciesKinBinsGraph.SetMarkerSize(markerSize)
efficienciesKinBinsGraph.GetXaxis().SetTitle(f"{kinematicBinning['label']} ({kinematicBinning['unit']})")
efficienciesKinBinsGraph.GetYaxis().SetTitle("Efficiency")
efficienciesKinBinsGraph.SetMinimum(0)
efficienciesKinBinsGraph.SetMaximum(1)
canvEff = ROOT.TCanvas(sanitize_filename(f"{selection}_{particle}_{channel}_{dataset}_mm2_eff_{kinematicBinning['variable']}{pdfFileNameSuffix}"), "")
efficienciesKinBinsGraph.Draw("AP")
# indicate value from fit of overall distributions
line = ROOT.TLine()
line.SetLineStyle(ROOT.kDashed)
line.DrawLine(efficienciesKinBinsGraph.GetXaxis().GetXmin(), overallEff.nominal_value, efficienciesKinBinsGraph.GetXaxis().GetXmax(), overallEff.nominal_value)
# indicate weighted average of efficiencies in kinematic bins
meanEff = np.average(yVals, weights = [1 / (yErr**2) for yErr in yErrs])
line.SetLineColor(ROOT.kRed + 1)
line.DrawLine(efficienciesKinBinsGraph.GetXaxis().GetXmin(), meanEff, efficienciesKinBinsGraph.GetXaxis().GetXmax(), meanEff)
canvEff.Draw()
canvEff.SaveAs(".pdf")

# Scratchpad

In [None]:
# see https://root-forum.cern.ch/t/syntax-of-a-free-function-or-c-functor-for-tgraph-fitting/22292/3
# and https://root.cern/manual/python/#just-in-time-compilation-of-small-strings
ROOT.gInterpreter.ProcessLine('''
struct MyFunction {
  MyFunction() { }

  double operator() (double* vars, double* pars)
  {
    const double x  = vars[0];
    const double p0 = pars[0];
    const double p1 = pars[1];
    const double p2 = pars[2];

    const double linTerm = p1 + p2 * x;
    return p0 * p0 + linTerm * linTerm;
  }
};
''')
myFunc = ROOT.MyFunction()
func = ROOT.TF1("func", myFunc, -1, 1, 3)
# make function visible in Cling's global scope so it can be used in TFormula
# see https://root-forum.cern.ch/t/advanced-tformula-and-tf1-usage/36643/2
ROOT.gInterpreter.ProcessLine("TF1& func = *((TF1*)gROOT->GetFunction(\"func\"))")
#!!! parameter setting in TFormula expression does not work
fooFunc = ROOT.TF1("fooFunc", "func(x)", -1, 1, 3)
fooFunc.SetParameter(0, 100000)
fooFunc.SetParameter(1, 0)
fooFunc.SetParameter(2, 0)
print(fooFunc.Eval(1))
# fooFunc = ROOT.TF1("fooFunc", "func(x, [2..4])", -1, 1, 5)
# fooFunc.SetParameter(0, 10)
# fooFunc.SetParameter(1, -5)
# fooFunc.SetParameter(2, -1)
# fooFunc.SetParameter(3, -2)
# fooFunc.SetParameter(4, -3)
# canv = ROOT.TCanvas("canv", "", 800, 600)
fooFunc.Draw()
canv.Draw()