# Fit missing-mass squared distributions

In [None]:
# on ifarm, use Python 3.6.8
import array

import numpy as np
import ROOT
from pathvalidate import sanitize_filename, sanitize_filepath
from uncertainties import ufloat
from uncertainties.umath import *

import fitFunction  # defines fit function: double Gaussian on top of a 2nd-order polynomial
import makePlots    # defines helper functions to generate histograms from data trees

makePlots.setupPlotStyle()
# ROOT.gROOT.LoadMacro("~/rootlogon.C")
# ROOT.gROOT.ForceStyle()
# ROOT.gStyle.SetCanvasDefW(600)
# ROOT.gStyle.SetCanvasDefH(400)
# ROOT.gStyle.SetPalette(ROOT.kBird)
# # ROOT.gStyle.SetOptStat("ni")  # show only name and integral
# ROOT.gStyle.SetOptStat("i")  # show only integral
# ROOT.gStyle.SetStatFormat("8.8g")
# ROOT.gStyle.SetTitleColor(1, "X")  # fix that for some mysterious reason x-axis titles of 2D plots and graphs are white

## Load files and get input histograms for the 3 cases

In [None]:
# selection = "paul"
selection = "justin"
# particle = "Pi-"
# particle = "Pi+"
particle = "Proton"
# channel = "2pi"
channel = "4pi"
# dataset = "30370_acc_Pval"
dataset = "bggen_2017_01-ver03"
fitRange = (-0.5, 4)
rebinFactor = 20
markerSize = 0.75
if selection == "paul":
  inFileName = f"../pmatt/trackeff_{particle}_{channel}{f'.{dataset}' if dataset != '' else ''}.root"
  #TODO fix names in Paul's code
  histNames = {
    "Total"   : "MissingMass/MissingMassVsBeamEnergy",
    "Found"   : "MissingMass/MissingMassVsBeamEnergy_Found",
    "Missing" : "MissingMass/MissingMassVsBeamEnergy_Missing"}
elif selection == "justin":
  # inFileName = f"../ReactionEfficiency/pippippimpimpmiss.30370.root"
  inFileName = f"../ReactionEfficiency/pippippimpimpmiss_bggen_2017_01-ver03.root"
  histNames = {
    "Total"   : "MissingMassSquared/MissingMassSquaredVsBeamEnergy",
    "Found"   : "MissingMassSquared/MissingMassSquaredVsBeamEnergy_Found",
    "Missing" : "MissingMassSquared/MissingMassSquaredVsBeamEnergy_Missing"}
  # histNames = {
  #   "Total"   : "MissingMassSquared/MissingMassSquaredVsBeamEnergy__2#pi^{#plus}2#pi^{#minus}p",
  #   "Found"   : "MissingMassSquared/MissingMassSquaredVsBeamEnergy_Found__2#pi^{#plus}2#pi^{#minus}p",
  #   "Missing" : "MissingMassSquared/MissingMassSquaredVsBeamEnergy_Missing__2#pi^{#plus}2#pi^{#minus}p"}
  # histNames = {
  #   "Total"   : "MissingMassSquared/MissingMassSquaredVsBeamEnergy__2#gamma2#pi^{#plus}2#pi^{#minus}p[#pi^{0}]",
  #   "Found"   : "MissingMassSquared/MissingMassSquaredVsBeamEnergy_Found__2#gamma2#pi^{#plus}2#pi^{#minus}p[#pi^{0}]",
  #   "Missing" : "MissingMassSquared/MissingMassSquaredVsBeamEnergy_Missing__2#gamma2#pi^{#plus}2#pi^{#minus}p[#pi^{0}]"}
else:
  raise ValueError(f"Unknown selection '{selection}'")
inFile = ROOT.TFile(inFileName)
MM2vsBeamEnergyHists = {case : inFile.Get(histName) for (case, histName) in histNames.items()}

In [None]:
canv = ROOT.TCanvas("canv")
MM2vsBeamEnergyHists["Missing"].Draw("COLZ")
canv.Draw()

## Tracking efficiency from overall missing-mass squared distributions

### Get overall missing-mass squared distributions by projecting the 2D histograms and adjust binning

In [None]:

MM2Hists = {case : MM2vsBeamEnergyHist.ProjectionY("_py", 0, -1, "E") for (case, MM2vsBeamEnergyHist) in MM2vsBeamEnergyHists.items()}
# MM2Hists = {case : inFile.Get("MissingMassSquared/MissingMassSquared" + ("" if case == "Total" else "_" + case)) for case in ["Total", "Found", "Missing"]}
# MM2Hists = {case : inFile.Get("MissingMassSquared/MissingMassSquared" + ("" if case == "Total" else "_" + case) + "__2#pi^{#plus}2#pi^{#minus}p") for case in ["Total", "Found", "Missing"]}
# MM2Hists = {case : inFile.Get("MissingMassSquared/MissingMassSquared" + ("" if case == "Total" else "_" + case) + "__2#gamma2#pi^{#plus}2#pi^{#minus}p[#pi^{0}]") for case in ["Total", "Found", "Missing"]}
canvs = []
for case, MM2Hist in MM2Hists.items():
  MM2Hist.Rebin(rebinFactor)
  MM2Hist.SetTitle(f"{particle} ({channel}) {case}")
  MM2Hist.SetYTitle(f"Counts / {MM2Hist.GetBinWidth(1)}" + " (GeV/c^{2})^{2}")
  canv = ROOT.TCanvas(f"{selection}_{particle}_{channel}_mm2_{dataset}_{case}", "")
  MM2Hists[case].Draw("HIST")
  canv.Draw()
  canv.SaveAs(".pdf")
  canvs.append(canv)

In [None]:
MM2SignalHists = {case : inFile.Get("MissingMassSquared/MissingMassSquared" + ("" if case == "Total" else "_" + case) + "__2#pi^{#plus}2#pi^{#minus}p") for case in ["Total", "Found", "Missing"]}
MM2BackgroundHists = {}
# adding up all background channels yields different distributions
# are topology tags disjoint?
# dir = inFile.Get("MissingMassSquared")
# for key in dir.GetListOfKeys():
#   obj = key.ReadObj()
#   if isinstance(obj, ROOT.TH1):
#     hist = obj
#     name = hist.GetName()
#     for case in ["Total", "Found", "Missing"]:
#       if not name.startswith("MissingMassSquared" + ("__" if case == "Total" else "_" + case + "__")):
#         continue
#       if name == "MissingMassSquared" + ("__" if case == "Total" else "_" + case + "__") + "2#pi^{#plus}2#pi^{#minus}p":
#         continue
#       if case in MM2BackgroundHists.keys():
#         MM2BackgroundHists[case].Add(hist)
#       else:
#         MM2BackgroundHists[case] = hist.Clone()
canvs = []
# for case, hist in MM2BackgroundHists.items():
#   canv = ROOT.TCanvas(case, "")
#   hist.Rebin(rebinFactor)
#   hist.SetTitle(f"{particle} ({channel}) Background {case}")
#   hist.SetYTitle(f"Counts / {hist.GetBinWidth(1)}" + " (GeV/c^{2})^{2}")
#   hist.Draw()
#   canv.Draw()
#   canvs.append(canv)
for case in MM2Hists.keys():
  canv = ROOT.TCanvas("Foo" + case, "")
  hist = MM2BackgroundHists[case] = MM2Hists[case].Clone()
  histSig = MM2SignalHists[case].Rebin(rebinFactor)
  # histSig.SetLineColor(ROOT.kOrange + 1)
  # histSig.SetMarkerColor(ROOT.kOrange + 1)
  histSig.Draw()
  hist.Add(histSig, -1)
  hist.SetTitle(f"{particle} ({channel}) Background {case}")
  hist.SetYTitle(f"Counts / {hist.GetBinWidth(1)}" + " (GeV/c^{2})^{2}")
  # hist.SetLineColor(ROOT.kOrange + 1)
  # hist.SetMarkerColor(ROOT.kOrange + 1)
  hist.Draw("SAME")
  canv.Draw()
  canvs.append(canv)
MM2Hists = MM2BackgroundHists

### Fit missing-mass squared distributions for the 3 cases

In [None]:
fitResults = {case : fitFunction.fitDistribution(MM2Hist, particle, fitRange, forceCommonGaussianMean = False) for (case, MM2Hist) in MM2Hists.items()}

In [None]:
def fitStatusString(fitResult):
  return (
    f"minimizer status = {fitResult.Status()}, "
    f"fit result is valid = {fitResult.IsValid()}, "
    f"covariance matrix status = {fitFunction.COV_MATRIX_STATUS_CODE[fitResult.CovMatrixStatus()][1]}"
  )

Show fit result

In [None]:
canvsOverall = {}
for case, MM2Hist in MM2Hists.items():
  canvsOverall[case] = ROOT.TCanvas(f"{selection}_{particle}_{channel}_mm2_{dataset}_fit_{case}", "")
  MM2Hist.Draw()
  # MM2BackgroundHists[case].Draw("SAME")
  # MM2SignalHists[case].Draw("SAME")
  canvsOverall[case].Update()  # needed otherwise TPaveStats object is not created
  stats = MM2Hist.FindObject("stats")
  stats.SetX1NDC(0.65)
  stats.SetX2NDC(0.98)
  stats.SetY1NDC(0.3)
  stats.SetY2NDC(0.92)
  canvsOverall[case].Draw()
  canvsOverall[case].SaveAs(".pdf")
for case, fitResult in fitResults.items():
  print(f"{case}: {fitStatusString(fitResult)}")

### Get overall signal yields and calculate average efficiency

In [None]:
def getFitParameter(fitResult, parName):
  parIndex = fitResult.Index(parName)
  return ufloat(fitResult.Parameter(parIndex), fitResult.ParError(parIndex))

In [None]:
def getSignalYield(fitResult):
  return getFitParameter(fitResult, "A")

In [None]:
def calculateEfficiency(fitResults):  # dictionary with TFitResults for "Total", "Found", and "Missing"
  signalYields = {case : getSignalYield(fitResult) for (case, fitResult) in fitResults.items()}
  return signalYields["Found"] / (signalYields["Found"] + signalYields["Missing"])

In [None]:
signalYields = {case : getSignalYield(fitResult) for (case, fitResult) in fitResults.items()}
overallEff = calculateEfficiency(fitResults)
#TODO one could perform a combined fit of all 3 histograms enforcing Found + Missing == Total
print(signalYields, f"mismatch = {100 * (signalYields['Total'] - signalYields['Found'] - signalYields['Missing']) / signalYields['Total']}%")
print(f"Overall efficiency = {100 * overallEff}%")
# Proton 4pi: {'Total': 3592.270996616253+/-11.300877442244428, 'Found': 2291.460860466247+/-7.7572963246630025, 'Missing': 1387.0557172146116+/-14.375700847915482} mismatch = -2.4+/-0.6%
# Overall efficiency = 62.29+/-0.26%

## Tracking efficiencies as a function of beam energy from missing-mass squared distributions

### Fit missing-mass squared distributions in bins of beam energy

In [None]:
def getMissingMassSquaredBeamEnergyBins(
  MM2vsBeamEnergyHist,
  case,
  beamEnergyRange = (3.0, 12.0),  # [GeV]
  nmbBeamEnergyBins = 9,          # 1 GeV bin width
  minCounts = 1000                # minimum number of counts required in bin
):
  energyBinWidth = (beamEnergyRange[1] - beamEnergyRange[0]) / float(nmbBeamEnergyBins)
  MM2Hists = {}
  for energyBin in range(nmbBeamEnergyBins):
    # get distribution for given beam energy bin
    energyBinMin = beamEnergyRange[0] + energyBin * energyBinWidth
    energyBinMax = energyBinMin + energyBinWidth
    MM2Hist = MM2vsBeamEnergyHist.ProjectionY(f"{MM2vsBeamEnergyHist.GetName()}_py_{energyBin}",
      MM2vsBeamEnergyHist.GetXaxis().FindBin(energyBinMin),
      MM2vsBeamEnergyHist.GetXaxis().FindBin(energyBinMax))
    MM2Hist.Rebin(rebinFactor)
    MM2Hist.SetTitle(f"{particle} ({channel}) {case}, {energyBinMin} < E_{{#gamma}} (GeV) < {energyBinMax}")
    MM2Hist.SetYTitle(f"Counts / {MM2Hist.GetBinWidth(1)} (GeV/c^{{2}})^{{2}}")
    counts = MM2Hist.Integral(1, MM2Hist.GetNbinsX())
    if  counts >= minCounts:
      MM2Hists[(energyBinMin, energyBinMax)] = MM2Hist
    else:
      print(f"Warning: {counts} counts in energy bin {(energyBinMin, energyBinMax)} are below the required minimum of {minCounts}. Skipping bin.")

  return MM2Hists

In [None]:
MM2HistsEBins = {case : getMissingMassSquaredBeamEnergyBins(MM2vsBeamEnergyHist, case) for (case, MM2vsBeamEnergyHist) in MM2vsBeamEnergyHists.items()}

In [None]:
def fitMissingMassSquaredBeamEnergyBins(
  MM2HistsEBins,  # dictionary of dictionaries {case : {energy bin : histogram, ...}, ...}
  **kwargs
):
  fitResults = {}
  for case, MM2Hists in MM2HistsEBins.items():
    fitResults[case] = {}
    for energyBin, MM2Hist in MM2Hists.items():
      # fit distribution
      print(f"Fitting case {case} for beam energy bin {energyBin} GeV")
      fitResults[case][energyBin] = fitFunction.fitDistribution(MM2Hist, **kwargs)

  return fitResults

In [None]:
fitResultsEBins = fitMissingMassSquaredBeamEnergyBins(MM2HistsEBins, particle = particle, fitRange = fitRange, forceCommonGaussianMean = True)

### Check whether fits converged

In [None]:
for case, fitResults in fitResultsEBins.items():
  for energyBin, fitResult in fitResults.items():
    print(f"case {case}, energy bin {energyBin} GeV: {fitStatusString(fitResult)}")

### Show fits

In [None]:
canvsEBins = {}
for case, MM2Hists in MM2HistsEBins.items():
  canvsEBins[case] = {}
  for energyBin, MM2Hist in MM2Hists.items():
    canvsEBins[case][energyBin] = ROOT.TCanvas(f"{selection}_{particle}_{channel}_mm2_{dataset}_fit_{case}_Egamma_{energyBin[0]}_{energyBin[1]}", "")
    MM2Hist.SetMinimum(0)
    MM2Hist.Draw()
    canvsEBins[case][energyBin].Update()  # needed otherwise TPaveStats object is not created
    stats = MM2Hist.FindObject("stats")
    stats.SetX1NDC(0.58)
    stats.SetX2NDC(0.98)
    stats.SetY1NDC(0.28)
    stats.SetY2NDC(0.93)
    canvsEBins[case][energyBin].Draw()
    canvsEBins[case][energyBin].SaveAs(".pdf")

### Plot fit parameters

Get values of fit parameters

In [None]:
fitParValues = {}  # {case : {parameter name : [ [energy bin center, parameter value] ...] ...} ...}
for case in fitResultsEBins.keys():
  fitParValues[case] = {}
  for energyBin, fitResult in fitResultsEBins[case].items():
    energyBinCenter = (energyBin[0] + energyBin[1]) / 2
    for parIndex in range(fitResult.NTotalParameters()):
      parName = fitResult.ParName(parIndex)
      parValue = getFitParameter(fitResult, parName)
      if not parName in fitParValues[case]:
        fitParValues[case][parName] = []
      fitParValues[case][parName].append([energyBinCenter, parValue])
    parName  = "#chi^{2}/ndf"
    parValue = ufloat(fitResult.Chi2() / fitResult.Ndf(), 0)
    if not parName in fitParValues[case]:
      fitParValues[case][parName] = []
    fitParValues[case][parName].append([energyBinCenter, parValue])
# print(fitParValues)

Postprocess fit-parameter values

In [None]:
for case in fitParValues.keys():
  for energyBinIndex, r in enumerate(fitParValues[case]["r"]):
    # calculate Gaussian 2 fraction
    ratio = sin(r[1])**2
    # make sure sigma_1 is the narrow Gaussian
    sigma1 = fitParValues[case]["#sigma_{1}"][energyBinIndex][1]
    sigma2 = fitParValues[case]["#sigma_{2}"][energyBinIndex][1]
    if sigma1 > sigma2:
      # print("!!!swap")
      # swap values
      fitParValues[case]["#sigma_{1}"][energyBinIndex][1] = sigma2
      fitParValues[case]["#sigma_{2}"][energyBinIndex][1] = sigma1
      #TODO add case for separate mean values
      ratio = 1 - ratio
    # print(energyBinIndex, ratio)
    fitParValues[case]["r"][energyBinIndex][1] = ratio
# print(fitParValues)

Plot fit-parameter values

In [None]:
caseColors = {
  "Total"   : ROOT.kBlack,
  "Found"   : ROOT.kGreen + 2,
  "Missing" : ROOT.kRed + 1}
energyOffsets = {
  "Total"   : 0,
  "Found"   : 0.1,
  "Missing" : 0.05}
canvsFitPar = []
parValueMultiGraphs = []
parValueGraphs = []
for parName in fitParValues["Total"].keys():
  canvsFitPar.append(ROOT.TCanvas(sanitize_filename(f"{selection}_{particle}_{channel}_mm2_{dataset}_fit_par_{parName}"), ""))
  parValueMultiGraphs.append(ROOT.TMultiGraph())
  parValueGraphs.append({})
  for case in fitParValues.keys():
    parValues = fitParValues[case][parName]
    xVals = array.array('d', [energy + energyOffsets[case] for energy, _ in parValues])
    yVals = array.array('d', [parValue.nominal_value for _, parValue in parValues])
    yErrs = array.array('d', [parValue.std_dev       for _, parValue in parValues])
    parValueGraphs[-1][case] = ROOT.TGraphErrors(len(xVals), xVals, yVals, ROOT.nullptr, yErrs)
    graph = parValueGraphs[-1][case]
    graph.SetTitle(case)
    graph.SetMarkerStyle(ROOT.kFullCircle)
    graph.SetMarkerSize(markerSize)
    graph.SetMarkerColor(caseColors[case])
    graph.SetLineColor(caseColors[case])
    parValueMultiGraphs[-1].Add(graph)
  parValueMultiGraph = parValueMultiGraphs[-1]
  parValueMultiGraph.SetTitle(f"{particle} ({channel})")
  parValueMultiGraph.GetXaxis().SetTitle("Beam Energy (GeV)")
  parValueMultiGraph.GetYaxis().SetTitle(parName if parName != "r" else "Fraction Wide Gaussian")
  parValueMultiGraph.Draw("APZ")
  canvsFitPar[-1].BuildLegend()
  if parName == "#mu":
    parValueMultiGraph.SetMinimum(0.86)
    parValueMultiGraph.SetMaximum(1.11)
    # indicate nominal value m_p^2 value
    line = ROOT.TLine()
    line.SetLineStyle(ROOT.kDashed)
    protonMassSq = 0.93827208816**2
    line.DrawLine(parValueMultiGraph.GetXaxis().GetXmin(), protonMassSq, parValueMultiGraph.GetXaxis().GetXmax(), protonMassSq)
  if parName == "#chi^{2}/ndf":
    parValueMultiGraph.SetMinimum(0)
    parValueMultiGraph.SetMaximum(10)
    # indicate nominal value
    line = ROOT.TLine()
    line.SetLineStyle(ROOT.kDashed)
    line.DrawLine(parValueMultiGraph.GetXaxis().GetXmin(), 1, parValueMultiGraph.GetXaxis().GetXmax(), 1)
  canvsFitPar[-1].Draw()
  canvsFitPar[-1].SaveAs(".pdf")

### Calculate and plot efficiencies

In [None]:
efficienciesEBins = {}
for energyBin in fitResultsEBins["Total"].keys():
  fitResults = {case : fitResultsEBins[case][energyBin] for case in fitResultsEBins.keys()}
  efficienciesEBins[energyBin] = calculateEfficiency(fitResults)
for energyBin, efficiency in efficienciesEBins.items():
  print(f"Efficiency in energy bin {energyBin} GeV = {100 * efficiency}%")

In [None]:

graphVals = [((energyBin[0] + energyBin[1]) / 2, efficiency) for (energyBin, efficiency) in efficienciesEBins.items()]
xVals = array.array('d', [graphVal[0]               for graphVal in graphVals])
yVals = array.array('d', [graphVal[1].nominal_value for graphVal in graphVals])
yErrs = array.array('d', [graphVal[1].std_dev       for graphVal in graphVals])
# print(xVals, yVals, yErrs)
efficienciesEBinsGraph = ROOT.TGraphErrors(len(graphVals), xVals, yVals, ROOT.nullptr, yErrs)
efficienciesEBinsGraph.SetTitle(f"{particle} Track-Finding Efficiency ({channel})")
efficienciesEBinsGraph.SetMarkerStyle(ROOT.kFullCircle)
efficienciesEBinsGraph.SetMarkerSize(markerSize)
efficienciesEBinsGraph.GetXaxis().SetTitle("Beam Energy (GeV)")
efficienciesEBinsGraph.GetYaxis().SetTitle("Efficiency")
efficienciesEBinsGraph.SetMinimum(0.3)
efficienciesEBinsGraph.SetMaximum(0.7)
canvEff = ROOT.TCanvas(f"{selection}_{particle}_{channel}_mm2_{dataset}_eff", "")
efficienciesEBinsGraph.Draw("AP")
# indicate value from fit of overall distributions
line = ROOT.TLine()
line.SetLineStyle(ROOT.kDashed)
line.DrawLine(efficienciesEBinsGraph.GetXaxis().GetXmin(), overallEff.nominal_value, efficienciesEBinsGraph.GetXaxis().GetXmax(), overallEff.nominal_value)
# indicate weighted average of efficiencies in energy bins
meanEff = np.average(yVals, weights = [1 / (yErr**2) for yErr in yErrs])
line.SetLineColor(ROOT.kRed + 1)
line.DrawLine(efficienciesEBinsGraph.GetXaxis().GetXmin(), meanEff, efficienciesEBinsGraph.GetXaxis().GetXmax(), meanEff)
canvEff.Draw()
canvEff.SaveAs(".pdf")

# Scratchpad

In [None]:
# see https://root-forum.cern.ch/t/syntax-of-a-free-function-or-c-functor-for-tgraph-fitting/22292/3
# and https://root.cern/manual/python/#just-in-time-compilation-of-small-strings
ROOT.gInterpreter.ProcessLine('''
struct MyFunction {
  MyFunction() { }

  double operator() (double* vars, double* pars)
  {
    const double x  = vars[0];
    const double p0 = pars[0];
    const double p1 = pars[1];
    const double p2 = pars[2];

    const double linTerm = p1 + p2 * x;
    return p0 * p0 + linTerm * linTerm;
  }
};
''')
myFunc = ROOT.MyFunction()
func = ROOT.TF1("func", myFunc, -1, 1, 3)
# make function visible in Cling's global scope so it can be used in TFormula
# see https://root-forum.cern.ch/t/advanced-tformula-and-tf1-usage/36643/2
ROOT.gInterpreter.ProcessLine("TF1& func = *((TF1*)gROOT->GetFunction(\"func\"))")
#!!! parameter setting in TFormula expression does not work
fooFunc = ROOT.TF1("fooFunc", "func(x)", -1, 1, 3)
fooFunc.SetParameter(0, -1)
fooFunc.SetParameter(1, -2)
fooFunc.SetParameter(2, -3)
# fooFunc = ROOT.TF1("fooFunc", "func(x, [2..4])", -1, 1, 5)
# fooFunc.SetParameter(0, 10)
# fooFunc.SetParameter(1, -5)
# fooFunc.SetParameter(2, -1)
# fooFunc.SetParameter(3, -2)
# fooFunc.SetParameter(4, -3)
# canv = ROOT.TCanvas("canv", "", 800, 600)
fooFunc.Draw()
canv.Draw()