In [9]:
import numpy as np
import ROOT

pt_reco_bins = [5.0,  6.0,  7.0,  8.0,  9.0,  10.0, 11.0, 12.0, 13.0, 14.0,
     15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0,
     25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0, 33.0, 34.0,
     35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 42.0, 44.0, 46.0, 48.0,
     50.0, 52.0, 54.0, 56.0, 58.0, 60.0, 64.0, 70.0, 90.0]

pt_mc_bins = [6.9,  8.2,  9.7,  11.5,13.6, 16.1, 19.0, 22.5, 26.6,31.4, 37.2, 44.0, 52.0]
# pt_reco_bins = [6.9,  8.2,  9.7,  11.5,13.6, 16.1, 19.0, 22.5, 26.6,31.4, 37.2, 44.0, 52.0]


pt_reco_bins=np.array(pt_reco_bins)
pt_mc_bins=np.array(pt_mc_bins)

In [10]:
# import uproot
# input_file_path = "/home/prozorov/dev/star/jets_pp_2012/output/jets_embedding.root"

# input_file = uproot.open(input_file_path)
# tree = input_file["MatchedTree"]
# print(tree.keys())


In [None]:
test_fraction = 0.2  # Fraction of data to use for testing
    
print("Creating new response matrix...")

    
    # Define histogram models for RDataFrame
MeasuredModel = ROOT.RDF.TH1DModel("Measured", ";p_{t}, GeV/c; dN/dp_{t}", len(pt_reco_bins)-1, pt_reco_bins)


TruthModel = ROOT.RDF.TH1DModel("Truth", ";p_{t}, GeV/c;dN/dp_{t}", len(pt_mc_bins)-1, pt_mc_bins)


ResponseModel = ROOT.RDF.TH2DModel("ResponseMatrix", "Response Matrix; Measured; Truth",
                                        len(pt_reco_bins)-1, pt_reco_bins,
                                        len(pt_mc_bins)-1, pt_mc_bins)

DetectorResolutionModel = ROOT.RDF.TH2DModel("DetectorResolution",
                                           "Detector Resolution; p_{T}^{mc}, GeV/c; p_{T}^{reco} - p_{T}^{mc}, GeV/c",
                                           1000, 0, 100, 1000, -100, 100)
    # Open input file with RDataFrame
input_file_path = "/home/prozorov/dev/star/jets_pp_2012/output/jets_embedding.root"

# Create RDataFrame
df = ROOT.RDataFrame("MatchedTree", input_file_path, {"mc_pt", "reco_pt", "mc_weight", "reco_trigger_match_HT2", "reco_trigger_match_JP2"})


n_entries = df.Count().GetValue()
print(f"Loaded {n_entries} entries from {input_file_path}")

# Filter out invalid entries
df_filtered = df.Filter("mc_pt != -9 && reco_pt != -9")  #- MB

df_filtered = df.Filter("mc_pt != -9 && reco_pt != -9 && reco_trigger_match_HT2")


# Add column for detector resolution (delta pT)
df_filtered = df_filtered.Define("delta_pt", "reco_pt - mc_pt")
# Add a random column for train/test split
df_filtered = df_filtered.Define("random", "gRandom->Uniform()")
# Split into training and testing dataframes
df_train = df_filtered.Filter(f"random > {test_fraction}")
df_test = df_filtered.Filter(f"random <= {test_fraction}")

print(f"Training entries: {df_train.Count().GetValue()}")
print(f"Testing entries: {df_test.Count().GetValue()}")

# Create histograms for training set
Measured = df_train.Histo1D(MeasuredModel, "reco_pt", "mc_weight")
Truth = df_train.Histo1D(TruthModel, "mc_pt", "mc_weight")
ResponseMatrix = df_train.Histo2D(ResponseModel,  "reco_pt", "mc_pt", "mc_weight")

print(f"Training histograms created")

# Create histograms for testing set
Measured_test = df_test.Histo1D(MeasuredModel, "reco_pt", "mc_weight")
Measured_test.SetName("MeasuredTest")
Truth_test = df_test.Histo1D(TruthModel, "mc_pt", "mc_weight")
Truth_test.SetName("TruthTest")
# Create detector resolution histogram
detector_resolution = df_filtered.Histo2D(DetectorResolutionModel, "mc_pt", "delta_pt", "mc_weight")

Creating new response matrix...
Loaded 17578802 entries from /home/prozorov/dev/star/jets_pp_2012/output/jets_embedding.root
Training entries: 11101727
Testing entries: 2776151
Training histograms created


In [12]:
%jsroot on
can=ROOT.TCanvas("c", "", 1600, 600)
can.Draw()
can.Divide(3,1)
can.cd(1)
ROOT.gPad.SetLogy()
Measured.Draw("")
can.cd(2)
ROOT.gPad.SetLogy()
Truth.Draw("")
can.cd(3)
ROOT.gPad.SetLogz()
ResponseMatrix.Draw("COLZ")



In [13]:
response=ROOT.RooUnfoldResponse("my_response", "my_response")
response.UseOverflow()
response.Setup(Measured.GetValue(), Truth.GetValue(), ResponseMatrix.GetValue())

<cppyy.gbl.RooUnfoldResponse object at 0x563465b5c8e0>



In [14]:
# # Create RooUnfoldResponse
# Write histograms to file

# save histograms to file
response_file=ROOT.TFile("response_MB.root", "RECREATE")
Measured.Write()
Truth.Write()
ResponseMatrix.Write()
detector_resolution.Write()
Measured_test.Write()
Truth_test.Write()
response.Write()


response_file.Save()
response_file.Close()

print("\nResponse matrix created and saved to response.root")


Response matrix created and saved to response.root


In [15]:
responseFile=ROOT.TFile("response.root", "READ")

Measured = responseFile.Get("Measured")
Truth = responseFile.Get("Truth")
ResponseMatrix = responseFile.Get("ResponseMatrix")

MeasuredTest= responseFile.Get("Measured_test")
TruthTest = responseFile.Get("Truth_test")
responseFile.ls()
can=ROOT.TCanvas("c", "", 1600, 600)
can.Draw()
can.Divide(3,1)
can.cd(1)
ROOT.gPad.SetLogy()
Measured.Draw("")
can.cd(2)
ROOT.gPad.SetLogy()
Truth.Draw("")
can.cd(3)
ROOT.gPad.SetLogz()
ResponseMatrix.Draw("COLZ")



TFile**		response.root	
 TFile*		response.root	
  OBJ: TH1D	Measured	 : 0 at: 0x5634654b39a0
  OBJ: TH1D	Truth	 : 0 at: 0x5634651c4470
  OBJ: TH2D	ResponseMatrix	Response Matrix : 0 at: 0x56346635f5a0
  KEY: TH1D	Measured;1	
  KEY: TH1D	Truth;1	
  KEY: TH2D	ResponseMatrix;1	Response Matrix
  KEY: TH2D	DetectorResolution;1	Detector Resolution
  KEY: TH1D	MeasuredTest;1	
  KEY: TH1D	TruthTest;1	
  KEY: RooUnfoldResponse	my_response;1	my_response




Check unfolding
========================

In [16]:
from ROOT import RooUnfoldBayes
def divide_by_binwidth(hist):
    """
    Divide histogram by bin width.
    """
    for bin in range(1, hist.GetNbinsX() + 1):
        bin_width = hist.GetBinWidth(bin)
        if bin_width > 0:
            hist.SetBinContent(bin, hist.GetBinContent(bin) / bin_width)
            hist.SetBinError(bin, hist.GetBinError(bin) / bin_width)
    return hist


responseFile=ROOT.TFile("response.root", "READ")
rooUnfoldResponse = responseFile.Get("my_response")

MeasuredTest = responseFile.Get("MeasuredTest")
TruthTest = responseFile.Get("TruthTest")
TruthTest.SetLineColor(ROOT.kGreen)


my_unfolding = RooUnfoldBayes(rooUnfoldResponse, MeasuredTest, 4)

unfolded = my_unfolding.Hunfold()
unfolded.SetLineColor(ROOT.kRed)
# divide by bin width
unfolded=divide_by_binwidth(unfolded)
MeasuredTest=divide_by_binwidth(MeasuredTest)
TruthTest=divide_by_binwidth(TruthTest)



line = ROOT.TLine()
line.SetLineStyle(2)

canvas = ROOT.TCanvas("canvas", "canvas", 800, 600)
canvas.Divide(1, 2)
# Upper pad for histograms
pad1 = canvas.cd(1)
pad1.SetPad(0.0, 0.5, 1.0, 1.0)
pad1.SetTopMargin(0.1)
pad1.SetBottomMargin(0.0)
pad1.SetLogy()

canvas.Draw()
canvas.cd(1)
MeasuredTest.Draw('hist')
unfolded.Draw('hist same')
TruthTest.Draw('hist same')

leg=ROOT.TLegend(0.5, 0.5, 0.9, 0.9)
leg.AddEntry(MeasuredTest, "Measured", "l")
leg.AddEntry(unfolded, "Unfolded", "l")
leg.AddEntry(TruthTest, "Truth", "l")
leg.Draw()
#========================
# Lower pad for ratio
pad2 = canvas.cd(2)
pad2.SetPad(0.0, 0.0, 1.0, 0.5)
pad2.SetTopMargin(0.0)
pad2.SetBottomMargin(0.3)

canvas.cd(2)
# create empty histogram for x range
temp=MeasuredTest.Clone("temp")
temp.Reset()
temp.GetYaxis().SetRangeUser(0.8, 1.2)
temp.GetYaxis().SetTitle("Unfolded / Truth")
temp.Draw()

ratio_hist=unfolded.Clone("ratio_hist")


ratio_hist.Divide(TruthTest)
ratio_hist.Draw("E same")

line.DrawLine(ratio_hist.GetXaxis().GetXmin(), 1.0, ratio_hist.GetXaxis().GetXmax(), 1.0)
line.SetLineColor(ROOT.kRed)
line.DrawLine(ratio_hist.GetXaxis().GetXmin(), 0.95, ratio_hist.GetXaxis().GetXmax(), 0.95)
line.DrawLine(ratio_hist.GetXaxis().GetXmin(), 1.05, ratio_hist.GetXaxis().GetXmax(), 1.05)


<cppyy.gbl.TLine object at 0x56346506bf40>

Using response matrix priors
Priors:

Vector (14)  is as follows

     |        1  |
------------------
   0 |0.424008 
   1 |0.16275 
   2 |0.13851 
   3 |0.102644 
   4 |0.072327 
   5 |0.0457683 
   6 |0.0270764 
   7 |0.0154481 
   8 |0.00711279 
   9 |0.00298581 
  10 |0.00104067 
  11 |0.000271115 
  12 |5.15992e-05 
  13 |6.01501e-06 

Now unfolding...
Iteration : 0
Chi^2 of change 1.23193e-12
Iteration : 1
Chi^2 of change 1.90252e-13
Iteration : 2
Chi^2 of change 5.1031e-14
Iteration : 3
Chi^2 of change 1.88443e-14
Calculating covariances due to number of measured events


