In [None]:
import ROOT
import uproot
file_name='/home/prozorov/dev/star/jets_pp_2012/output/jets_embedding.root'
tree_name = 'MatchedTree'  # Update this to your tree name
my_file = uproot.open(file_name)

selected_branches = ['mc_pt',    'mc_weight',
                     'reco_pt',  'reco_trigger_match_JP2', 'reco_trigger_match_HT2','deltaR'
                     ]
my_tree = my_file[tree_name]
all_jets = my_tree.arrays(selected_branches, library='pd', entry_stop=10000)

all_jets=all_jets.copy()   
all_jets['reco_trigger_match_JP2'] = all_jets['reco_trigger_match_JP2'].astype(int)
all_jets['reco_trigger_match_HT2'] = all_jets['reco_trigger_match_HT2'].astype(int)

In [None]:
import numpy as np

pt_reco_bins = [5.0,  6.0,  7.0,  8.0,  9.0,  10.0, 11.0, 12.0, 13.0, 14.0,
     15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0,
     25.0, 26.0, 27.0, 28.0, 29.0, 30.0, 31.0, 32.0, 33.0, 34.0,
     35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 42.0, 44.0, 46.0, 48.0,
     50.0, 52.0, 54.0, 56.0, 58.0, 60.0, 64.0, 70.0, 90.0]

pt_mc_bins = [5.0,  6.9,  8.2,  9.7,  11.5,
                                    13.6, 16.1, 19.0, 22.5, 26.6,
                                    31.4, 37.2, 44.0, 52.0, 70.0]

pt_reco_bins=np.array(pt_reco_bins)
pt_mc_bins=np.array(pt_mc_bins)

In [None]:


test_fraction = 0.1  # Fraction of data to use for testing
    
print("Creating new response matrix...")

    
    # Define histogram models for RDataFrame
MeasuredModel = ROOT.RDF.TH1DModel("Measured", ";p_{t}, GeV/c; dN/dp_{t}", len(pt_reco_bins)-1, pt_reco_bins)

TruthModel = ROOT.RDF.TH1DModel("Truth", ";p_{t}, GeV/c;dN/dp_{t}", len(pt_mc_bins)-1, pt_mc_bins)

ResponseModel = ROOT.RDF.TH2DModel("ResponseMatrix", "Response Matrix; Measured; Truth",
                                        len(pt_reco_bins)-1, pt_reco_bins,
                                        len(pt_mc_bins)-1, pt_mc_bins)

DetectorResolutionModel = ROOT.RDF.TH2DModel("DetectorResolution",
                                           "Detector Resolution; p_{T}^{mc}, GeV/c; p_{T}^{reco} - p_{T}^{mc}, GeV/c",
                                           1000, 0, 100, 1000, -100, 100)
    # Open input file with RDataFrame
input_file_path = "/home/prozorov/dev/star/jets_pp_2012/output/jets_embedding.root"

# Create RDataFrame
df = ROOT.RDataFrame("MatchedTree", input_file_path, {"mc_pt", "reco_pt", "mc_weight"})
n_entries = df.Count().GetValue()
print(f"Loaded {n_entries} entries from {input_file_path}")

# Filter out invalid entries
df_filtered = df.Filter("mc_pt != -9 && reco_pt != -9")
# Add column for detector resolution (delta pT)
df_filtered = df_filtered.Define("delta_pt", "reco_pt - mc_pt")
# Add a random column for train/test split
df_filtered = df_filtered.Define("random", "gRandom->Uniform()")
# Split into training and testing dataframes
df_train = df_filtered.Filter(f"random > {test_fraction}")
df_test = df_filtered.Filter(f"random <= {test_fraction}")

print(f"Training entries: {df_train.Count().GetValue()}")
print(f"Testing entries: {df_test.Count().GetValue()}")

# Create histograms for training set
Measured = df_train.Histo1D(MeasuredModel, "reco_pt", "mc_weight")
Truth = df_train.Histo1D(TruthModel, "mc_pt", "mc_weight")
ResponseMatrix = df_train.Histo2D(ResponseModel,  "reco_pt", "mc_pt", "mc_weight")

print(f"Training histograms created")

# Create histograms for testing set
Measured_test = df_test.Histo1D(MeasuredModel, "reco_pt", "mc_weight")
Truth_test = df_test.Histo1D(TruthModel, "mc_pt", "mc_weight")
# Create detector resolution histogram
detector_resolution = df_filtered.Histo2D(DetectorResolutionModel, "mc_pt", "delta_pt", "mc_weight")

In [None]:
%jsroot on
can=ROOT.TCanvas("c", "", 1600, 600)
can.Draw()
can.Divide(3,1)
can.cd(1)
ROOT.gPad.SetLogy()
Measured.Draw("")
can.cd(2)
ROOT.gPad.SetLogy()
Truth.Draw("")
can.cd(3)
ROOT.gPad.SetLogz()
ResponseMatrix.Draw("COLZ")

In [None]:
response.Setup(Measured.GetValue(), Truth.GetValue(), ResponseMatrix.GetValue())

In [None]:
# # Create RooUnfoldResponse

response.SetName("my_response")
# Write histograms to file

# save histograms to file
response_file=ROOT.TFile("response.root", "RECREATE")
Measured.Write()
Truth.Write()
ResponseMatrix.Write()
detector_resolution.Write()
Measured_test.Write()
Truth_test.Write()
response_file.Save()
response_file.Close()

print("\nResponse matrix created and saved to response.root")

In [None]:
responseFile=ROOT.TFile("response.root", "READ")

Measured = responseFile.Get("Measured")
Truth = responseFile.Get("Truth")
ResponseMatrix = responseFile.Get("ResponseMatrix")

MeasuredTest= responseFile.Get("Measured_test")
TruthTest = responseFile.Get("Truth_test")
responseFile.ls()
can=ROOT.TCanvas("c", "", 1600, 600)
can.Draw()
can.Divide(3,1)
can.cd(1)
ROOT.gPad.SetLogy()
Measured.Draw("")
can.cd(2)
ROOT.gPad.SetLogy()
Truth.Draw("")
can.cd(3)
ROOT.gPad.SetLogz()



Check unfolding
========================

In [None]:
def divide_by_binwidth(hist):
    """
    Divide histogram by bin width.
    """
    for bin in range(1, hist.GetNbinsX() + 1):
        bin_width = hist.GetBinWidth(bin)
        if bin_width > 0:
            hist.SetBinContent(bin, hist.GetBinContent(bin) / bin_width)
            hist.SetBinError(bin, hist.GetBinError(bin) / bin_width)
    return hist


responseFile=ROOT.TFile("response.root", "READ")
rooUnfoldResponse = responseFile.Get("my_response")
hist=response_file.Get("MeasuredTest")
hist_truth=response_file.Get("TruthTest")

unfolding=ROOT.RooUnfoldBayes(rooUnfoldResponse, hist, 5)
unfolded = unfolding.Hunfold()
unfolded.SetLineColor(ROOT.kRed)
# divide by bin width
unfolded=divide_by_binwidth(unfolded)
hist=divide_by_binwidth(hist)
can=ROOT.TCanvas('can', '', 800, 600)
can.Draw()
can.SetLogy()
hist.Draw('hist')
unfolded.Draw('hist same')
hist_truth.SetLineColor(ROOT.kGreen)
hist_truth.Draw('hist same')



The parameters are as follows:

- `name` and `title` - as with any other root object
- `truth hist` and `truth observable` - This is because RooFit is used to define the range the unfolding is valid for
- `reco hist` and `reco observable` - as above
- `response hist` - we are unfolding after all
- `background hist` - optional
- `includeUnderflowOverflow`, `errorThreshold` as described


In [None]:

# spec = ROOT.RooUnfoldSpec("unfold", "unfold",
#                           truthHist,"obs_truth",
#                           recoHist,"obs_reco",
#                           responseHist,
#                           recoBkgHist,
#                           dataHist,
#                           True,-1)



In [None]:
# unfolding = spec.makeFunc(ROOT.RooUnfolding.kBayes, 10)
# Hunfolded = unfolding.unfolding().Vunfold()
# Hunfolded.Print() # This should be a histogram.

In [None]:
# def make_hist(vec, template, title="", errors=None):
#     tmp = template.Clone(title)
#     tmp.SetTitle(title)
#     for b in range(tmp.GetNbinsX()+1):
#         if np.isnan(vec[b]): vec[b] =0
#         tmp.SetBinContent(b,vec[b])
#         if errors: tmp.SetBinError(b,errors[b])
#         else: tmp.SetBinError(b,0)
#     tmp.SetStats(0)
#     tmp.SetLineColor(1)
#     return tmp

In [None]:
# response = spec.makeFunc(ROOT.RooUnfolding.kInvert).unfolding().response()

# pur_vec = response.Vpurity() # a vector of the purity
# eff_vec = response.Vefficiency() # a vector of the efficiency

# c1 = ROOT.TCanvas()
# c1.Divide(2,1,.0001,0.0001)
# c1.cd(1)
# purity = make_hist(pur_vec, recoHist, "purity")
# purity.Draw()
# c1.cd(2)
# efficiency = make_hist(eff_vec, truthHist, "efficiency")
# efficiency.Draw()
# c1.Draw()

