In [11]:
import ROOT
from ROOT import TFile, TTree, TH1F, TCanvas, TAxis, TLegend, TTreeReader, TTreeReaderValue
import copy
import time
import math
import concurrent.futures
ROOT.gStyle.SetOptStat(0)

In [12]:
# ==== Implement weight of MCs ====
def get_weight(sample):
    lumi = 41.53*1000
    weight_xsec = {
        'QCD_Pt_300to470': lumi*6830/111229780,
        'QCD_Pt_470to600': lumi*552.1/27881028,
        'QCD_Pt_600to800': lumi*156.5/12807188,
        'QCD_Pt_800to1000': lumi*26.28/1906000,
        'QCD_Pt_1000to1400': lumi*7.47/1517308,
        'QCD_Pt_1400to1800': lumi*0.6484/776000,
        'QCD_Pt_1800to2400': lumi*0.08743/856734,
        'QCD_Pt_2400to3200': lumi*0.005236/1485988,
        'QCD_Pt_3200toInf': lumi*0.0001357/757837,
    }
    return weight_xsec[sample]

In [13]:
def histograming_sig(sample, batch_size, ibatch):
    weight = 1
    inFile = TFile.Open(f"/home/xyan13/Trijet/TrijetAna/TrijetAna/outputs_3_jets/{sample}_ML_study.root")
    outFile = TFile(f"{sample}_match_01_histos.root","RECREATE") # Well, we need this dummy declaration to make the clone work
    tree = inFile.Get("Events")
    variable2use = {}
    variable2use["m_jj"] = {"title": "m_{jj} [GeV]", "binning": 100, "lb": 0, "hb": 12000}
    variable2use["dijet_eta"] = {"title": "#eta_{jj}", "binning": 100, "lb": -3, "hb": 3}
    variable2use["dR_jj"] = {"title": "#Delta R_{jj}", "binning": 100, "lb": 0, "hb": 6}
    variable2use["dEta_jj"] = {"title": "#Delta #eta_{jj}", "binning": 100, "lb": 0, "hb": 3}
    variable2use["dPhi_jj"] = {"title": "#Delta #phi_{jj}", "binning": 100, "lb": 0, "hb": 3.2}
    variable2use["dR_j0j2"] = {"title": "#Delta R_{j0j2}", "binning": 100, "lb": 0, "hb": 6}
    variable2use["dEta_j0j2"] = {"title": "#Delta #eta_{j0j2}", "binning": 100, "lb": 0, "hb": 3}
    variable2use["dPhi_j0j2"] = {"title": "#Delta #phi_{j0j2}", "binning": 100, "lb": 0, "hb": 3.2}
    variable2use["dR_j1j2"] = {"title": "#Delta R_{j1j2}", "binning": 100, "lb": 0, "hb": 6}
    variable2use["dEta_j1j2"] = {"title": "#Delta #eta_{j1j2}", "binning": 100, "lb": 0, "hb": 3}
    variable2use["dPhi_j1j2"] = {"title": "#Delta #phi_{j1j2}", "binning": 100, "lb": 0, "hb": 3.2}
    variable2use["jet_eta_0"] = {"title": "#eta_{j0}", "binning": 100, "lb": -3, "hb": 3}
    variable2use["jet_eta_1"] = {"title": "#eta_{j1}", "binning": 100, "lb": -3, "hb": 3}
    variable2use["jet_eta_2"] = {"title": "#eta_{j2}", "binning": 100, "lb": -3, "hb": 3}
    variable2use["jet_ptoverm_0"] = {"title": "p^{j0}_{T}/m_{jj}", "binning": 100, "lb": 0, "hb": 3}
    variable2use["jet_ptoverm_1"] = {"title": "p^{j1}_{T}/m_{jj}", "binning": 100, "lb": 0, "hb": 3}
    variable2use["jet_ptoverm_2"] = {"title": "p^{j2}_{T}/m_{jj}", "binning": 100, "lb": 0, "hb": 3}
    variable2use["dR_jj_j"] = {"title": "#Delta R_{jj-j}", "binning": 100, "lb": 0, "hb": 6}
    variable2use["dEta_jj_j"] = {"title": "#Delta #eta_{jj-j}", "binning": 100, "lb": 0, "hb": 3}
    variable2use["dPhi_jj_j"] = {"title": "#Delta #phi_{jj-j}", "binning": 100, "lb": 0, "hb": 3.2}
    variable2use["jet_ptoverM_0"] = {"title": "p^{j0}_{T}/M_{jjj}", "binning": 100, "lb": 0, "hb": 1}
    variable2use["jet_ptoverM_1"] = {"title": "p^{j1}_{T}/M_{jjj}", "binning": 100, "lb": 0, "hb": 1}
    variable2use["jet_ptoverM_2"] = {"title": "p^{j2}_{T}/M_{jjj}", "binning": 100, "lb": 0, "hb": 1}
    variable2use["dijet_ptoverM"] = {"title": "p^{jj}_{T}/M_{jjj}", "binning": 100, "lb": 0, "hb": 1}
    variable2use["M_jjj"] = {"title": "M_{jjj}", "binning": 100, "lb": 0, "hb": 15000}

    histo_dic = {}
    for variable, param_dict in variable2use.items():
        histo_dic[variable] = TH1F(f"{sample}_{variable}",param_dict["title"],param_dict["binning"], param_dict["lb"], param_dict["hb"])
        histo_dic[variable].SetCanExtend(ROOT.TH1.kYaxis)
    
    count = 0
    start = time.time()
    evt_start = ibatch*batch_size
    evt_end = (ibatch+1)*batch_size
    if((ibatch+1)*batch_size > tree.GetEntries()):
        evt_end = tree.GetEntries()
    for ievt in range(evt_start, evt_end):
        tree.GetEntry(ievt)
        if(ievt % 100000 == 0):
            print(f"{round((ievt - evt_start)/ batch_size,2)*100}%")
        if(tree.gen_dijet_matched > 1): # unmatched
#         if(tree.gen_dijet_matched < 2): # matched
            continue
        count += 1
        for variable, hist in histo_dic.items():
            hist.Fill(getattr(tree, variable),weight)
    outFile.cd()
    outFile.Write()
    outFile.Close
    print(count)
    return 0

In [7]:
def histograming_bkg(sample, batch_size, ibatch):
    weight = get_weight(sample)
    inFile = TFile.Open(f"/home/xyan13/Trijet/TrijetAna/TrijetAna/outputs_3_jets/{sample}_ML_study.root")
    outFile = TFile(f"{sample}_{ibatch}_histos.root","RECREATE")
    tree = inFile.Get("Events")
    variable2use = {}
    variable2use["m_jj"] = {"title": "m_{jj} [GeV]", "binning": 100, "lb": 0, "hb": 8000}
    variable2use["dijet_eta"] = {"title": "#eta_{jj}", "binning": 100, "lb": -3, "hb": 3}
    variable2use["dR_jj"] = {"title": "#Delta R_{jj}", "binning": 100, "lb": 0, "hb": 6}
    variable2use["dEta_jj"] = {"title": "#Delta #eta_{jj}", "binning": 100, "lb": 0, "hb": 3}
    variable2use["dPhi_jj"] = {"title": "#Delta #phi_{jj}", "binning": 100, "lb": 0, "hb": 3.2}
    variable2use["dR_j0j2"] = {"title": "#Delta R_{j0j2}", "binning": 100, "lb": 0, "hb": 6}
    variable2use["dEta_j0j2"] = {"title": "#Delta #eta_{j0j2}", "binning": 100, "lb": 0, "hb": 3}
    variable2use["dPhi_j0j2"] = {"title": "#Delta #phi_{j0j2}", "binning": 100, "lb": 0, "hb": 3.2}
    variable2use["dR_j1j2"] = {"title": "#Delta R_{j1j2}", "binning": 100, "lb": 0, "hb": 6}
    variable2use["dEta_j1j2"] = {"title": "#Delta #eta_{j1j2}", "binning": 100, "lb": 0, "hb": 3}
    variable2use["dPhi_j1j2"] = {"title": "#Delta #phi_{j1j2}", "binning": 100, "lb": 0, "hb": 3.2}
    variable2use["jet_eta_0"] = {"title": "#eta_{j0}", "binning": 100, "lb": -3, "hb": 3}
    variable2use["jet_eta_1"] = {"title": "#eta_{j1}", "binning": 100, "lb": -3, "hb": 3}
    variable2use["jet_eta_2"] = {"title": "#eta_{j2}", "binning": 100, "lb": -3, "hb": 3}
    variable2use["jet_ptoverm_0"] = {"title": "p^{j0}_{T}/m_{jj}", "binning": 100, "lb": 0, "hb": 3}
    variable2use["jet_ptoverm_1"] = {"title": "p^{j1}_{T}/m_{jj}", "binning": 100, "lb": 0, "hb": 3}
    variable2use["jet_ptoverm_2"] = {"title": "p^{j2}_{T}/m_{jj}", "binning": 100, "lb": 0, "hb": 3}
    variable2use["dR_jj_j"] = {"title": "#Delta R_{jj-j}", "binning": 100, "lb": 0, "hb": 6}
    variable2use["dEta_jj_j"] = {"title": "#Delta #eta_{jj-j}", "binning": 100, "lb": 0, "hb": 3}
    variable2use["dPhi_jj_j"] = {"title": "#Delta #phi_{jj-j}", "binning": 100, "lb": 0, "hb": 3.2}
    variable2use["jet_ptoverM_0"] = {"title": "p^{j0}_{T}/M_{jjj}", "binning": 100, "lb": 0, "hb": 1}
    variable2use["jet_ptoverM_1"] = {"title": "p^{j1}_{T}/M_{jjj}", "binning": 100, "lb": 0, "hb": 1}
    variable2use["jet_ptoverM_2"] = {"title": "p^{j2}_{T}/M_{jjj}", "binning": 100, "lb": 0, "hb": 1}
    variable2use["dijet_ptoverM"] = {"title": "p^{jj}_{T}/M_{jjj}", "binning": 100, "lb": 0, "hb": 1}
    variable2use["M_jjj"] = {"title": "M_{jjj}", "binning": 100, "lb": 0, "hb": 12000}

    histo_dic = {}
    for variable, param_dict in variable2use.items():
        histo_dic[variable] = TH1F(f"{sample}_{variable}",param_dict["title"],param_dict["binning"], param_dict["lb"], param_dict["hb"])
        histo_dic[variable].SetCanExtend(ROOT.TH1.kYaxis)
    
    t_start = time.time()
    evt_start = ibatch*batch_size
    evt_end = (ibatch+1)*batch_size
    if((ibatch+1)*batch_size > tree.GetEntries()):
        evt_end = tree.GetEntries()
    for ievt in range(evt_start, evt_end):
        tree.GetEntry(ievt)
        if(ievt % 200000 == 0):
            # time check:
            t_end = time.time()
            speed = (t_end - t_start)/(ievt - evt_start+1)*1000
            t_remain = (evt_end - ievt) / 1000 * speed / 60
            print(f"Batch #{ibatch} >> Avg. speed: {speed}s/1k candidates, time remaining: {t_remain}mins\n".format(ibatch, speed, t_remain))
        for variable, hist in histo_dic.items():
            hist.Fill(getattr(tree, variable),weight)
    outFile.cd()
#     for variable, hist in histo_dic.items():
#         hist.Write()
    outFile.Write()
    outFile.Close
    return 0

In [None]:
# Backgrounds dryrun
if __name__ == '__main__':
    
    sample_list = ["QCD_Pt_300to470"]
                   
    for sample in sample_list:
        temp_file = TFile(f"/home/xyan13/Trijet/TrijetAna/TrijetAna/outputs_3_jets/{sample}_ML_study.root","READ")
        temp_tree = temp_file.Get("Events")
        tot_evts = temp_tree.GetEntries()
        
        with concurrent.futures.ProcessPoolExecutor() as executor:
            if "QCD" in sample:
                results = [executor.submit(histograming_bkg, sample, 150000, ibatch) for ibatch in range(0,1)]
            else:
                results = [executor.submit(histograming_sig, sample, 150000, ibatch) for ibatch in range(0,1)]
            status = [r.result() for r in results]
            print(status)

    print(f"Time used: {round(time.time() - main_start, 2)}")

In [8]:
# Backgrounds
if __name__ == '__main__':
    
    main_start = time.time()
    
#     sample_list = ["QCD_Pt_300to470"]
    sample_list = ["QCD_Pt_300to470", "QCD_Pt_470to600","QCD_Pt_600to800","QCD_Pt_800to1000","QCD_Pt_1000to1400","QCD_Pt_1400to1800","QCD_Pt_1800to2400","QCD_Pt_2400to3200","QCD_Pt_3200toInf"]
                   
    for sample in sample_list:
        temp_file = TFile(f"/home/xyan13/Trijet/TrijetAna/TrijetAna/outputs_3_jets/{sample}_ML_study.root","READ")
        temp_tree = temp_file.Get("Events")
        tot_evts = temp_tree.GetEntries()

        expect_time = 0.1 # in hrs
        known_speed = 2.5 # sec per 10k candidates
        evt_batch = int(expect_time * 3600 / known_speed * 10000)
        num_batch = math.ceil(tot_evts / evt_batch)
        print(f"Number of Candidates to be processed: {tot_evts}")
        print(f"Candidates to be processed per batch: {evt_batch}")
        print(f"Number of batches to be processed: {num_batch}")
        with concurrent.futures.ProcessPoolExecutor() as executor:
            if "QCD" in sample:
                results = [executor.submit(histograming_bkg, sample, evt_batch, ibatch) for ibatch in range(0,num_batch)]
            else:
                results = [executor.submit(histograming_sig, sample, evt_batch, ibatch) for ibatch in range(0,num_batch)]
            status = [r.result() for r in results]
            print(status)

    print(f"Time used: {round(time.time() - main_start, 2)}")

Number of Candidates to be processed: 13502853
Candidates to be processed per batch: 1440000
Number of batches to be processed: 10
Batch #0 >> Avg. speed: 57.74331092834473s/1k candidates, time remaining: 1385.8394622802734mins

Batch #5 >> Avg. speed: 5720.122814178467s/1k candidates, time remaining: 137282.9475402832mins

Batch #4 >> Avg. speed: 0.4027660350609307s/1k candidates, time remaining: 9.397874151421718mins

Batch #9 >> Avg. speed: 0.42279997854446644s/1k candidates, time remaining: 3.5434372935170098mins

Batch #8 >> Avg. speed: 0.34298458224056594s/1k candidates, time remaining: 7.774317197452828mins

Batch #3 >> Avg. speed: 0.3553008906842858s/1k candidates, time remaining: 8.053486855510478mins

Batch #7 >> Avg. speed: 0.3239126193761923s/1k candidates, time remaining: 7.12607762627623mins

Batch #2 >> Avg. speed: 0.3277981959079021s/1k candidates, time remaining: 7.211560309973846mins

Batch #1 >> Avg. speed: 0.30797155540238813s/1k candidates, time remaining: 6.570059


Batch #2 >> Avg. speed: 0.3399001029245828s/1k candidates, time remaining: 7.477802264340822mins

Batch #11 >> Avg. speed: 0.3249013010638555s/1k candidates, time remaining: 6.9312277560289175mins

Batch #6 >> Avg. speed: 0.3289129651174467s/1k candidates, time remaining: 7.01680992250553mins

Batch #1 >> Avg. speed: 0.3300111157194634s/1k candidates, time remaining: 7.040237135348552mins

Batch #0 >> Avg. speed: 0.3241334575244696s/1k candidates, time remaining: 6.698758122172372mins

Batch #5 >> Avg. speed: 0.3252618894013837s/1k candidates, time remaining: 6.722079047628596mins

Batch #10 >> Avg. speed: 0.3283264075508541s/1k candidates, time remaining: 6.785412422717652mins

Batch #15 >> Avg. speed: 0.335256289044864s/1k candidates, time remaining: 6.928629973593856mins

Batch #4 >> Avg. speed: 0.32156145202469383s/1k candidates, time remaining: 6.431229040493877mins

Batch #14 >> Avg. speed: 0.32674330129152973s/1k candidates, time remaining: 6.534866025830595mins

Batch #9 >> Av


Batch #1 >> Avg. speed: 0.307977692268621s/1k candidates, time remaining: 1.4372292305868979mins

Batch #5 >> Avg. speed: 0.30399676043692664s/1k candidates, time remaining: 1.2159870417477066mins

Batch #0 >> Avg. speed: 0.3061859962866407s/1k candidates, time remaining: 1.224743985146563mins

Batch #10 >> Avg. speed: 0.30742407134572425s/1k candidates, time remaining: 1.229696285382897mins

Batch #15 >> Avg. speed: 0.3084115027245682s/1k candidates, time remaining: 1.2336460108982727mins

Batch #4 >> Avg. speed: 0.30436674960069593s/1k candidates, time remaining: 1.0145558320023198mins

Batch #14 >> Avg. speed: 0.30656267600346804s/1k candidates, time remaining: 1.0218755866782268mins

Batch #9 >> Avg. speed: 0.31313614530175216s/1k candidates, time remaining: 1.0437871510058405mins

Batch #8 >> Avg. speed: 0.306088862972382s/1k candidates, time remaining: 0.816236967926352mins

Batch #12 >> Avg. speed: 0.2973748322182626s/1k candidates, time remaining: 0.5947496644365252mins

Batch


Batch #11 >> Avg. speed: 0.28559216564878276s/1k candidates, time remaining: 4.188685096182147mins

Batch #5 >> Avg. speed: 0.278442734132817s/1k candidates, time remaining: 3.898198277859438mins

Batch #0 >> Avg. speed: 0.2794563233356576s/1k candidates, time remaining: 3.9123885266992064mins

Batch #10 >> Avg. speed: 0.2798562467884321s/1k candidates, time remaining: 3.917987455038049mins

Batch #4 >> Avg. speed: 0.2816004465826305s/1k candidates, time remaining: 3.75467262110174mins

Batch #9 >> Avg. speed: 0.28316295233685385s/1k candidates, time remaining: 3.7755060311580513mins

Batch #8 >> Avg. speed: 0.2792229781620453s/1k candidates, time remaining: 3.5368243900525735mins

Batch #3 >> Avg. speed: 0.28032753615557926s/1k candidates, time remaining: 3.5508154579706708mins

Batch #2 >> Avg. speed: 0.27896485757052303s/1k candidates, time remaining: 3.3475782908462763mins

Batch #7 >> Avg. speed: 0.27945932012884345s/1k candidates, time remaining: 3.3535118415461214mins

Batch #1

Batch #0 >> Avg. speed: 61.925411224365234s/1k candidates, time remaining: 1273.7437834739685mins

Batch #0 >> Avg. speed: 0.2572910235443482s/1k candidates, time remaining: 4.43458231813587mins

Batch #0 >> Avg. speed: 0.26192538404781457s/1k candidates, time remaining: 3.641373997494067mins

Batch #0 >> Avg. speed: 0.2618837267478456s/1k candidates, time remaining: 2.7678491079979803mins

Batch #0 >> Avg. speed: 0.2610714262685396s/1k candidates, time remaining: 1.8890258166703962mins

Batch #0 >> Avg. speed: 0.26861667259693517s/1k candidates, time remaining: 1.0482317953641067mins

Batch #0 >> Avg. speed: 0.27749786259524123s/1k candidates, time remaining: 0.15789628381669227mins

[0]
Number of Candidates to be processed: 1340877
Candidates to be processed per batch: 1440000
Number of batches to be processed: 1
Batch #0 >> Avg. speed: 128.9980411529541s/1k candidates, time remaining: 2882.8417737841605mins

Batch #0 >> Avg. speed: 0.3079022688439872s/1k candidates, time remaining: 

In [14]:
# Signal
if __name__ == '__main__':
    
    main_start = time.time()
    sample_list = ["Res1ToRes2GluTo3Glu_M1-3000_R-0p3", "Res1ToRes2GluTo3Glu_M1-3000_R-0p7", 
                   "Res1ToRes2GluTo3Glu_M1-5000_R-0p3", "Res1ToRes2GluTo3Glu_M1-5000_R-0p7",
                   "Res1ToRes2GluTo3Glu_M1-7000_R-0p3", "Res1ToRes2GluTo3Glu_M1-7000_R-0p7"]
                   
    for sample in sample_list:
        temp_file = TFile(f"/home/xyan13/Trijet/TrijetAna/TrijetAna/outputs_3_jets/{sample}_ML_study.root","READ")
        temp_tree = temp_file.Get("Events")
        tot_evts = temp_tree.GetEntries()
        print(tot_evts)
        
        with concurrent.futures.ProcessPoolExecutor() as executor:
            if "QCD" in sample:
                results = [executor.submit(histograming_bkg, sample, evt_batch, ibatch) for ibatch in range(0,num_batch)]
            else:
                results = [executor.submit(histograming_sig, sample, tot_evts, ibatch) for ibatch in range(0,1)]
            status = [r.result() for r in results]
            print(status)

    print(f"Time used: {round(time.time() - main_start, 2)}")

138495
0.0%
72.0%
100683
[0]
144192
0.0%
69.0%
102287
[0]
147000
0.0%
68.0%
106497
[0]
147834
0.0%
68.0%
104539
[0]
148515
0.0%
67.0%
107108
[0]
139713
0.0%
72.0%
98461
[0]
Time used: 174.71
