In [None]:
import ROOT
from ROOT import TFile, TTree, TH1F, TCanvas, TAxis, TLegend, TTreeReader, TTreeReaderValue
import copy
import time
import math
import concurrent.futures
ROOT.gStyle.SetOptStat(0)

In [None]:
# ==== Implement weight of MCs ====
def get_weight(sample):
    lumi = 41.53*1000
    weight_xsec = {
        'QCD_Pt_300to470': lumi*6830/111229780,
        'QCD_Pt_470to600': lumi*552.1/27881028,
        'QCD_Pt_600to800': lumi*156.5/12807188,
        'QCD_Pt_800to1000': lumi*26.28/1906000,
        'QCD_Pt_1000to1400': lumi*7.47/1517308,
        'QCD_Pt_1400to1800': lumi*0.6484/776000,
        'QCD_Pt_1800to2400': lumi*0.08743/856734,
        'QCD_Pt_2400to3200': lumi*0.005236/1485988,
        'QCD_Pt_3200toInf': lumi*0.0001357/757837,
    }
    return weight_xsec[sample]

In [None]:
def histograming(sample, batch_size, ibatch):
    weight = get_weight(sample)
    inFile = TFile.Open(f"/home/xyan13/Trijet/TrijetAna/TrijetAna/outputs/{sample}_ML_study.root")
    outFile = TFile(f"{sample}_{ibatch}.root","RECREATE") # Well, we need this dummy declaration to make the clone work
    tree = inFile.Get("Events")
    variable2use = {}
    variable2use["m_jj"] = {"title": "m_{jj} [GeV]", "binning": 100, "lb": 0, "hb": 8000}
    variable2use["dijet_eta"] = {"title": "#eta_{jj}", "binning": 100, "lb": 0, "hb": 3}
    variable2use["dR_jj"] = {"title": "#Delta R_{jj}", "binning": 100, "lb": 0, "hb": 6}
    variable2use["dEta_jj"] = {"title": "#Delta #eta_{jj}", "binning": 100, "lb": 0, "hb": 3}
    variable2use["dPhi_jj"] = {"title": "#Delta #phi_{jj}", "binning": 100, "lb": 0, "hb": 6.5}
    variable2use["dR_j0j2"] = {"title": "#Delta R_{j0j2}", "binning": 100, "lb": 0, "hb": 6}
    variable2use["dEta_j0j2"] = {"title": "#Delta #eta_{j0j2}", "binning": 100, "lb": 0, "hb": 3}
    variable2use["dPhi_j0j2"] = {"title": "#Delta #phi_{j0j2}", "binning": 100, "lb": 0, "hb": 6.5}
    variable2use["dR_j1j2"] = {"title": "#Delta R_{j1j2}", "binning": 100, "lb": 0, "hb": 6}
    variable2use["dEta_j1j2"] = {"title": "#Delta #eta_{j1j2}", "binning": 100, "lb": 0, "hb": 3}
    variable2use["dPhi_j1j2"] = {"title": "#Delta #phi_{j1j2}", "binning": 100, "lb": 0, "hb": 6.5}
    variable2use["jet_eta_0"] = {"title": "#eta_{j0}", "binning": 100, "lb": 0, "hb": 3}
    variable2use["jet_eta_1"] = {"title": "#eta_{j1}", "binning": 100, "lb": 0, "hb": 3}
    variable2use["jet_eta_2"] = {"title": "#eta_{j2}", "binning": 100, "lb": 0, "hb": 3}
    variable2use["jet_ptoverm_0"] = {"title": "p^{j0}_{T}/m_{jj}", "binning": 100, "lb": 0, "hb": 3}
    variable2use["jet_ptoverm_1"] = {"title": "p^{j1}_{T}/m_{jj}", "binning": 100, "lb": 0, "hb": 3}
    variable2use["jet_ptoverm_2"] = {"title": "p^{j2}_{T}/m_{jj}", "binning": 100, "lb": 0, "hb": 3}
    variable2use["dR_jj_j"] = {"title": "#Delta R_{jj-j}", "binning": 100, "lb": 0, "hb": 6}
    variable2use["dEta_jj_j"] = {"title": "#Delta #eta_{jj-j}", "binning": 100, "lb": 0, "hb": 3}
    variable2use["dPhi_jj_j"] = {"title": "#Delta #phi_{jj-j}", "binning": 100, "lb": 0, "hb": 6.5}
    variable2use["jet_ptoverM_0"] = {"title": "p^{j0}_{T}/M_{jjj}", "binning": 100, "lb": 0, "hb": 3}
    variable2use["jet_ptoverM_1"] = {"title": "p^{j1}_{T}/M_{jjj}", "binning": 100, "lb": 0, "hb": 3}
    variable2use["jet_ptoverM_2"] = {"title": "p^{j2}_{T}/M_{jjj}", "binning": 100, "lb": 0, "hb": 1}
    variable2use["dijet_ptoverM"] = {"title": "p^{jj}_{T}/M_{jjj}", "binning": 100, "lb": 0, "hb": 1}
    variable2use["M_jjj"] = {"title": "M_{jjj}", "binning": 100, "lb": 0, "hb": 8000}

    histo_dic = {}
    for variable, param_dict in variable2use.items():
        histo_dic[variable] = TH1F(f"{sample}_{variable}",param_dict["title"],param_dict["binning"], param_dict["lb"], param_dict["hb"])
        histo_dic[variable].SetCanExtend(ROOT.TH1.kYaxis)
        
    start = time.time()
    evt_start = ibatch*batch_size
    evt_end = (ibatch+1)*batch_size
    if((ibatch+1)*batch_size > tree.GetEntries()):
        evt_end = tree.GetEntries()
    for ievt in range(evt_start, evt_end):
        tree.GetEntry(ievt)
        if(ievt % 100000 == 0):
            print(f"{round((ievt - evt_start)/ batch_size,2)*100}%")
        for variable, hist in histo_dic.items():
            hist.Fill(getattr(tree, variable),weight)
    outFile.cd()
#     for variable, hist in histo_dic.items():
#         hist.Write()
    outFile.Write()
    outFile.Close
    return 0

In [None]:
if __name__ == '__main__':
    
    main_start = time.time()
    
    sample_list = ["QCD_Pt_300to470"]
    #sample_list = ["QCD_Pt_470to600","QCD_Pt_600to800","QCD_Pt_800to1000","QCD_Pt_1000to1400","QCD_Pt_1400to1800","QCD_Pt_1800to2400","QCD_Pt_2400to3200","QCD_Pt_3200toInf"]
    
    for sample in sample_list:
        temp_file = TFile(f"/home/xyan13/Trijet/TrijetAna/TrijetAna/outputs/{sample}_ML_study.root","READ")
        temp_tree = temp_file.Get("Events")
        tot_evts = temp_tree.GetEntries()

        expect_time = 0.2 # in hrs
        known_speed = 4 # sec per 10k candidates
        evt_batch = int(expect_time * 3600 / known_speed * 10000)
        num_batch = math.ceil(tot_evts / evt_batch)
        print(f"Number of Candidates to be processed: {tot_evts}")
        print(f"Candidates to be processed per batch: {evt_batch}")
        print(f"Number of batches to be processed: {num_batch}")
        with concurrent.futures.ProcessPoolExecutor() as executor:
            results = [executor.submit(histograming, sample, evt_batch, ibatch) for ibatch in range(0,num_batch)]
            status = [r.result() for r in results]
            print(status)

    print(f"Time used: {round(time.time() - main_start, 2)}")