In [2]:
import uproot
import pandas
import awkward as ak
import vector
import matplotlib.pyplot as plt
import os

luminosity_ifb = 10.

def createDirs(path):
    """
    Function to create any directories needed to store the output of a function.

    Args:
        path (str): the path of directories you want to exist

    Returns:
        void
    """
    base = path.split("/")[0]
    dirs = path.split("/")[:]
    tmp_dir = ""
    for folder in dirs:
        tmp_dir = tmp_dir + "/" + folder
        if not os.path.isdir(base+"/"+tmp_dir):
            try:
                os.mkdir(base+"/"+tmp_dir)
                print("creating: ", base+"/"+tmp_dir)
            except OSError as error:
                print(error)

# Define out input and output paths, make sure the output path exists.
ntuple_path = "./data/GamGam/"
output_path = "./histograms/GamGam_pythonic/"
createDirs(output_path)


creating:  .//./histograms/GamGam_pythonic


In [15]:
# now vectorially process each event, retrieve the needed info from the ntuple, check if the event passes our selection, and fill histograms for the output.
def eventLooper (tree, out_hists, is_data):
    """
    # TODO
    """

    # TODO include the scaleFactor_PHOTON in the event weight, and the photon_isTightID branch.
    branches = ["photon_pt", "photon_E", "photon_eta", "photon_phi", "photon_n", "mcWeight", "XSection", "SumWeights", "scaleFactor_PILEUP"]

    # Read in the TTree to an Awkward Array (allows for vectors of branch values for each event rather than a pandas dataframe which must strcitly tabular.)
    ## https://awkward-array.org/doc/main/ 
    df = ak.to_dataframe(uproot.concatenate(tree, filter_name = branches, library='ak'))

    # get an idea of what this object looks like:
    # note 'entries' are each event, 'subentries' are for each particle in the event.
    print(df)

    nentries = ak.num(df, axis=0)
    print("there are {} event in the TTree".format(nentries))

    #hist_pTGam_1 = setupHist1D(out_hists, "photon_pT_1", 100, 0., 500., "pT [GeV]")
    #hist_pTGam_2 = setupHist1D(out_hists, "photon_pT_2", 100, 0., 500., "pT [GeV]")
    #hist_EGam_1 = setupHist1D(out_hists, "photon_E_1", 100, 0., 500., "E [GeV]")
    #hist_EGam_2 = setupHist1D(out_hists, "photon_E_2", 100, 0., 500., "E [GeV]")
    #hist_etaGam_1 = setupHist1D(out_hists, "photon_eta_1", 10, -2.5, 2.5, "#eta")
    #hist_etaGam_2 = setupHist1D(out_hists, "photon_eta_2", 10, -2.5, 2.5, "#eta")
    #hist_phiGam_1 = setupHist1D(out_hists, "photon_phi_1", 10, -4., 4., "#phi")
    #hist_phiGam_2 = setupHist1D(out_hists, "photon_phi_2", 10, -4., 4., "#phi")
    #hist_mGamGam = setupHist1D(out_hists, "diphoton_mass", 100, 0., 1000., "m#gamma#gamma [GeV]")

    # read in the event weights
    if (is_data):
        df["histoweight"] = 1.0
    else:
        # MC event weighting to luminosity of data
        df["histoweight"] = df["mcWeight"] * df["XSection"]*1000. * luminosity_ifb / df["SumWweights"]
        # MC weight corrections for experimental effects
        df["histoweight"] *= df["scaleFactor_PILEUP"] # TODO multiply by the photon scale factor weight

    # Only interested in events that have 2 photons in.
    df.query("photon_n==2", inplace=True)    
    
    # Obtain the kinematic variables (note TTree is in MeV and I want GeV)
                # TODO we want to check that our photons 1/2 are ordered by their pT... can you alter the assignment here to ensure we have that?
    df["photon_1_pt"] = df["photon_pt"][:,0]*0.001
    df["photon_2_pt"] = df["photon_pt"][:,1]*0.001
    df["photon_2_E"] = df["photon_E"][:,1]*0.001
    df["photon_1_eta"] = df["photon_eta"][:,0]
    df["photon_2_eta"] = df["photon_eta"][:,1]
    df["photon_1_phi"] = df["photon_phi"][:,0]
    df["photon_2_phi"] = df["photon_phi"][:,1]

    # Need to check the photons are in the fiducial region
    #eta_expr = abs(df["photon_1_eta"]) < 2.37 and (abs(df["photon_1_eta"]) < 1.37 or abs(df["photon_1_eta"]) > 1.56) and \
    #           abs(df["photon_2_eta"]) < 2.37 and (abs(df["photon_2_eta"]) < 1.37 or abs(df["photon_2_eta"]) > 1.56)
    df["passEta"] = abs(df["photon_1_eta"]) < 2.37 and (abs(df["photon_1_eta"]) < 1.37 or abs(df["photon_1_eta"]) > 1.56) and \
               abs(df["photon_2_eta"]) < 2.37 and (abs(df["photon_2_eta"]) < 1.37 or abs(df["photon_2_eta"]) > 1.56)
    eta_expr.values
    df.query("passEta", inplace=True)
        
    # We need to apply the photon trigger requirements, approximated by requiring our photons to have photon 1(2) pT > 35(25) GeV
    df.query("photon_1_pt > 35. & photon_2_pt > 25.", inplace=True)

    # TODO we're also only interested in the case where our two photons have passed a Tight particle ID, to reduce misreconstruction backgrounds.
    # Can you use the boolean "photon_isTightID" vector branch to require this?..

    photon_1_p4 = vector.arr( { 'pt' : df['photon_1_pt'],
                           'eta' : df['photon_1_eta'],
                           'phi' : df['photon_1_phi'],
                           'E': df['photon_1_E']
                           } )
    photon_2_p4 = vector.arr( { 'pt' : df['photon_2_pt'],
                           'eta' : df['photon_2_eta'],
                           'phi' : df['photon_2_phi'],
                           'E': df['photon_2_E']
                           } )
    
    diphoton_p4 = photon_1_p4 + photon_2_p4
    df["diphoton_mass"] = diphoton_p4.m

    # Another requiremt for good photons is a pT/diphoton mass bound
    passRatio = (df['photon_1_pt']/df['diphoton_m'] > 0.35) & (df['photon_2_pt']/df['diphoton_m'] > 0.25)
    df["passEnergyRatio"] = passRatio
    #df['passEnergyRatio'] = df['photon_isTightID']
    #df.loc[(slice(None), 0), 'passEnergyRatio'] = passRatio.values
    #df.loc[(slice(None), 1), 'passEnergyRatio'] = passRatio.values
    df.query('passEnergyRatio', inplace=True)

    print(df)

        # Fill the histograms in the output file with the event values.
        #hist_pTGam_1.Fill(photon_1_pt, histoweight)
        #hist_pTGam_2.Fill(photon_2_pt, histoweight)
        #hist_EGam_1.Fill(photon_1_E, histoweight)
        #hist_EGam_2.Fill(photon_2_E, histoweight)
        #hist_etaGam_1.Fill(photon_1_eta, histoweight)
        #hist_etaGam_2.Fill(photon_2_eta, histoweight)
        #hist_phiGam_1.Fill(photon_1_phi, histoweight)
        #hist_phiGam_2.Fill(photon_2_phi, histoweight)
        #hist_mGamGam.Fill(diphoton_mass, histoweight)


In [16]:
# EventLoop analysis over the Data 

# write the file we will output our data histograms into (in .h5 and .csv format)
out_df_data_str = output_path+"data"

# read in our data ntuples
tree_data_str = ntuple_path+"/Data/data_*.GamGam.root:mini"

# loop over the data ntuple to process needed info into histograms
eventLooper(tree_data_str, out_df_data_str, True)
# write data histograms to root file for further analysis.
#out_hists_data.Write()
#out_hists_data.Close()

                  mcWeight  scaleFactor_PILEUP  photon_n     photon_pt  \
entry   subentry                                                         
0       0              0.0                 0.0         2  46690.222656   
        1              0.0                 0.0         2  29442.607422   
1       0              0.0                 0.0         2  60888.738281   
        1              0.0                 0.0         2  37795.011719   
2       0              0.0                 0.0         2  39186.210938   
...                    ...                 ...       ...           ...   
7798421 1              0.0                 0.0         2  35773.832031   
7798422 0              0.0                 0.0         2  72873.671875   
        1              0.0                 0.0         2  45556.425781   
7798423 0              0.0                 0.0         2  41198.300781   
        1              0.0                 0.0         2  39577.066406   

                  photon_eta  photon_

ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [None]:
# EventLoop analysis over the signal MC ntuples gluon gluon fusion -> H -> gamma gamma

# write the file we will output our Higgs signal MC histograms into
out_hists_ggfHiggs = TFile.Open(output_path+"ggfHiggs.root", "RECREATE")

# read in our signal MC ntuples gluon gluon fusion -> H -> gamma gamma
tree_ggfMC = TChain("mini")
tree_ggfMC.Add(ntuple_path+"/MC/mc_343981.ggH125_gamgam.GamGam.root")

# loop over the data ntuple to process needed info into histograms
eventLooper(tree_ggfMC, out_hists_ggfHiggs, False)
# write data histograms to root file for further analysis.
out_hists_ggfHiggs.Write()
out_hists_ggfHiggs.Close()

In [None]:
# EventLoop analysis over the signal MC ntuples Vector-boson fusion -> H -> gamma gamma

# write the file we will output our Higgs signal MC histograms into
out_hists_vbfHiggs = TFile.Open(output_path+"VBFHiggs.root", "RECREATE")

# read in our signal MC ntuples gluon gluon fusion -> H -> gamma gamma
tree_vbfMC = TChain("mini")
tree_vbfMC.Add(ntuple_path+"/MC/mc_345041.VBFH125_gamgam.GamGam.root")

# loop over the data ntuple to process needed info into histograms
eventLooper(tree_vbfMC, out_hists_vbfHiggs, False)
# write data histograms to root file for further analysis.
out_hists_vbfHiggs.Write()
out_hists_vbfHiggs.Close()

In [None]:
# Now maybe we want to also have combined Signal MC Histograms containing both ggf and vbf production? can combine root files via hadd command:
# and we can run a bash command via python as so....
command = "hadd -f histograms/GamGam/allHiggs.root histograms/GamGam/ggfHiggs.root histograms/GamGam/VBFHiggs.root"
print(command)
os.system(command)