In [None]:
import uproot
import pandas as pd
import awkward as ak
import vector
import matplotlib.pyplot as plt
import os

import sys
sys.path.insert(0, '../utils/')

from utils import createDirs

luminosity_ifb = 10.

# Define out input and output paths, make sure the output path exists.
ntuple_path = "../data/GamGam/"
output_path = "../histograms/GamGam_pythonic/"
createDirs(output_path)


In [2]:
# now vectorially process each event, retrieve the needed info from the ntuple, check if the event passes our selection, and fill histograms for the output.
def eventProcessor (tree, out_hists, is_data, h5key):
    """
    # TODO
    """

    # TODO include the scaleFactor_PHOTON in the event weight, and the photon_isTightID branch.
    branches = ["photon_pt", "photon_E", "photon_eta", "photon_phi", "photon_n", "mcWeight", "XSection", "SumWeights", "scaleFactor_PILEUP"]

    # Read in the TTree to an Awkward Array (allows for vectors of branch values for each event rather than a pandas dataframe which must strcitly tabular.)
    ## https://awkward-array.org/doc/main/ 

    df = pd.DataFrame()

    akdf = uproot.concatenate(tree, filter_name = branches, library='ak')

    # get an idea of what this object looks like:
    print(akdf)

    # Note the photons are stored in descending pT order

    # only care about photons that are in the fiducial region
    photon_eta_mask = (abs(akdf["photon_eta"]) < 2.37) & ((abs(akdf["photon_eta"]) < 1.37) | (abs(akdf["photon_eta"]) > 1.56))
    features_photon = ["photon_pt", "photon_E", "photon_eta", "photon_phi"]
    for pf in features_photon:
        akdf[pf] = akdf[pf][photon_eta_mask]
    del photon_eta_mask

   # TODO we're also only interested photons that have passed a Tight particle ID, to reduce misreconstruction backgrounds.
    # Can we add a similar mask to require that all the photons pass the Tight Identification? via the photon_isTightID bool variable.

    # Need events that have at least 2 photons in.
    nphoton_mask = ak.num(akdf['photon_pt']) >= 2
    akdf = akdf[nphoton_mask]
    del nphoton_mask

    # We need to apply the photon trigger requirements, approximated by requiring our photons to have photon 1(2) pT > 35(25) GeV
    photon1_pt = akdf.photon_pt[:, 0]
    photon2_pt = akdf.photon_pt[:, 1]
    photon_1_pt_mask = (photon1_pt > 35_000)
    photon_2_pt_mask = (photon2_pt > 25_000)
    photon_pt_mask = photon_1_pt_mask & photon_2_pt_mask
    akdf = akdf[photon_pt_mask]
    del photon_1_pt_mask
    del photon_2_pt_mask
    del photon_pt_mask


    # Only interested in events that have exactly 2 photons in that pass the requirements, to fill our pandas dataframe.
    nphoton_mask = ak.num(akdf['photon_pt']) == 2
    akdf = akdf[nphoton_mask]
    del nphoton_mask

    ak.drop_none(akdf)
    
    # Obtain the kinematic variables in our PANDAS dataframe (note TTree is in MeV and I want GeV)
    df["photon_1_pt"] = akdf["photon_pt"][:,0]*0.001
    df["photon_2_pt"] = akdf["photon_pt"][:,1]*0.001
    df["photon_1_E"] = akdf["photon_E"][:,1]*0.001
    df["photon_2_E"] = akdf["photon_E"][:,1]*0.001
    df["photon_1_eta"] = akdf["photon_eta"][:,0]
    df["photon_2_eta"] = akdf["photon_eta"][:,1]
    df["photon_1_phi"] = akdf["photon_phi"][:,0]
    df["photon_2_phi"] = akdf["photon_phi"][:,1]
        
    # read in the event weights
    if (is_data):
        df["histoweight"] = 1.0
    else:
        # MC event weighting to luminosity of data
        df["histoweight"] = akdf["mcWeight"] * akdf["XSection"]*1000. * luminosity_ifb / akdf["SumWweights"]
        # MC weight corrections for experimental effects
        df["histoweight"] *= akdf["scaleFactor_PILEUP"] # TODO multiply by the photon scale factor weight


    # TODO we're also only interested in the case where our two photons have passed a Tight particle ID, to reduce misreconstruction backgrounds.
    # Can you use the boolean "photon_isTightID" vector branch to require this?..

    photon_1_p4 = vector.arr( { 'pt' : df['photon_1_pt'],
                           'eta' : df['photon_1_eta'],
                           'phi' : df['photon_1_phi'],
                           'E': df['photon_1_E']
                           } )
    photon_2_p4 = vector.arr( { 'pt' : df['photon_2_pt'],
                           'eta' : df['photon_2_eta'],
                           'phi' : df['photon_2_phi'],
                           'E': df['photon_2_E']
                           } )
    
    diphoton_p4 = photon_1_p4 + photon_2_p4
    df["diphoton_mass"] = diphoton_p4.m

    # Another requirement for the events is a pT/diphoton mass bound (doing this in the dataframe to demonstrate how to filter pdframes)
    passRatio = (df['photon_1_pt']/df['diphoton_mass'] > 0.35) & (df['photon_2_pt']/df['diphoton_mass'] > 0.25)
    df["passEnergyRatio"] = passRatio
    df.query('passEnergyRatio', inplace=True)
    print(df)

    # Write out final selected data for further analysis
    df.to_hdf(out_hists +".h5", key=h5key, mode="w")
    df.to_csv(out_hists +".csv", mode="w")




In [3]:
# EventLoop analysis over the Data 

# write the file we will output our data histograms into (in .h5 and .csv format)
out_df_data_str = output_path+"data"

# read in our data ntuples
tree_data_str = ntuple_path+"/Data/data_*.GamGam.root:mini"

# loop over the data ntuple to process needed info into histograms
eventProcessor(tree_data_str, out_df_data_str, True, 'data')


[{mcWeight: 0, scaleFactor_PILEUP: 0, photon_n: 2, photon_pt: [...], ...}, ...]
         photon_1_pt  photon_2_pt  photon_1_E  photon_2_E  photon_1_eta  \
0          46.690224    29.442608   51.918003   51.918003     -1.779089   
1          60.888741    37.795013   56.970108   56.970108      0.174119   
2          39.186214    25.327362   50.309616   50.309616      0.139337   
3          35.529690    29.579599   57.963634   57.963634      1.048340   
4          48.605839    29.717976   31.570139   31.570139     -0.448705   
...              ...          ...         ...         ...           ...   
7105497    42.760803    29.416094   31.302219   31.302219     -0.823499   
7105498    56.330582    27.199383   32.148697   32.148697     -0.142294   
7105499    36.401966    35.773834   50.212563   50.212563     -1.776634   
7105500    72.873672    45.556427   56.051189   56.051189      0.935952   
7105501    41.198303    39.577068   66.560402   66.560402      0.311521   

         photon_2_e

In [4]:
# EventLoop analysis over the signal MC ntuples gluon gluon fusion -> H -> gamma gamma

# write the file we will output our processed ggf Higgs events into (in .h5 and .csv format)
out_df_ggfHiggs_str = output_path+"ggfHiggs"

# read in our ggfHiggs ntuples
tree_ggfHiggs_str = ntuple_path+"/MC/mc_343981.ggH125_gamgam.GamGam.root:mini"

# loop over the ggfHiggs ntuple to process needed info into histograms
eventProcessor(tree_ggfHiggs_str, out_df_ggfHiggs_str, True, 'ggfHiggs')

[{mcWeight: 28.5, scaleFactor_PILEUP: 0, photon_n: 2, photon_pt: ..., ...}, ...]
        photon_1_pt  photon_2_pt  photon_1_E  photon_2_E  photon_1_eta  \
0         58.376011    56.773354   57.806873   57.806873     -0.482442   
2         82.018768    69.614159  121.158180  121.158180      1.024764   
3         60.740158    57.999924   73.122002   73.122002     -0.040543   
4         75.291000    55.593014   57.959332   57.959332     -0.036332   
5         48.024559    42.526234   68.603981   68.603981     -0.681367   
...             ...          ...         ...         ...           ...   
939530    68.995056    25.804670   51.422329   51.422329     -0.544884   
939532    56.199371    50.974934   51.262089   51.262089      0.960204   
939536    64.243874    59.020187   62.364948   62.364948      0.605870   
939537    62.599831    62.278881   73.880775   73.880775     -0.629353   
939538    80.016304    52.876263   57.308094   57.308094      0.132135   

        photon_2_eta  photon_1

In [5]:
# EventLoop analysis over the signal MC ntuples Vector-boson fusion -> H -> gamma gamma

# write the file we will output our processed VBF Higgs events into (in .h5 and .csv format)
out_df_vbfHiggs_str = output_path+"vbfHiggs"

# read in our vbfHiggs ntuples
tree_vbfHiggs_str = ntuple_path+"/MC/mc_345041.VBFH125_gamgam.GamGam.root:mini"

# loop over the vbfHiggs ntuple to process needed info into histograms
eventProcessor(tree_vbfHiggs_str, out_df_vbfHiggs_str, True, 'vbfHiggs')

[{mcWeight: 3.75, scaleFactor_PILEUP: 0, photon_n: 2, photon_pt: ..., ...}, ...]
        photon_1_pt  photon_2_pt  photon_1_E  photon_2_E  photon_1_eta  \
0         76.868210    46.170959   64.013298   64.013298     -0.195896   
1         79.918510    72.685318  128.432632  128.432632     -1.226758   
3         75.253479    26.408932   50.392307   50.392307      0.701149   
4         60.144890    40.534061   77.002548   77.002548      0.257373   
16        64.201637    41.124813   53.977917   53.977917      0.940091   
...             ...          ...         ...         ...           ...   
439898    70.614510    48.867386   50.941853   50.941853      0.440406   
439903    67.701111    53.527912   61.358067   61.358067      0.030924   
439908    56.288536    45.325504   54.074001   54.074001      0.693200   
439910    75.344986    51.030186  127.581200  127.581200     -1.239651   
439911    65.130150    63.108433   90.046631   90.046631      1.170829   

        photon_2_eta  photon_1

In [6]:
# Now maybe we want to also have combined Signal MC Histograms containing both ggf and vbf production? 

# and we can run a bash command via python as so....
command = "cat {0} {1} > {2}".format(out_df_ggfHiggs_str+".csv", out_df_vbfHiggs_str+".csv", output_path+"allHiggs.csv")
print(command)
os.system(command)

cat ./histograms/GamGam_pythonic/ggfHiggs.csv ./histograms/GamGam_pythonic/vbfHiggs.csv > ./histograms/GamGam_pythonic/allHiggs.csv


1