In [1]:
import uproot
import glob
import os
import fnmatch

In [2]:
def get_quantities_from_sample(basepath, samples, channel, sample):
    if sample not in samples:
        # try to find a matching sample with fnmatch
        matching = fnmatch.filter(samples, sample)
        if len(matching) == 0:
            raise ValueError("Sample {} not found in sample list".format(sample))
        else:
            print("Sample {} not found in sample list, but found {} matching samples: {}, will use the first one".format(sample, len(matching), matching))
            sample = matching[0]
    # now glob for the files
    print(f"globing for {sample} using {os.path.join(basepath,sample,channel,'*')}")
    files = glob.glob(os.path.join(basepath,sample,channel,"*"))
    # now open the first file and get the quantities tree
    uproot_file = uproot.open(files[0])
    quantities_tree = uproot_file["quantities"]
    # now get the quantities
    quantities = [x.name for x in quantities_tree.branches]
    # now return the quantities
    return quantities

def get_shifts_from_sample(basepath, samples, channel, sample):
    if sample not in samples:
        # try to find a matching sample with fnmatch
        matching = fnmatch.filter(samples, sample)
        if len(matching) == 0:
            raise ValueError("Sample {} not found in sample list".format(sample))
        else:
            print("Sample {} not found in sample list, but found {} matching samples: {}, will use the first one".format(sample, len(matching), matching))
            sample = matching[0]
    # now glob for the files
    print(f"globing for {sample} using {os.path.join(basepath,sample,channel,'*')}")
    files = glob.glob(os.path.join(basepath,sample,channel,"*"))
    # now open the first file and get the shifts tree
    uproot_file = uproot.open(files[0])
    shifts_tree = uproot_file["variations"]
    # now get the shifts
    shifts = [x.name for x in shifts_tree.branches]
    # now return the shifts
    return shifts

In [6]:
era = 2018
sample = "*ZHToTauTau*"
channel = "mt"
tag = "2022_09_v1"
basepath = f"/ceph/sbrommer/smhtt_ul/{tag}/ntuples/{era}/"
# now glob for all the files in the basepath
samples = [ x.replace(basepath, "") for x in  glob.glob(basepath + "*")]
print(f"found {len(samples)} samples in {basepath}")
variables = get_quantities_from_sample(basepath, samples, channel, sample)
print(variables)
shifts = get_shifts_from_sample(basepath, samples, channel, sample)
print(shifts)

found 75 samples in /ceph/sbrommer/smhtt_ul/2022_09_v1/ntuples/2018/
Sample *ZHToTauTau* not found in sample list, but found 1 matching samples: ['ZHToTauTau_M125_CP5_13TeV-powheg-pythia8_RunIISummer20UL18NanoAODv9-106X'], will use the first one
globing for ZHToTauTau_M125_CP5_13TeV-powheg-pythia8_RunIISummer20UL18NanoAODv9-106X using /ceph/sbrommer/smhtt_ul/2022_09_v1/ntuples/2018/ZHToTauTau_M125_CP5_13TeV-powheg-pythia8_RunIISummer20UL18NanoAODv9-106X/mt/*
['HTXS_Higgs_pt', 'HTXS_Higgs_y', 'HTXS_njets30', 'HTXS_stage1_2_cat_pTjet30GeV', 'HTXS_stage1_2_fine_cat_pTjet30GeV', 'HTXS_stage_0', 'THU_ggH_Mig01', 'THU_ggH_Mig12', 'THU_ggH_Mu', 'THU_ggH_PT120', 'THU_ggH_PT60', 'THU_ggH_Res', 'THU_ggH_VBF2j', 'THU_ggH_VBF3j', 'THU_ggH_qmtop', 'THU_qqH_25', 'THU_qqH_JET01', 'THU_qqH_Mjj1000', 'THU_qqH_Mjj120', 'THU_qqH_Mjj1500', 'THU_qqH_Mjj350', 'THU_qqH_Mjj60', 'THU_qqH_Mjj700', 'THU_qqH_PTH200', 'THU_qqH_TOT', 'beta_1', 'beta_2', 'bphi_1', 'bphi_2', 'bpt_1', 'bpt_2', 'btag_value_1', 'btag_va

In [8]:
def find_string_in_shapes(shapefile, search_string):
    uproot_file = uproot.open(shapefile)
    available_hists = uproot_file.keys()
    print("\n".join(s for s in available_hists if search_string in s))
find_string_in_shapes(f"/work/sbrommer/smhtt_ul/analysis/smhtt_ul/output/2018-mt-2022_09_v1-debugging_training_v11_3/tauid_shapes-2018-mt-2022_09_v1-debugging_training_v11_3.root","anti_iso_CMS_scale_t_emb")

EMB#mt-Embedded-qqh#anti_iso_CMS_scale_t_emb_1prong_EraDown#mt_score;1
EMB#mt-Embedded-ggh#anti_iso_CMS_scale_t_emb_1prong_EraDown#mt_score;1
EMB#mt-Embedded-ztt#anti_iso_CMS_scale_t_emb_1prong_EraDown#mt_score;1
EMB#mt-Embedded-ff#anti_iso_CMS_scale_t_emb_1prong_EraDown#mt_score;1
EMB#mt-Embedded-tt#anti_iso_CMS_scale_t_emb_1prong_EraDown#mt_score;1
EMB#mt-Embedded-misc#anti_iso_CMS_scale_t_emb_1prong_EraDown#mt_score;1
EMB#mt-Embedded-xxh#anti_iso_CMS_scale_t_emb_1prong_EraDown#mt_score;1
EMB#mt-Embedded-qqh#anti_iso_CMS_scale_t_emb_1prong_EraUp#mt_score;1
EMB#mt-Embedded-ggh#anti_iso_CMS_scale_t_emb_1prong_EraUp#mt_score;1
EMB#mt-Embedded-ztt#anti_iso_CMS_scale_t_emb_1prong_EraUp#mt_score;1
EMB#mt-Embedded-ff#anti_iso_CMS_scale_t_emb_1prong_EraUp#mt_score;1
EMB#mt-Embedded-tt#anti_iso_CMS_scale_t_emb_1prong_EraUp#mt_score;1
EMB#mt-Embedded-misc#anti_iso_CMS_scale_t_emb_1prong_EraUp#mt_score;1
EMB#mt-Embedded-xxh#anti_iso_CMS_scale_t_emb_1prong_EraUp#mt_score;1
EMB#mt-Embedded-qqh#an