Bare copys av importsene

In [1]:
import glob
import re
import ROOT
import time
import numpy as np
import sys
import os
import shutil
import pandas as pd
import ctypes
import gc
import json
# check if having pandas, if not install it through pip
try:
    import pandas as pd
except:
    !{sys.executable} -m pip install pandas
    import pandas as pd
# ROOT's vector class
ROOT.gInterpreter.GenerateDictionary("ROOT::VecOps::RVec<vector<double>>", "vector;ROOT/RVec.hxx")
ROOT.gInterpreter.GenerateDictionary("ROOT::VecOps::RVec<vector<float>>", "vector;ROOT/RVec.hxx")
ROOT.gInterpreter.GenerateDictionary("ROOT::VecOps::RVec<vector<int>>", "vector;ROOT/RVec.hxx")

Welcome to JupyROOT 6.30/04


1

## Kompilerer  C++ biblioteket

Bruker helperFunctions.cxx til å kompilere C++ funkjsonene vi vil trenge

In [2]:
! g++ -shared -fPIC -o ./../commontools/helperFunctions.so ./../commontools/helperFunctions.cxx `root-config --cflags --glibs`

### Funksjoner

Nyttige funskjoner.

In [3]:
# Loading the c++ library 
def loadlibraries():
    ROOT.gSystem.AddDynamicPath("../commontools/.")
    ROOT.gROOT.ProcessLine(".include ./../commontools");
    #ROOT.gInterpreter.AddIncludePath("./../commontools");
    #ROOT.gInterpreter.Declare('#include \"./../commontools/helperFunctions.h\"') # Header with the definition of the myFilter function
    #ROOT.gROOT.ProcessLine(".L ./../commontools/helperFunctions.cxx+");

    ROOT.gInterpreter.Declare('#include "./../commontools/helperFunctions.h"')
    #ROOT.gSystem.Load("../commontools/helperFunctions_cxx.so") # Library with the myFilter function
    ROOT.gSystem.Load("../commontools/helperFunctions.so") # Library with the myFilter function
    
    ROOT.setGRL()

    #ROOT.readMetaData("physlite_metadata.csv")

Vi vil bruke Dask, et parallell computing program.

In [4]:
# Create a Dask connection
def create_connection():

    #cluster = SSHCluster(hosts=["localhost","technihadron.uio.no","zprime.uio.no","technicolor.uio.no"],
                         #worker_options={"nprocs":4,},)
    from dask.distributed import LocalCluster, Client
    # Point RDataFrame calls to Dask RDataFrame object
    RDataFrame = ROOT.RDF.Experimental.Distributed.Dask.RDataFrame
    RunGraphs = ROOT.RDF.Experimental.Distributed.RunGraphs
    cluster = LocalCluster(n_workers=n_workers, threads_per_worker=1, processes=True, memory_limit="10GiB")
    try:
        client = Client(cluster,timeout='2s')
    except TimeoutError:
        pass
    return client

In [5]:
# Function to execute all the defined 
# ROOT histograms and write them to file
def computeHistograms(allhistograms):
    #start = time.time()
    RunGraphs(allhistograms)
    #end = time.time()
    #fordi parallellisering
    start=1
    end=0
    #print("Used %.2f seconds"%(end-start))
    #tfile = ROOT.TFile("histograms_all_GravitonLoose.root","UPDATE")
    tfile = ROOT.TFile("histograms_all_GravitonLoose_MC3.root","UPDATE")
    #tfile = ROOT.TFile("histograms_all_GravitonTight_MC2.root","UPDATE")
    for h in allhistograms:
        h.Write()
        #del h
    tfile.Close()
    #tfile.Delete()
    return (end-start)

In [6]:
def dumpResults(histo,df_cut,new_col,ntup_col,isData,batchnum,makeHistograms=True,writeToHDF5=False,writeToROOT=False):
    # Specify the columns we would like to write to hdf5
    print("-"*40)
    print("Will make the following: \nHDF5: %s \nROOT: %s \nhistograms: %s"%("YES" if writeToHDF5 else "NO","YES" if writeToROOT else "NO","YES" if makeHistograms else "NO"))
    print("-"*40)
    numpy_col = []
    for col in new_col:
        # Can't convert non-flattend stuff to hdf5
        if 'jet_' in col: continue 
        if 'lep_' in col: continue    
        numpy_col.append(col)
    dids = list(df_cut.keys())
    for did in dids:
        if not writeToHDF5 and not writeToROOT: continue
        fname=did
        for ch in df_cut[did].keys():
            try:
                this_nev = df_cut[did][ch].Count().GetValue()
                #tot_nev[ch]["nev"] += this_nev
            except:
                print("Problems finding nev for %s and %s"%(did,ch))
                this_nev = 0
            if writeToROOT and ch == "4L":
                # One can also write content back to a ROOT file using Snapshot (will not do this now)
                print("Converting channel %s skim of DSID %s to skim %s.root for %i events"%(ch,did,fname,this_nev))
                start_ntup = time.time()
                df_cut[did][ch].Snapshot("CollectionTree","%s/%s_batch%i.root"%(MYDIR,did,batchnum),ntup_col,opts)
                stop_ntup = time.time()
                print("Used %.2f s on %i events to ntuple"%((stop_ntup-start_ntup),this_nev))
            if ch == "2L": continue
            if writeToHDF5:
                print("Converting channel %s skim of DSID %s to skim %s.hdf5 for %i events"%(ch,did,fname,this_nev))
                start_hdf5 = time.time()
                try:
                    pandas_df = pd.DataFrame(data=df_cut[did][ch].AsNumpy(numpy_col))
                    pandas_df.to_hdf(MYDIR+"%s.hdf5"%fname,"CollectionTree",mode='a',append=True)
                except:
                    print("Problems converting %s to hdf5"%did)
                stop_hdf5 = time.time()
                print("Used %.2f s on %i events to hdf5"%((stop_hdf5-start_hdf5),this_nev))
        try:
            del pandas_df
        except:
            print("WARNING \t No panda to delete")
        del df_cut[did]
    
    allhistograms = []
    for key in histo.keys():
        allhistograms.append(histo[key])
    if makeHistograms:
        print("Prepared %i histograms on %i events"%(len(allhistograms),n_entries))
        if len(allhistograms):
            start_hist = time.time()
            computeHistograms(allhistograms)
            stop_hist = time.time()
            print("Used %.2f s on %i events to hdf5"%((stop_hist-start_hist),this_nev))
    

## Sette opp workers

Hvor mange workers skal brukes. Setter til False og null i starten av anbefaling fra Eirik.

Ingen workers for at det skal funke.

With workers

In [7]:
doDask= False
n_workers = 100

In [8]:
# If not doing dask, still run multithreaded if n_workers > 1
print("0")
if not doDask:
    if n_workers:
        ROOT.EnableImplicitMT(n_workers)
    print("1")    
    RDataFrame = ROOT.RDataFrame
    print("2")
    RunGraphs = ROOT.RDF.RunGraphs
    print("3")
    loadlibraries()
    print("4")
if doDask:
    RDataFrame = ROOT.RDF.Experimental.Distributed.Dask.RDataFrame
    RunGraphs = ROOT.RDF.Experimental.Distributed.RunGraphs
    connection = create_connection()
    connection.run(loadlibraries)

0
1
2
3
4


## Laste inn og se på meta data
Bruker readmetadata.ipynb, så greit å lese. Metadataen må leses fra en csv fil. Finner cross-section, filter efficiency, number of generated events og k-factors. Må skalere variablene med integrated luminosity. Hver Dask worker må lese gjennom meta data og er derfor lagt til i loadlibraries fra forrige celle.

In [9]:
# Inspect the metadata by reading it directly into a pandas dataframe
pand = pd.read_csv("physlite_metadata.csv")
bool_series = pd.isnull(pand["category"]) 
# inspect first 5 entries
#pand[pand["category"]=="Higgs_4lep_notau"]
pand[pand["category"]=="Diboson_nom"]

Unnamed: 0,category,dataset_number,physics_short,crossSection,genFiltEff,kFactor,relUncertUP,relUncertDOWN,generator_name,etag,sumofweights
271,Diboson_nom,700566,Sh_2212_WlvWqq,116.83,0.43847,1.0,0.0,0.0,Sherpa(v.2.2.12.bbba1f7),e8462,1807121000000.0
272,Diboson_nom,700567,Sh_2212_WlvZbb,2.5961,1.0,1.0,0.0,0.0,Sherpa(v.2.2.12.bbba1f7),e8462,31370500000.0
273,Diboson_nom,700568,Sh_2212_WlvZqq,9.2309,1.0,1.0,0.0,0.0,Sherpa(v.2.2.12.bbba1f7),e8462,111588300000.0
274,Diboson_nom,700569,Sh_2212_WqqZll,3.5549,1.0,1.0,0.0,0.0,Sherpa(v.2.2.12.bbba1f7),e8462,42968190000.0
275,Diboson_nom,700570,Sh_2212_WqqZvv,7.0361,1.0,1.0,0.0,0.0,Sherpa(v.2.2.12.bbba1f7),e8462,85011020000.0
276,Diboson_nom,700571,Sh_2212_ZbbZll,1.0415,0.478826,1.0,0.0,0.0,Sherpa(v.2.2.12.bbba1f7),e8462,9099626000.0
277,Diboson_nom,700572,Sh_2212_ZqqZll,6.7319,0.26354,1.0,0.0,0.0,Sherpa(v.2.2.12.bbba1f7),e8462,57147130000.0
278,Diboson_nom,700573,Sh_2212_ZbbZvv,2.0093,0.491491,1.0,0.0,0.0,Sherpa(v.2.2.12.bbba1f7),e8462,17587980000.0
279,Diboson_nom,700574,Sh_2212_ZqqZvv,8.9452,0.392668,1.0,0.0,0.0,Sherpa(v.2.2.12.bbba1f7),e8462,78151330000.0
280,Diboson_nom,700600,Sh_2212_llll,1.3385,1.0,1.0,0.0,0.0,Sherpa(v.2.2.12.bbba1f7),e8462,388448500000.0


## Skaffe inputs
Vi vil lese alle simulerte samplesene fra shared directory på hepp02/03. The dsid (dataset-id) is a unique identifier for each simulated sample.

*Datasets* here refer to the various processes, each identified with a uniqe dataset-ID (DSID), which we talked about in one of the lectures and was briefly discussed when loading the metadata. The dictionary contains the path to where all of the input files for each DSID are stored. E.g. for DSID *513093* the input files are stored in:

## Preparing the input
As was discussed in one of the lectures some of the columns are not readable without having access to the ATLAS software and classes and can not easilly be read in a columnar data format. For this tuorial we are not interested in keeping these columns. The allowed types, which we will consider in the following are:

In [10]:
allowedTypes = [
    'ROOT::VecOps::RVec<vector<int>>',
    'ROOT::VecOps::RVec<float>',
    'ROOT::VecOps::RVec<UInt_t>',
    'ROOT::VecOps::RVec<UInt_t>'
    'ROOT::VecOps::RVec<int>',
    'ROOT::VecOps::RVec<vector<float>>',
    'ROOT::VecOps::RVec<char>',
    'ROOT::VecOps::RVec<unsigned short>',
    'ROOT::VecOps::RVec<unsigned int>',
    'ROOT::VecOps::RVec<unsigned char>',
    'ROOT::VecOps::RVec<double>',
    'ROOT::VecOps::RVec<set<unsigned int>>',
    'Float_t',
    'Char_t',
    'UInt_t',
    'Int_t',
    'ULong64_t',
    #'ROOT::VecOps::RVec<string>',
    #'ROOT::VecOps::RVec<vector<string>>',
    'ROOT::VecOps::RVec<ULong64_t>',
    'ROOT::VecOps::RVec<short>',
    'ROOT::VecOps::RVec<vector<vector<unsigned int> >>'
]
print("Done")

Done


Furthermore, the variables need to be *AuxDyn* for ROOT to be able to read them. Let's remove all columns which are not in allowed types (defined above) and not *AuxDyn*. 

Please ignore all the WARNINGS about *no dictionary for class <-----> is available* since this is just telling you than many of the variables need some special classes (*xAOD::xyz*) which are only available when setting up the full ATLAS software. You don't have to worry about these now since they are in any case not easilly accessible in a columnar data structure. 

In [11]:
"""
print("Came here 0")
auxDyn = []
auxDynAll = []
"""

'\nprint("Came here 0")\nauxDyn = []\nauxDynAll = []\n'

In [12]:
"""
print("Came here 1")
#df = ROOT.RDataFrame("CollectionTree", "/storage/shared/data/PHYSLITEforML/mc21_13p6TeV.601183.PhPy8EG_AZNLO_Wplusenu.deriv.DAOD_PHYSLITE.e8453_s3873_r13829_p5631/DAOD_PHYSLITE.33080366._000004.pool.root.1")
df = ROOT.RDataFrame("CollectionTree", "/storage/shared/data/PHYSLITEforML/mc21_13p6TeV.601183.PhPy8EG_AZNLO_Wplusenu.deriv.DAOD_PHYSLITE.e8453_s3873_r13829_p5631/DAOD_PHYSLITE.33080366._000001.pool.root.1")
"""

'\nprint("Came here 1")\n#df = ROOT.RDataFrame("CollectionTree", "/storage/shared/data/PHYSLITEforML/mc21_13p6TeV.601183.PhPy8EG_AZNLO_Wplusenu.deriv.DAOD_PHYSLITE.e8453_s3873_r13829_p5631/DAOD_PHYSLITE.33080366._000004.pool.root.1")\ndf = ROOT.RDataFrame("CollectionTree", "/storage/shared/data/PHYSLITEforML/mc21_13p6TeV.601183.PhPy8EG_AZNLO_Wplusenu.deriv.DAOD_PHYSLITE.e8453_s3873_r13829_p5631/DAOD_PHYSLITE.33080366._000001.pool.root.1")\n'

In [13]:
"""
tot_col = 0
colkeep = []
"""

'\ntot_col = 0\ncolkeep = []\n'

In [14]:
#a=df.GetColumnNames()

In [15]:
"""
for name in df.GetColumnNames():
    print("Came here 2")
    tot_col += 1
    if "AuxDyn" in str(name):
        datatype = df.GetColumnType(name)
        auxDynAll.append((str(name),datatype))
        if datatype in allowedTypes: 
            auxDyn.append((str(name),datatype))
            colkeep.append(name)
"""

'\nfor name in df.GetColumnNames():\n    print("Came here 2")\n    tot_col += 1\n    if "AuxDyn" in str(name):\n        datatype = df.GetColumnType(name)\n        auxDynAll.append((str(name),datatype))\n        if datatype in allowedTypes: \n            auxDyn.append((str(name),datatype))\n            colkeep.append(name)\n'

In [16]:
#print("Reduced data sets from %i to %i columns" %(tot_col,len(auxDyn)))

In [17]:
#print("Came here 0")
auxDyn = []
auxDynAll = []
# We can pick any file we like to compile a list of variables
# since they all have identical content.
#print("Came here 1")
df = ROOT.RDataFrame("CollectionTree", "/storage/shared/data/PHYSLITEforML/mc21_13p6TeV.601183.PhPy8EG_AZNLO_Wplusenu.deriv.DAOD_PHYSLITE.e8453_s3873_r13829_p5631/DAOD_PHYSLITE.33080366._000004.pool.root.1")
tot_col = 0
colkeep = []
for name in df.GetColumnNames():
    #print("Came here 2")
    tot_col += 1
    if "AuxDyn" in str(name):
        datatype = df.GetColumnType(name)
        auxDynAll.append((str(name),datatype))
        if datatype in allowedTypes: 
            auxDyn.append((str(name),datatype))
            colkeep.append(name)
print("Reduced data sets from %i to %i columns" %(tot_col,len(auxDyn)))

Reduced data sets from 901 to 622 columns




In [18]:
auxDyn

[('AnalysisElectronsAuxDyn.DFCommonElectronsECIDS',
  'ROOT::VecOps::RVec<char>'),
 ('AnalysisElectronsAuxDyn.DFCommonElectronsECIDSResult',
  'ROOT::VecOps::RVec<double>'),
 ('AnalysisElectronsAuxDyn.DFCommonElectronsLHLoose',
  'ROOT::VecOps::RVec<char>'),
 ('AnalysisElectronsAuxDyn.DFCommonElectronsLHLooseBL',
  'ROOT::VecOps::RVec<char>'),
 ('AnalysisElectronsAuxDyn.DFCommonElectronsLHLooseBLIsEMValue',
  'ROOT::VecOps::RVec<unsigned int>'),
 ('AnalysisElectronsAuxDyn.DFCommonElectronsLHLooseIsEMValue',
  'ROOT::VecOps::RVec<unsigned int>'),
 ('AnalysisElectronsAuxDyn.DFCommonElectronsLHMedium',
  'ROOT::VecOps::RVec<char>'),
 ('AnalysisElectronsAuxDyn.DFCommonElectronsLHMediumIsEMValue',
  'ROOT::VecOps::RVec<unsigned int>'),
 ('AnalysisElectronsAuxDyn.DFCommonElectronsLHTight',
  'ROOT::VecOps::RVec<char>'),
 ('AnalysisElectronsAuxDyn.DFCommonElectronsLHTightIsEMValue',
  'ROOT::VecOps::RVec<unsigned int>'),
 ('AnalysisElectronsAuxDyn.DFCommonElectronsLHVeryLoose',
  'ROOT::VecOp

In fact, a few more columns need to be removed since they contain links to xAOD objects

In [19]:
colkeep = []
# Need to remove a few more columns since they contain links to xAOD objects
for name,type in auxDyn:
    if "Link" in name or "detDescrTags" in name or "originalTruthParticle" in name or "originalTrackParticle" in name: continue
    colkeep.append("%s"%name.strip())
# The columns we would like to keep are now added in colkeep
#print(colkeep)
#rvec = ROOT.RVec('string')((colkeep[0], colkeep[1], colkeep[3]))
print("Reduced data sets from %i to %i columns" %(tot_col,len(colkeep)))

Reduced data sets from 901 to 492 columns


In [20]:
colkeep

['AnalysisElectronsAuxDyn.DFCommonElectronsECIDS',
 'AnalysisElectronsAuxDyn.DFCommonElectronsECIDSResult',
 'AnalysisElectronsAuxDyn.DFCommonElectronsLHLoose',
 'AnalysisElectronsAuxDyn.DFCommonElectronsLHLooseBL',
 'AnalysisElectronsAuxDyn.DFCommonElectronsLHLooseBLIsEMValue',
 'AnalysisElectronsAuxDyn.DFCommonElectronsLHLooseIsEMValue',
 'AnalysisElectronsAuxDyn.DFCommonElectronsLHMedium',
 'AnalysisElectronsAuxDyn.DFCommonElectronsLHMediumIsEMValue',
 'AnalysisElectronsAuxDyn.DFCommonElectronsLHTight',
 'AnalysisElectronsAuxDyn.DFCommonElectronsLHTightIsEMValue',
 'AnalysisElectronsAuxDyn.DFCommonElectronsLHVeryLoose',
 'AnalysisElectronsAuxDyn.DFCommonElectronsLHVeryLooseIsEMValue',
 'AnalysisElectronsAuxDyn.OQ',
 'AnalysisElectronsAuxDyn.ambiguityType',
 'AnalysisElectronsAuxDyn.author',
 'AnalysisElectronsAuxDyn.charge',
 'AnalysisElectronsAuxDyn.eta',
 'AnalysisElectronsAuxDyn.m',
 'AnalysisElectronsAuxDyn.neflowisol20',
 'AnalysisElectronsAuxDyn.phi',
 'AnalysisElectronsAuxDyn

# What to analyse
Before starting the analysis we need to define which files we want to process. When debugging or working on getting a finalized analysis it might be a good idea to just process a few files. Remember we classified the various DSIDs into processes in the *Readmetadata.ipynb* notbook. You can use these processes now to define what you want to run over. E.g. a nominal set of backgrounds would be to use the following:

In [21]:
isData=False
testing=False
# The recomended input for a complete description of the SM in pp collisions at 13.6 TeV.
if not isData:
    categories_to_use = ["singletop_nom","ttbar_dilep_nom","ttW_dilep","Diboson_nom","Triboson","Vgamma_nom","Wenu_nom",
                         "Wmunu_nom",
                         "Wtaunu_nom",
                         "Zee_nom",
                         "Zmumu_nom",
                         "Ztautau_nom",
                         "Znunu_nom",
                         "Zee_lowmll_nom",
                         "Zmumu_lowmll_nom",
                         "Ztautau_lowmll_nom",
                         "Higgs_tautau",
                         "Higgs_gamgam",
                         "Higgs_4lep_notau",
                         "Higgs_mumu", 
                         #"ttll_2L",
                         "ttll_4L",
                         "Zeejets_phpy", "Zmumujets_phpy", "Ztautaujets_phpy"
                        ]
    if testing:
        spec_file="mc21_13p6TeV_RDFspec-test.json"
    else:
        #spec_file="mc21_13p6TeV_RDFspec_jan08.json"
        #spec_file="mc16_13TeV_RDFspec_ZjetsPowhegPythia.json"
        #spec_file="mc21_13p6TeV_RDFspec_ZjetsSherpa.json"
        #spec_file="mc21_13p6TeV_RDFspec_ZjetsPowhegPythia.json"
        #spec_file="mc21_13p6TeV_RDFspec_ZjetsSherpa_Mar05.json"
        #spec_file="mc21_13p6TeV_RDFspec_ZjetsSherpa_Mar12.json"
        #spec_file="mc21_13p6TeV_RDFspec_ZjetsPowhegPythia_Mar05.json"
        spec_file="mc21_13p6TeV_RDFspec_ZjetsSherpa_Apr6.json"
else:
    categories_to_use=["data22"]
    if testing:
        spec_file="data22_13p6TeV_RDFspec_1files.json"
    else:
        spec_file="data22_13p6TeV_RDFspec_JAN24.json"
        
import json
categories_to_use = []
with open(spec_file, 'r') as json_file:
    data = json.load(json_file)
for dsid in data["samples"].keys():
    cat = data["samples"][dsid]["metadata"]["category"]
    if not cat in categories_to_use:
        categories_to_use.append(cat)
json_file.close()

In [22]:
only_process=[]
#only_process=["ttbar_dilep_nom"]
print(len(only_process))

0


In [23]:
if len(only_process):
    categories_to_use
    outfil = './mc21_13p6TeV_RDFspec_%s.json'%("_".join(only_process))
    spec_dir = {"samples":{}}
    for dsid in data["samples"].keys():
        cat = data["samples"][dsid]["metadata"]["category"]
        if cat in only_process:
            spec_dir["samples"][dsid] = data["samples"][dsid].copy()
    with open(outfil, 'w') as json_file:
        json.dump(spec_dir, json_file)
    json_file.close()
    print("Created spec file %s"%outfil)
    spec_file = outfil
    categories_to_use = only_process.copy()

We would like to create some histograms for some of the key-variables which we can look at later. We then need to define the number of bins and the range of the histograms. 

In [24]:
plot_dic = {"pt":{"nbin":1000,"min":0,"max":1000000},
            "eta":{"nbin":600,"min":-3,"max":3},
            "phi":{"nbin":800,"min":-4,"max":4},
            "type":{"nbin":2,"min":1,"max":2},
            "ch":{"nbin":3,"min":-1,"max":1},
            #"mass":{"nbin":1200,"min":0,"max":120},
            "mll":{"nbin":120,"min":0,"max":1200000},
            "mt2f12":{"nbin":800,"min":0,"max":800000} ,
            "mt2f13":{"nbin":800,"min":0,"max":800000} ,
            "mt2f14":{"nbin":800,"min":0,"max":800000} ,
            "mt2f23":{"nbin":800,"min":0,"max":800000} ,
            "mt2f24":{"nbin":800,"min":0,"max":800000} ,
            "mt2f34":{"nbin":800,"min":0,"max":800000} ,
            
            "el":{"nbin":5,"min":0,"max":5},
            "mu":{"nbin":5,"min":0,"max":5},
            "lep":{"nbin":5,"min":0,"max":5},
            "m4l":{"nbin":3000,"min":0,"max":3000000},
            "m2lf12":{"nbin":1200,"min":0,"max":1200000},
            "m2lf13":{"nbin":1200,"min":0,"max":1200000},
            "m2lf14":{"nbin":1200,"min":0,"max":1200000},
            "m2lf23":{"nbin":1200,"min":0,"max":1200000},
            "m2lf24":{"nbin":1200,"min":0,"max":1200000},
            "m2lf34":{"nbin":1200,"min":0,"max":1200000},
            
            "Zlm1":{"nbin":1200,"min":0,"max":1200000},
            "Zlm2":{"nbin":1200,"min":0,"max":1200000},
            "Zlm13":{"nbin":1200,"min":0,"max":1200000},
            "Zlm14":{"nbin":1200,"min":0,"max":1200000},
            "Zlm23":{"nbin":1200,"min":0,"max":1200000},
            "Zlm24":{"nbin":1200,"min":0,"max":1200000},
            "nbjet85":{"nbin": 20, "min": 0, "max": 20},
            "DeltaR12":{"nbin":1000,"min":0,"max":10} ,
            "DeltaR34":{"nbin":1000,"min":0,"max":10} ,
            "DeltaR13":{"nbin":1000,"min":0,"max":10} ,
            "DeltaR14":{"nbin":1000,"min":0,"max":10} ,
            "DeltaR23":{"nbin":1000,"min":0,"max":10} ,
            "DeltaR24":{"nbin":1000,"min":0,"max":10} ,
            "njets" :{"nbin":5, "min": 0, "max": 5} ,
            "DeltaPhi12":{"nbin": 400, "min": 0, "max": 4},
            "DeltaPhi13":{"nbin": 400, "min": 0, "max": 4},
            "DeltaPhi14":{"nbin": 400, "min": 0, "max": 4},
            "DeltaPhi23":{"nbin": 400, "min": 0, "max": 4},
            "DeltaPhi24":{"nbin": 400, "min": 0, "max": 4},
            "DeltaPhi34":{"nbin": 400, "min": 0, "max": 4},
            "unledmasses1":{"nbin":1200,"min":0,"max":1200000},
            "unledmasses2":{"nbin":1200,"min":0,"max":1200000},
            "SortedPt1":{"nbin":1000,"min":0,"max":1000000},
            "SortedPt2":{"nbin":1000,"min":0,"max":1000000},
            "SortedPt3":{"nbin":1000,"min":0,"max":1000000},
            "SortedPt4":{"nbin":1000,"min":0,"max":1000000}
            
}

## Decide if we want to only make histograms (default) or also write to hdf5 files
writeToHDF5 = False
makeHistograms = True
writeToROOT = True

## Analysen

Skal nå faktisk analysere og luke ut fra dataene. Skal være lik en *Practical Sessions* notebook. Må gjøre det om til 4 leptoner. Er foreløpig bare dilepton.

In [25]:
if 0:
    c = ROOT.TChain("CollectionTree")
    dofiles = []
    for i in inputDS.keys():
        fname = xsec_dic[i]["category"]
        """
        if not fname == "Diboson_nom":
            continue
        """
        print(inputDS[i])
        mappe = inputDS[i]
        for f in glob.glob(mappe+"/*.pool.root.1"):
            c.Add(f)
            #thisdf = RDataFrame("CollectionTree", f)
            #print("File %s has %i events" %(f.split("/")[-1],thisdf.Count().GetValue()))
            #thisdf = thisdf.Define("sig_el","AnalysisElectronsAuxDyn.eta > -2.47 && AnalysisElectronsAuxDyn.eta < 2.47 && AnalysisElectronsAuxDyn.pt > 7000 && AnalysisElectronsAuxDyn.DFCommonElectronsLHLooseBL")
            #thisdf = thisdf.Define("n_el", "ROOT::VecOps::Sum(sig_el,0.)")
            #thisdf = thisdf.Define("scalef", "1.0")
            #histo  = thisdf.Histo1D(("h_nel","h_nel",10,0,10),"n_el","scalef")
            #histo.Integral()
            dofiles.append(f)
    #thisdf = RDataFrame("CollectionTree", dofiles)
    #print("File %s has %i events" %(f.split("/")[-1],thisdf.Count().GetValue()))

In [26]:
#%%timeit
try:
    del pandas_df
    del df
    del df_cut
except:
    print("All OK")
opts = ROOT.RDF.RSnapshotOptions()
idid = 0


# Since we will dump things whenever we have > 1000 files open we will 
# append rather than recreate the outputfiles
opts.fMode = "UPDATE"
opts.fOverwriteIfExists = True;

# Below we will do basically four things on each of the input files: 
# 1) Define new columns/features/variables (augementation)
# 2) Apply some selections by filtering out the events we are interested in (slimming)
# 3) Make some histograms
# 4) create hdf5 files of the selected columns and events

# We define the path where we want to store the slimmed output (for later use in ML)
# Please change this to be using your specific path 
MYDIR = "/storage/carlpd/data/PHYSLITEforML/slimmed/" #<username>/
CHECK_FOLDER = os.path.isdir(MYDIR)
#If folder doesn't exist, then create it.
if not CHECK_FOLDER:
    os.makedirs(MYDIR)
    print("created folder : ", MYDIR)
else: 
    for file in glob.glob(MYDIR+'/*'): 
        os.remove(file)
print("INFO \t Will be saving ouput to %s"%(MYDIR))
GeV=0.001
histo = {}   
allhistograms = []
n_entries = 0
tot_files = 0
tot_time = 0
df = {}
df_cut = {}
intlumi = 1000.
new_col = []
ntup_col = []
dofiles = []
hasdone=[]
batchsize = 3000
#batchsize = 3
stopDataLoop = False
stopMCLoop= False
batchnum=1
categories_to_run= [1]
# Loop over all the DSIDS (i.e. all of the different prcoesses)

print("Got before for loop")
for icount in range(0,len(categories_to_run)):
    #isData = "data" in did
    idid += 1

    # Make a directory for the category. Delete content if already existing

    #print("Looking at did %i/%i"%(idid,len(inputDS.keys())))



    # How many files are there in the input folder for this particular DSID
    # Need to kee track of these since we can should avoid having > 1000 files open 
    # at the same time. Might happen if you're running many cores with DASK.
    #rfiles = glob.glob(inputDS[did]+"/*.pool.root.1")
    #tot_files += len(rfiles)

    # Defining the RDataFrame
    did="ALL"
    df[did] = ROOT.RDF.Experimental.FromSpec(spec_file)
    

    

    categories_to_use = []
    with open(spec_file) as user_file:
        spec_dic = json.load(user_file)
    for key in spec_dic["samples"]:
        thiscat = spec_dic["samples"][key]["metadata"]["category"]
        if not thiscat in categories_to_use:
            categories_to_use.append(thiscat)


    # Check how many simulated events for this process
    #print("Before GetValue")
    n_entries += df[did].Count().GetValue()
    #print("After GetValue")

    # Get the metadata for this particular DSID and calculate the scale factor
    sow = 0
    scalef = 1.0
    """
    #Eirik kom med denne
    for cat in categories_to_use:
    df_uncut = df.Filter(f'category == "{cat}"')
    df_uncut = df_uncut.Define("dummy","1.0")
    histo["h_cutflow_%s"%(cat)] = df_uncut.Histo1D(("h_cutflow_%s"%(cat),"h_cuflow_%s"%(cat),2,0,2),"dummy","scalef")
    """
    if not isData:
        #if sow > 0:
            #scalef = (xsec*kfac*filtereff*intlumi)/sow;
        df[did] = df[did].Define("eventweight",'EventInfoAuxDyn.mcEventWeights.at(0)')
        df[did] = df[did].Define("puweight",'EventInfoAuxDyn.PileupWeight_NOSYS')
        df[did] = df[did].DefinePerSample("xsec",'rdfsampleinfo_.GetD("xsec")')
        df[did] = df[did].DefinePerSample("sow", 'rdfsampleinfo_.GetD("sow")')
        df[did] = df[did].DefinePerSample("kFactor",'rdfsampleinfo_.GetD("kFactor")')
        df[did] = df[did].DefinePerSample("genFiltEff",'rdfsampleinfo_.GetD("genFiltEff")')
        #df[did] = df[did].Define("scalef","sf*eventweight*puweight")
        df[did] = df[did].Define("scalef","(xsec*kFactor*genFiltEff*eventweight*puweight)/sow")



        #print("Scale factor for %s is %f"%(did,scalef))
        #print("xsec = %.2f, kfac = %.2f, filtereff = %.2f, intlumi = %.0f, sow = %.2f"%(xsec,kfac,filtereff,intlumi,sow))
    else:
        df[did] = df[did].Define("eventweight","1.0")
        df[did] = df[did].Define("puweight","1.0")
        df[did] = df[did].Define("scalef","1.0")
        
    df[did] = df[did].DefinePerSample("category", 'rdfsampleinfo_.GetS("category")')
    for cat in categories_to_use:
        df_uncut = df[did].Filter(f'category == "{cat}"')
        df_uncut = df_uncut.Define("dummy","1.0")
        histo["h_cutflow_%s"%(cat)] = df_uncut.Histo1D(("h_cutflow_%s"%(cat),"h_cuflow_%s"%(cat),2,0,2),"dummy","scalef")



    #if sow == 0:






    # Create a new dictionary which will contain our filtered 
    # data frames, with the key indicating the selection
    # One can in principle make as many as one want of these 
    # selection and program will automatically make histograms
    # and write the output to a hdf5 file.
    df_cut[did] = {}
    print("Got to the filtration part")

    if isData:
        df[did] = df[did].Define("ev_passGRL","checkLB(EventInfoAuxDyn.runNumber,EventInfoAuxDyn.lumiBlock)")
        df[did] =df[did].Define("data", "1")
        df[did] = df[did].Define("cleanEvent","isCleanEvent(EventInfoAuxDyn.larFlags, \
                                                            EventInfoAuxDyn.tileFlags,\
                                                            EventInfoAuxDyn.sctFlags, \
                                                            EventInfoAuxDyn.coreFlags)")
        if not n_workers:
            df[did].Report().Print()
    else:
        df[did] = df[did].Define("ev_passGRL","1")
        df[did] = df[did].Define("cleanEvent", "1")
        df[did] =df[did].Define("data", "0")
        #df[did] = df[did].Define("bornMass","ComputeBornMass(BornLeptonsAuxDyn.px,\
                                                 #BornLeptonsAuxDyn.py, BornLeptonsAuxDyn.pz, \
                                                 #BornLeptonsAuxDyn.e,EventInfoAuxDyn.runNumber)")
        #df[did] = df[did].Filter("bornMass <= 105000","Z overlap")
    print("Got after first if isData")

    # Define our "good" electrons and "good" muons
    df_cut[did]["4L"] = df[did].Define("sig_el", "AnalysisElectronsAuxDyn.eta > -2.47 && AnalysisElectronsAuxDyn.eta < 2.47 && AnalysisElectronsAuxDyn.pt > 4500 && AnalysisElectronsAuxDyn.DFCommonElectronsLHLooseBL && \
                                        AnalysisElectronsAuxDyn.ptvarcone30_Nonprompt_All_MaxWeightTTVALooseCone_pt1000/AnalysisElectronsAuxDyn.pt < 0.15 && \
                                        AnalysisElectronsAuxDyn.topoetcone20/AnalysisElectronsAuxDyn.pt < 0.2")
    df_cut[did]["4L"] = df_cut[did]["4L"].Define("sig_mu", "AnalysisMuonsAuxDyn.eta > -2.7 && AnalysisMuonsAuxDyn.eta < 2.7 && AnalysisMuonsAuxDyn.pt > 5000 && (AnalysisMuonsAuxDyn.ptvarcone30_Nonprompt_All_MaxWeightTTVA_pt500 + 0.4*AnalysisMuonsAuxDyn.neflowisol20)/AnalysisMuonsAuxDyn.pt < 0.16")
    df_cut[did]["4L"] = df_cut[did]["4L"].Define('is_pvx','PrimaryVerticesAuxDyn.vertexType == 1')
    df_cut[did]["4L"] = df_cut[did]["4L"].Define("n_pvx", "ROOT::VecOps::Sum(is_pvx,0.)")
    df_cut[did]["4L"] = df_cut[did]["4L"].Filter('n_pvx >= 1', 'Primary Vertex')
    # Count them and require exactly two leptons (electron and muons)
    df_cut[did]["4L"] = df_cut[did]["4L"].Define("n_el", "ROOT::VecOps::Sum(sig_el,0.)")
    df_cut[did]["4L"] = df_cut[did]["4L"].Define("n_mu", "ROOT::VecOps::Sum(sig_mu,0.)")
    df_cut[did]["4L"] = df_cut[did]["4L"].Define("n_lep", "ROOT::VecOps::Sum(sig_el,0.)+ROOT::VecOps::Sum(sig_mu,0.)")
    #df_cut[did]["Pre4LFilt"] = df_cut[did]["4L"].Filter("1.0")
    df_cut[did]["4L"] = df_cut[did]["4L"].Filter('n_lep == 4', 'Exactly four leptons')
    #df_cut[did]["Pre4LFilt"] = df_cut[did]["4L"].Filter("1.0")
    #df_cut[did]["PrePassGRL"] = df_cut[did]["4L"].Filter("1.0")
    df_cut[did]["4L"] = df_cut[did]["4L"].Filter('ev_passGRL','Passed GRL')
    #df_cut[did]["PreCleanEvent"] = df_cut[did]["4L"].Filter("1.0")
    df_cut[did]["4L"] = df_cut[did]["4L"].Filter('cleanEvent','Passed cleanEvent')

    #Trying to find the Z-masses
    #print("Before Z")

    df_cut[did]["4L"] = df_cut[did]["4L"].Define("ChargeList", "getChargeList(AnalysisElectronsAuxDyn.charge[sig_el], AnalysisMuonsAuxDyn.charge[sig_mu])")
    df_cut[did]["4L"] = df_cut[did]["4L"].Define("FlavourList", "getFlavourList(AnalysisElectronsAuxDyn.charge[sig_el], AnalysisMuonsAuxDyn.charge[sig_mu])")
    df_cut[did]["4L"] = df_cut[did]["4L"].Define("PtList", "getChargeList(AnalysisElectronsAuxDyn.pt[sig_el], AnalysisMuonsAuxDyn.pt[sig_mu])")
    df_cut[did]["4L"] = df_cut[did]["4L"].Define("EtaList", "getChargeList(AnalysisElectronsAuxDyn.eta[sig_el], AnalysisMuonsAuxDyn.eta[sig_mu])")
    df_cut[did]["4L"] = df_cut[did]["4L"].Define("PhiList", "getChargeList(AnalysisElectronsAuxDyn.phi[sig_el], AnalysisMuonsAuxDyn.phi[sig_mu])")
    df_cut[did]["4L"] = df_cut[did]["4L"].Define("MList", "getMList(AnalysisElectronsAuxDyn.charge[sig_el], AnalysisMuonsAuxDyn.charge[sig_mu], getElecMass(), getMuonMass())")
    df_cut[did]["4L"] = df_cut[did]["4L"].Define("ev_Zlm1", "getLeptonsFromZ(ChargeList, FlavourList, PtList, EtaList, PhiList, MList).first")
    df_cut[did]["4L"] = df_cut[did]["4L"].Define("ev_Zlm2", "getLeptonsFromZ(ChargeList, FlavourList, PtList, EtaList, PhiList, MList).second")
    #print("After Z")
    
    #Cutting out after the trigger
    df_cut[did]["4L"] = df_cut[did]["4L"].Define("SortedPt", "sortedPtList(PtList)")
    #df_cut[did]["PreSortedPtFilters"] = df_cut[did]["4L"].Filter("1.0")
    df_cut[did]["4L"] = df_cut[did]["4L"].Define("ev_SortedPt1", "SortedPt[0]")
    df_cut[did]["4L"] = df_cut[did]["4L"].Define("ev_SortedPt2", "SortedPt[1]")
    df_cut[did]["4L"] = df_cut[did]["4L"].Define("ev_SortedPt3", "SortedPt[2]")
    df_cut[did]["4L"] = df_cut[did]["4L"].Define("ev_SortedPt4", "SortedPt[3]")
    
    for nc in ["ev_SortedPt1","ev_SortedPt2","ev_SortedPt3", "ev_SortedPt4"]:
        for i in range(0,3):
            if i == 0: 
                ch = "4E"
                cutstr = "n_el == 4"
            if i == 1: 
                ch = "4M"
                cutstr = "n_mu == 4"
            if i == 2: 
                ch = "EEMM"
                cutstr = "n_el == 2 && n_mu == 2"
            for cat_to_use in categories_to_use:
                histo["hprecutpT_%s_precut%s_%s"%(ch,nc,cat_to_use)] = df_cut[did]["4L"].Filter(cutstr).Filter(f'category=="{cat_to_use}"').Histo1D(("hprecutpT_%s_precut%s_%s"%(ch,nc,cat_to_use),"hprecutpT_%s_precut%s_%s"%(ch,nc,cat_to_use),3000,0,3000000),"%s"%nc,"scalef")
    
    
    df_cut[did]["4L"] = df_cut[did]["4L"].Filter("SortedPt[0]>27000")
    df_cut[did]["4L"] = df_cut[did]["4L"].Filter("SortedPt[1]>15000")
    df_cut[did]["4L"] = df_cut[did]["4L"].Filter("SortedPt[2]>10000")
    
    df_cut[did]["4L"] = df_cut[did]["4L"].Define("ev_m4lZ","ComputeInvariantMass4L(PtList,EtaList,PhiList,MList)")
    #cut to secure data within 1000GeV because Im not allowed to show data beyond that point
    if isData==True:
        df_cut[did]["4L"] = df_cut[did]["4L"].Filter("ev_m4lZ<1000000")
        
    
    """
    ch_lep=np.zeros(4)
    for i in range(0,4):
        if leptype[i]==11:
            ch_lep[i]=df_cut[did]["4L"]

    """
    #Getting the pairs
    df_cut[did]["4L"] = df_cut[did]["4L"].Define("PairedList", "getLeptonsPairsFromZ(ChargeList, FlavourList, PtList, EtaList, PhiList, MList)")
    df_cut[did]["4L"] = df_cut[did]["4L"].Define("isSFOS", "PairedList[0]>=0 && PairedList[1]>=0 && PairedList[2]>=0 && PairedList[3]>=0")
    df_cut[did]["4L"] = df_cut[did]["4L"].Filter("isSFOS")
    #Getting the last Ms thats not in pairs
    df_cut[did]["4L"] = df_cut[did]["4L"].Define("ev_Zlm13","getInvariantMass_ll_4(PtList,EtaList, PhiList, MList,PairedList[0], PairedList[2])")
    df_cut[did]["4L"] = df_cut[did]["4L"].Define("ev_Zlm14","getInvariantMass_ll_4(PtList,EtaList, PhiList, MList,PairedList[0], PairedList[3])")
    df_cut[did]["4L"] = df_cut[did]["4L"].Define("ev_Zlm23","getInvariantMass_ll_4(PtList,EtaList, PhiList, MList,PairedList[1], PairedList[2])")
    df_cut[did]["4L"] = df_cut[did]["4L"].Define("ev_Zlm24","getInvariantMass_ll_4(PtList,EtaList, PhiList, MList,PairedList[1], PairedList[3])")
    #Getting the deltaRs
    #The closest to Z (leading) are 1 and 2 and the next closest to z are 34
    df_cut[did]["4L"] = df_cut[did]["4L"].Define("ev_DeltaR12","deltaR_ll_4(PtList,EtaList, PhiList, MList,PairedList[0], PairedList[1])")
    df_cut[did]["4L"] = df_cut[did]["4L"].Define("ev_DeltaR34","deltaR_ll_4(PtList,EtaList, PhiList, MList,PairedList[2], PairedList[3])")
    df_cut[did]["4L"] = df_cut[did]["4L"].Define("ev_DeltaR13","deltaR_ll_4(PtList,EtaList, PhiList, MList,PairedList[0], PairedList[2])")
    df_cut[did]["4L"] = df_cut[did]["4L"].Define("ev_DeltaR14","deltaR_ll_4(PtList,EtaList, PhiList, MList,PairedList[0], PairedList[3])")
    df_cut[did]["4L"] = df_cut[did]["4L"].Define("ev_DeltaR23","deltaR_ll_4(PtList,EtaList, PhiList, MList,PairedList[1], PairedList[2])")
    df_cut[did]["4L"] = df_cut[did]["4L"].Define("ev_DeltaR24","deltaR_ll_4(PtList,EtaList, PhiList, MList,PairedList[1], PairedList[3])")
    #Getting deltaPhis
    df_cut[did]["4L"] = df_cut[did]["4L"].Define("ev_DeltaPhi12","deltaPhi_ll_4(PtList,EtaList, PhiList, MList,PairedList[0], PairedList[1])")
    df_cut[did]["4L"] = df_cut[did]["4L"].Define("ev_DeltaPhi34","deltaPhi_ll_4(PtList,EtaList, PhiList, MList,PairedList[2], PairedList[3])")
    df_cut[did]["4L"] = df_cut[did]["4L"].Define("ev_DeltaPhi13","deltaPhi_ll_4(PtList,EtaList, PhiList, MList,PairedList[0], PairedList[2])")
    df_cut[did]["4L"] = df_cut[did]["4L"].Define("ev_DeltaPhi14","deltaPhi_ll_4(PtList,EtaList, PhiList, MList,PairedList[0], PairedList[3])")
    df_cut[did]["4L"] = df_cut[did]["4L"].Define("ev_DeltaPhi23","deltaPhi_ll_4(PtList,EtaList, PhiList, MList,PairedList[1], PairedList[2])")
    df_cut[did]["4L"] = df_cut[did]["4L"].Define("ev_DeltaPhi24","deltaPhi_ll_4(PtList,EtaList, PhiList, MList,PairedList[1], PairedList[3])")
    #Requiring DeltaR>0.1
    
    for nc in ["ev_DeltaR12","ev_DeltaR34", "ev_DeltaR13","ev_DeltaR14","ev_DeltaR23", "ev_DeltaR24"]:
        for i in range(0,3):
            if i == 0: 
                ch = "4E"
                cutstr = "n_el == 4"
            if i == 1: 
                ch = "4M"
                cutstr = "n_mu == 4"
            if i == 2: 
                ch = "EEMM"
                cutstr = "n_el == 2 && n_mu == 2"
            for cat_to_use in categories_to_use:
                histo["hprecutDeltaR_%s_precut%s_%s"%(ch,nc,cat_to_use)] = df_cut[did]["4L"].Filter(cutstr).Filter(f'category=="{cat_to_use}"').Histo1D(("hprecutDeltaR_%s_precut%s_%s"%(ch,nc,cat_to_use),"hprecutpT_%s_precut%s_%s"%(ch,nc,cat_to_use),1000,0,10),"%s"%nc,"scalef")
    
    
    
    
    df_cut[did]["4L"] = df_cut[did]["4L"].Filter("ev_DeltaR13>0.1")
    df_cut[did]["4L"] = df_cut[did]["4L"].Filter("ev_DeltaR14>0.1")
    df_cut[did]["4L"] = df_cut[did]["4L"].Filter("ev_DeltaR23>0.1")
    df_cut[did]["4L"] = df_cut[did]["4L"].Filter("ev_DeltaR24>0.1")
    
    for nc in ["ev_DeltaR12","ev_DeltaR34", "ev_DeltaR13","ev_DeltaR14","ev_DeltaR23", "ev_DeltaR24"]:
        for i in range(0,3):
            if i == 0: 
                ch = "4E"
                cutstr = "n_el == 4"
            if i == 1: 
                ch = "4M"
                cutstr = "n_mu == 4"
            if i == 2: 
                ch = "EEMM"
                cutstr = "n_el == 2 && n_mu == 2"
            for cat_to_use in categories_to_use:
                histo["hpostcutDeltaR_%s_postcut%s_%s"%(ch,nc,cat_to_use)] = df_cut[did]["4L"].Filter(cutstr).Filter(f'category=="{cat_to_use}"').Histo1D(("hpostcutDeltaR_%s_postcut%s_%s"%(ch,nc,cat_to_use),"hpostcutpT_%s_postcut%s_%s"%(ch,nc,cat_to_use),1000,0,10),"%s"%nc,"scalef")
    
    
    for nc in ["ev_Zlm1", "ev_Zlm2"]:
        for i in range(0,3):
            if i == 0: 
                ch = "4E"
                cutstr = "n_el == 4"
            if i == 1: 
                ch = "4M"
                cutstr = "n_mu == 4"
            if i == 2: 
                ch = "EEMM"
                cutstr = "n_el == 2 && n_mu == 2"
            for cat_to_use in categories_to_use:
                histo["hprecutThreshold_%s_precut%s_%s"%(ch,nc,cat_to_use)] = df_cut[did]["4L"].Filter(cutstr).Filter(f'category=="{cat_to_use}"').Histo1D(("hprecutThreshold_%s_precut%s_%s"%(ch,nc,cat_to_use),"hprecutThreshold_%s_precut%s_%s"%(ch,nc,cat_to_use),3000,0,3000000),"%s"%nc,"scalef")
                #histo["hprecutThreshold_%s_precut%s_%s"%(ch,nc,cat_to_use)] = df_cut[did]["4L"].Filter(cutstr).Filter(f'category=="{cat_to_use}"').Histo1D(("hprecutThreshold_%s_precut%s_%s"%(ch,nc,cat_to_use),"hprecutThreshold_%s_precut%s_%s"%(ch,nc,cat_to_use),3000,0,3000000),"%s"%nc,"scalef")


        
        
        
    for i in range(0,3):
        nc="ev_m4lZ"
        if i == 0: 
            ch = "4E"
            cutstr = "n_el == 4"
        if i == 1: 
            ch = "4M"
            cutstr = "n_mu == 4"
        if i == 2: 
            ch = "EEMM"
            cutstr = "n_el == 2 && n_mu == 2"
        for cat_to_use in categories_to_use:
            histo["hprecutm4l_%s_precut%s_%s"%(ch,nc,cat_to_use)] = df_cut[did]["4L"].Filter(cutstr).Filter(f'category=="{cat_to_use}"').Histo1D(("hprecutm4l_%s_precut%s_%s"%(ch,nc,cat_to_use),"hprecutm4l_%s_precut%s_%s"%(ch,nc,cat_to_use),3000,0,3000000),"%s"%nc,"scalef")

    
    #df_cut[did]["PreTresholdMasses"] = df_cut[did]["4L"].Filter("1.0")
    #Higgs
    #df_cut[did]["4L"] = df_cut[did]["4L"].Filter("MassesInThreshold(ev_Zlm1/1000, ev_Zlm2/1000, ev_m4lZ/1000)")
    
    
    #Graviton 1
    """
    df_cut[did]["4L"] = df_cut[did]["4L"].Filter("ev_Zlm1>50000")
    df_cut[did]["4L"] = df_cut[did]["4L"].Filter("ev_Zlm1<106000")
    
    df_cut[did]["4L"] = df_cut[did]["4L"].Filter("ev_Zlm2>50000")
    df_cut[did]["4L"] = df_cut[did]["4L"].Filter("ev_Zlm2<115000")
    """
    
    df_cut[did]["4L"] = df_cut[did]["4L"].Filter("CloseToZ(ev_Zlm1, 20000.0)")
    #If Graviton
    df_cut[did]["4L"] = df_cut[did]["4L"].Filter("CloseToZ(ev_Zlm2, 20000.0)")


    #Jet time
    # This function does the linking from the jet collection to the flavour tagging
    # information. But this is not really needed since there is a 1-to-1 correspondence between the two
    # df_cut[did]["2L"] = df_cut[did]["2L"].Define("bjetprob","getTaggerProb(AnalysisJetsAuxDyn.btaggingLink.m_persKey,AnalysisJetsAuxDyn.btaggingLink.m_persIndex,BTagging_AntiKt4EMPFlowAuxDyn.DL1dv01_pb)")

    

    # For some reason bool variables are stored as char in xAOD. Needs to go through c++
    # to access them
    #df_cut[did]["4L"] = df_cut[did]["4L"].Define("passJVT","checkJVT(AnalysisJetsAuxDyn.NNJvtPass)")#checkJVT(AnalysisJetsAuxDyn.NNJvtPass)")
    #df_cut[did]["4L"] = df_cut[did]["4L"].Define("sig_jet","AnalysisJetsAuxDyn.eta > -4.5 && AnalysisJetsAuxDyn.eta < 4.5 && AnalysisJetsAuxDyn.pt > 20000")
    df_cut[did]["4L"] = df_cut[did]["4L"].Define("sig_jet","AnalysisJetsAuxDyn.eta > -4.5 && \
                                                            AnalysisJetsAuxDyn.eta < 4.5 && \
                                                            AnalysisJetsAuxDyn.pt > 20000 && \
                                                            checkJVT(AnalysisJetsAuxDyn.NNJvtPass) && \
                                                            JetLepOR(AnalysisElectronsAuxDyn.pt,\
                                                            AnalysisElectronsAuxDyn.eta,\
                                                            AnalysisElectronsAuxDyn.phi,\
                                                            AnalysisJetsAuxDyn.pt,\
                                                            AnalysisJetsAuxDyn.eta,\
                                                            AnalysisJetsAuxDyn.phi,\
                                                            AnalysisJetsAuxDyn.m,0.2,1) && \
                                                            JetLepOR(AnalysisMuonsAuxDyn.pt,\
                                                            AnalysisMuonsAuxDyn.eta,\
                                                            AnalysisMuonsAuxDyn.phi,\
                                                            AnalysisJetsAuxDyn.pt,\
                                                            AnalysisJetsAuxDyn.eta,\
                                                            AnalysisJetsAuxDyn.phi,\
                                                            AnalysisJetsAuxDyn.m,0.1,0)")
    #df_cut[did]["4L"] = df_cut[did]["4L"].Define("pass_JVT","checkJVT(AnalysisJetsAuxDyn.NNJvtPass[sig_jet])")
    # Make some jet-varibles
    df_cut[did]["4L"] = df_cut[did]["4L"].Define('jet_pt',"AnalysisJetsAuxDyn.pt[sig_jet]")
    df_cut[did]["4L"] = df_cut[did]["4L"].Define('jet_eta',"AnalysisJetsAuxDyn.eta[sig_jet]")
    df_cut[did]["4L"] = df_cut[did]["4L"].Define('jet_phi',"AnalysisJetsAuxDyn.phi[sig_jet]")
    df_cut[did]["4L"] = df_cut[did]["4L"].Define('jet_mass',"AnalysisJetsAuxDyn.m[sig_jet]")
    # Define the b-jets using the 85% WP
    df_cut[did]["4L"] = df_cut[did]["4L"].Define("jet_isbjet","AnalysisJetsAuxDyn.eta[sig_jet] > -2.5 && \
                                                           AnalysisJetsAuxDyn.eta[sig_jet] < 2.5 && \
                                                           BTagging_AntiKt4EMPFlowAuxDyn.DL1dv00_pb[sig_jet] > 0.85")
    df_cut[did]["4L"] = df_cut[did]["4L"].Define("ev_nbjet85","ROOT::VecOps::Sum(jet_isbjet,0.)")
    df_cut[did]["4L"] = df_cut[did]["4L"].Define("ev_njets","ROOT::VecOps::Sum(sig_jet,0.)")

    """
    #Define jets
    df_cut[did]["4L"] = df_cut[did]["4L"].Define("sig_jet", "AnalysisJetsAuxDyn.eta > -2.5 && AnalysisJetsAuxDyn.eta < 2.5 && AnalysisJetsAuxDyn.pt > 30000 && AnalysisJetsAuxDyn.DFCommonJetsLHLooseBL")
    df_cut[did]["4L"] = df_cut[did]["4L"].Define("n_jet", "ROOT::VecOps::Sum(sig_jet,0.)")
    """
    """
    df_cut[did]["4ALL"] = df_cut[did]["4L"]
    df_cut[did]["4ALL"] = df_cut[did]["4ALL"].Define("unledmasses", "unpairedSFOSmasses(PairedList,ChargeList, FlavourList, PtList, EtaList, PhiList, MList)")
    df_cut[did]["4ALL"] = df_cut[did]["4ALL"].Filter("unledmasses.first>5000")
    df_cut[did]["4ALL"] = df_cut[did]["4ALL"].Filter("unledmasses.second>5000")
    df_cut[did]["4ALL"] = df_cut[did]["4ALL"].Define("lep1_pt","PtList[0]")
    df_cut[did]["4ALL"] = df_cut[did]["4ALL"].Define("lep2_pt","PtList[1]")
    df_cut[did]["4ALL"] = df_cut[did]["4ALL"].Define("lep3_pt","PtList[2]")
    df_cut[did]["4ALL"] = df_cut[did]["4ALL"].Define("lep4_pt","PtList[3]")
    df_cut[did]["4ALL"] = df_cut[did]["4ALL"].Define("lep1_eta","EtaList[0]")
    df_cut[did]["4ALL"] = df_cut[did]["4ALL"].Define("lep2_eta","EtaList[1]")
    df_cut[did]["4ALL"] = df_cut[did]["4ALL"].Define("lep3_eta","EtaList[2]")
    df_cut[did]["4ALL"] = df_cut[did]["4ALL"].Define("lep4_eta","EtaList[3]")
    df_cut[did]["4ALL"] = df_cut[did]["4ALL"].Define("lep1_phi","PhiList[0]")
    df_cut[did]["4ALL"] = df_cut[did]["4ALL"].Define("lep2_phi","PhiList[1]")
    df_cut[did]["4ALL"] = df_cut[did]["4ALL"].Define("lep3_phi","PhiList[2]")
    df_cut[did]["4ALL"] = df_cut[did]["4ALL"].Define("lep4_phi","PhiList[3]")
    df_cut[did]["4ALL"] = df_cut[did]["4ALL"].Define("lep1_type","getActualIDFromFlavourList(FlavourList[0])")
    df_cut[did]["4ALL"] = df_cut[did]["4ALL"].Define("lep2_type","getActualIDFromFlavourList(FlavourList[1])")
    df_cut[did]["4ALL"] = df_cut[did]["4ALL"].Define("lep3_type","getActualIDFromFlavourList(FlavourList[2])")
    df_cut[did]["4ALL"] = df_cut[did]["4ALL"].Define("lep4_type","getActualIDFromFlavourList(FlavourList[3])")
    df_cut[did]["4ALL"] = df_cut[did]["4ALL"].Define("lep1_mass","MList[0]")
    df_cut[did]["4ALL"] = df_cut[did]["4ALL"].Define("lep2_mass","MList[1]")
    df_cut[did]["4ALL"] = df_cut[did]["4ALL"].Define("lep3_mass","MList[2]")
    df_cut[did]["4ALL"] = df_cut[did]["4ALL"].Define("lep4_mass","MList[3]")
    df_cut[did]["4ALL"] = df_cut[did]["4ALL"].Define("lep1_ch","ChargeList[0]")
    df_cut[did]["4ALL"] = df_cut[did]["4ALL"].Define("lep2_ch","ChargeList[1]")
    df_cut[did]["4ALL"] = df_cut[did]["4ALL"].Define("lep3_ch","ChargeList[2]")
    df_cut[did]["4ALL"] = df_cut[did]["4ALL"].Define("lep4_ch","ChargeList[3]")
    df_cut[did]["4ALL"] = df_cut[did]["4ALL"].Define("ev_m4l","ComputeInvariantMass4L(PtList,EtaList,PhiList,MList)")
    df_cut[did]["4ALL"] = df_cut[did]["4ALL"].Define('lep_pt',"getVector(AnalysisElectronsAuxDyn.pt[sig_el],AnalysisMuonsAuxDyn.pt[sig_mu])")
    df_cut[did]["4ALL"] = df_cut[did]["4ALL"].Define('lep_eta',"getVector(AnalysisElectronsAuxDyn.eta[sig_el],AnalysisMuonsAuxDyn.eta[sig_mu])")
    df_cut[did]["4ALL"] = df_cut[did]["4ALL"].Define('lep_phi',"getVector(AnalysisElectronsAuxDyn.phi[sig_el],AnalysisMuonsAuxDyn.phi[sig_mu])")
    df_cut[did]["4ALL"] = df_cut[did]["4ALL"].Define('lep_mass',"getVector(AnalysisElectronsAuxDyn.charge[sig_el],AnalysisMuonsAuxDyn.charge[sig_mu],0.511,105.66)")
     #Dilepton invariant mass
    df_cut[did]["4ALL"] = df_cut[did]["4ALL"].Define("ev_m2lf12","ComputeInvariantMass2L(lep_pt,lep_eta,lep_phi,lep_mass, 0, 1)")
    df_cut[did]["4ALL"] = df_cut[did]["4ALL"].Define("ev_m2lf13","ComputeInvariantMass2L(lep_pt,lep_eta,lep_phi,lep_mass, 0, 2)")
    df_cut[did]["4ALL"] = df_cut[did]["4ALL"].Define("ev_m2lf14","ComputeInvariantMass2L(lep_pt,lep_eta,lep_phi,lep_mass, 0, 3)")
    df_cut[did]["4ALL"] = df_cut[did]["4ALL"].Define("ev_m2lf23","ComputeInvariantMass2L(lep_pt,lep_eta,lep_phi,lep_mass, 1, 2)")
    df_cut[did]["4ALL"] = df_cut[did]["4ALL"].Define("ev_m2lf24","ComputeInvariantMass2L(lep_pt,lep_eta,lep_phi,lep_mass, 1, 3)")
    df_cut[did]["4ALL"] = df_cut[did]["4ALL"].Define("ev_m2lf34","ComputeInvariantMass2L(lep_pt,lep_eta,lep_phi,lep_mass, 2, 3)")
    #Transverse mass
    df_cut[did]["4ALL"] = df_cut[did]["4ALL"].Define("ev_mt2f12","calcMT2(lep_pt,lep_eta,lep_phi,lep_mass,MET_Core_AnalysisMETAuxDyn.sumet[0],MET_Core_AnalysisMETAuxDyn.mpx[0],MET_Core_AnalysisMETAuxDyn.mpy[0],0,1)")
    df_cut[did]["4ALL"] = df_cut[did]["4ALL"].Define("ev_mt2f13","calcMT2(lep_pt,lep_eta,lep_phi,lep_mass,MET_Core_AnalysisMETAuxDyn.sumet[0],MET_Core_AnalysisMETAuxDyn.mpx[0],MET_Core_AnalysisMETAuxDyn.mpy[0],0,2)")
    df_cut[did]["4ALL"] = df_cut[did]["4ALL"].Define("ev_mt2f14","calcMT2(lep_pt,lep_eta,lep_phi,lep_mass,MET_Core_AnalysisMETAuxDyn.sumet[0],MET_Core_AnalysisMETAuxDyn.mpx[0],MET_Core_AnalysisMETAuxDyn.mpy[0],0,3)")
    df_cut[did]["4ALL"] = df_cut[did]["4ALL"].Define("ev_mt2f23","calcMT2(lep_pt,lep_eta,lep_phi,lep_mass,MET_Core_AnalysisMETAuxDyn.sumet[0],MET_Core_AnalysisMETAuxDyn.mpx[0],MET_Core_AnalysisMETAuxDyn.mpy[0],1,2)")
    df_cut[did]["4ALL"] = df_cut[did]["4ALL"].Define("ev_mt2f24","calcMT2(lep_pt,lep_eta,lep_phi,lep_mass,MET_Core_AnalysisMETAuxDyn.sumet[0],MET_Core_AnalysisMETAuxDyn.mpx[0],MET_Core_AnalysisMETAuxDyn.mpy[0],1,3)")
    df_cut[did]["4ALL"] = df_cut[did]["4ALL"].Define("ev_mt2f34","calcMT2(lep_pt,lep_eta,lep_phi,lep_mass,MET_Core_AnalysisMETAuxDyn.sumet[0],MET_Core_AnalysisMETAuxDyn.mpx[0],MET_Core_AnalysisMETAuxDyn.mpy[0],2,3)")
    """
    
    
    
    # Require exactly 4 electrons and put this into a new RDataFrame
    df_cut[did]["4E"] = df_cut[did]["4L"].Filter("n_el == 4")
    # Then start defining some flat variables to be written to hdf5 and made histograms from
    df_cut[did]["4E"] = df_cut[did]["4E"].Define("unledmasses", "unpairedSFOSmasses(PairedList,ChargeList, FlavourList, PtList, EtaList, PhiList, MList)")
    df_cut[did]["4E"] = df_cut[did]["4E"].Define("ev_unledmasses1", "unledmasses.first")
    df_cut[did]["4E"] = df_cut[did]["4E"].Define("ev_unledmasses2", "unledmasses.second")
    for nc in ["ev_unledmasses1","ev_unledmasses2"]:
        ch = "4E"
        cutstr = "n_el == 4"

        for cat_to_use in categories_to_use:
            histo["hprecutUnledmasses_%s_precut%s_%s"%(ch,nc,cat_to_use)] = df_cut[did]["4E"].Filter(cutstr).Filter(f'category=="{cat_to_use}"').Histo1D(("hprecutUnledmasses_%s_precut%s_%s"%(ch,nc,cat_to_use),"hprecutUnledmasses_%s_precut%s_%s"%(ch,nc,cat_to_use),1000,0,1200000),"%s"%nc,"scalef")
    
    
    df_cut[did]["4E"] = df_cut[did]["4E"].Filter("unledmasses.first>5000")
    df_cut[did]["4E"] = df_cut[did]["4E"].Filter("unledmasses.second>5000")
    df_cut[did]["4E"] = df_cut[did]["4E"].Define("lep1_pt","AnalysisElectronsAuxDyn.pt[sig_el].at(0)")
    df_cut[did]["4E"] = df_cut[did]["4E"].Define("lep2_pt","AnalysisElectronsAuxDyn.pt[sig_el].at(1)")
    df_cut[did]["4E"] = df_cut[did]["4E"].Define("lep3_pt","AnalysisElectronsAuxDyn.pt[sig_el].at(2)")
    df_cut[did]["4E"] = df_cut[did]["4E"].Define("lep4_pt","AnalysisElectronsAuxDyn.pt[sig_el].at(3)")
    df_cut[did]["4E"] = df_cut[did]["4E"].Define("lep1_eta","AnalysisElectronsAuxDyn.eta[sig_el].at(0)")
    df_cut[did]["4E"] = df_cut[did]["4E"].Define("lep2_eta","AnalysisElectronsAuxDyn.eta[sig_el].at(1)")
    df_cut[did]["4E"] = df_cut[did]["4E"].Define("lep3_eta","AnalysisElectronsAuxDyn.eta[sig_el].at(2)")
    df_cut[did]["4E"] = df_cut[did]["4E"].Define("lep4_eta","AnalysisElectronsAuxDyn.eta[sig_el].at(3)")
    df_cut[did]["4E"] = df_cut[did]["4E"].Define("lep1_phi","AnalysisElectronsAuxDyn.phi[sig_el].at(0)")
    df_cut[did]["4E"] = df_cut[did]["4E"].Define("lep2_phi","AnalysisElectronsAuxDyn.phi[sig_el].at(1)")
    df_cut[did]["4E"] = df_cut[did]["4E"].Define("lep3_phi","AnalysisElectronsAuxDyn.phi[sig_el].at(2)")
    df_cut[did]["4E"] = df_cut[did]["4E"].Define("lep4_phi","AnalysisElectronsAuxDyn.phi[sig_el].at(3)")
    df_cut[did]["4E"] = df_cut[did]["4E"].Define("lep1_type","getElecPdgID()")
    df_cut[did]["4E"] = df_cut[did]["4E"].Define("lep2_type","getElecPdgID()")
    df_cut[did]["4E"] = df_cut[did]["4E"].Define("lep3_type","getElecPdgID()")
    df_cut[did]["4E"] = df_cut[did]["4E"].Define("lep4_type","getElecPdgID()")
    df_cut[did]["4E"] = df_cut[did]["4E"].Define("lep1_mass","getElecMass()")
    df_cut[did]["4E"] = df_cut[did]["4E"].Define("lep2_mass","getElecMass()") 
    df_cut[did]["4E"] = df_cut[did]["4E"].Define("lep3_mass","getElecMass()")
    df_cut[did]["4E"] = df_cut[did]["4E"].Define("lep4_mass","getElecMass()")
    df_cut[did]["4E"] = df_cut[did]["4E"].Define("lep1_ch","AnalysisElectronsAuxDyn.charge[sig_el].at(0)")
    df_cut[did]["4E"] = df_cut[did]["4E"].Define("lep2_ch","AnalysisElectronsAuxDyn.charge[sig_el].at(1)")
    df_cut[did]["4E"] = df_cut[did]["4E"].Define("lep3_ch","AnalysisElectronsAuxDyn.charge[sig_el].at(2)")
    df_cut[did]["4E"] = df_cut[did]["4E"].Define("lep4_ch","AnalysisElectronsAuxDyn.charge[sig_el].at(3)")
    # These variables are used when calculating object-specific variables (like mll and mt2 below)
    df_cut[did]["4E"] = df_cut[did]["4E"].Define('lep_pt',"AnalysisElectronsAuxDyn.pt[sig_el]")
    df_cut[did]["4E"] = df_cut[did]["4E"].Define('lep_eta',"AnalysisElectronsAuxDyn.eta[sig_el]")
    df_cut[did]["4E"] = df_cut[did]["4E"].Define('lep_phi',"AnalysisElectronsAuxDyn.phi[sig_el]")
    df_cut[did]["4E"] = df_cut[did]["4E"].Define('lep_mass',"getVector(AnalysisElectronsAuxDyn.charge[sig_el],0.511)")#,"abs(AnalysisElectronsAuxDyn.charge[sig_el])*0.511")
    # Use the above 4-vector variables to calculate mll and mt2
    #Fourlepton invariant mass
    df_cut[did]["4E"] = df_cut[did]["4E"].Define("ev_m4l","ComputeInvariantMass4L(lep_pt,lep_eta,lep_phi,lep_mass)")
    #Dilepton invariant mass
    df_cut[did]["4E"] = df_cut[did]["4E"].Define("ev_m2lf12","ComputeInvariantMass2L(lep_pt,lep_eta,lep_phi,lep_mass, 0, 1)")
    df_cut[did]["4E"] = df_cut[did]["4E"].Define("ev_m2lf13","ComputeInvariantMass2L(lep_pt,lep_eta,lep_phi,lep_mass, 0, 2)")
    df_cut[did]["4E"] = df_cut[did]["4E"].Define("ev_m2lf14","ComputeInvariantMass2L(lep_pt,lep_eta,lep_phi,lep_mass, 0, 3)")
    df_cut[did]["4E"] = df_cut[did]["4E"].Define("ev_m2lf23","ComputeInvariantMass2L(lep_pt,lep_eta,lep_phi,lep_mass, 1, 2)")
    df_cut[did]["4E"] = df_cut[did]["4E"].Define("ev_m2lf24","ComputeInvariantMass2L(lep_pt,lep_eta,lep_phi,lep_mass, 1, 3)")
    df_cut[did]["4E"] = df_cut[did]["4E"].Define("ev_m2lf34","ComputeInvariantMass2L(lep_pt,lep_eta,lep_phi,lep_mass, 2, 3)")
    #Transverse/Stranseverse mass
    df_cut[did]["4E"] = df_cut[did]["4E"].Define("ev_mt2f12","calcMT2(lep_pt,lep_eta,lep_phi,lep_mass,MET_Core_AnalysisMETAuxDyn.sumet[0],MET_Core_AnalysisMETAuxDyn.mpx[0],MET_Core_AnalysisMETAuxDyn.mpy[0],0,1)")
    df_cut[did]["4E"] = df_cut[did]["4E"].Define("ev_mt2f13","calcMT2(lep_pt,lep_eta,lep_phi,lep_mass,MET_Core_AnalysisMETAuxDyn.sumet[0],MET_Core_AnalysisMETAuxDyn.mpx[0],MET_Core_AnalysisMETAuxDyn.mpy[0],0,2)")
    df_cut[did]["4E"] = df_cut[did]["4E"].Define("ev_mt2f14","calcMT2(lep_pt,lep_eta,lep_phi,lep_mass,MET_Core_AnalysisMETAuxDyn.sumet[0],MET_Core_AnalysisMETAuxDyn.mpx[0],MET_Core_AnalysisMETAuxDyn.mpy[0],0,3)")
    df_cut[did]["4E"] = df_cut[did]["4E"].Define("ev_mt2f23","calcMT2(lep_pt,lep_eta,lep_phi,lep_mass,MET_Core_AnalysisMETAuxDyn.sumet[0],MET_Core_AnalysisMETAuxDyn.mpx[0],MET_Core_AnalysisMETAuxDyn.mpy[0],1,2)")
    df_cut[did]["4E"] = df_cut[did]["4E"].Define("ev_mt2f24","calcMT2(lep_pt,lep_eta,lep_phi,lep_mass,MET_Core_AnalysisMETAuxDyn.sumet[0],MET_Core_AnalysisMETAuxDyn.mpx[0],MET_Core_AnalysisMETAuxDyn.mpy[0],1,3)")
    df_cut[did]["4E"] = df_cut[did]["4E"].Define("ev_mt2f34","calcMT2(lep_pt,lep_eta,lep_phi,lep_mass,MET_Core_AnalysisMETAuxDyn.sumet[0],MET_Core_AnalysisMETAuxDyn.mpx[0],MET_Core_AnalysisMETAuxDyn.mpy[0],2,3)")
    #The deltaR
    """
    df_cut[did]["4E"] = df_cut[did]["4E"].Define("ev_DeltaRf12","deltaR_ll_4(lep_pt,lep_eta, lep_mass, lep_phi,0,1)")
    df_cut[did]["4E"] = df_cut[did]["4E"].Define("ev_DeltaRf13","deltaR_ll_4(lep_pt,lep_eta, lep_mass, lep_phi,0,2)")
    df_cut[did]["4E"] = df_cut[did]["4E"].Define("ev_DeltaRf14","deltaR_ll_4(lep_pt,lep_eta, lep_mass, lep_phi,0,3)")
    df_cut[did]["4E"] = df_cut[did]["4E"].Define("ev_DeltaRf23","deltaR_ll_4(lep_pt,lep_eta, lep_mass, lep_phi,1,2)")
    df_cut[did]["4E"] = df_cut[did]["4E"].Define("ev_DeltaRf24","deltaR_ll_4(lep_pt,lep_eta, lep_mass, lep_phi,1,3)")
    df_cut[did]["4E"] = df_cut[did]["4E"].Define("ev_DeltaRf34","deltaR_ll_4(lep_pt,lep_eta, lep_mass, lep_phi,2,3)")
    """
    print("Got after 4E")


    #Stransverse mass, hvis du har tid
    #df_cut[did]["4E"] = df_cut[did]["4E"].Define("ev_mt4","calcMT2(lep_pt,lep_eta,lep_phi,lep_mass,MET_Core_AnalysisMETAuxDyn.sumet[0],MET_Core_AnalysisMETAuxDyn.mpx[0],MET_Core_AnalysisMETAuxDyn.mpy[0],0,1)")
    """
    #Kutt kutt kutt
    df_cut[did]["4E"] =df_cut[did]["4E"].Filter("lep1_pt>20000")
    df_cut[did]["4E"] =df_cut[did]["4E"].Filter("lep1_pt>15000")
    """
    # Do exactly the same for events with exactly 2 muons
    df_cut[did]["4M"] = df_cut[did]["4L"].Filter("n_mu == 4")
    df_cut[did]["4M"] = df_cut[did]["4M"].Define("unledmasses", "unpairedSFOSmasses(PairedList,ChargeList, FlavourList, PtList, EtaList, PhiList, MList)")
    
    df_cut[did]["4M"] = df_cut[did]["4M"].Define("ev_unledmasses1", "unledmasses.first")
    df_cut[did]["4M"] = df_cut[did]["4M"].Define("ev_unledmasses2", "unledmasses.second")
    for nc in ["ev_unledmasses1","ev_unledmasses2"]:
        ch = "4M"
        cutstr = "n_mu == 4"

        for cat_to_use in categories_to_use:
            histo["hprecutUnledmasses_%s_precut%s_%s"%(ch,nc,cat_to_use)] = df_cut[did]["4M"].Filter(cutstr).Filter(f'category=="{cat_to_use}"').Histo1D(("hprecutUnledmasses_%s_precut%s_%s"%(ch,nc,cat_to_use),"hprecutpT_%s_precut%s_%s"%(ch,nc,cat_to_use),1000,0,1200000),"%s"%nc,"scalef")
    
    
    df_cut[did]["4M"] = df_cut[did]["4M"].Filter("unledmasses.first>5000")
    df_cut[did]["4M"] = df_cut[did]["4M"].Filter("unledmasses.second>5000")
    df_cut[did]["4M"] = df_cut[did]["4M"].Define("lep1_pt","AnalysisMuonsAuxDyn.pt[sig_mu].at(0)")
    df_cut[did]["4M"] = df_cut[did]["4M"].Define("lep2_pt","AnalysisMuonsAuxDyn.pt[sig_mu].at(1)")
    df_cut[did]["4M"] = df_cut[did]["4M"].Define("lep3_pt","AnalysisMuonsAuxDyn.pt[sig_mu].at(2)")
    df_cut[did]["4M"] = df_cut[did]["4M"].Define("lep4_pt","AnalysisMuonsAuxDyn.pt[sig_mu].at(3)")
    df_cut[did]["4M"] = df_cut[did]["4M"].Define("lep1_eta","AnalysisMuonsAuxDyn.eta[sig_mu].at(0)")
    df_cut[did]["4M"] = df_cut[did]["4M"].Define("lep2_eta","AnalysisMuonsAuxDyn.eta[sig_mu].at(1)")
    df_cut[did]["4M"] = df_cut[did]["4M"].Define("lep3_eta","AnalysisMuonsAuxDyn.eta[sig_mu].at(2)")
    df_cut[did]["4M"] = df_cut[did]["4M"].Define("lep4_eta","AnalysisMuonsAuxDyn.eta[sig_mu].at(3)")
    df_cut[did]["4M"] = df_cut[did]["4M"].Define("lep1_phi","AnalysisMuonsAuxDyn.phi[sig_mu].at(0)")
    df_cut[did]["4M"] = df_cut[did]["4M"].Define("lep2_phi","AnalysisMuonsAuxDyn.phi[sig_mu].at(1)")
    df_cut[did]["4M"] = df_cut[did]["4M"].Define("lep3_phi","AnalysisMuonsAuxDyn.phi[sig_mu].at(2)")
    df_cut[did]["4M"] = df_cut[did]["4M"].Define("lep4_phi","AnalysisMuonsAuxDyn.phi[sig_mu].at(3)")
    df_cut[did]["4M"] = df_cut[did]["4M"].Define("lep1_type","getMuonPdgID()")
    df_cut[did]["4M"] = df_cut[did]["4M"].Define("lep2_type","getMuonPdgID()")
    df_cut[did]["4M"] = df_cut[did]["4M"].Define("lep3_type","getMuonPdgID()")
    df_cut[did]["4M"] = df_cut[did]["4M"].Define("lep4_type","getMuonPdgID()")
    df_cut[did]["4M"] = df_cut[did]["4M"].Define("lep1_mass","getMuonMass()") 
    df_cut[did]["4M"] = df_cut[did]["4M"].Define("lep2_mass","getMuonMass()")
    df_cut[did]["4M"] = df_cut[did]["4M"].Define("lep3_mass","getMuonMass()")
    df_cut[did]["4M"] = df_cut[did]["4M"].Define("lep4_mass","getMuonMass()")
    df_cut[did]["4M"] = df_cut[did]["4M"].Define("lep1_ch","AnalysisMuonsAuxDyn.charge[sig_mu].at(0)")
    df_cut[did]["4M"] = df_cut[did]["4M"].Define("lep2_ch","AnalysisMuonsAuxDyn.charge[sig_mu].at(1)")
    df_cut[did]["4M"] = df_cut[did]["4M"].Define("lep3_ch","AnalysisMuonsAuxDyn.charge[sig_mu].at(2)")
    df_cut[did]["4M"] = df_cut[did]["4M"].Define("lep4_ch","AnalysisMuonsAuxDyn.charge[sig_mu].at(3)")
    # These variables are used when calculating object-specific variables (like mll and mt2 below)
    df_cut[did]["4M"] = df_cut[did]["4M"].Define('lep_pt',"AnalysisMuonsAuxDyn.pt[sig_mu]")
    df_cut[did]["4M"] = df_cut[did]["4M"].Define('lep_eta',"AnalysisMuonsAuxDyn.eta[sig_mu]")
    df_cut[did]["4M"] = df_cut[did]["4M"].Define('lep_phi',"AnalysisMuonsAuxDyn.phi[sig_mu]")
    df_cut[did]["4M"] = df_cut[did]["4M"].Define('lep_mass',"getVector(AnalysisMuonsAuxDyn.charge[sig_mu],105.66)")
    # Use the above 4-vector variables to calculate mll and mt2
    df_cut[did]["4M"] = df_cut[did]["4M"].Define("ev_m4l","ComputeInvariantMass4L(lep_pt,lep_eta,lep_phi,lep_mass)")
    #Dilepton invariant mass
    df_cut[did]["4M"] = df_cut[did]["4M"].Define("ev_m2lf12","ComputeInvariantMass2L(lep_pt,lep_eta,lep_phi,lep_mass, 0, 1)")
    df_cut[did]["4M"] = df_cut[did]["4M"].Define("ev_m2lf13","ComputeInvariantMass2L(lep_pt,lep_eta,lep_phi,lep_mass, 0, 2)")
    df_cut[did]["4M"] = df_cut[did]["4M"].Define("ev_m2lf14","ComputeInvariantMass2L(lep_pt,lep_eta,lep_phi,lep_mass, 0, 3)")
    df_cut[did]["4M"] = df_cut[did]["4M"].Define("ev_m2lf23","ComputeInvariantMass2L(lep_pt,lep_eta,lep_phi,lep_mass, 1, 2)")
    df_cut[did]["4M"] = df_cut[did]["4M"].Define("ev_m2lf24","ComputeInvariantMass2L(lep_pt,lep_eta,lep_phi,lep_mass, 1, 3)")
    df_cut[did]["4M"] = df_cut[did]["4M"].Define("ev_m2lf34","ComputeInvariantMass2L(lep_pt,lep_eta,lep_phi,lep_mass, 2, 3)")
    #Transverse mass
    df_cut[did]["4M"] = df_cut[did]["4M"].Define("ev_mt2f12","calcMT2(lep_pt,lep_eta,lep_phi,lep_mass,MET_Core_AnalysisMETAuxDyn.sumet[0],MET_Core_AnalysisMETAuxDyn.mpx[0],MET_Core_AnalysisMETAuxDyn.mpy[0],0,1)")
    df_cut[did]["4M"] = df_cut[did]["4M"].Define("ev_mt2f13","calcMT2(lep_pt,lep_eta,lep_phi,lep_mass,MET_Core_AnalysisMETAuxDyn.sumet[0],MET_Core_AnalysisMETAuxDyn.mpx[0],MET_Core_AnalysisMETAuxDyn.mpy[0],0,2)")
    df_cut[did]["4M"] = df_cut[did]["4M"].Define("ev_mt2f14","calcMT2(lep_pt,lep_eta,lep_phi,lep_mass,MET_Core_AnalysisMETAuxDyn.sumet[0],MET_Core_AnalysisMETAuxDyn.mpx[0],MET_Core_AnalysisMETAuxDyn.mpy[0],0,3)")
    df_cut[did]["4M"] = df_cut[did]["4M"].Define("ev_mt2f23","calcMT2(lep_pt,lep_eta,lep_phi,lep_mass,MET_Core_AnalysisMETAuxDyn.sumet[0],MET_Core_AnalysisMETAuxDyn.mpx[0],MET_Core_AnalysisMETAuxDyn.mpy[0],1,2)")
    df_cut[did]["4M"] = df_cut[did]["4M"].Define("ev_mt2f24","calcMT2(lep_pt,lep_eta,lep_phi,lep_mass,MET_Core_AnalysisMETAuxDyn.sumet[0],MET_Core_AnalysisMETAuxDyn.mpx[0],MET_Core_AnalysisMETAuxDyn.mpy[0],1,3)")
    df_cut[did]["4M"] = df_cut[did]["4M"].Define("ev_mt2f34","calcMT2(lep_pt,lep_eta,lep_phi,lep_mass,MET_Core_AnalysisMETAuxDyn.sumet[0],MET_Core_AnalysisMETAuxDyn.mpx[0],MET_Core_AnalysisMETAuxDyn.mpy[0],2,3)")
    #Hvis du har tid
    #df_cut[did]["4M"] = df_cut[did]["4M"].Define("ev_mt2","calcMT2(lep_pt,lep_eta,lep_phi,lep_mass,MET_Core_AnalysisMETAuxDyn.sumet[0],MET_Core_AnalysisMETAuxDyn.mpx[0],MET_Core_AnalysisMETAuxDyn.mpy[0],0,1)")
    #The deltaR

    # And finally, the slightly more complicated case of one electron and one muon
    df_cut[did]["EEMM"] = df_cut[did]["4L"].Filter("n_mu == 2 && n_el == 2")
    df_cut[did]["EEMM"] = df_cut[did]["EEMM"].Define("ev_unledmasses1", "1400") #Should not be used
    df_cut[did]["EEMM"] = df_cut[did]["EEMM"].Define("ev_unledmasses2", "1400")
    df_cut[did]["EEMM"] = df_cut[did]["EEMM"].Define("lep1_pt","AnalysisElectronsAuxDyn.pt[sig_el].at(0)")
    df_cut[did]["EEMM"] = df_cut[did]["EEMM"].Define("lep2_pt","AnalysisElectronsAuxDyn.pt[sig_el].at(1)")
    df_cut[did]["EEMM"] = df_cut[did]["EEMM"].Define("lep3_pt","AnalysisMuonsAuxDyn.pt[sig_mu].at(0)")
    df_cut[did]["EEMM"] = df_cut[did]["EEMM"].Define("lep4_pt","AnalysisMuonsAuxDyn.pt[sig_mu].at(1)")
    df_cut[did]["EEMM"] = df_cut[did]["EEMM"].Define("lep1_eta","AnalysisElectronsAuxDyn.eta[sig_el].at(0)")
    df_cut[did]["EEMM"] = df_cut[did]["EEMM"].Define("lep2_eta","AnalysisElectronsAuxDyn.eta[sig_el].at(1)")
    df_cut[did]["EEMM"] = df_cut[did]["EEMM"].Define("lep3_eta","AnalysisMuonsAuxDyn.eta[sig_mu].at(0)")
    df_cut[did]["EEMM"] = df_cut[did]["EEMM"].Define("lep4_eta","AnalysisMuonsAuxDyn.eta[sig_mu].at(1)")
    df_cut[did]["EEMM"] = df_cut[did]["EEMM"].Define("lep1_phi","AnalysisElectronsAuxDyn.phi[sig_el].at(0)")
    df_cut[did]["EEMM"] = df_cut[did]["EEMM"].Define("lep2_phi","AnalysisElectronsAuxDyn.phi[sig_el].at(1)")
    df_cut[did]["EEMM"] = df_cut[did]["EEMM"].Define("lep3_phi","AnalysisMuonsAuxDyn.phi[sig_mu].at(0)")
    df_cut[did]["EEMM"] = df_cut[did]["EEMM"].Define("lep4_phi","AnalysisMuonsAuxDyn.phi[sig_mu].at(1)")
    df_cut[did]["EEMM"] = df_cut[did]["EEMM"].Define("lep1_type","getElecPdgID()")
    df_cut[did]["EEMM"] = df_cut[did]["EEMM"].Define("lep2_type","getElecPdgID()")
    df_cut[did]["EEMM"] = df_cut[did]["EEMM"].Define("lep3_type","getMuonPdgID()")
    df_cut[did]["EEMM"] = df_cut[did]["EEMM"].Define("lep4_type","getMuonPdgID()")
    df_cut[did]["EEMM"] = df_cut[did]["EEMM"].Define("lep1_mass","getElecMass()")
    df_cut[did]["EEMM"] = df_cut[did]["EEMM"].Define("lep2_mass","getElecMass()") 
    df_cut[did]["EEMM"] = df_cut[did]["EEMM"].Define("lep3_mass","getMuonMass()") 
    df_cut[did]["EEMM"] = df_cut[did]["EEMM"].Define("lep4_mass","getMuonMass()") 
    df_cut[did]["EEMM"] = df_cut[did]["EEMM"].Define("lep1_ch","AnalysisElectronsAuxDyn.charge[sig_el].at(0)")
    df_cut[did]["EEMM"] = df_cut[did]["EEMM"].Define("lep2_ch","AnalysisElectronsAuxDyn.charge[sig_el].at(1)")
    df_cut[did]["EEMM"] = df_cut[did]["EEMM"].Define("lep3_ch","AnalysisMuonsAuxDyn.charge[sig_mu].at(0)")
    df_cut[did]["EEMM"] = df_cut[did]["EEMM"].Define("lep4_ch","AnalysisMuonsAuxDyn.charge[sig_mu].at(1)")
    # These variables are used when calculating object-specific variables (like mll and mt2 below)
    df_cut[did]["EEMM"] = df_cut[did]["EEMM"].Define('lep_pt',"getVector(AnalysisElectronsAuxDyn.pt[sig_el],AnalysisMuonsAuxDyn.pt[sig_mu])")
    df_cut[did]["EEMM"] = df_cut[did]["EEMM"].Define('lep_eta',"getVector(AnalysisElectronsAuxDyn.eta[sig_el],AnalysisMuonsAuxDyn.eta[sig_mu])")
    df_cut[did]["EEMM"] = df_cut[did]["EEMM"].Define('lep_phi',"getVector(AnalysisElectronsAuxDyn.phi[sig_el],AnalysisMuonsAuxDyn.phi[sig_mu])")
    df_cut[did]["EEMM"] = df_cut[did]["EEMM"].Define('lep_mass',"getVector(AnalysisElectronsAuxDyn.charge[sig_el],AnalysisMuonsAuxDyn.charge[sig_mu],0.511,105.66)")
    # Use the above 4-vector variables to calculate mll and mt2   
    df_cut[did]["EEMM"] = df_cut[did]["EEMM"].Define("ev_m4l","ComputeInvariantMass4L(lep_pt,lep_eta,lep_phi,lep_mass)")
    #Dilepton invariant mass
    df_cut[did]["EEMM"] = df_cut[did]["EEMM"].Define("ev_m2lf12","ComputeInvariantMass2L(lep_pt,lep_eta,lep_phi,lep_mass, 0, 1)")
    df_cut[did]["EEMM"] = df_cut[did]["EEMM"].Define("ev_m2lf13","ComputeInvariantMass2L(lep_pt,lep_eta,lep_phi,lep_mass, 0, 2)")
    df_cut[did]["EEMM"] = df_cut[did]["EEMM"].Define("ev_m2lf14","ComputeInvariantMass2L(lep_pt,lep_eta,lep_phi,lep_mass, 0, 3)")
    df_cut[did]["EEMM"] = df_cut[did]["EEMM"].Define("ev_m2lf23","ComputeInvariantMass2L(lep_pt,lep_eta,lep_phi,lep_mass, 1, 2)")
    df_cut[did]["EEMM"] = df_cut[did]["EEMM"].Define("ev_m2lf24","ComputeInvariantMass2L(lep_pt,lep_eta,lep_phi,lep_mass, 1, 3)")
    df_cut[did]["EEMM"] = df_cut[did]["EEMM"].Define("ev_m2lf34","ComputeInvariantMass2L(lep_pt,lep_eta,lep_phi,lep_mass, 2, 3)")
    #Transverse mass
    df_cut[did]["EEMM"] = df_cut[did]["EEMM"].Define("ev_mt2f12","calcMT2(lep_pt,lep_eta,lep_phi,lep_mass,MET_Core_AnalysisMETAuxDyn.sumet[0],MET_Core_AnalysisMETAuxDyn.mpx[0],MET_Core_AnalysisMETAuxDyn.mpy[0],0,1)")
    df_cut[did]["EEMM"] = df_cut[did]["EEMM"].Define("ev_mt2f13","calcMT2(lep_pt,lep_eta,lep_phi,lep_mass,MET_Core_AnalysisMETAuxDyn.sumet[0],MET_Core_AnalysisMETAuxDyn.mpx[0],MET_Core_AnalysisMETAuxDyn.mpy[0],0,2)")
    df_cut[did]["EEMM"] = df_cut[did]["EEMM"].Define("ev_mt2f14","calcMT2(lep_pt,lep_eta,lep_phi,lep_mass,MET_Core_AnalysisMETAuxDyn.sumet[0],MET_Core_AnalysisMETAuxDyn.mpx[0],MET_Core_AnalysisMETAuxDyn.mpy[0],0,3)")
    df_cut[did]["EEMM"] = df_cut[did]["EEMM"].Define("ev_mt2f23","calcMT2(lep_pt,lep_eta,lep_phi,lep_mass,MET_Core_AnalysisMETAuxDyn.sumet[0],MET_Core_AnalysisMETAuxDyn.mpx[0],MET_Core_AnalysisMETAuxDyn.mpy[0],1,2)")
    df_cut[did]["EEMM"] = df_cut[did]["EEMM"].Define("ev_mt2f24","calcMT2(lep_pt,lep_eta,lep_phi,lep_mass,MET_Core_AnalysisMETAuxDyn.sumet[0],MET_Core_AnalysisMETAuxDyn.mpx[0],MET_Core_AnalysisMETAuxDyn.mpy[0],1,3)")
    df_cut[did]["EEMM"] = df_cut[did]["EEMM"].Define("ev_mt2f34","calcMT2(lep_pt,lep_eta,lep_phi,lep_mass,MET_Core_AnalysisMETAuxDyn.sumet[0],MET_Core_AnalysisMETAuxDyn.mpx[0],MET_Core_AnalysisMETAuxDyn.mpy[0],2,3)")
    #print("INFO \t Saving CollectionTree to %s/%s.root"%(MYDIR,fname))
    #The deltaR
    """
    df_cut[did]["2E2M"] = df_cut[did]["EEMM"].Filter("isFirstPairElectrons(PairedList, FlavourList)")
    df_cut[did]["2M2E"] = df_cut[did]["EEMM"].Filter("isFirstPairElectrons(PairedList, FlavourList)==false")
    """
    """
    #Taking the EEEM case as well
    df_cut[did]["EEEM"] = df_cut[did]["4L"].Filter("n_mu == 1 && n_el == 3")
    df_cut[did]["EEEM"] = df_cut[did]["EEEM"].Define("ev_unledmasses1", "1400") #Should not be used
    df_cut[did]["EEEM"] = df_cut[did]["EEEM"].Define("ev_unledmasses2", "1400")
    df_cut[did]["EEEM"] = df_cut[did]["EEEM"].Define("lep1_pt","AnalysisElectronsAuxDyn.pt[sig_el].at(0)")
    df_cut[did]["EEEM"] = df_cut[did]["EEEM"].Define("lep2_pt","AnalysisElectronsAuxDyn.pt[sig_el].at(1)")
    df_cut[did]["EEEM"] = df_cut[did]["EEEM"].Define("lep3_pt","AnalysisElectronsAuxDyn.pt[sig_el].at(2)")
    df_cut[did]["EEEM"] = df_cut[did]["EEEM"].Define("lep4_pt","AnalysisMuonsAuxDyn.pt[sig_mu].at(0)")
    df_cut[did]["EEEM"] = df_cut[did]["EEEM"].Define("lep1_eta","AnalysisElectronsAuxDyn.eta[sig_el].at(0)")
    df_cut[did]["EEEM"] = df_cut[did]["EEEM"].Define("lep2_eta","AnalysisElectronsAuxDyn.eta[sig_el].at(1)")
    df_cut[did]["EEEM"] = df_cut[did]["EEEM"].Define("lep3_eta","AnalysisElectronsAuxDyn.eta[sig_el].at(2)")
    df_cut[did]["EEEM"] = df_cut[did]["EEEM"].Define("lep4_eta","AnalysisMuonsAuxDyn.eta[sig_mu].at(0)")
    df_cut[did]["EEEM"] = df_cut[did]["EEEM"].Define("lep1_phi","AnalysisElectronsAuxDyn.phi[sig_el].at(0)")
    df_cut[did]["EEEM"] = df_cut[did]["EEEM"].Define("lep2_phi","AnalysisElectronsAuxDyn.phi[sig_el].at(1)")
    df_cut[did]["EEEM"] = df_cut[did]["EEEM"].Define("lep3_phi","AnalysisElectronsAuxDyn.phi[sig_el].at(2)")
    df_cut[did]["EEEM"] = df_cut[did]["EEEM"].Define("lep4_phi","AnalysisMuonsAuxDyn.phi[sig_mu].at(0)")
    df_cut[did]["EEEM"] = df_cut[did]["EEEM"].Define("lep1_type","getElecPdgID()")
    df_cut[did]["EEEM"] = df_cut[did]["EEEM"].Define("lep2_type","getElecPdgID()")
    df_cut[did]["EEEM"] = df_cut[did]["EEEM"].Define("lep3_type","getElecPdgID()")
    df_cut[did]["EEEM"] = df_cut[did]["EEEM"].Define("lep4_type","getMuonPdgID()")
    df_cut[did]["EEEM"] = df_cut[did]["EEEM"].Define("lep1_mass","getElecMass()")
    df_cut[did]["EEEM"] = df_cut[did]["EEEM"].Define("lep2_mass","getElecMass()") 
    df_cut[did]["EEEM"] = df_cut[did]["EEEM"].Define("lep3_mass","getElecMass()") 
    df_cut[did]["EEEM"] = df_cut[did]["EEEM"].Define("lep4_mass","getMuonMass()") 
    df_cut[did]["EEEM"] = df_cut[did]["EEEM"].Define("lep1_ch","AnalysisElectronsAuxDyn.charge[sig_el].at(0)")
    df_cut[did]["EEEM"] = df_cut[did]["EEEM"].Define("lep2_ch","AnalysisElectronsAuxDyn.charge[sig_el].at(1)")
    df_cut[did]["EEEM"] = df_cut[did]["EEEM"].Define("lep3_ch","AnalysisElectronsAuxDyn.charge[sig_el].at(2)")
    df_cut[did]["EEEM"] = df_cut[did]["EEEM"].Define("lep4_ch","AnalysisMuonsAuxDyn.charge[sig_mu].at(0)")
    # These variables are used when calculating object-specific variables (like mll and mt2 below)
    df_cut[did]["EEEM"] = df_cut[did]["EEEM"].Define('lep_pt',"getVector(AnalysisElectronsAuxDyn.pt[sig_el],AnalysisMuonsAuxDyn.pt[sig_mu])")
    df_cut[did]["EEEM"] = df_cut[did]["EEEM"].Define('lep_eta',"getVector(AnalysisElectronsAuxDyn.eta[sig_el],AnalysisMuonsAuxDyn.eta[sig_mu])")
    df_cut[did]["EEEM"] = df_cut[did]["EEEM"].Define('lep_phi',"getVector(AnalysisElectronsAuxDyn.phi[sig_el],AnalysisMuonsAuxDyn.phi[sig_mu])")
    df_cut[did]["EEEM"] = df_cut[did]["EEEM"].Define('lep_mass',"getVector(AnalysisElectronsAuxDyn.charge[sig_el],AnalysisMuonsAuxDyn.charge[sig_mu],0.511,105.66)")
    # Use the above 4-vector variables to calculate mll and mt2   
    df_cut[did]["EEEM"] = df_cut[did]["EEEM"].Define("ev_m4l","ComputeInvariantMass4L(lep_pt,lep_eta,lep_phi,lep_mass)")
    #Dilepton invariant mass
    df_cut[did]["EEEM"] = df_cut[did]["EEEM"].Define("ev_m2lf12","ComputeInvariantMass2L(lep_pt,lep_eta,lep_phi,lep_mass, 0, 1)")
    df_cut[did]["EEEM"] = df_cut[did]["EEEM"].Define("ev_m2lf13","ComputeInvariantMass2L(lep_pt,lep_eta,lep_phi,lep_mass, 0, 2)")
    df_cut[did]["EEEM"] = df_cut[did]["EEEM"].Define("ev_m2lf14","ComputeInvariantMass2L(lep_pt,lep_eta,lep_phi,lep_mass, 0, 3)")
    df_cut[did]["EEEM"] = df_cut[did]["EEEM"].Define("ev_m2lf23","ComputeInvariantMass2L(lep_pt,lep_eta,lep_phi,lep_mass, 1, 2)")
    df_cut[did]["EEEM"] = df_cut[did]["EEEM"].Define("ev_m2lf24","ComputeInvariantMass2L(lep_pt,lep_eta,lep_phi,lep_mass, 1, 3)")
    df_cut[did]["EEEM"] = df_cut[did]["EEEM"].Define("ev_m2lf34","ComputeInvariantMass2L(lep_pt,lep_eta,lep_phi,lep_mass, 2, 3)")
    #Transverse mass
    df_cut[did]["EEEM"] = df_cut[did]["EEEM"].Define("ev_mt2f12","calcMT2(lep_pt,lep_eta,lep_phi,lep_mass,MET_Core_AnalysisMETAuxDyn.sumet[0],MET_Core_AnalysisMETAuxDyn.mpx[0],MET_Core_AnalysisMETAuxDyn.mpy[0],0,1)")
    df_cut[did]["EEEM"] = df_cut[did]["EEEM"].Define("ev_mt2f13","calcMT2(lep_pt,lep_eta,lep_phi,lep_mass,MET_Core_AnalysisMETAuxDyn.sumet[0],MET_Core_AnalysisMETAuxDyn.mpx[0],MET_Core_AnalysisMETAuxDyn.mpy[0],0,2)")
    df_cut[did]["EEEM"] = df_cut[did]["EEEM"].Define("ev_mt2f14","calcMT2(lep_pt,lep_eta,lep_phi,lep_mass,MET_Core_AnalysisMETAuxDyn.sumet[0],MET_Core_AnalysisMETAuxDyn.mpx[0],MET_Core_AnalysisMETAuxDyn.mpy[0],0,3)")
    df_cut[did]["EEEM"] = df_cut[did]["EEEM"].Define("ev_mt2f23","calcMT2(lep_pt,lep_eta,lep_phi,lep_mass,MET_Core_AnalysisMETAuxDyn.sumet[0],MET_Core_AnalysisMETAuxDyn.mpx[0],MET_Core_AnalysisMETAuxDyn.mpy[0],1,2)")
    df_cut[did]["EEEM"] = df_cut[did]["EEEM"].Define("ev_mt2f24","calcMT2(lep_pt,lep_eta,lep_phi,lep_mass,MET_Core_AnalysisMETAuxDyn.sumet[0],MET_Core_AnalysisMETAuxDyn.mpx[0],MET_Core_AnalysisMETAuxDyn.mpy[0],1,3)")
    df_cut[did]["EEEM"] = df_cut[did]["EEEM"].Define("ev_mt2f34","calcMT2(lep_pt,lep_eta,lep_phi,lep_mass,MET_Core_AnalysisMETAuxDyn.sumet[0],MET_Core_AnalysisMETAuxDyn.mpx[0],MET_Core_AnalysisMETAuxDyn.mpy[0],2,3)")
    #print("INFO \t Saving CollectionTree to %s/%s.root"%(MYDIR,fname))
    #The deltaR
    
    #Taking the EMMM case as well
    df_cut[did]["EMMM"] = df_cut[did]["4L"].Filter("n_mu == 3 && n_el == 1")
    df_cut[did]["EMMM"] = df_cut[did]["EMMM"].Define("ev_unledmasses1", "1400") #Should not be used
    df_cut[did]["EMMM"] = df_cut[did]["EMMM"].Define("ev_unledmasses2", "1400")
    df_cut[did]["EMMM"] = df_cut[did]["EMMM"].Define("lep1_pt","AnalysisElectronsAuxDyn.pt[sig_el].at(0)")
    df_cut[did]["EMMM"] = df_cut[did]["EMMM"].Define("lep2_pt","AnalysisMuonsAuxDyn.pt[sig_mu].at(0)")
    df_cut[did]["EMMM"] = df_cut[did]["EMMM"].Define("lep3_pt","AnalysisMuonsAuxDyn.pt[sig_mu].at(1)")
    df_cut[did]["EMMM"] = df_cut[did]["EMMM"].Define("lep4_pt","AnalysisMuonsAuxDyn.pt[sig_mu].at(2)")
    df_cut[did]["EMMM"] = df_cut[did]["EMMM"].Define("lep1_eta","AnalysisElectronsAuxDyn.eta[sig_el].at(0)")
    df_cut[did]["EMMM"] = df_cut[did]["EMMM"].Define("lep2_eta","AnalysisMuonsAuxDyn.eta[sig_mu].at(0)")
    df_cut[did]["EMMM"] = df_cut[did]["EMMM"].Define("lep3_eta","AnalysisMuonsAuxDyn.eta[sig_mu].at(1)")
    df_cut[did]["EMMM"] = df_cut[did]["EMMM"].Define("lep4_eta","AnalysisMuonsAuxDyn.eta[sig_mu].at(2)")
    df_cut[did]["EMMM"] = df_cut[did]["EMMM"].Define("lep1_phi","AnalysisElectronsAuxDyn.phi[sig_el].at(0)")
    df_cut[did]["EMMM"] = df_cut[did]["EMMM"].Define("lep2_phi","AnalysisMuonsAuxDyn.phi[sig_mu].at(0)")
    df_cut[did]["EMMM"] = df_cut[did]["EMMM"].Define("lep3_phi","AnalysisMuonsAuxDyn.phi[sig_mu].at(1)")
    df_cut[did]["EMMM"] = df_cut[did]["EMMM"].Define("lep4_phi","AnalysisMuonsAuxDyn.phi[sig_mu].at(2)")
    df_cut[did]["EMMM"] = df_cut[did]["EMMM"].Define("lep1_type","getElecPdgID()")
    df_cut[did]["EMMM"] = df_cut[did]["EMMM"].Define("lep2_type","getMuonPdgID()")
    df_cut[did]["EMMM"] = df_cut[did]["EMMM"].Define("lep3_type","getMuonPdgID()")
    df_cut[did]["EMMM"] = df_cut[did]["EMMM"].Define("lep4_type","getMuonPdgID()")
    df_cut[did]["EMMM"] = df_cut[did]["EMMM"].Define("lep1_mass","getElecMass()")
    df_cut[did]["EMMM"] = df_cut[did]["EMMM"].Define("lep2_mass","getMuonMass()") 
    df_cut[did]["EMMM"] = df_cut[did]["EMMM"].Define("lep3_mass","getMuonMass()") 
    df_cut[did]["EMMM"] = df_cut[did]["EMMM"].Define("lep4_mass","getMuonMass()") 
    df_cut[did]["EMMM"] = df_cut[did]["EMMM"].Define("lep1_ch","AnalysisElectronsAuxDyn.charge[sig_el].at(0)")
    df_cut[did]["EMMM"] = df_cut[did]["EMMM"].Define("lep2_ch","AnalysisMuonsAuxDyn.charge[sig_mu].at(0)")
    df_cut[did]["EMMM"] = df_cut[did]["EMMM"].Define("lep3_ch","AnalysisMuonsAuxDyn.charge[sig_mu].at(1)")
    df_cut[did]["EMMM"] = df_cut[did]["EMMM"].Define("lep4_ch","AnalysisMuonsAuxDyn.charge[sig_mu].at(2)")
    # These variables are used when calculating object-specific variables (like mll and mt2 below)
    df_cut[did]["EMMM"] = df_cut[did]["EMMM"].Define('lep_pt',"getVector(AnalysisElectronsAuxDyn.pt[sig_el],AnalysisMuonsAuxDyn.pt[sig_mu])")
    df_cut[did]["EMMM"] = df_cut[did]["EMMM"].Define('lep_eta',"getVector(AnalysisElectronsAuxDyn.eta[sig_el],AnalysisMuonsAuxDyn.eta[sig_mu])")
    df_cut[did]["EMMM"] = df_cut[did]["EMMM"].Define('lep_phi',"getVector(AnalysisElectronsAuxDyn.phi[sig_el],AnalysisMuonsAuxDyn.phi[sig_mu])")
    df_cut[did]["EMMM"] = df_cut[did]["EMMM"].Define('lep_mass',"getVector(AnalysisElectronsAuxDyn.charge[sig_el],AnalysisMuonsAuxDyn.charge[sig_mu],0.511,105.66)")
    # Use the above 4-vector variables to calculate mll and mt2   
    df_cut[did]["EMMM"] = df_cut[did]["EMMM"].Define("ev_m4l","ComputeInvariantMass4L(lep_pt,lep_eta,lep_phi,lep_mass)")
    #Dilepton invariant mass
    df_cut[did]["EMMM"] = df_cut[did]["EMMM"].Define("ev_m2lf12","ComputeInvariantMass2L(lep_pt,lep_eta,lep_phi,lep_mass, 0, 1)")
    df_cut[did]["EMMM"] = df_cut[did]["EMMM"].Define("ev_m2lf13","ComputeInvariantMass2L(lep_pt,lep_eta,lep_phi,lep_mass, 0, 2)")
    df_cut[did]["EMMM"] = df_cut[did]["EMMM"].Define("ev_m2lf14","ComputeInvariantMass2L(lep_pt,lep_eta,lep_phi,lep_mass, 0, 3)")
    df_cut[did]["EMMM"] = df_cut[did]["EMMM"].Define("ev_m2lf23","ComputeInvariantMass2L(lep_pt,lep_eta,lep_phi,lep_mass, 1, 2)")
    df_cut[did]["EMMM"] = df_cut[did]["EMMM"].Define("ev_m2lf24","ComputeInvariantMass2L(lep_pt,lep_eta,lep_phi,lep_mass, 1, 3)")
    df_cut[did]["EMMM"] = df_cut[did]["EMMM"].Define("ev_m2lf34","ComputeInvariantMass2L(lep_pt,lep_eta,lep_phi,lep_mass, 2, 3)")
    #Transverse mass
    df_cut[did]["EMMM"] = df_cut[did]["EMMM"].Define("ev_mt2f12","calcMT2(lep_pt,lep_eta,lep_phi,lep_mass,MET_Core_AnalysisMETAuxDyn.sumet[0],MET_Core_AnalysisMETAuxDyn.mpx[0],MET_Core_AnalysisMETAuxDyn.mpy[0],0,1)")
    df_cut[did]["EMMM"] = df_cut[did]["EMMM"].Define("ev_mt2f13","calcMT2(lep_pt,lep_eta,lep_phi,lep_mass,MET_Core_AnalysisMETAuxDyn.sumet[0],MET_Core_AnalysisMETAuxDyn.mpx[0],MET_Core_AnalysisMETAuxDyn.mpy[0],0,2)")
    df_cut[did]["EMMM"] = df_cut[did]["EMMM"].Define("ev_mt2f14","calcMT2(lep_pt,lep_eta,lep_phi,lep_mass,MET_Core_AnalysisMETAuxDyn.sumet[0],MET_Core_AnalysisMETAuxDyn.mpx[0],MET_Core_AnalysisMETAuxDyn.mpy[0],0,3)")
    df_cut[did]["EMMM"] = df_cut[did]["EMMM"].Define("ev_mt2f23","calcMT2(lep_pt,lep_eta,lep_phi,lep_mass,MET_Core_AnalysisMETAuxDyn.sumet[0],MET_Core_AnalysisMETAuxDyn.mpx[0],MET_Core_AnalysisMETAuxDyn.mpy[0],1,2)")
    df_cut[did]["EMMM"] = df_cut[did]["EMMM"].Define("ev_mt2f24","calcMT2(lep_pt,lep_eta,lep_phi,lep_mass,MET_Core_AnalysisMETAuxDyn.sumet[0],MET_Core_AnalysisMETAuxDyn.mpx[0],MET_Core_AnalysisMETAuxDyn.mpy[0],1,3)")
    df_cut[did]["EMMM"] = df_cut[did]["EMMM"].Define("ev_mt2f34","calcMT2(lep_pt,lep_eta,lep_phi,lep_mass,MET_Core_AnalysisMETAuxDyn.sumet[0],MET_Core_AnalysisMETAuxDyn.mpx[0],MET_Core_AnalysisMETAuxDyn.mpy[0],2,3)")
    #print("INFO \t Saving CollectionTree to %s/%s.root"%(MYDIR,fname))
    #The deltaR
    """
    print("Got after all the lepton changes")

    # If these variables have not yet been added 
    # to the list which will be used both to make 
    # histograms and writing to hdf5
    if not len(new_col):
        new_col.append("lep1_pt")
        new_col.append("lep2_pt")
        new_col.append("lep3_pt")
        new_col.append("lep4_pt")
        new_col.append("lep1_eta")
        new_col.append("lep2_eta")
        new_col.append("lep3_eta")
        new_col.append("lep4_eta")
        new_col.append("lep1_phi")
        new_col.append("lep2_phi")
        new_col.append("lep3_phi")
        new_col.append("lep4_phi")
        new_col.append("lep1_type")
        new_col.append("lep2_type")
        new_col.append("lep3_type")
        new_col.append("lep4_type")
        new_col.append("lep1_ch")
        new_col.append("lep2_ch")
        new_col.append("lep3_ch")
        new_col.append("lep4_ch")
        #new_col.append("lep1_mass")
        #new_col.append("lep2_mass")
        #new_col.append("lep3_mass")
        #new_col.append("lep4_mass")
        new_col.append("ev_m4l")
        new_col.append("ev_m2lf12")
        new_col.append("ev_m2lf13")
        new_col.append("ev_m2lf14")
        new_col.append("ev_m2lf23")
        new_col.append("ev_m2lf24")
        new_col.append("ev_m2lf34")
        #new_col.append("ev_mt2")

        new_col.append("ev_mt2f12")
        new_col.append("ev_mt2f13")
        new_col.append("ev_mt2f14")
        new_col.append("ev_mt2f23")
        new_col.append("ev_mt2f24")
        new_col.append("ev_mt2f34")
        
        new_col.append("n_el")
        new_col.append("n_mu")
        new_col.append("n_lep")

        new_col.append("ev_Zlm1")
        new_col.append("ev_Zlm2")
        new_col.append("ev_Zlm13")
        new_col.append("ev_Zlm14")
        new_col.append("ev_Zlm23")
        new_col.append("ev_Zlm24")
        
        
        new_col.append("ev_DeltaR12")
        new_col.append("ev_DeltaR34")
        new_col.append("ev_DeltaR23")
        new_col.append("ev_DeltaR13")
        new_col.append("ev_DeltaR14")
        new_col.append("ev_DeltaR24")
        
        new_col.append("ev_njets")
        new_col.append("ev_nbjet85")
        new_col.append("ev_DeltaPhi12")
        new_col.append("ev_DeltaPhi13")
        new_col.append("ev_DeltaPhi14")
        new_col.append("ev_DeltaPhi23")
        new_col.append("ev_DeltaPhi24")
        new_col.append("ev_DeltaPhi34")
        
        new_col.append("scalef")
        new_col.append("eventweight")
        new_col.append("ev_unledmasses1")
        new_col.append("ev_unledmasses2")
        new_col.append("ev_SortedPt1")
        new_col.append("ev_SortedPt2")
        new_col.append("ev_SortedPt3")
        new_col.append("ev_SortedPt4")
        print("Appended all to collumns")

    # For testing writing out a few events (ONLY in single threaded mode!!)
    """
    if not n_workers:
        #df[did] = df[did].Range(10)
        df_cut[did]["4E"].Report().Print()
        st=df.Display(["eventweight", "puweight", "sf", "scalef"]).Print()
        print(st)
    """

    # Book the histograms (nothing will be computed yet!)
    #print("Befor nc in new_col")
    for nc in new_col:
        #print("nc", nc)
        key = nc.split("_")[-1]
        if not key in plot_dic.keys():
            print("Could not find plot details for %s"%key)
            continue
        for did in df_cut.keys():
            #print("did",did)
            for ch in df_cut[did].keys():
                #print("ch", ch)
                if ch == "4L": continue
                #if ch in ["EMMM"]: continue
                #print("INFO \t Creating histogram h_%s_%s_%s"%(ch,nc,did))
                for cat_to_use in categories_to_use:
                    #print("nc, Cat_to_use", nc, cat_to_use)
                    #print("cat_to_use", cat_to_use)
                    histo["h_%s_%s_%s"%(ch,nc,cat_to_use)] = df_cut[did][ch].Filter(f'category=="{cat_to_use}"').Histo1D(("h_%s_%s_%s"%(ch,nc,cat_to_use),"h_%s_%s_%s"%(ch,nc,cat_to_use),plot_dic[key]["nbin"],plot_dic[key]["min"],plot_dic[key]["max"]),"%s"%nc,"scalef")
    print("Got to the end of one did")

    #print("Number of open files %i"%tot_files)



    """
    # If > 1000 files are open, compute the histograms and dump the files 
    if tot_files > batchsize:
        print("WARNING \t More than 1000 (%i) files are open. Let's clean up before continuing"%tot_files)
        for key in histo.keys():
            allhistograms.append(histo[key])
        if makeHistograms:
            print("Prepared %i histograms on %i events"%(len(allhistograms),n_entries))
            tot_time += computeHistograms(allhistograms)
        # Specify the columns we would like to write to hdf5
        numpy_col = []
        for col in new_col:
            numpy_col.append(col)
        dids = list(df_cut.keys())
        hdf5_events = 0
        for did in dids:
            if not writeToHDF5: continue
            fname = xsec_dic[did]["category"]
            for ch in df_cut[did].keys():
                if ch == "4L": continue
                # One can also write content back to a ROOT file using Snapshot (will not do this now)
                # df_cut[did][ch].Snapshot("CollectionTree","%s/%s.root"%(MYDIR,fname),numpy_col,opts)
                this_nev = df_cut[did][ch].Count().GetValue()
                hdf5_events += this_nev
                print("Converting channel %s skim of DSID %s to skim %s.hdf5 for %i events"%(ch,did,fname,this_nev))
                pandas_df = pd.DataFrame(data=df_cut[did][ch].AsNumpy(numpy_col))
                pandas_df.to_hdf(MYDIR+"%s.hdf5"%fname,"CollectionTree",mode='a',append=True)
            try:
                del pandas_df
            except:
                print("WARNING \t No panda to delete")
            del df_cut[did]
        # Clean everything before continuing
        df = {}
        df_cut = {}
        histo = {}
        allhistograms = []
        tot_files = 0
    """


    """
    # Done processing everything. Let's write to file and finish                                        
    for key in histo.keys():
        allhistograms.append(histo[key])
    numpy_col = []
    for col in new_col:
        numpy_col.append(col)
    if makeHistograms:
        print("Prepared %i histograms on %i events"%(len(allhistograms),n_entries))
        computeHistograms(allhistograms)

    start = time.time()
    hdf5_events = 0
    dids = list(df_cut.keys())
    for did in dids:
        if not writeToHDF5: continue
        fname = xsec_dic[did]["category"]
        for ch in df_cut[did].keys():
            if ch == "4L": continue
            # One can also write content back to a ROOT file using Snapshot (will not do this now)
            # df_cut[did][ch].Snapshot("CollectionTree","%s/%s.root"%(MYDIR,fname),numpy_col,opts)                                 
            this_nev = df_cut[did][ch].Count().GetValue()
            hdf5_events += this_nev
            print("Converting channel %s skim of DSID %s to skim %s.hdf5 for %i events"%(ch,did,fname,this_nev))
            pandas_df = pd.DataFrame(data=df_cut[did][ch].AsNumpy(numpy_col))
            pandas_df.to_hdf(MYDIR+"%s.hdf5"%fname,"CollectionTree",mode='a',append=True)
        try:
            del pandas_df
        except:
            print("WARNING \t No pandas DF to delete")
        #del df_cut[did]
    end = time.time()

    # If > 1000 files are open, compute the histograms and dump the files 
    if tot_files >= batchsize:
        print("WARNING \t More than %i (%i) files are open. Let's clean up before continuing"%(batchsize,tot_files))
        #thnev = df_cut[did]["2E"].Count().GetValue()
        #print("Writing %i events to .root" %thnev)
        #df_cut[did]["2E"].Snapshot("CollectionTree","%s/%s.root"%(MYDIR,fname),new_col,opts)
        start = time.time()
        dumpResults(histo,df_cut,new_col,ntup_col,isData,xsec_dic,batchnum,makeHistograms,writeToHDF5,writeToROOT)
        end = time.time()
        print("Used %.0f seconds on batch %i"%((end-start),batchnum))
        print("-"*40)
        print("Done with batch %i, so far processed : "%batchnum)
        for key in tot_nev.keys():
            print("%10s %20i %10i"%(key,tot_nev[key]["nev"],len(tot_nev[key]["nfiles"])))
        print("-"*40)
        batchnum += 1
        # Clean everything before continuing
        #df = {}
        #df_cut = {}
        histo = {}
        allhistograms = []
        tot_files = 0 
        """
#ROOT.RDF.SaveGraph(df_cut[did]["4E"], "mydot.dot")
print("Got to dump results")
dumpResults(histo,df_cut,new_col,ntup_col,isData,batchnum,makeHistograms,writeToHDF5,writeToROOT)     
print("Got past dump results")
            

if doDask:
    connection.close()

All OK
INFO 	 Will be saving ouput to /storage/carlpd/data/PHYSLITEforML/slimmed/
Got before for loop
Got to the filtration part
Got after first if isData
Got after 4E
Got after all the lepton changes
Appended all to collumns
Could not find plot details for scalef
Could not find plot details for eventweight
Got to the end of one did
Got to dump results
----------------------------------------
Will make the following: 
HDF5: NO 
ROOT: YES 
histograms: YES
----------------------------------------
Converting channel 4L skim of DSID ALL to skim ALL.root for 282319 events
Used 3030.96 s on 282319 events to ntuple
Prepared 5208 histograms on 572080000 events
Used 2.33 s on 141803 events to hdf5
Got past dump results




Vet ikke hva denne gjør

In [27]:
del df
df

NameError: name 'df' is not defined

In [None]:
for k in histo.keys():
    print(histo[k].GetName())