<a href="https://colab.research.google.com/github/GuillermoFidalgo/Matplotlib-SWC/blob/main/ATLAS-H4lpt.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **If on Colab**

Please click on the bage and then run the code below

You might see a warning to Restart the Runtime. This is expected. Just go to the `Kernel`  tab and click on `Restart runtime`.

You only have to do this once per notebook on Google Colab.

In [None]:
import uproot
import pandas as pd
import numpy as np

In [None]:
samples_dic= {'data': [['data', 'data_A'], 
                       ['data', 'data_B'], 
                       ['data', 'data_C'], 
                       ['data', 'data_D']],
              'higgs': [['mc', 'mc_345060.ggH125_ZZ4lep'], 
                        ['mc', 'mc_344235.VBFH125_ZZ4lep']], 
              'zz': [['mc', 'mc_363490.llll']], 
              'other': [['mc', 'mc_361106.Zee'], 
                        ['mc', 'mc_361107.Zmumu']]}

In [None]:
processes = samples_dic.keys()
Tuples={}
samples=[]
for p in processes:
    for d in samples_dic[p]:
        # Load the dataframes
        folder = d[0] # Folder name
        sample = d[1] # Sample name
        samples.append(sample)

        DataUproot = uproot.open(f'samples_FilterGem_{sample}.root')
        Tuples[sample] = DataUproot['myTree']

First we ask them to read the branches that start with `lep`

In [None]:
Tuples['data_A']

In [None]:
Tuples['data_A'].keys()

In [None]:
branches={}
for s in samples:
    branches[s] = Tuples[s].arrays()

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.hist(branches['data_A']['m4l'])

# Selection

- sum good leptons
- sum charge
- sum types

### Example of a simple selection

In [None]:
sum_leptons_test = branches['data_A']['sum_good_lep'] == 4

In [None]:
 branches['data_A']['good_lep']

In [None]:
sum_leptons_test

## All the selection for this analysis

In [None]:
selection_events={}
for s in samples:
    # trigger
    trigger = ((branches[s]['trigM'] == True) | (branches[s]['trigE'] == True))
    sum_leptons = branches[s]['sum_good_lep'] == 4
    sum_charge = branches[s]['sum_lep_charge'] == 0
    sum_types_ee = branches[s]['goodlep_sumtypes'] == 44
    sum_types_mm = branches[s]['goodlep_sumtypes'] == 52
    sum_types_em = branches[s]['goodlep_sumtypes'] == 48
    sum_types_goodlep = (sum_types_ee | sum_types_mm | sum_types_em)
    sum_lep_selection = (sum_leptons & sum_charge & sum_types_goodlep)
    # Select leptons with high transverse momentum
    pt_0_selection = ((branches[s]['lep_pt'][:,0] > 25000) & (branches[s]['good_lep'][:,0] == 1))
    pt_1_selection = ((branches[s]['lep_pt'][:,1] > 15000) & (branches[s]['good_lep'][:,1] == 1))
    pt_2_selection = ((branches[s]['lep_pt'][:,2] > 10000) & (branches[s]['good_lep'][:,2] == 1))
    high_pt_selection = (pt_0_selection & pt_1_selection & pt_2_selection)
    final_selection = trigger & sum_types_goodlep & sum_lep_selection & high_pt_selection
    selection_events[s] = final_selection

In [None]:
for s in samples:
    print(s,'      Initial events: ', len(branches[s]['m4l']))

In [None]:
for s in samples:
    print(s,'      After selection: ', len(branches[s]['m4l'][selection_events[s]]))

In [None]:
mc_samples=samples[4:]

In [None]:
data_samples=samples[:4]

In [None]:
stack_mc_list=[]
for s in mc_samples:
    mc_selection_values=branches[s]['m4l'][selection_events[s]]
    stack_mc_list.append(mc_selection_values)
    print(len(mc_selection_values))

In [None]:
stack_weights_list=[]
for s in mc_samples:
    mc_selection_weight=branches[s]['weight'][selection_events[s]]
    stack_weights_list.append(mc_selection_weight)
    print(len(mc_selection_weight))

In [None]:
stack_data_list=[]
for d in data_samples:
    data_list=list(branches[d]['m4l'][selection_events[d]])
    stack_data_list+=data_list
    print(d,len(data_list))

In [None]:
def plotData(data_var, range_ab, bins_samples):
    data_hist,bins = np.histogram(data_var, range=range_ab, bins=bins_samples) 
    #data_hist, bins = plt.hist(data_var, range=range_ab, stacked=True, bins=bins_samples)
    print(data_hist, bins)
    data_hist_errors = np.sqrt( data_hist )
    bin_center=(bins[1:]+bins[:-1])/2
    h0=plt.errorbar(x=bin_center, y=data_hist, yerr=data_hist_errors,fmt='ko', label='Data')

In [None]:
var_name = 'm4l'
units = ' [GeV]'
rangos = [[80,170]]
bines = 24 #int((rangos[0][-1]-rangos[0][0])/5)

In [None]:
plt.figure(figsize=(10,8))
plt.title(var_name)
plotData(stack_data_list, rangos[0], bines)
h1=plt.hist(stack_mc_list, range=rangos[0], label=mc_samples, stacked=True, weights=stack_weights_list, bins=bines)
plt.ylabel('Events', fontsize=18)
plt.xlabel(var_name+units, fontsize=18)
#plt.yscale('log')
plt.tick_params(which='both', # ticks on both x and y axes
                direction='in', # Put ticks inside and outside the axes
                top=True, # draw ticks on the top axis
                right=True, length=6, width=1)
plt.yticks(fontsize=16)
plt.xticks(fontsize=16)
plt.ylim(0,30)
plt.legend(fontsize=18,frameon=False)
plt.show()
plt.close()