In [10]:
import uproot
import vector
import matplotlib.pyplot as plt
import awkward as ak
import numpy as np
from scipy import stats
import pandas as pd
import pyarrow.feather as feather

tree2 = uproot.open('delphes_output_cWtil_real_final.root:Delphes')

events_NP1 = tree2.arrays(['Event.Weight','Muon_size','Muon.PT','Muon.Eta','Muon.Phi', 
                    'Electron_size', 'Electron.PT', 'Electron.Eta','Electron.Phi', 
                    'Jet.PT', 'Jet.Eta', 'Jet.Phi', 'Jet.DeltaEta', 'Jet.DeltaPhi', 'Jet_size', 'Jet.Mass',
                    'Jet.BTag', 'MissingET.MET', 'MissingET_size'])


N_EVENTS_NP1 = len(events_NP1['Event.Weight'])

def initial_lepton_cuts(events_array):
    
    #lepton selection
    nlepton_mask = (events_array['Muon_size'] == 1) & (events_array['Electron_size'] == 1)
    events_number = np.array([len(events_array['Event.Weight'])])
    muon_eta_mask = (abs(events_array['Muon.Eta']) < 2.5)
    electron_eta_mask1 = (abs(events_array['Electron.Eta']) < 2.47)
    electron_eta_mask2 = (abs(events_array['Electron.Eta']) >= 1.52) | (abs(events_array['Electron.Eta']) <= 1.37)
    muon_pt_mask = (events_array['Muon.PT'] > 27)
    electron_pt_mask = (events_array['Electron.PT'] > 27)
    
    total_muon_mask = muon_pt_mask & muon_eta_mask 
    total_electron_mask = electron_eta_mask1 & electron_eta_mask2 & electron_pt_mask
    
    
    filtered_event_mask = (ak.sum(total_electron_mask, axis=1) == 1) & (ak.sum(total_muon_mask, axis=1) == 1)
    
    
    return filtered_event_mask

def initial_jet_cuts(events_array):
    #jet selection
    events_number = np.array([len(events_array['Event.Weight'])])
    njet_mask = (events_array['Jet_size'] >= 2)
    jet_pt_mask = (events_array['Jet.PT'] > 25)
    jet_eta_mask = (abs(events_array['Jet.Eta']) <= 4.5)
    jet_btag_mask = ((events_array['Jet.PT'] > 25) & (abs(events_array['Jet.Eta']) <= 4.5)) & (events_array['Jet.BTag']==1)
    
    total_jet_mask = jet_pt_mask & jet_eta_mask & ~jet_btag_mask
    

    jet_filtered_mask = ak.sum(total_jet_mask, axis=1) >= 2

    cut_flow = events_array['Event.Weight']
    cut_flow = cut_flow[jet_filtered_mask]
    events_number = np.append(events_number, len(cut_flow))
   
    
    return jet_filtered_mask

def transverse_mass_calc(dilepton_system, met):
    transverse_energy = dilepton_system.et
    transverse_momentum = dilepton_system.pt
    return np.sqrt((transverse_energy + met)**2 - (np.abs(transverse_momentum + met))**2)

def invariant_mass_calc(obj1, obj2):
    invariant_mass  = (obj1 + obj2).mass
    return invariant_mass

def jets_final_cuts(jets):
    jet_lead_pt_mask = (jets[:,0]).pt > 65
    #jet_lead_pt_mask = (jets[:,0]).pt > 20
    jet_sublead_pt_mask = ((jets[:,1]).pt > 35)
    #jet_sublead_pt_mask = (jets[:,1]).pt > 20
    jet_deltaeta_mask = (np.abs(jets[:,0].rapidity - jets[:,1].rapidity) > 2)
    dijet_mass = invariant_mass_calc(jets[:,0], jets[:,1])
    #dijet_mass_mask = (dijet_mass > 100 ) & (dijet_mass < 2000)
    #dijet_mass_mask = (dijet_mass <160)
    dijet_mass_mask = (dijet_mass > 500)
    final_jet_mask = jet_lead_pt_mask & jet_sublead_pt_mask & dijet_mass_mask & jet_deltaeta_mask
    return final_jet_mask

def leptons_mass_cut(electrons, muons):
    emu_mass = invariant_mass_calc(electrons[:,0], muons[:,0])
    final_dilepton_mass_mask = (emu_mass > 20)
    
    return final_dilepton_mass_mask

def missing_et_cut(missing_et):
    return ak.flatten(missing_et > 30)

def apply_cuts(array):
    jets = vector.zip({'pt':array['Jet.PT'],'eta':array['Jet.Eta'],'phi':array['Jet.Phi'], 'mass':array['Jet.Mass']})
    electrons = vector.zip({'pt':array['Electron.PT'],'eta':array['Electron.Eta'],'phi':array['Electron.Phi'],'mass':0.000511})
    muons = vector.zip({'pt':array['Muon.PT'],'eta':array['Muon.Eta'],'phi':array['Muon.Phi'],'mass':0.10566})  
            
    events_number = np.array([len(electrons)])
    initial_lepton_filter = initial_lepton_cuts(array)
    initial_jet_filter = initial_jet_cuts(array)
    missing_et = array['MissingET.MET']
    events_number = np.append(events_number, len(electrons[initial_lepton_filter]))
    electrons_filtered = electrons[initial_lepton_filter & initial_jet_filter]
    jets_filtered = jets[initial_lepton_filter & initial_jet_filter]
    muons_filtered = muons[initial_lepton_filter & initial_jet_filter]
    missing_et_filtered = missing_et[initial_lepton_filter & initial_jet_filter]
    events_number = np.append(events_number, len(electrons_filtered))
    
    #apply jet filter
    total_jet_mask = jets_final_cuts(jets_filtered)
    electrons_filtered = electrons_filtered[total_jet_mask]
    jets_filtered = jets_filtered[total_jet_mask]
    muons_filtered = muons_filtered[total_jet_mask]
    missing_et_filtered = missing_et_filtered[total_jet_mask]
    events_number = np.append(events_number, len(electrons_filtered))
    
    #apply dilepton mass cut
    dilepton_mass_mask = leptons_mass_cut(electrons_filtered, muons_filtered)
    electrons_filtered = electrons_filtered[dilepton_mass_mask]
    jets_filtered = jets_filtered[dilepton_mass_mask]
    muons_filtered = muons_filtered[dilepton_mass_mask]
    missing_et_filtered = missing_et_filtered[dilepton_mass_mask]
    events_number = np.append(events_number, len(electrons_filtered))

    #apply missing ET cut
    missing_et_mask = missing_et_cut(missing_et_filtered)
    jets_filtered = jets_filtered[missing_et_mask]
    muons_filtered = muons_filtered[missing_et_mask]
    missing_et_filtered = missing_et_filtered[missing_et_mask]
    electrons_filtered = electrons_filtered[missing_et_mask]
    events_number = np.append(events_number, len(electrons_filtered))
    
    #apply cuts to weights
    filtered_weights = array['Event.Weight'][initial_lepton_filter & initial_jet_filter]
    filtered_weights = filtered_weights[total_jet_mask]
    filtered_weights = filtered_weights[dilepton_mass_mask]
    filtered_weights = filtered_weights[missing_et_mask]

    #apply cuts to full data
    filtered_array = array[initial_lepton_filter & initial_jet_filter]
    filtered_array = filtered_array[total_jet_mask]
    filtered_array = filtered_array[dilepton_mass_mask]
    filtered_array = filtered_array[missing_et_mask]

    return filtered_weights, filtered_array

def check_cross_section(cross_x, weights):
    return 1/(cross_x/(np.sum(weights)))

def combine_arrays(array1, array2, array3):
    return ak.concatenate([array1, array2, array3], mergebool=True, highlevel=True)

def scaled_cross_section(weights1, number):
    luminosity = 139 # fb^-1
    normalisation = luminosity/number
    return weights1 * normalisation


def ak_to_numpy_array(root_observable):
    nested_counts = ak.num(root_observable)
    max_len = ak.max(nested_counts)

    root_observable = ak.pad_none(root_observable, target=max_len, axis=1)
    root_observable = ak.fill_none(root_observable, 0)
    root_observable = ak.to_numpy(root_observable)

    return root_observable

def ak_to_numpy_array(root_observable):
    nested_counts = ak.num(root_observable)
    max_len = ak.max(nested_counts)

    root_observable = ak.pad_none(root_observable, target=max_len, axis=1)
    root_observable = ak.fill_none(root_observable, 0)
    root_observable = ak.to_numpy(root_observable)

    return root_observable

def object_selector(filtered_events, filtered_weights):
    print("Number of events:", len(filtered_weights))
    jet_eta = filtered_events['Jet.Eta']
    jet_phi = filtered_events['Jet.Phi']
    jet_pt = filtered_events['Jet.PT']
    electron_eta = filtered_events['Electron.Eta']
    electron_phi = filtered_events['Electron.Phi']
    electron_pt = filtered_events['Electron.PT']
    muon_eta = filtered_events['Muon.Eta']
    muon_phi = filtered_events['Muon.Phi']
    muon_pt = filtered_events['Muon.PT']

    jet_eta = ak_to_numpy_array(jet_eta)
    jet_phi = ak_to_numpy_array(jet_phi)
    #jet_pt = ak_to_numpy_array(jet_pt)
    electron_eta = ak_to_numpy_array(electron_eta)
    electron_phi = ak_to_numpy_array(electron_phi)
    #electron_pt = ak_to_numpy_array(electron_pt)
    muon_eta = ak_to_numpy_array(muon_eta)
    muon_phi = ak_to_numpy_array(muon_phi)
    #muon_pt = ak_to_numpy_array(muon_pt)
    weight_data = ak_to_numpy_array(filtered_weights)

    x_data = np.concatenate([jet_eta, jet_phi, electron_eta, electron_phi, muon_eta, muon_phi], axis=-1)
    print("x shape", x_data.shape)
    
    print(x_data)

    return x_data, weight_data

def export_to_feather(x_data, weight_data, filtered_events):
    object_types = {
        "Jet": ("Eta", "Phi"),
        "Electron": ("Eta", "Phi"),
        "Muon": ("Eta", "Phi"),
    }

    max_counts = {
        obj: np.max(ak.to_numpy(ak.num(filtered_events[f"{obj}.Eta"])))
        for obj in object_types
    }

    columns = [
        f"{obj}_{attr}_{i}"
        for obj, attrs in object_types.items()
        for attr in attrs
        for i in range(max_counts[obj])
    ]

    # Ensure column count matches x_data.shape[1]
    if len(columns) > x_data.shape[1]:
        columns = columns[: x_data.shape[1]]

    df = pd.DataFrame(x_data, columns=columns)

    df["Weight"] = weight_data

    feather.write_feather(df, "VBS_data.feather", compression="zstd")

    print(f"Data saved to VBS_data.feather with shape {df.shape}")

def __main__():
    
    final_weights_NP1, final_events_NP1 = apply_cuts(events_NP1)
    final_weights_NP1 = scaled_cross_section(final_weights_NP1, N_EVENTS_NP1)

    x_data_1, weight_data_1 = object_selector(final_events_NP1, final_weights_NP1)
    export_to_feather(x_data_1, weight_data_1, final_events_NP1)

if __name__ == '__main__':
    __main__()

FileNotFoundError: [Errno 2] No such file or directory: '/delphes_output_cWtil_real_final.root'