In [1]:
import pickle
import matplotlib.pyplot as plt
import numpy as np
plt.style.use("science.mplstyle")

In [2]:
muon_vars = ["Muon_pt", "Muon_eta", "Muon_phi", "Muon_charge", "Muon_pfRelIso03_all", "Muon_pfRelIso04_all"]
jet_vars = ["Jet_pt", "Jet_eta", "Jet_phi", "Jet_mass", "Jet_nConstituents", "Jet_btagCSVV2", "Jet_btagDeepB", "Jet_btagDeepFlavB"]

path_to_input = "/pscratch/sd/r/rmastand/dimuonAD/post_root_sim/"

code = "SM_SIM" # 49
#code = "DATA" # 28

project_id = "SUSY"

path_to_output = f"/pscratch/sd/r/rmastand/dimuonAD/compiled_data/{project_id}"


#code = "BSM_XYY" # 2
#code = "BSM_HAA" # 11

In [3]:
num_jets_to_save = 2

filtered_muons = {key:np.empty((1,)) for key in muon_vars}
filtered_amuons = {key:np.empty((1,)) for key in muon_vars}
filtered_jets = {i:{key:np.empty((1,)) for key in jet_vars} for i in range(num_jets_to_save)}


Load in the data

In [4]:
for i in range(10):
    
    print(i)

    with open(f"{path_to_input}/filtered_mu_{code}_{i}", "rb") as input_file:
        loc_mu_feature = pickle.load(input_file)
    for mv in muon_vars:
        filtered_muons[mv] = np.hstack((filtered_muons[mv], loc_mu_feature[mv]))
        
    with open(f"{path_to_input}/filtered_amu_{code}_{i}", "rb") as input_file:
        loc_amu_feature = pickle.load(input_file)
    for mv in muon_vars:
        filtered_amuons[mv] = np.hstack((filtered_amuons[mv], loc_amu_feature[mv]))

    with open(f"{path_to_input}/filtered_jet_{code}_{i}", "rb") as input_file:
        loc_jet_feature = pickle.load(input_file)
    for jv in jet_vars:
        loc_jet_vars = {i:[] for i in range(num_jets_to_save)}
        # jets are a little harder, since we may have 0-3 of them
        for event in loc_jet_feature[jv]:
            try: # this holds if there is at least 1 jet in the event
                for i in range(num_jets_to_save):
                    try: 
                        loc_jet_vars[i].append(event[i])
                    except IndexError: # fewer than num_jets_to_save jets
                        loc_jet_vars[i].append(np.NaN)
            except TypeError: # no jets
                for i in range(num_jets_to_save):
                    loc_jet_vars[i].append(np.NaN)
        for i in range(num_jets_to_save):
            filtered_jets[i][jv] = np.hstack((filtered_jets[i][jv], loc_jet_vars[i]))
        
   
for mv in muon_vars:
    
    filtered_muons[mv] = filtered_muons[mv][1:]
    filtered_amuons[mv] = filtered_amuons[mv][1:]
    print(mv, filtered_muons[mv].shape)

for jv in jet_vars:
    
    for i in range(num_jets_to_save):

        filtered_jets[i][jv] = filtered_jets[i][jv][1:]    
        print(jv, filtered_jets[i][jv].shape)


num_events = filtered_muons["Muon_pt"].shape[0]
print(f"Num. events: {num_events}")

0
1
2
3
4
5
6
7
8
9
Muon_pt (2123550,)
Muon_eta (2123550,)
Muon_phi (2123550,)
Muon_charge (2123550,)
Muon_pfRelIso03_all (2123550,)
Muon_pfRelIso04_all (2123550,)
Jet_pt (2123550,)
Jet_pt (2123550,)
Jet_eta (2123550,)
Jet_eta (2123550,)
Jet_phi (2123550,)
Jet_phi (2123550,)
Jet_mass (2123550,)
Jet_mass (2123550,)
Jet_nConstituents (2123550,)
Jet_nConstituents (2123550,)
Jet_btagCSVV2 (2123550,)
Jet_btagCSVV2 (2123550,)
Jet_btagDeepB (2123550,)
Jet_btagDeepB (2123550,)
Jet_btagDeepFlavB (2123550,)
Jet_btagDeepFlavB (2123550,)
Num. events: 2123550


Plot data

In [None]:

bins = {
    "pt":np.linspace(0, 200, 100), 
    "eta":np.linspace(-4, 4, 100),
    "phi":np.linspace(-3.2, 3.2, 100),
    "charge":np.linspace(-3.2, 3.2, 100),
    "pfRelIso03":np.linspace(0, 1, 100),
    "pfRelIso04":np.linspace(0, 1, 100),
    "mass":np.linspace(0, 200, 100),
    "nConstituents":np.linspace(0, 100, 100),
    "btagCSVV2":np.linspace(0, 1, 100),
    "btagDeepB":np.linspace(-0, 1, 100),
    "btagDeepFlavB":np.linspace(-0, 1, 100),
       }

nice_labels = {
    "Muon_pt":"Muon $p_T$", 
    "Muon_eta":"Muon $\eta$", 
    "Muon_phi":"Muon $\phi$", 
    "Muon_charge":"Charge",
    "Muon_pfRelIso03_all":"Muon R03",
    "Muon_pfRelIso04_all":"Muon R04",
    "Jet_pt":"Jet $p_T$", 
    "Jet_eta":"Jet $\eta$", 
    "Jet_phi":"Jet $\phi$", 
    "Jet_mass":"Jet $M$", 
    "Jet_nConstituents":"N const.",
    "Jet_btagCSVV2":"Jet_btagCSVV2.",
    "Jet_btagDeepB":"Jet_btagDeepB.",
    "Jet_btagDeepFlavB":"Jet_btagDeepFlavB.",
       }
       


for mv in muon_vars:

    plt.figure()
    plt.hist(filtered_muons[mv], bins = bins[mv.split("_")[1]], histtype = "step", label = "$\mu$", color = "purple")
    plt.hist(filtered_amuons[mv], bins = bins[mv.split("_")[1]], histtype = "step", label = "$\overline{\mu}$", color = "green")
    plt.xlabel(nice_labels[mv])
    plt.ylabel("Counts")
    plt.legend()
    plt.show()
    
jet_index = 0
for jv in jet_vars:

    plt.figure()
    plt.hist(filtered_jets[jet_index][jv], bins = bins[jv.split("_")[1]], histtype = "step", color = "pink")
    plt.xlabel(nice_labels[jv])
    plt.ylabel("Counts")
    plt.yscale("log")
    plt.show()




## Cut on the number of jets

In [None]:

if project_id == "SUSY":
    
    dijet_filter = (~np.isnan(filtered_jets[0]["Jet_pt"])) & (~np.isnan(filtered_jets[1]["Jet_pt"]))

    print(dijet_filter)

    for mv in muon_vars:

        filtered_muons[mv] = filtered_muons[mv][dijet_filter]
        filtered_amuons[mv] = filtered_amuons[mv][dijet_filter]
        print(mv, filtered_muons[mv].shape)

    for jv in jet_vars:

        for i in range(num_jets_to_save):

            filtered_jets[i][jv] = filtered_jets[i][jv][dijet_filter]
            print(jv, filtered_jets[i][jv].shape)

## Assemble dimuon system

In [None]:
muon_mass = 0.1056583755 # GeV

def assemble_m_inv(a_M, a_pt, a_eta, a_phi, b_M, b_pt, b_eta, b_phi):
    # computes system of mother particle
    
    a_E = np.sqrt(a_M**2 + (a_pt*np.cosh(a_eta))**2)
    b_E = np.sqrt(b_M**2 + (b_pt*np.cosh(b_eta))**2)

    a_px = a_pt*np.cos(a_phi)
    b_px = b_pt*np.cos(b_phi)

    a_py = a_pt*np.sin(a_phi)
    b_py = b_pt*np.sin(b_phi)

    a_pz = a_pt*np.sinh(a_eta)
    b_pz = b_pt*np.sinh(b_eta)

    mother_E = a_E + b_E
    mother_px = a_px + b_px
    mother_py = a_py + b_py
    mother_pz = a_pz + b_pz

    mother_M = np.sqrt(mother_E**2 - mother_px**2 - mother_py**2 - mother_pz**2)
    mother_pt = np.sqrt(mother_px**2 + mother_py**2)
    mother_eta = np.arcsinh(mother_pz/mother_pt)
    mother_phi = np.arctan(mother_py/mother_px)
    

    return mother_M, mother_pt, mother_eta, mother_phi


# get the dimuon system
dimu_M, dimu_pt, dimu_eta, dimu_phi = assemble_m_inv(muon_mass, filtered_muons["Muon_pt"], filtered_muons["Muon_eta"], filtered_muons["Muon_phi"], 
                                           muon_mass, filtered_amuons["Muon_pt"], filtered_amuons["Muon_eta"], filtered_amuons["Muon_phi"])


filtered_composites = {
            "Dimu_mass":dimu_M,
            "Dimu_pt":dimu_pt,
            "Dimu_eta":dimu_eta,
            }

   
if project_id == "SUSY":
    
   
    # get the dijet system
    dijet_M, dijet_pt, dijet_eta, dijet_phi = assemble_m_inv(filtered_jets[0]["Jet_mass"], filtered_jets[0]["Jet_pt"], filtered_jets[0]["Jet_eta"], filtered_jets[0]["Jet_phi"],
                                                   filtered_jets[1]["Jet_mass"], filtered_jets[1]["Jet_pt"], filtered_jets[1]["Jet_eta"], filtered_jets[1]["Jet_phi"])



    # get the higgs system
    H_M, H_pt, H_eta, H_phi = assemble_m_inv(dimu_M, dimu_pt, dimu_eta, dimu_phi, dijet_M, dijet_pt, dijet_eta, dijet_phi)

    filtered_composites["Dijet_mass"] = dijet_M
    filtered_composites["Dijet_pt"] = dijet_pt
    filtered_composites["Dijet_eta"] = dijet_eta
    filtered_composites["H_mass"] = H_M
    filtered_composites["H_eta"] = H_pt
    filtered_composites["Dijet_mass"] = H_eta
               



In [None]:
extra_filter =  (filtered_jets[0]["Jet_btagDeepB"]>=0.6) & (filtered_jets[1]["Jet_btagDeepB"]>=0.6)
for key in filtered_composites.keys():
    

    plt.figure()
    plt.hist(filtered_composites[key][extra_filter], bins = bins[key.split("_")[1]], histtype = "step", label = "$\mu$", color = "purple")
    plt.xlabel(key)
    plt.ylabel("Counts")
    plt.legend()
    plt.show()
    

Assemble arrays

In [None]:
"""
full_data_array = np.empty((num_events, 11))

full_data_array[:,0] = filtered_jets["Jet_pt"]
full_data_array[:,1] = filtered_jets["Jet_eta"]
full_data_array[:,2] = filtered_jets["Jet_phi"]
full_data_array[:,3] = filtered_jets["Jet_mass"]
full_data_array[:,4] = filtered_muons["Muon_pfRelIso03_all"]
full_data_array[:,5] = filtered_amuons["Muon_pfRelIso03_all"]
full_data_array[:,6] = filtered_muons["Muon_pfRelIso04_all"]
full_data_array[:,7] = filtered_amuons["Muon_pfRelIso04_all"]
full_data_array[:,8] = filtered_dimuons["Dimu_pt"]
full_data_array[:,9] = filtered_dimuons["Dimu_eta"]
full_data_array[:,10] = filtered_dimuons["Dimu_mass"]

print(full_data_array)
"""

data_to_output = {}

if project_id == "SUSY":
    
    
     
    data_to_output["dijet_pt"] = filtered_composites["Dijet_pt"]
    data_to_output["dijet_eta"] = filtered_composites["Dijet_eta"]
    data_to_output["dijet_mass"] = filtered_composites["Dijet_mass"]
    data_to_output["jet0_btag"] = filtered_jets[0]["Jet_btagDeepB"]
    data_to_output["jet1_btag"] = filtered_jets[1]["Jet_btagDeepB"]
    data_to_output["higgs_pt"] = filtered_composites["H_pt"]
    data_to_output["higgs_eta"] = filtered_composites["H_eta"]
    data_to_output["higgs_mass"] = filtered_composites["H_mass"]
    data_to_output["dimu_pt"] = filtered_composites["Dimu_pt"]
    data_to_output["dimu_eta"] = filtered_composites["Dimu_eta"]
    data_to_output["dimu_mass"] = filtered_composites["Dimu_mass"]
    

    
elif project_id == "eta":
    
    
    
    
    data_to_output["muon_pt"] = filtered_muons["Muon_pt"]
    data_to_output["amuon_pt"] = filtered_amuons["Muon_pt"]
    data_to_output["muon_eta"] = filtered_muons["Muon_eta"]
    data_to_output["amuon_eta"] = filtered_amuons["Muon_eta"]
    data_to_output["muon_iso03"] = filtered_muons["Muon_pfRelIso03_all"]
    data_to_output["amuon_iso03"] = filtered_amuons["Muon_pfRelIso03_all"]
    data_to_output["muon_iso04"] = filtered_muons["Muon_pfRelIso04_all"]
    data_to_output["amuon_iso04"] = filtered_amuons["Muon_pfRelIso04_all"]
    data_to_output["dimu_pt"] = filtered_composites["Dimu_pt"]
    data_to_output["dimu_eta"] = filtered_composites["Dimu_eta"]
    data_to_output["dimu_mass"] = filtered_composites["Dimu_mass"]
    



In [None]:


 with open(f"{path_to_output}/{code}", "wb") as ofile:
        pickle.dump(scaler, full_data_array)