In [None]:
import awkward as ak
import os
import pickle

import matplotlib.pyplot as plt
import numpy as np
plt.style.use("../science.mplstyle")

from helpers.physics_functions import assemble_m_inv

In [None]:

data_id = "skimmed_data_2016H_30555"
#data_id = "SM_SIM"
project_id = "lowmass"#"BSM" # "lowmass"

path_to_input = f"/global/cfs/cdirs/m3246/rmastand/dimuonAD/precompiled_data/{data_id}/"
path_to_output = f"/global/cfs/cdirs/m3246/rmastand/dimuonAD/compiled_data/{project_id}/"

num_files_dict = {
    "skimmed_data_2016H_30555":28,
    "SM_SIM":49,
    "BSM_HAA":11,
    "BSM_XYY":2,
}
num_files = num_files_dict[data_id]


In [None]:
 
with open(f"{path_to_input}/all_mu_0", "rb") as input_file:
    loc_mu_feature = pickle.load(input_file)
    muon_vars = list(loc_mu_feature.keys())

with open(f"{path_to_input}/all_jet_0", "rb") as input_file:
    loc_jet_feature = pickle.load(input_file)
    jet_vars = list(loc_jet_feature.keys())
    

triggers_HLT = [var for var in muon_vars if "HLT" in var]
muon_vars = [var for var in muon_vars if "HLT" not in var]

print("Muon vars")
print(muon_vars)
print()
print("Trigger vars")
print(triggers_HLT, len(triggers_HLT))
print()
print("Jet vars")
print(jet_vars)
print()


In [None]:
run_jet = False

all_data = {
      "dimu_pt": [],
      "dimu_eta": [],
     "dimu_phi": [],
     "n_muons": [],
     "n_jets": [],
      "dimu_mass": [],
    }

single_mu_vars_to_add = {
    "ip3d":"Muon_ip3d",
    "jetiso":"Muon_jetRelIso",
    "eta":"Muon_eta",
    "pt": "Muon_pt",
    "phi":"Muon_phi",
    "iso04":"Muon_pfRelIso04_all",
}

for key in single_mu_vars_to_add.keys():
    all_data[f"mu0_{key}"] = []
    all_data[f"mu1_{key}"] = []
    
for key in triggers_HLT:
    all_data[key] = []

# only implemented for hardest jet
hardest_jet_vars_to_add = {
    "hardest_jet_btag":"Jet_btagDeepB",
    "hardest_jet_pt":"Jet_pt",
    "hardest_jet_eta":"Jet_eta",
    "hardest_jet_phi": "Jet_phi",
    "hardest_jet_mass":"Jet_mass",
}
       


run_jet = False
run_samesign = True


if run_jet:
    for key in hardest_jet_vars_to_add.keys():
        all_data[key] = []
        
a = list(all_data.keys())
if run_samesign:
    for key in a:
        all_data[key+"_samesign"] = []
print(all_data.keys())

In [None]:
 for i in range(0,28):
    
    print(f"Analyzing file {i+1} of {num_files}...")
    
    # LOAD IN DATA
    
    with open(f"{path_to_input}/all_mu_{i}", "rb") as input_file:
        loc_mu_feature = pickle.load(input_file)
        
    with open(f"{path_to_input}/all_jet_{i}", "rb") as input_file:
        loc_jet_feature = pickle.load(input_file)
        
    
    if run_jet:
        # 2 hard muons that pass tight ID and jet
        event_filter = (np.sum(loc_mu_feature["Muon_tightId"], axis = 1) >= 2) & (ak.count(loc_jet_feature["Jet_mass"], axis = 1) >= 1)
    else:
        # 2 hard muons that pass tight ID
        event_filter = (np.sum(loc_mu_feature["Muon_tightId"], axis = 1) >= 2) 
   
    # helper function to grab tight muons
    def pull_tight_muons(feature):
        return loc_mu_feature[feature][loc_mu_feature["Muon_tightId"]][event_filter]
    
    dimu_mass, dimu_pt, dimu_eta, dimu_phi = assemble_m_inv(muon_mass, pull_tight_muons("Muon_pt")[:,0], pull_tight_muons("Muon_eta")[:,0], pull_tight_muons("Muon_phi")[:,0], 
                                   muon_mass, pull_tight_muons("Muon_pt")[:,1],  pull_tight_muons("Muon_eta")[:,1],  pull_tight_muons("Muon_phi")[:,1])
    
    total_charge = pull_tight_muons("Muon_charge")[:,0] + pull_tight_muons("Muon_charge")[:,1]
    
    # filters for opp-sign and same-sign muons; must apply *after* the event filter
    samesign_filter = np.abs(total_charge) == 2
    oppsign_filter = np.abs(total_charge) == 0

    # variables that have already had the event filter applied
    all_data["dimu_mass"].append(dimu_mass[oppsign_filter].to_numpy(allow_missing = True))
    all_data["dimu_pt"].append(dimu_pt[oppsign_filter].to_numpy(allow_missing = True))
    all_data["dimu_eta"].append(dimu_eta[oppsign_filter].to_numpy(allow_missing = True))
    all_data["dimu_phi"].append(dimu_phi[oppsign_filter].to_numpy(allow_missing = True))
    
    if run_samesign:
        all_data["dimu_mass_samesign"].append(dimu_mass[samesign_filter].to_numpy(allow_missing = True))
        all_data["dimu_pt_samesign"].append(dimu_pt[samesign_filter].to_numpy(allow_missing = True))
        all_data["dimu_eta_samesign"].append(dimu_eta[samesign_filter].to_numpy(allow_missing = True))
        all_data["dimu_phi_samesign"].append(dimu_phi[samesign_filter].to_numpy(allow_missing = True))
    # variables that need the event filter
    for mv in triggers_HLT:
        try:
            trigger_data = loc_mu_feature[mv][event_filter]
        except:
            trigger_data = ak.Array([False for i in range(sum(event_filter))])
        all_data[f"{mv}"].append(trigger_data[oppsign_filter].to_numpy(allow_missing = True))
        all_data[f"{mv}_samesign"].append(trigger_data[samesign_filter].to_numpy(allow_missing = True))
    
    
    all_data["n_jets"].append(ak.count(loc_jet_feature["Jet_mass"][event_filter], axis = 1)[oppsign_filter].to_numpy(allow_missing = True))
    if run_samesign:
        all_data["n_jets_samesign"].append(ak.count(loc_jet_feature["Jet_mass"][event_filter], axis = 1)[samesign_filter].to_numpy(allow_missing = True))
    
    all_data["n_muons"].append(ak.count(pull_tight_muons("Muon_charge"), axis = 1)[oppsign_filter].to_numpy(allow_missing = True))
    if run_samesign:
        all_data["n_muons_samesign"].append(ak.count(pull_tight_muons("Muon_charge"), axis = 1)[samesign_filter].to_numpy(allow_missing = True))
    
  
    """
    JET VARS
    """

    # only implemented for hardest jet
    if run_jet:
        for jet_var in hardest_jet_vars_to_add.keys(): 
            
            all_data[f"{jet_var}"].append(ak.firsts(loc_jet_feature[hardest_jet_vars_to_add[jet_var]][event_filter])[oppsign_filter].to_numpy(allow_missing = True))
            if run_samesign:
                all_data[f"{jet_var}_samesign"].append(ak.firsts(loc_jet_feature[hardest_jet_vars_to_add[jet_var]][event_filter][event_filter])[samesign_filter].to_numpy(allow_missing = True))

                
    for single_mu_var in single_mu_vars_to_add.keys(): 

        all_data[f"mu0_{single_mu_var}"].append(pull_tight_muons(single_mu_vars_to_add[single_mu_var])[oppsign_filter][:,0].to_numpy(allow_missing = True))
        if run_samesign:
            all_data[f"mu0_{single_mu_var}_samesign"].append(pull_tight_muons(single_mu_vars_to_add[single_mu_var])[samesign_filter][:,0].to_numpy(allow_missing = True))
        all_data[f"mu1_{single_mu_var}"].append(pull_tight_muons(single_mu_vars_to_add[single_mu_var])[oppsign_filter][:,1].to_numpy(allow_missing = True))
        if run_samesign:
            all_data[f"mu1_{single_mu_var}_samesign"].append(pull_tight_muons(single_mu_vars_to_add[single_mu_var])[samesign_filter][:,1].to_numpy(allow_missing = True))


print("Done!")
    
    

In [None]:
for key in all_data.keys():
    
    all_data[key] = np.hstack(all_data[key])
    print(key, all_data[key].shape)


In [None]:
from helpers.plotting import *
from helpers.physics_functions import *
from helpers.data_transforms import *

import yaml
with open("workflow.yaml", "r") as file:
    workflow = yaml.safe_load(file)
    
SB_left = float(workflow["upsilon"]["SB_left"])
SR_left = float(workflow["upsilon"]["SR_left"])
SR_right = float(workflow["upsilon"]["SR_right"])
SB_right = float(workflow["upsilon"]["SB_right"])
plot_bins_all, plot_bins_SR, plot_bins_left, plot_bins_right, plot_centers_all, plot_centers_SR, plot_centers_SB = get_bins(SR_left, SR_right, SB_left, SB_right, num_bins_SR= num_bins_SR)




bkg_fit_type = "quintic"
num_bins_SR = 12
bkg_fit_function = bkg_fit_quintic
plot_bins_all, plot_bins_SR, plot_bins_left, plot_bins_right, plot_centers_all, plot_centers_SR, plot_centers_SB = get_bins(SR_left, SR_right, SB_left, SB_right, num_bins_SR= num_bins_SR)
x = np.linspace(SB_left, SB_right, 100) # plot curve fit


In [None]:

in_SB = (all_data["dimu_mass"] >= SB_left) & (all_data["dimu_mass"] <= SB_right)# & (all_data["mu0_iso04"] >= 0.55) & (all_data["mu1_iso04"] >= 0.55)


for trig in ["HLT_DoubleMu0", "HLT_Mu8", "HLT_Mu17_Mu8"]:
    
    
    events_all = all_data["dimu_mass"][in_SB]
    events_trig = all_data["dimu_mass"][in_SB][all_data[trig][in_SB]]
    bins = np.linspace(SB_left, SB_right, 50)
    plt.figure()
    plt.hist(events_all, bins = bins, label = f"baseline ({len(events_all)} events)", histtype = "step", density = True)
    plt.hist(events_trig, bins = bins, label = f"triger ({len(events_trig)} events)", histtype = "step", density = True)
    #plt.xscale("log")
    plt.yscale("log")
    plt.ylabel("Density")
    plt.legend()
    plt.xlabel("$m$ [GeV]")
    plt.title(trig)
    plt.show()
    
    
    events_all = all_data["dimu_pt"][in_SB]
    events_trig = all_data["dimu_pt"][in_SB][all_data[trig][in_SB]]
    bins = np.linspace(0, 150, 50)
    plt.figure()
    plt.hist(events_all, bins = bins, label = f"baseline ({len(events_all)} events)", histtype = "step", density = True)
    plt.hist(events_trig, bins = bins, label = f"trigger ({len(events_trig)} events)", histtype = "step", density = True)
    #plt.yscale("log")
    plt.ylabel("Density")
    plt.legend()
    plt.xlabel("dimu $p_T$ [GeV]")
    plt.title(trig)
    plt.show()

In [None]:
iso_cut = 0
trig = "HLT_DoubleMu18NoFiltersNoVtx"
in_SB = (all_data["dimu_mass"] >= SB_left) & (all_data["dimu_mass"] <= SB_right) & (all_data["mu0_iso04"] >= 0) & (all_data["mu1_iso04"] >= iso_cut)

masses = all_data["dimu_mass"][in_SB][all_data[trig][in_SB]]
#masses = np.random.choice(masses, size = 20000)

popt_0, pcov, _, _, _ = curve_fit_m_inv(masses, bkg_fit_type, SR_left, SR_right, plot_bins_left, plot_bins_right, plot_centers_SB)

plt.figure()
plt.plot(x, bkg_fit_function(x, *popt_0), lw = 3, linestyle = "dashed")
sig = np.sqrt(calculate_test_statistic(masses, bkg_fit_function, bkg_fit_type, plot_bins_SR, plot_centers_SR, SR_left, SR_right, popt_0, pcov))
plt.hist(masses, bins = plot_bins_all, lw = 2, histtype = "step", density = False, label = f"sig: {round(sig,3)}")
plt.legend()
plt.title(f"{trig}, mu_iso > {iso_cut}, {len(masses)} events")
plt.axvline(SR_right)
plt.axvline(SR_left)
plt.xlabel("M")
plt.ylabel("Counts")
plt.show()

In [None]:
print(triggers_HLT)
masses = all_data["dimu_mass"]


#print(len(masses))
#trigger_filter = np.logical_or.reduce([all_data[key] for key in triggers_HLT])
#print(sum(trigger_filter))

#print(len(masses["HLT_Mu8"]))

event_filter = (all_data["dimu_mass"] >= 7.8) & (all_data["dimu_mass"] <= 15) & (all_data["mu0_iso04"]>= 0.55)& (all_data["mu1_iso04"]>= 0.55)

bins = np.linspace(7.8, 15, 100)
plt.figure()
plt.hist(masses[event_filter],  bins = bins, histtype = "step", density = True, label = "all events")
plt.hist(masses[event_filter][all_data["HLT_DoubleMu0"][event_filter]], bins= bins, histtype = "step", density = True,label = "pass trigger")
plt.hist(masses[event_filter][~all_data["HLT_DoubleMu0"][event_filter]], bins = bins, histtype = "step", density = True,label = "fail trigger")
plt.ylabel("density")
plt.xlabel("m [GeV]")
plt.title("HLT_DoubleMu0, mu_iso > 0.55")
plt.yscale("log")
plt.legend()
plt.show()

bins = np.linspace(0, 100, 100)
plt.figure()
plt.hist(all_data["dimu_pt"][event_filter],  bins = bins, histtype = "step", density = True, label = "all events")
plt.hist(all_data["dimu_pt"][event_filter][all_data["HLT_DoubleMu0"][event_filter]], bins= bins, histtype = "step", density = True,label = "pass trigger")
plt.hist(all_data["dimu_pt"][event_filter][~all_data["HLT_DoubleMu0"][event_filter]], bins = bins, histtype = "step", density = True,label = "fail trigger")
plt.ylabel("density")
plt.xlabel("pT [GeV]")
plt.title("HLT_DoubleMu0, mu_iso > 0.55")
#plt.yscale("log")
plt.legend()
plt.show()



"""
in_SB = (all_data["dimu_mass"] >= 7.8) & (all_data["dimu_mass"] <= 15)

plt.figure()
plt.hist(all_data["dimu_pt"][in_SB], bins = np.linspace(0, 100, 100), histtype = "step", density = True, label = "no trig")
plt.hist(all_data["dimu_pt"][in_SB][all_data["HLT_Mu17_Mu8"][in_SB]], bins = np.linspace(0, 100, 100), histtype = "step", density = True,label = "trig")
plt.ylabel("density")
plt.xlabel("pt[GeV]")
plt.legend()
plt.show()





print(len(masses[in_SB]))
print(len(masses[in_SB][all_data["HLT_Mu17_Mu8"][in_SB]]))

target_M = 0.547 # GeV
delta = 0.2 #GeV


plt.figure()

plt.hist(all_data["dimu_mass"], bins = np.linspace(target_M - delta, target_M + delta, 60), histtype = "step", label = "opp.sign", density = False)
plt.hist(all_data["dimu_mass_samesign"], bins = np.linspace(target_M - delta, target_M + delta, 60), histtype = "step", label = "same sign", density = False)

plt.axvline(target_M, color = "r")

plt.legend()
plt.ylabel("Counts")
plt.xlabel("dimu $M$ [GeV]")
plt.show()
"""

## Calculate supplementary observables

In [None]:
all_data["mumu_deltaR"] = calculate_deltaR(all_data["mu0_phi"], all_data["mu1_phi"], all_data["mu0_eta"], all_data["mu1_eta"])
all_data["mumu_deltapT"] = all_data["mu0_pt"] - all_data["mu1_pt"]

if run_jet:
    all_data["dimujet_deltaR"] = calculate_deltaR(all_data["dimu_phi"], all_data["hardest_jet_phi"], all_data["dimu_eta"], all_data["hardest_jet_eta"])

if run_samesign:
    all_data["mumu_deltaR_samesign"] = calculate_deltaR(all_data["mu0_phi_samesign"], all_data["mu1_phi_samesign"], all_data["mu0_eta_samesign"], all_data["mu1_eta_samesign"])
    all_data["mumu_deltapT_samesign"] = all_data["mu0_pt_samesign"] - all_data["mu1_pt_samesign"]
    
    if run_jet:
        all_data["dimujet_deltaR_samesign"] = calculate_deltaR(all_data["dimu_phi_samesign"], all_data["hardest_jet_phi_samesign"], all_data["dimu_eta_samesign"], all_data["hardest_jet_eta_samesign"])


Make cuts on deltsaR min

In [None]:
nb = 40
mass_low, mass_high = target_M - delta, target_M + delta

keys_to_plot =  ["mu0_eta", "mu1_eta", "dimu_eta", "mumu_deltaR", "mumu_deltapT", "dimu_mass"]
labels = ["hardest mu $\eta$","second mu $\eta$", "dimu $\eta$", "$\mu-\mu$ $\Delta R$", "$\mu-\mu$ $\Delta p_T$", "dimu $M$"]
bins = [np.linspace(-2.5, 2.5, nb), np.linspace(-2.5, 2.5, nb),np.linspace(-2.5, 2.5, nb), np.linspace(0, .25, nb),np.linspace(0, 50, nb), np.linspace(mass_low, mass_high, nb)]
n_features = len(keys_to_plot)




eta_min, eta_max = -8, 8
event_filter_opp =  (all_data["mumu_deltaR"] > 0.0)  & (all_data["dimu_mass"] > mass_low)  & (all_data["dimu_mass"] < mass_high) & (all_data["mu0_eta"] < eta_max) & (all_data["mu0_eta"] > eta_min)
event_filter_ss =  (all_data["mumu_deltaR_samesign"] > 0.00)  & (all_data["dimu_mass_samesign"] > mass_low)  & (all_data["dimu_mass_samesign"] < mass_high) & (all_data["mu0_eta_samesign"] < eta_max) & (all_data["mu0_eta_samesign"] > eta_min)


colors = ["purple", "blue", "green", "orange", "red", "magenta"]

fig, ax = plt.subplots(1, n_features, figsize = (3*n_features, 3))

for i, key in enumerate(keys_to_plot):
    
    for j, delta_R_min in enumerate([0., 0.005, 0.01, 0.015, 0.02, 0.025]):
    
        event_filter_opp =  (all_data["mumu_deltaR"] >delta_R_min)  & (all_data["dimu_mass"] > mass_low)  & (all_data["dimu_mass"] < mass_high) & (all_data["mu0_eta"] < eta_max) & (all_data["mu0_eta"] > eta_min)
        event_filter_ss =  (all_data["mumu_deltaR_samesign"] > delta_R_min)  & (all_data["dimu_mass_samesign"] > mass_low)  & (all_data["dimu_mass_samesign"] < mass_high) & (all_data["mu0_eta_samesign"] < eta_max) & (all_data["mu0_eta_samesign"] > eta_min)

        if delta_R_min == 0:
            label0 = "opp. sign"
            label1 = "same sign"
        else:
            label0 = None
            label1 = None

        #ax[i].hist(all_data[key][event_filter_opp], bins= bins[i], histtype = "step", label = label0, density = True, color = colors[j])
        ax[i].hist(all_data[key+"_samesign"][event_filter_ss], bins= bins[i], histtype = "step", lw = 2, label = "deltaR > " + str(delta_R_min), density = False, color = colors[j])


 
    ax[i].set_xlabel(labels[i])
    ax[i].set_yticks([])
    
ax[3].legend(fontsize = 10)
ax[0].set_ylabel("Counts")

plt.subplots_adjust(wspace=0)
plt.show()




In [None]:

if run_jet:
    save_id = f"{data_id}_jet"
else: 
    save_id = f"{data_id}_nojet"

"""
with open(f"{path_to_output}/{save_id}", "wb") as output_file:
        pickle.dump(all_data, output_file)
"""