In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

In [None]:
OUT_DIR = "./Histograms_of_Variables/"
os.makedirs(OUT_DIR, exist_ok=True)

In [None]:
# import data from CSV
dataframe = pd.read_csv("dataframe.csv", delimiter=",")
data=pd.read_csv("Output_ZPrimeBoostedAnalysis/data.csv",delimiter=" ")

data

In [None]:
signal_data = dataframe.loc[dataframe["ID"] == "signal"]
ttbar_bkg=dataframe.loc[dataframe["ID"] == "ttbar_bkg"]
lepton_bkg=dataframe.loc[dataframe["ID"] == "lepton_bkg"]
V_jets_bkg=dataframe.loc[dataframe["ID"] == "V_jets_bkg"]
single_top_bkg=dataframe.loc[dataframe["ID"] == "single_top_bkg"]
bkg=dataframe.loc[dataframe["label"] == 0]

In [None]:
ranges_bins = {"etmiss": [0, 400, 30], "leadbjet_eta": [-3, 3, 30], "leadbjet_pt": [0, 400, 20], 
                "leadjet_eta": [-3, 3, 30], "leadjet_pt": [0, 400, 20], 
                "Q_leadlep": [-1.5, 1.5, 7], "leadleptE": [0, 500, 30], "leadleptEta": [-3, 3, 30], 
                "leadleptID": [6, 20, 15], "leadleptPhi":[-3.2, 3.2, 32], "leadleptPt": [0, 400, 20], 
                "largeRjet_eta_total": [-3, 3, 20], "largeRjet_m_total": [50, 250, 30], "largeRjet_phi_total": [-3.2, 3.2, 12],
                "largeRjet_pt_total": [250, 700, 30], "largeRjet_tau32_total": [0, 1, 20], "leadTopLRjet_eta": [-3, 3, 20],
                "leadTopLRjet_m": [100, 250, 25], "leadTopLRjet_phi": [-3.2, 3.2, 12], 
                "leadTopLRjet_pt": [250, 700, 30],
                "leadTopLRjet_Tau32": [0, 1, 20], "mtw": [0, 250, 30], "n_bjets": [-0.5, 3.5, 4], 
                "n_jets": [-0.5, 3.5, 4], "n_TopLRjets": [-0.5, 3.5, 4],
                "ttbarMLR": [400, 1600, 20]}
# "leadTopLRjet_syst_pt": [250, 700, 30], 
# "n_leadLRjets": [-0.5, 3.5, 4],

In [None]:
plot_features = ['etmiss', 'mtw', 'leadleptPt', 'leadleptEta',
       'leadleptE', 'leadleptPhi', 'Q_leadlep', 'n_TopLRjets', "leadleptID",
       'leadTopLRjet_pt', 'leadTopLRjet_eta', 'leadTopLRjet_phi',
       'leadTopLRjet_m', 'leadTopLRjet_Tau32', 'n_jets', 'leadjet_pt',
       'leadjet_eta', 'n_bjets', 'leadbjet_pt', 'leadbjet_eta', 'ttbarMLR']

x_label = {'etmiss': "[GeV]", 'mtw': "[GeV]", 'leadleptPt': "[GeV]", 'leadleptEta': "", 'leadleptID': "",
       'leadleptE': "[GeV]", 'leadleptPhi': "", 'Q_leadlep': "", 'n_TopLRjets': "",
       'leadTopLRjet_pt': "[GeV]", 'leadTopLRjet_eta': "", 'leadTopLRjet_phi':"",
       'leadTopLRjet_m': "[GeV]", 'leadTopLRjet_Tau32': "[GeV]", 'n_jets': "", 'leadjet_pt': "[GeV]",
       'leadjet_eta': "", 'n_bjets': "", 'leadbjet_pt': "[GeV]", 'leadbjet_eta': "", 'ttbarMLR': "[GeV]"}

# multiply all columns by the weights
for i in plot_features:
    
    data_to_plot = np.histogram(data[i], bins=ranges_bins[i][2], range=(ranges_bins[i][0], ranges_bins[i][1]))
    interval = (ranges_bins[i][1] - ranges_bins[i][0])/(2*ranges_bins[i][2])

    bins = plt.hist(lepton_bkg[i], bins=ranges_bins[i][2], histtype="step", linewidth=1, 
                    weights=lepton_bkg["scaleweight"], range=(ranges_bins[i][0], ranges_bins[i][1]), label="lepton_bkg")
    
    bins_vjets = plt.hist(V_jets_bkg[i], bins=bins[1], histtype="step", linewidth=1, 
                          weights=V_jets_bkg["scaleweight"], range=(ranges_bins[i][0], ranges_bins[i][1]), label="v_jets_bkg", bottom = bins[0])
    
    bins_singtop = plt.hist(single_top_bkg[i], bins=bins[1], histtype="step", linewidth=1, 
                            weights=single_top_bkg["scaleweight"], range=(ranges_bins[i][0], ranges_bins[i][1]), label="single_top_bkg", bottom = bins[0] + bins_vjets[0])
    
    plt.hist(ttbar_bkg[i], bins=bins[1], histtype="step", linewidth=1, weights=ttbar_bkg["scaleweight"], 
             range=(ranges_bins[i][0], ranges_bins[i][1]), label="ttbar_bkg", bottom = bins[0] + bins_vjets[0] + bins_singtop[0])
    
    bins_sig = plt.hist(signal_data[i], bins=bins[1], histtype="bar", linewidth=1, 
                        weights=signal_data["scaleweight"], range=(ranges_bins[i][0], ranges_bins[i][1]), label="signal", bottom = bins[0])
    
    plt.scatter(data_to_plot[1][:-1] + interval, data_to_plot[0], label="data", marker="d", color="black")

    plt.xlabel(f"{i} {x_label[i]}")
    plt.ylabel("Events")
    plt.legend()
    title="MC simulation sig+bkg vs. data ("+i+")"
    plt.title(title)
    plt.savefig(OUT_DIR + "plot_"+i+".png")
    plt.show()