In [None]:
import ROOT
import os
from ROOT import gROOT
import pandas as pd
import numpy as np

In [None]:
MakeCsvDataFrame = ROOT.RDF.MakeCsvDataFrame

In [None]:
tt_file = MakeCsvDataFrame('/localdata/Athar/output_files/tt_classified.csv')
hh_file = MakeCsvDataFrame('/localdata/Athar/output_files/hh_classified.csv')

In [None]:
cut = np.linspace(0.90, 0.99,30)
Lumi = 3000
# TTbar variables
BR_tt = 0.375  # Total branching ratio for the ttbar process
x_sect_tt = 984500  # fb
total_events_tt = 1_400_000

# HH variables
BR_hh = 0.0937  # Total branching ratio for the hh process
x_sect_hh = 40  # fb
total_events_hh = 900_000

max_sigma = 0
cut_index = 0
sigma_value = []
cut_value = []
signal_events_dict = {}  # Dictionary to store signal event rows for each bin
background_events_dict = {}  # Dictionary to store background event rows for each bin

for i in cut:
    prob_sig_cut = 'prob_sig > {}'.format(i)
    tt_cut = tt_file.Filter(prob_sig_cut)  # Filter signal events
    hh_cut = hh_file.Filter(prob_sig_cut)  # Filter background events

    # Extract relevant data from RDataFrame and convert to pandas DataFrame
    tt_cut_data = tt_cut.AsNumpy()
    tt_cut_df = pd.DataFrame(tt_cut_data)

    hh_cut_data = hh_cut.AsNumpy()
    hh_cut_df = pd.DataFrame(hh_cut_data)

    num_hh = hh_cut.Count().GetValue()
    num_tt = tt_cut.Count().GetValue()
    print('number of tt events: ', num_tt)
    print('number of hh events: ', num_hh)

    accp_tt = num_tt / total_events_tt
    accp_hh = num_hh / total_events_hh
    print('acceptance of hh: ', accp_hh)
    print('acceptance of tt: ', accp_tt)

    # Calculate expected events
    N_exp_tt = Lumi * BR_tt * x_sect_tt * accp_tt
    N_exp_hh = Lumi * BR_hh * x_sect_hh * accp_hh
    print('No. of expected tt events: ', N_exp_tt)
    print('No. of expected hh events: ', N_exp_hh)

    sigma = N_exp_hh / (N_exp_tt)**(1/2)
    print("semi-lepton Sigma: " + str(sigma) + " with a cut > " + str(i))
    sigma_value.append(sigma)
    cut_value.append(i)

    # Store copies of the DataFrames in the dictionaries for each bin
    signal_events_dict[i] = hh_cut_df.copy()
    background_events_dict[i] = tt_cut_df.copy()

    if sigma > max_sigma:
        max_sigma = sigma
        cut_index = i

print('No. of tt events at maximum sigma: ', tt_cut.Count().GetValue())
print('No. of hh events at maximum sigma: ', hh_cut.Count().GetValue())
print('The maximum sigma value is:')
print("semi-lepton Sigma: " + str(max_sigma) + " with a cut > " + str(cut_index))
bb = pd.DataFrame({"cut_value" : cut_value, "sigma_value" : sigma_value})
bb.to_csv("/localdata/Athar/plot_data/analyzed_data.csv", index=False)


# Save the data for each bin to separate CSV files
for i in cut:
    signal_events_dict[i].to_csv(f"/localdata/Athar/cut_data/signal_events_bin_{i}.csv", index=False)
    background_events_dict[i].to_csv(f"/localdata/Athar/cut_data/background_events_bin_{i}.csv", index=False)