In [3]:
import ROOT
import os
from ROOT import gROOT
import pandas as pd
import numpy as np

Welcome to JupyROOT 6.24/06


In [4]:
MakeCsvDataFrame = ROOT.RDF.MakeCsvDataFrame

In [5]:
tt_trial_1 = MakeCsvDataFrame('/localdata/Athar/CNN/output_files/tt_feature_2.csv')
hh_trial_1 = MakeCsvDataFrame('/localdata/Athar/CNN/output_files/hh_feature_2.csv')
tt_trial_2 = MakeCsvDataFrame('/localdata/Athar/CNN/output_files/tt_feature_3.csv')
hh_trial_2 = MakeCsvDataFrame('/localdata/Athar/CNN/output_files/hh_feature_3.csv')
tt_trial_3 = MakeCsvDataFrame('/localdata/Athar/CNN/output_files/tt_feature_4.csv')
hh_trial_3 = MakeCsvDataFrame('/localdata/Athar/CNN/output_files/hh_feature_4.csv')
tt_trial_4 = MakeCsvDataFrame('/localdata/Athar/CNN/output_files/tt_feature_6.csv')
hh_trial_4 = MakeCsvDataFrame('/localdata/Athar/CNN/output_files/hh_feature_6.csv')
tt_trial_5 = MakeCsvDataFrame('/localdata/Athar/CNN/output_files/tt_feature_7.csv')
hh_trial_5 = MakeCsvDataFrame('/localdata/Athar/CNN/output_files/hh_feature_7.csv')

In [11]:
#trial 1 (signal) Using variables m,pt,eta,phi,P,pair_dr

cut = np.linspace(0.90, 0.99,30)
Lumi = 3000
# TTbar variables
BR_tt = 0.375  # Total branching ratio for the ttbar process
x_sect_tt = 984500  # fb
total_events_tt = 1_400_000

# HH variables
BR_hh = 0.0937  # Total branching ratio for the hh process
x_sect_hh = 40  # fb
total_events_hh = 900_000

max_sigma = 0
cut_index = 0
sigma_value = []
cut_value = []
signal_events_dict = {}  # Dictionary to store signal event rows for each bin
background_events_dict = {}  # Dictionary to store background event rows for each bin

for i in cut:
    prob_sig_cut = 'prob_sig > {}'.format(i)
    tt_cut = tt_trial_1.Filter(prob_sig_cut)  # Filter signal events
    hh_cut = hh_trial_1.Filter(prob_sig_cut)  # Filter background events

    # Extract relevant data from RDataFrame and convert to pandas DataFrame
    tt_cut_data = tt_cut.AsNumpy()
    tt_cut_df = pd.DataFrame(tt_cut_data)

    hh_cut_data = hh_cut.AsNumpy()
    hh_cut_df = pd.DataFrame(hh_cut_data)

    num_hh = hh_cut.Count().GetValue()
    num_tt = tt_cut.Count().GetValue()
    print('number of tt events: ', num_tt)
    print('number of hh events: ', num_hh)

    accp_tt = num_tt / total_events_tt
    accp_hh = num_hh / total_events_hh
    print('acceptance of hh: ', accp_hh)
    print('acceptance of tt: ', accp_tt)

    # Calculate expected events
    N_exp_tt = Lumi * BR_tt * x_sect_tt * accp_tt
    N_exp_hh = Lumi * BR_hh * x_sect_hh * accp_hh
    print('No. of expected tt events: ', N_exp_tt)
    print('No. of expected hh events: ', N_exp_hh)

    sigma = N_exp_hh / (N_exp_tt)**(1/2)
    print("semi-lepton Sigma: " + str(sigma) + " with a cut > " + str(i))
    sigma_value.append(sigma)
    cut_value.append(i)

    # Store copies of the DataFrames in the dictionaries for each bin
    signal_events_dict[i] = hh_cut_df.copy()
    background_events_dict[i] = tt_cut_df.copy()

    if sigma > max_sigma:
        max_sigma = sigma
        cut_index = i

print('No. of tt events at maximum sigma: ', tt_cut.Count().GetValue())
print('No. of hh events at maximum sigma: ', hh_cut.Count().GetValue())
print('The maximum sigma value is:')
print("semi-lepton Sigma: " + str(max_sigma) + " with a cut > " + str(cut_index))
bb = pd.DataFrame({"cut_value" : cut_value, "sigma_value" : sigma_value})
bb.to_csv("/localdata/Athar/CNN/plot_files/plot_data/feature_2.csv", index=False)


# Save the data for each bin to separate CSV files
for i in cut:
    signal_events_dict[i].to_csv(f"/localdata/Athar/CNN/plot_files/cut_data/cut_1/signal_events_bin_{i}.csv", index=False)
    background_events_dict[i].to_csv(f"/localdata/Athar/CNN/plot_files/cut_data/cut_1/background_events_bin_{i}.csv", index=False)

number of tt events:  1080
number of hh events:  25675
acceptance of hh:  0.028527777777777777
acceptance of tt:  0.0007714285714285715
No. of expected tt events:  854405.3571428572
No. of expected hh events:  320.7663333333333
semi-lepton Sigma: 0.34702183216946664 with a cut > 0.9
number of tt events:  1026
number of hh events:  25300
acceptance of hh:  0.02811111111111111
acceptance of tt:  0.0007328571428571428
No. of expected tt events:  811685.0892857143
No. of expected hh events:  316.0813333333333
semi-lepton Sigma: 0.350836738224812 with a cut > 0.9031034482758621
number of tt events:  975
number of hh events:  24871
acceptance of hh:  0.027634444444444444
acceptance of tt:  0.0006964285714285715
No. of expected tt events:  771338.1696428572
No. of expected hh events:  310.72169333333335
semi-lepton Sigma: 0.35379294105164155 with a cut > 0.9062068965517242
number of tt events:  935
number of hh events:  24480
acceptance of hh:  0.0272
acceptance of tt:  0.0006678571428571429


number of tt events:  39
number of hh events:  5418
acceptance of hh:  0.00602
acceptance of tt:  2.7857142857142858e-05
No. of expected tt events:  30853.526785714286
No. of expected hh events:  67.68888
semi-lepton Sigma: 0.3853584806838876 with a cut > 0.99
No. of tt events at maximum sigma:  39
No. of hh events at maximum sigma:  5418
The maximum sigma value is:
semi-lepton Sigma: 0.41538741718918765 with a cut > 0.9806896551724138


In [10]:
#trial 2 (signal) Using variables m,pt,eta,phi,pair_dr,Hness,Tness

cut = np.linspace(0.90, 0.99,30)
Lumi = 3000
# TTbar variables
BR_tt = 0.375  # Total branching ratio for the ttbar process
x_sect_tt = 984500  # fb
total_events_tt = 1_400_000

# HH variables
BR_hh = 0.0937  # Total branching ratio for the hh process
x_sect_hh = 40  # fb
total_events_hh = 900_000

max_sigma = 0
cut_index = 0
sigma_value = []
cut_value = []
signal_events_dict = {}  # Dictionary to store signal event rows for each bin
background_events_dict = {}  # Dictionary to store background event rows for each bin

for i in cut:
    prob_sig_cut = 'prob_sig > {}'.format(i)
    tt_cut = tt_trial_2.Filter(prob_sig_cut)  # Filter signal events
    hh_cut = hh_trial_2.Filter(prob_sig_cut)  # Filter background events

    # Extract relevant data from RDataFrame and convert to pandas DataFrame
    tt_cut_data = tt_cut.AsNumpy()
    tt_cut_df = pd.DataFrame(tt_cut_data)

    hh_cut_data = hh_cut.AsNumpy()
    hh_cut_df = pd.DataFrame(hh_cut_data)

    num_hh = hh_cut.Count().GetValue()
    num_tt = tt_cut.Count().GetValue()
    print('number of tt events: ', num_tt)
    print('number of hh events: ', num_hh)

    accp_tt = num_tt / total_events_tt
    accp_hh = num_hh / total_events_hh
    print('acceptance of hh: ', accp_hh)
    print('acceptance of tt: ', accp_tt)

    # Calculate expected events
    N_exp_tt = Lumi * BR_tt * x_sect_tt * accp_tt
    N_exp_hh = Lumi * BR_hh * x_sect_hh * accp_hh
    print('No. of expected tt events: ', N_exp_tt)
    print('No. of expected hh events: ', N_exp_hh)

    sigma = N_exp_hh / (N_exp_tt)**(1/2)
    print("semi-lepton Sigma: " + str(sigma) + " with a cut > " + str(i))
    sigma_value.append(sigma)
    cut_value.append(i)

    # Store copies of the DataFrames in the dictionaries for each bin
    signal_events_dict[i] = hh_cut_df.copy()
    background_events_dict[i] = tt_cut_df.copy()

    if sigma > max_sigma:
        max_sigma = sigma
        cut_index = i

print('No. of tt events at maximum sigma: ', tt_cut.Count().GetValue())
print('No. of hh events at maximum sigma: ', hh_cut.Count().GetValue())
print('The maximum sigma value is:')
print("semi-lepton Sigma: " + str(max_sigma) + " with a cut > " + str(cut_index))
bb = pd.DataFrame({"cut_value" : cut_value, "sigma_value" : sigma_value})
bb.to_csv("/localdata/Athar/CNN/plot_files/plot_data/feature_3.csv", index=False)


# Save the data for each bin to separate CSV files
for i in cut:
    signal_events_dict[i].to_csv(f"/localdata/Athar/CNN/plot_files/cut_data/cut_3/signal_events_bin_{i}.csv", index=False)
    background_events_dict[i].to_csv(f"/localdata/Athar/CNN/plot_files/cut_data/cut_3/background_events_bin_{i}.csv", index=False)

number of tt events:  915
number of hh events:  24300
acceptance of hh:  0.027
acceptance of tt:  0.0006535714285714286
No. of expected tt events:  723871.2053571428
No. of expected hh events:  303.588
semi-lepton Sigma: 0.35682391215641135 with a cut > 0.9
number of tt events:  864
number of hh events:  23866
acceptance of hh:  0.02651777777777778
acceptance of tt:  0.0006171428571428571
No. of expected tt events:  683524.2857142857
No. of expected hh events:  298.1658933333334
semi-lepton Sigma: 0.36064589139864117 with a cut > 0.9031034482758621
number of tt events:  826
number of hh events:  23488
acceptance of hh:  0.02609777777777778
acceptance of tt:  0.00059
No. of expected tt events:  653461.875
No. of expected hh events:  293.44341333333335
semi-lepton Sigma: 0.36300636363956235 with a cut > 0.9062068965517242
number of tt events:  792
number of hh events:  23049
acceptance of hh:  0.02561
acceptance of tt:  0.0005657142857142857
No. of expected tt events:  626563.9285714285


number of tt events:  27
number of hh events:  4553
acceptance of hh:  0.005058888888888889
acceptance of tt:  1.9285714285714285e-05
No. of expected tt events:  21360.133928571428
No. of expected hh events:  56.882146666666664
semi-lepton Sigma: 0.3892010400082815 with a cut > 0.99
No. of tt events at maximum sigma:  27
No. of hh events at maximum sigma:  4553
The maximum sigma value is:
semi-lepton Sigma: 0.42509398384466174 with a cut > 0.9775862068965517


In [13]:
#trial 3 (signal) Using variables m,pt,eta,phi,P,pair_dr,Hness,Tness

cut = np.linspace(0.97, 0.99,10)
Lumi = 3000
# TTbar variables
BR_tt = 0.375  # Total branching ratio for the ttbar process
x_sect_tt = 984500  # fb
total_events_tt = 1_400_000

# HH variables
BR_hh = 0.0937  # Total branching ratio for the hh process
x_sect_hh = 40  # fb
total_events_hh = 900_000

max_sigma = 0
cut_index = 0
sigma_value = []
cut_value = []
signal_events_dict = {}  # Dictionary to store signal event rows for each bin
background_events_dict = {}  # Dictionary to store background event rows for each bin

for i in cut:
    prob_sig_cut = 'prob_sig > {}'.format(i)
    tt_cut = tt_trial_3.Filter(prob_sig_cut)  # Filter signal events
    hh_cut = hh_trial_3.Filter(prob_sig_cut)  # Filter background events

    # Extract relevant data from RDataFrame and convert to pandas DataFrame
    tt_cut_data = tt_cut.AsNumpy()
    tt_cut_df = pd.DataFrame(tt_cut_data)

    hh_cut_data = hh_cut.AsNumpy()
    hh_cut_df = pd.DataFrame(hh_cut_data)

    num_hh = hh_cut.Count().GetValue()
    num_tt = tt_cut.Count().GetValue()
    print('number of tt events: ', num_tt)
    print('number of hh events: ', num_hh)

    accp_tt = num_tt / total_events_tt
    accp_hh = num_hh / total_events_hh
    print('acceptance of hh: ', accp_hh)
    print('acceptance of tt: ', accp_tt)

    # Calculate expected events
    N_exp_tt = Lumi * BR_tt * x_sect_tt * accp_tt
    N_exp_hh = Lumi * BR_hh * x_sect_hh * accp_hh
    print('No. of expected tt events: ', N_exp_tt)
    print('No. of expected hh events: ', N_exp_hh)

    sigma = N_exp_hh / (N_exp_tt)**(1/2)
    print("semi-lepton Sigma: " + str(sigma) + " with a cut > " + str(i))
    sigma_value.append(sigma)
    cut_value.append(i)

    # Store copies of the DataFrames in the dictionaries for each bin
    signal_events_dict[i] = hh_cut_df.copy()
    background_events_dict[i] = tt_cut_df.copy()

    if sigma > max_sigma:
        max_sigma = sigma
        cut_index = i

print('No. of tt events at maximum sigma: ', tt_cut.Count().GetValue())
print('No. of hh events at maximum sigma: ', hh_cut.Count().GetValue())
print('The maximum sigma value is:')
print("semi-lepton Sigma: " + str(max_sigma) + " with a cut > " + str(cut_index))
bb = pd.DataFrame({"cut_value" : cut_value, "sigma_value" : sigma_value})
bb.to_csv("/localdata/Athar/CNN/plot_files/plot_data/feature_4.csv", index=False)


# Save the data for each bin to separate CSV files
for i in cut:
    signal_events_dict[i].to_csv(f"/localdata/Athar/CNN/plot_files/cut_data/cut_4/signal_events_bin_{i}.csv", index=False)
    background_events_dict[i].to_csv(f"/localdata/Athar/CNN/plot_files/cut_data/cut_4/background_events_bin_{i}.csv", index=False)

number of tt events:  200
number of hh events:  13159
acceptance of hh:  0.01462111111111111
acceptance of tt:  0.00014285714285714287
No. of expected tt events:  158223.2142857143
No. of expected hh events:  164.39977333333334
semi-lepton Sigma: 0.4133006734742924 with a cut > 0.97
number of tt events:  180
number of hh events:  12514
acceptance of hh:  0.013904444444444445
acceptance of tt:  0.00012857142857142858
No. of expected tt events:  142400.89285714287
No. of expected hh events:  156.34157333333334
semi-lepton Sigma: 0.41430304187417993 with a cut > 0.9722222222222222
number of tt events:  155
number of hh events:  11865
acceptance of hh:  0.013183333333333333
acceptance of tt:  0.00011071428571428571
No. of expected tt events:  122622.99107142857
No. of expected hh events:  148.2334
semi-lepton Sigma: 0.42331154906342494 with a cut > 0.9744444444444444
number of tt events:  131
number of hh events:  11121
acceptance of hh:  0.012356666666666667
acceptance of tt:  9.357142857

In [16]:
#trial 4 (signal) Using variables m,pt,eta,phi,P,pair_dr,Hness,Tness

cut = np.linspace(0.97, 0.99,10)
Lumi = 3000
# TTbar variables
BR_tt = 0.375  # Total branching ratio for the ttbar process
x_sect_tt = 984500  # fb
total_events_tt = 1_400_000

# HH variables
BR_hh = 0.0937  # Total branching ratio for the hh process
x_sect_hh = 40  # fb
total_events_hh = 900_000

max_sigma = 0
cut_index = 0
sigma_value = []
cut_value = []
signal_events_dict = {}  # Dictionary to store signal event rows for each bin
background_events_dict = {}  # Dictionary to store background event rows for each bin

for i in cut:
    prob_sig_cut = 'prob_sig > {}'.format(i)
    tt_cut = tt_trial_4.Filter(prob_sig_cut)  # Filter signal events
    hh_cut = hh_trial_4.Filter(prob_sig_cut)  # Filter background events

    # Extract relevant data from RDataFrame and convert to pandas DataFrame
    tt_cut_data = tt_cut.AsNumpy()
    tt_cut_df = pd.DataFrame(tt_cut_data)

    hh_cut_data = hh_cut.AsNumpy()
    hh_cut_df = pd.DataFrame(hh_cut_data)

    num_hh = hh_cut.Count().GetValue()
    num_tt = tt_cut.Count().GetValue()
    print('number of tt events: ', num_tt)
    print('number of hh events: ', num_hh)

    accp_tt = num_tt / total_events_tt
    accp_hh = num_hh / total_events_hh
    print('acceptance of hh: ', accp_hh)
    print('acceptance of tt: ', accp_tt)

    # Calculate expected events
    N_exp_tt = Lumi * BR_tt * x_sect_tt * accp_tt
    N_exp_hh = Lumi * BR_hh * x_sect_hh * accp_hh
    print('No. of expected tt events: ', N_exp_tt)
    print('No. of expected hh events: ', N_exp_hh)

    sigma = N_exp_hh / (N_exp_tt)**(1/2)
    print("semi-lepton Sigma: " + str(sigma) + " with a cut > " + str(i))
    sigma_value.append(sigma)
    cut_value.append(i)

    # Store copies of the DataFrames in the dictionaries for each bin
    signal_events_dict[i] = hh_cut_df.copy()
    background_events_dict[i] = tt_cut_df.copy()

    if sigma > max_sigma:
        max_sigma = sigma
        cut_index = i

print('No. of tt events at maximum sigma: ', tt_cut.Count().GetValue())
print('No. of hh events at maximum sigma: ', hh_cut.Count().GetValue())
print('The maximum sigma value is:')
print("semi-lepton Sigma: " + str(max_sigma) + " with a cut > " + str(cut_index))
bb = pd.DataFrame({"cut_value" : cut_value, "sigma_value" : sigma_value})
bb.to_csv("/localdata/Athar/CNN/plot_files/plot_data/feature_5.csv", index=False)


# Save the data for each bin to separate CSV files
for i in cut:
    signal_events_dict[i].to_csv(f"/localdata/Athar/CNN/plot_files/cut_data/cut_5/signal_events_bin_{i}.csv", index=False)
    background_events_dict[i].to_csv(f"/localdata/Athar/CNN/plot_files/cut_data/cut_5/background_events_bin_{i}.csv", index=False)

number of tt events:  238
number of hh events:  14578
acceptance of hh:  0.016197777777777776
acceptance of tt:  0.00017
No. of expected tt events:  188285.625
No. of expected hh events:  182.1278133333333
semi-lepton Sigma: 0.41972775774035526 with a cut > 0.97
number of tt events:  212
number of hh events:  13969
acceptance of hh:  0.01552111111111111
acceptance of tt:  0.00015142857142857143
No. of expected tt events:  167716.60714285713
No. of expected hh events:  174.51937333333333
semi-lepton Sigma: 0.4261432470965824 with a cut > 0.9722222222222222
number of tt events:  192
number of hh events:  13254
acceptance of hh:  0.014726666666666667
acceptance of tt:  0.00013714285714285713
No. of expected tt events:  151894.2857142857
No. of expected hh events:  165.58664000000002
semi-lepton Sigma: 0.4248685414234925 with a cut > 0.9744444444444444
number of tt events:  167
number of hh events:  12612
acceptance of hh:  0.014013333333333333
acceptance of tt:  0.00011928571428571428
No.

FileNotFoundError: [Errno 2] No such file or directory: '/localdata/Athar/CNN/plot_files/cut_data/cut_5/signal_events_bin_0.97.csv'

In [8]:
#trial 5b (signal) Using variables m,pt,eta,phi,P,pair_dr,Hness,Tness

cut = np.linspace(0.96, 0.97,10)
Lumi = 3000
# TTbar variables
BR_tt = 0.375  # Total branching ratio for the ttbar process
x_sect_tt = 984500  # fb
total_events_tt = 1_400_000

# HH variables
BR_hh = 0.0937  # Total branching ratio for the hh process
x_sect_hh = 40  # fb
total_events_hh = 900_000

max_sigma = 0
cut_index = 0
sigma_value = []
cut_value = []
signal_events_dict = {}  # Dictionary to store signal event rows for each bin
background_events_dict = {}  # Dictionary to store background event rows for each bin

for i in cut:
    prob_sig_cut = 'prob_sig > {}'.format(i)
    tt_cut = tt_trial_5.Filter(prob_sig_cut)  # Filter signal events
    hh_cut = hh_trial_5.Filter(prob_sig_cut)  # Filter background events

    # Extract relevant data from RDataFrame and convert to pandas DataFrame
    tt_cut_data = tt_cut.AsNumpy()
    tt_cut_df = pd.DataFrame(tt_cut_data)

    hh_cut_data = hh_cut.AsNumpy()
    hh_cut_df = pd.DataFrame(hh_cut_data)

    num_hh = hh_cut.Count().GetValue()
    num_tt = tt_cut.Count().GetValue()
    print('number of tt events: ', num_tt)
    print('number of hh events: ', num_hh)

    accp_tt = num_tt / total_events_tt
    accp_hh = num_hh / total_events_hh
    print('acceptance of hh: ', accp_hh)
    print('acceptance of tt: ', accp_tt)

    # Calculate expected events
    N_exp_tt = Lumi * BR_tt * x_sect_tt * accp_tt
    N_exp_hh = Lumi * BR_hh * x_sect_hh * accp_hh
    print('No. of expected tt events: ', N_exp_tt)
    print('No. of expected hh events: ', N_exp_hh)

    sigma = N_exp_hh / (N_exp_tt)**(1/2)
    print("semi-lepton Sigma: " + str(sigma) + " with a cut > " + str(i))
    sigma_value.append(sigma)
    cut_value.append(i)

    # Store copies of the DataFrames in the dictionaries for each bin
    signal_events_dict[i] = hh_cut_df.copy()
    background_events_dict[i] = tt_cut_df.copy()

    if sigma > max_sigma:
        max_sigma = sigma
        cut_index = i

print('No. of tt events at maximum sigma: ', tt_cut.Count().GetValue())
print('No. of hh events at maximum sigma: ', hh_cut.Count().GetValue())
print('The maximum sigma value is:')
print("semi-lepton Sigma: " + str(max_sigma) + " with a cut > " + str(cut_index))
bb = pd.DataFrame({"cut_value" : cut_value, "sigma_value" : sigma_value})
bb.to_csv("/localdata/Athar/CNN/plot_files/plot_data/feature_7b.csv", index=False)


# Save the data for each bin to separate CSV files
for i in cut:
    signal_events_dict[i].to_csv(f"/localdata/Athar/CNN/plot_files/cut_data/cut_6/signal_events_bin_{i}.csv", index=False)
    background_events_dict[i].to_csv(f"/localdata/Athar/CNN/plot_files/cut_data/cut_6/background_events_bin_{i}.csv", index=False)

number of tt events:  253
number of hh events:  13723
acceptance of hh:  0.015247777777777778
acceptance of tt:  0.0001807142857142857
No. of expected tt events:  200152.36607142855
No. of expected hh events:  171.44601333333335
semi-lepton Sigma: 0.3832189941443403 with a cut > 0.96
number of tt events:  239
number of hh events:  13469
acceptance of hh:  0.014965555555555555
acceptance of tt:  0.0001707142857142857
No. of expected tt events:  189076.74107142855
No. of expected hh events:  168.27270666666666
semi-lepton Sigma: 0.3869854408494273 with a cut > 0.961111111111111
number of tt events:  232
number of hh events:  13243
acceptance of hh:  0.014714444444444445
acceptance of tt:  0.00016571428571428572
No. of expected tt events:  183538.92857142858
No. of expected hh events:  165.44921333333335
semi-lepton Sigma: 0.3861896323083258 with a cut > 0.9622222222222222
number of tt events:  222
number of hh events:  12979
acceptance of hh:  0.014421111111111112
acceptance of tt:  0.00

FileNotFoundError: [Errno 2] No such file or directory: '/localdata/Athar/CNN/plot_files/cut_data/cut_6/signal_events_bin_0.96.csv'