In [None]:
import os

import pandas as pd
import numpy as np
import json

import plotly.express as px
import plotly.graph_objects as go
from tqdm import tqdm

if os.getcwd().split("/")[-1] == "notebooks":
    os.chdir("..")

In [None]:
# Choose the appropiate folders
PREFIXES = [

    # "run_ATLAS_130_55",
    # "run_ATLAS_136_55",
    # "run_CMS_130_55",
    # "run_CMS_136_55",

    # "run_ATLAS_130_60",
    # "run_ATLAS_136_60",
    # "run_CMS_130_60",
    # "run_CMS_136_60",

    # "run_ATLAS_130_65",
    # "run_ATLAS_136_65",
    # "run_CMS_130_65",
    # "run_CMS_136_65",

    # "run_ATLAS_130_70",
    # "run_ATLAS_136_70",
    # "run_CMS_130_70",
    # "run_CMS_136_70",
    
    # "run_ATLAS_130_80",
    # "run_ATLAS_136_80",
    # "run_CMS_130_80",
    # "run_CMS_136_80",

    # "Newrun_ATLAS_136_55",
    # "Newrun_ATLAS_136_60",
    # "Newrun_ATLAS_136_65",
    # "Newrun_ATLAS_136_70",

    # "Newrun_CMS_136_55",
    # "Newrun_CMS_136_60",
    # "Newrun_CMS_136_65",
    # "Newrun_CMS_136_70",

    # "Newrun_ATLAS_140_55",
    # "Newrun_ATLAS_140_60",
    # "Newrun_ATLAS_140_65",
    # "Newrun_ATLAS_140_70",

    "run_ATLAS_140_55",
    "run_ATLAS_140_60",
    "run_ATLAS_140_65",
    "run_ATLAS_140_70",
]


def get_workdir(prefix="run_ATLAS_130_65"):
    return sorted([d for d in os.listdir("models") if d.startswith(prefix)])[-1]

In [None]:
dataframes = []
combined_data = {}
i = 0

for prefix in tqdm(PREFIXES):
    workdir = get_workdir(prefix)
    print(workdir)
    detector = prefix.split("_")[1]
    # df = pd.read_json(f"models/{workdir}/pr_curves.json")
    with open (f"models/{workdir}/pr_curves.json") as f:
        data = json.load(f)
    
    # Filter only the entries of interest
    filtered_data = {k: v for k, v in data.items() if k in ["precision_rf", "recall_rf", "threshold_rf"]}

    # Convert non-list values to lists
    for key in filtered_data:
        if not isinstance(filtered_data[key], list):
            filtered_data[key] = [filtered_data[key]]

    # Add data to combined_data with new keys
    for key, value in filtered_data.items():
        new_key = f"{key}{i+1}"
        combined_data[new_key] = value
        
    i += 1


    # df = pd.DataFrame.from_dict(filtered_data, orient='index')

# Determine the maximum length of lists in the combined data
max_length = max(len(lst) for lst in combined_data.values())

# Ensure all lists are of the same length by padding with None
for key in combined_data:
    combined_data[key] += [None] * (max_length - len(combined_data[key]))

# Convert the combined dictionary to a DataFrame
df = pd.DataFrame(combined_data)

# print(combined_data)

# Display the DataFrame
# print(df)
        

In [None]:
## Different Background cuts for NEW analysis - - load the approipiate folders from the PREFIXES ("Newrun...")
fig = go.Figure()

fig.add_trace(
    go.Scatter(
        x=df['recall_rf1'],
        y=df['precision_rf1'],
        customdata=df['threshold_rf1'],
        hovertemplate="Threshold=%{customdata}<br>Recall=%{x}<br>Precision=%{y}",
        mode='lines',
        name="5.5 TeV"
    )
)

fig.add_trace(
    go.Scatter(
        x=df['recall_rf2'],
        y=df['precision_rf2'],
        customdata=df['threshold_rf2'],
        hovertemplate="Threshold=%{customdata}<br>Recall=%{x}<br>Precision=%{y}",
        mode='lines',
        name="6 TeV"
    )
)

fig.add_trace(
    go.Scatter(
        x=df['recall_rf3'],
        y=df['precision_rf3'],
        customdata=df['threshold_rf3'],
        hovertemplate="Threshold=%{customdata}<br>Recall=%{x}<br>Precision=%{y}",
        mode='lines',
        name="6.5 TeV"
    )
)

fig.add_trace(
    go.Scatter(
        x=df['recall_rf4'],
        y=df['precision_rf4'],
        customdata=df['threshold_rf4'],
        hovertemplate="Threshold=%{customdata}<br>Recall=%{x}<br>Precision=%{y}",
        mode='lines',
        name="7 TeV"
    )
)

fig.update_layout(
    # title='Precision vs Recall for Different Background Cuts',
    xaxis_title='Recall',
    yaxis_title='Precision',
    width=1200 * (2 / 3),
    height=800 * (2 / 3),
)

fig.update_layout(
    title = detector,
    title_x = 0.88,
    title_y = 0.78,
)

fig.update_layout(legend=dict(title = "Background production: mHatMin phase space cut", orientation="h", yanchor="bottom", y=1, xanchor="right", x=1.01))
fig.write_image("New-PR-curve-All_ATLAS_140.pdf")
fig.show()

In [None]:
## S/B For NEW Analysis - load the approipiate folders from the PREFIXES ("Newrun_...")
data = []
fits = []
pr = []
iter = 2
for prefix in tqdm(PREFIXES):
    workdir = get_workdir(prefix)
    print(workdir)
    detector = prefix.split("_")[1]
    df_new = pd.read_csv(f"models/{workdir}/counts_rf.csv")
    df = df_new.drop(columns=["0.99"])
    df = df.melt(id_vars=["Unnamed: 0"], var_name="cut", value_name="counts")
    df["detector"] = prefix.split("_")[1]
    df["ECM"] = int(prefix.split("_")[2]) / 10
    df["mSuu"] = str((int(prefix.split("_")[3]) / 10) + iter) + " TeV"
    df["BKG"] = str(int(prefix.split("_")[3]) / 10) + " TeV"
    data.append(df)

    with open(f"models/{workdir}/fits.json", "r") as f:
        js = json.load(f)
        js["detector"] = prefix.split("_")[1]
        js["ECM"] = int(prefix.split("_")[2]) / 10
        js["mSuu"] = (int(prefix.split("_")[3]) / 10) + iter
        js["BKG"] = int(prefix.split("_")[3]) / 10
        fits.append(js)

    with open(f"models/{workdir}/pr_curves.json", "r") as f:
        js = json.load(f)
        js["detector"] = prefix.split("_")[1]
        js["ECM"] = int(prefix.split("_")[2]) / 10
        js["mSuu"] = (int(prefix.split("_")[3]) / 10) + iter
        js["BKG"] = int(prefix.split("_")[3]) / 10
        pr.append(js)

    iter -= 0.5


counts_df = pd.concat(data).rename(columns={"Unnamed: 0": "label"})
fits_df = pd.json_normalize(fits)
pr_df = pd.json_normalize(pr)

counts_df.to_csv("models/counts.csv", index=False)

fig = px.line(
    counts_df[counts_df["label"] == "S/B"],
    x="cut",
    y="counts",
    color="BKG",
    markers=True,
)


fig.for_each_yaxis(lambda a: a.title.update(text=""))

fig.update_layout(
    annotations=[
        dict(
            x=-0.1,  # position x-axis relative to the plot area (negative value to move outside the plot area)
            y=0.5,  # position y-axis at the middle
            xref='paper',
            yref='paper',
            text='S/B',
            showarrow=False,
            textangle=0,  # keep text horizontal
            font=dict(size=12)  # adjust font size if needed
        )
    ],
    margin=dict(l=20)  # add some margin on the left side
)

# Update ticks text
tickvals = [0.0005, 0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1, 2, 5, 10, 20]
ticktext = ['0.0005', '0.001', '0.002', '0.005', '0.01', '0.02', '0.05', '0.1', '0.2', '0.5', '1', '2', '5', '10', '20']

fig.update_layout(
    yaxis=dict(
        type='log',
        tickvals=tickvals,
        ticktext=ticktext
    )
)

fig.update_layout(
    title = detector,
    title_x = 0.535,
    title_y = 0.89 if detector == "ATLAS" else 0.864, # Adjust title y pos depending on detector used
    width=600,
    height=600,
    legend=dict(title = "Background production: <br>mHatMin phase space cut", yanchor="top", y=0.99, xanchor="left", x=0.01),
    
)


fig.update_xaxes(title="Discriminator cut")

# fig.for_each_annotation(update_annotations)
fig.write_image("NEW_ATLAS_sb_vs_cut_140.pdf")
fig.show()
# fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))

In [None]:
## Different Suu Masses - load the approipiate folders from the PREFIXES (run_...)
fig = go.Figure()

fig.add_trace(
    go.Scatter(
        x=df['recall_rf1'],
        y=df['precision_rf1'],
        customdata=df['threshold_rf1'],
        hovertemplate="Threshold=%{customdata}<br>Recall=%{x}<br>Precision=%{y}",
        mode='lines',
        name="7 TeV"
    )
)

fig.add_trace(
    go.Scatter(
        x=df['recall_rf2'],
        y=df['precision_rf2'],
        customdata=df['threshold_rf2'],
        hovertemplate="Threshold=%{customdata}<br>Recall=%{x}<br>Precision=%{y}",
        mode='lines',
        name="7.5 TeV"
    )
)

fig.add_trace(
    go.Scatter(
        x=df['recall_rf3'],
        y=df['precision_rf3'],
        customdata=df['threshold_rf3'],
        hovertemplate="Threshold=%{customdata}<br>Recall=%{x}<br>Precision=%{y}",
        mode='lines',
        name="8 TeV"
    )
)

fig.add_trace(
    go.Scatter(
        x=df['recall_rf4'],
        y=df['precision_rf4'],
        customdata=df['threshold_rf4'],
        hovertemplate="Threshold=%{customdata}<br>Recall=%{x}<br>Precision=%{y}",
        mode='lines',
        name="8.5 TeV"
    )
)

fig.update_layout(
    # title='Precision vs Recall for Different Suu Masses',
    xaxis_title='Recall',
    yaxis_title='Precision',
    width=1200 * (2 / 3),
    height=800 * (2 / 3),
)

fig.update_layout(
    title = detector,
    title_x = 0.85,
    title_y = 0.75,
)

fig.update_layout(legend=dict(title = r"$S_{uu} \text{ mass}$", orientation="h", yanchor="bottom", y=1, xanchor="right", x=0.6))
fig.write_image("PR-curve-All_ATLAS_140.pdf")
fig.show()