In [20]:
import torch
import numpy as np
import pandas as pd
import uproot as up
import awkward as ak
import os

import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [2]:
E0 = 8000

tracks = []
prefix = '/Users/avencast/Desktop/Work/GNN/c0.999'
for lt in os.listdir(prefix):
    if lt.endswith('lt'):
        tmp_list = torch.load(os.path.join(prefix, lt))
        tracks += tmp_list

In [3]:
# Define the columns you will use (excluding 'entry' and 'subentry' which are part of the MultiIndex).
columns = ['run_num', 'evt_num', 'rec_ID', 'rec_Pi', 'rec_Pf', 'rec_PAvg', 'rec_PStd', 'rec_Vx', 'rec_Vy', 'rec_Vz']

# Initialize lists to hold the data.
data = {col: [] for col in columns}
index_list = []

# Populate the lists with data.
for i, evt in enumerate(tracks):

    sorted_evt = sorted(evt, key=lambda trk: trk.p_avg, reverse=True)

    for j, trk in enumerate(sorted_evt):
        index_list.append((i, j))  # Append the index tuple.
        data['run_num'].append(trk.run_num)
        data['evt_num'].append(trk.evt_num)
        data['rec_ID'].append(-1)
        data['rec_Pi'].append(trk.p_i * E0)
        data['rec_Pf'].append(trk.p_f * E0)
        data['rec_PAvg'].append(trk.p_avg * E0)
        data['rec_PStd'].append(trk.p_std * E0)
        data['rec_Vx'].append(trk.vertex_hit[0])
        data['rec_Vy'].append(trk.vertex_hit[1])
        data['rec_Vz'].append(trk.vertex_hit[2])

# Create the MultiIndex from the accumulated index tuples.
multi_index = pd.MultiIndex.from_tuples(index_list, names=['entry', 'subentry'])

# Create the DataFrame in one go.
df = pd.DataFrame(data, index=multi_index)

df = df.sort_values(by=['run_num', 'evt_num'], ascending=[True, True])
df.reset_index(drop=True, inplace=True)

In [4]:
df

Unnamed: 0,run_num,evt_num,rec_ID,rec_Pi,rec_Pf,rec_PAvg,rec_PStd,rec_Vx,rec_Vy,rec_Vz
0,0,0,-1,4740.072727,4875.734806,4839.976788,71.587175,-2.505000,-20.471136,38.779999
1,0,0,-1,2957.506180,2899.734497,2941.440821,29.748932,-2.505000,-20.471136,38.779999
2,0,0,-1,258.518219,258.518219,258.518219,0.000000,-4.140000,-20.905239,53.779999
3,0,1,-1,4327.688217,4800.971031,4684.294701,208.616868,-24.840000,-6.581889,22.779999
4,0,1,-1,4327.688217,3182.059765,3494.213581,482.072979,-24.840000,-6.581889,22.779999
...,...,...,...,...,...,...,...,...,...,...
277705,4,19997,-1,402.962506,393.375933,393.890947,7.205875,-23.415001,-11.389974,7.780000
277706,4,19998,-1,1250.067234,1250.067234,1250.067234,0.000000,1.215000,20.739008,38.779999
277707,4,19999,-1,1961.612821,2069.119215,2005.029917,39.818004,-43.080002,0.777081,22.779999
277708,4,19999,-1,1760.736704,1689.650536,1744.330168,29.190138,-38.895000,-0.972536,7.780000


In [5]:
# with up.open("/Users/avencast/CLionProjects/darkshine-simulation/workspace/Tracker_GNN.root:truth") as f:

truth = up.concatenate([
    "/Users/avencast/Desktop/Work/GNN/Tracker_20.root:truth",
    "/Users/avencast/Desktop/Work/GNN/Tracker_200.root:truth",
    "/Users/avencast/Desktop/Work/GNN/Tracker_100.root:truth",
    "/Users/avencast/Desktop/Work/GNN/Tracker_50.root:truth",
    "/Users/avencast/Desktop/Work/GNN/Tracker_500.root:truth",
],
    filter_name="/^(?:(?!tag_).)*$/i",
    library="ak"
)
truth = ak.to_dataframe(truth)
truth.sort_values(by=['run_num', 'evt_num', 'rec_E'], ascending=[True, True, False], inplace=True)
truth.reset_index(drop=True, inplace=True)

In [6]:
truth

Unnamed: 0,run_num,evt_num,rec_ID,rec_E,rec_Vx,rec_Vy,rec_Vz,rec_Px,rec_Py,rec_Pz
0,0,0,-11.0,4693.527832,-1.971450,-20.188143,22.877148,-154.253555,10.251737,4691.492188
1,0,0,11.0,2972.704346,-1.971716,-20.193453,22.839268,-102.542465,-8.515394,2971.434326
2,0,0,11.0,257.371124,-1.200308,-20.197134,-0.175000,-7.228745,-1.836229,257.773743
3,0,1,11.0,4938.062988,-24.196095,-6.718469,3.710038,-159.285721,7.503434,4935.999023
4,0,1,-11.0,3033.025635,-24.196095,-6.718469,3.710038,-106.958908,-8.111062,3031.639648
...,...,...,...,...,...,...,...,...,...,...
297782,4,19998,11.0,1111.764404,-1.429825,14.575148,0.175000,80.792732,172.691559,1095.813232
297783,4,19998,-11.0,701.334473,-1.432059,14.374350,0.175000,49.133247,-285.283722,639.363708
297784,4,19999,11.0,3467.862305,-38.790752,0.029675,0.010933,-343.884735,553.274597,3406.646973
297785,4,19999,11.0,2181.076172,-38.836823,0.006593,0.175000,-401.216125,47.327873,2143.853516


In [7]:
# Reset the index to preserve it after the merge
df1_reset = df.reset_index().rename(columns={'index': 'idx1'})
df2_reset = truth.reset_index().rename(columns={'index': 'idx2'})

# Perform an inner merge on 'run_num' and 'evt_num'
common_rows = pd.merge(df1_reset, df2_reset, on=['run_num', 'evt_num'], how='inner')

# The result will have 'idx1' and 'idx2' columns representing the original indices from df1 and df2
common_indices = common_rows[['idx1', 'idx2']]

common_rows

Unnamed: 0,idx1,run_num,evt_num,rec_ID_x,rec_Pi,rec_Pf,rec_PAvg,rec_PStd,rec_Vx_x,rec_Vy_x,rec_Vz_x,idx2,rec_ID_y,rec_E,rec_Vx_y,rec_Vy_y,rec_Vz_y,rec_Px,rec_Py,rec_Pz
0,0,0,0,-1,4740.072727,4875.734806,4839.976788,71.587175,-2.505,-20.471136,38.779999,0,-11.0,4693.527832,-1.971450,-20.188143,22.877148,-154.253555,10.251737,4691.492188
1,0,0,0,-1,4740.072727,4875.734806,4839.976788,71.587175,-2.505,-20.471136,38.779999,1,11.0,2972.704346,-1.971716,-20.193453,22.839268,-102.542465,-8.515394,2971.434326
2,0,0,0,-1,4740.072727,4875.734806,4839.976788,71.587175,-2.505,-20.471136,38.779999,2,11.0,257.371124,-1.200308,-20.197134,-0.175000,-7.228745,-1.836229,257.773743
3,1,0,0,-1,2957.506180,2899.734497,2941.440821,29.748932,-2.505,-20.471136,38.779999,0,-11.0,4693.527832,-1.971450,-20.188143,22.877148,-154.253555,10.251737,4691.492188
4,1,0,0,-1,2957.506180,2899.734497,2941.440821,29.748932,-2.505,-20.471136,38.779999,1,11.0,2972.704346,-1.971716,-20.193453,22.839268,-102.542465,-8.515394,2971.434326
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
839851,277708,4,19999,-1,1760.736704,1689.650536,1744.330168,29.190138,-38.895,-0.972536,7.780000,297785,11.0,2181.076172,-38.836823,0.006593,0.175000,-401.216125,47.327873,2143.853516
839852,277708,4,19999,-1,1760.736704,1689.650536,1744.330168,29.190138,-38.895,-0.972536,7.780000,297786,-11.0,1802.488281,-38.778221,-0.043160,0.175000,-26.256176,-223.125000,1788.947144
839853,277709,4,19999,-1,801.025569,426.067710,695.638001,192.120314,-39.555,0.811657,7.780000,297784,11.0,3467.862305,-38.790752,0.029675,0.010933,-343.884735,553.274597,3406.646973
839854,277709,4,19999,-1,801.025569,426.067710,695.638001,192.120314,-39.555,0.811657,7.780000,297785,11.0,2181.076172,-38.836823,0.006593,0.175000,-401.216125,47.327873,2143.853516


In [39]:
def plot_multiplicity(
        rec_bin_counts, 
        rec_bin_center, 
        truth_bin_counts, 
        truth_bin_center,
        x_title,
        log_y = False, 

):    

    y_max = max(rec_bin_counts.max(), truth_bin_counts.max())
    y_min = min(rec_bin_counts.min(), truth_bin_counts.min())

    fig = make_subplots(
        rows=2, cols=1,
        shared_xaxes=True,
        vertical_spacing=0.0,
        row_heights=[0.75, 0.25],
    )
    fig.add_trace(
        go.Scatter(
            x=rec_bin_center,
            y=rec_bin_counts,
            name=f'Reconstructed',
            mode='markers',
            marker_symbol='circle',
            marker_size=14, 
            marker_color='#D43E33',
        ), row=1, col=1
    )
    fig.add_trace(
        go.Scatter(
            x=truth_bin_center,
            y=truth_bin_counts,
            name=f'Truth',
            mode='markers',
            marker_symbol='star-diamond',
            marker_size=14, 
            marker_color='#5857AD',
        ), row=1, col=1
    )
    fig.add_trace(
        go.Scatter(
            x=truth_bin_center,
            y=rec_bin_counts/truth_bin_counts,
            name=f'Recon/Truth',
            mode='markers',
            marker_symbol='triangle-up',
            marker_size=14, 
            marker_color='rgba(0,100,80,0.4)',
        ), row=2, col=1
    )
    fig.add_hline(y=1.0, line_width=1, line_dash="dash", line_color="grey", row=2, col=1)
    
    y_axis_attr = dict(linecolor="#666666", zerolinecolor='rgba(0,0,0,0)', linewidth=2, mirror=True)
    fig.update_yaxes(
        **y_axis_attr, title_text="A.U.", type="log" if log_y else "linear", row=1, col=1,
        range=[np.log10(y_min + 1e-6) + 0.5, np.log10(y_max) + 2.5] if log_y else [0.1 * y_min, y_max * 1.5]
        # range=[1e-5, np.log10(y_max) + 2.5] if log_y else [0, 1]
    )
    fig.update_yaxes(
        **y_axis_attr, title_text="Reco/Truth", type="log", row=2, col=1,
        range=[-1, 1],
        # range=[1e-5, np.log10(y_max) + 2.5] if log_y else [0, 1]
    )
    x_axis_attr = dict(
        linecolor="#666666", gridcolor='#d9d9d9', zerolinecolor='rgba(0,0,0,0)', linewidth=2,
        showline=True, showgrid=False
    )
    fig.update_xaxes(**x_axis_attr, mirror=True, row=1, col=1)
    fig.update_xaxes(**x_axis_attr, mirror=False, title_text=x_title, row=2, col=1)
    
    
    # annotation
    x_base, y_base = 0.05, 0.97
    fig.add_annotation(
        text=r'<i><b>DarkSHINE<b><i>', showarrow=False, xref='paper', x=x_base, yref='paper', y=y_base,
        font=dict(size=34, family='Cambria'),
    )
    fig.add_annotation(
        text=r'Simulation', showarrow=False, xref='paper', x=x_base + 0.405, yref='paper', y=y_base - 0.0037,
        font=dict(size=31, family='Cambria'),
    )

    y_base -= 0.075
    fig.add_annotation(
        text=r'$\Large{E^{e}_{0} = 8~\text{GeV},~10^{6}~\text{EOT}}$', showarrow=False, xref='paper', x=x_base,
        yref='paper', y=y_base,
    )

    y_base -= 0.062
    fig.update_layout(
        legend=dict(
            orientation="v",
            yanchor="top",
            y=0.99,
            xanchor="right",
            x=0.98,
            font=dict(size=14),
        ),
        width=800,
        height=800,
        paper_bgcolor='rgba(0,0,0,0)',
        plot_bgcolor='rgba(0,0,0,0)',
    )
    
    fig.show()


def binning(df_in):
    bins = np.linspace(0.5, 10.5, 11)
    counts = df_in.groupby(['run_num', 'evt_num']).size()
    duplicates = counts[counts > 0].reset_index(name='count')
    duplicates['binned'] = pd.cut(duplicates['count'], bins=bins, labels=bins[:-1])
    bin_counts = duplicates['binned'].value_counts()
    bin_counts = bin_counts.sort_index()
    bin_center = bins[:-1] + (bins[1:] - bins[:-1]) / 2

    return bin_counts, bin_center


E_cut = 300
Std_min = 1500
rec_bin_counts, rec_bin_center = binning(df[(df['rec_PAvg'] > E_cut) & (df['rec_PStd'] < Std_min)])
truth_bin_counts, truth_bin_center = binning(truth[truth['rec_E'] > E_cut])

plot_multiplicity(rec_bin_counts, rec_bin_center, truth_bin_counts, truth_bin_center, log_y=False, x_title='Multiplicity')

In [38]:
def select_nth_row(df, group_columns, nth):
        # Group by the specified columns and filter out groups with less than nth entries
    filtered_groups = df.groupby(group_columns).filter(lambda x: len(x) >= nth)

    # Select the nth row of each group from the filtered DataFrame
    nth_rows = filtered_groups.groupby(group_columns).nth(nth - 1)  # nth - 1 because of zero-based indexing

    return nth_rows

def binning(df_in, nth, col='rec_PAvg'):
    df_ = df_in.copy(deep=True)
    bins = np.linspace(0, 8000, 100)
    df_ = select_nth_row(df_, ['run_num', 'evt_num'], nth)
    df_['binned'] = pd.cut(df_[col], bins=bins, labels=bins[:-1])
    bin_counts = df_['binned'].value_counts()
    bin_counts = bin_counts.sort_index()
    bin_center = bins[:-1] + (bins[1:] - bins[:-1]) / 2

    return bin_counts, bin_center

E_cut = 300
Std_min = 1500
for nth in [1,2,3]:
    rec_bin_counts, rec_bin_center = binning(df[(df['rec_PAvg'] > E_cut) & (df['rec_PStd'] < Std_min)], nth, col= 'rec_PAvg')
    truth_bin_counts, truth_bin_center = binning(truth[truth['rec_E'] > E_cut], nth, col='rec_E')
    plot_multiplicity(rec_bin_counts, rec_bin_center, truth_bin_counts, truth_bin_center, log_y=False, x_title=f'$P_{{{nth}}} \\text{{ [MeV]}}$')