In [45]:
import json
import numpy as np
import pandas as pd
import uproot
import matplotlib.pyplot as plt
import mplhep as mh

from hist.hist import Hist
from hist.axis import Regular
from hist.axis import IntCategory
from hist.axis import StrCategory

from pathlib import Path

from typing import Optional, Union

from matplotlib.colors import Colormap, ListedColormap
from matplotlib.colors import LogNorm

In [46]:
from functools import cache
from functools import cached_property

@cache
def get_segment(ring: int, station: int, sector: int, subsector: int) -> int:
    """
    https://github.com/cms-sw/cmssw/blob/CMSSW_13_3_0_pre3/Geometry/RPCGeometry/src/RPCGeomServ.cc#L361-L368
    """
    nsub = 3 if ring == 1 and station > 1 else 6
    return subsector + nsub * (sector - 1)


@cache
def get_roll_name(region: int, ring: int, station: int, sector: int, layer: int,
             subsector: int, roll: int
) -> str:
    """
    https://github.com/cms-sw/cmssw/blob/CMSSW_13_3_0_pre3/Geometry/RPCGeometry/src/RPCGeomServ.cc#L11-L87
    """
    if region == 0:
        name = f'W{ring:+d}_RB{station}'

        if station <= 2:
            name += 'in' if layer == 1 else 'out'
        else:
            if sector == 4 and station == 4:
                name += ['--', '-', '+', '++'][subsector - 1]
            elif (station == 3) or (station == 4 and sector not in (4, 9, 11)):
                name += '-' if subsector == 1 else '+'
        name += f'_S{sector:0>2d}_'
        name += ['Backward', 'Middle', 'Forward'][roll - 1]
    else:
        segment = get_segment(ring, station, sector, subsector)
        name = f'RE{station * region:+d}_R{ring}_CH{segment:0>2d}_'
        name += ['A', 'B', 'C', 'D', 'E'][roll - 1]
    return name

In [47]:
from tqdm import tqdm

def load_data(
    input_path: Path,
    columns: list,
    roll_blacklist_path: Optional[Path] = None,
) -> dict:
    #############################################################
    ##     COLUMNS
    ##     'is_fiducial', 'is_matched', 
    ##     'region', 'ring', 'station', 'sector', 'layer', 'subsector', 'roll', 
    ##     'run', 'cls', 'bx', 'event',
    ##     'tag_pt', 'tag_eta', 'tag_phi', 
    ##     'probe_pt', 'probe_eta', 'probe_phi', 'probe_time', 'probe_dxdz', 'probe_dydz', 
    ##     'dimuon_pt', 'dimuon_mass', 
    ##     'residual_x', 'residual_y', 'pull_x', 'pull_y', 'pull_x_v2', 'pull_y_v2', 
    #############################################################
    data = uproot.open(f"{str(input_path)}:tree").arrays(columns, library='np')
    
    fiducial_mask = data['is_fiducial']
    for key, values in data.items():
        data[key] = data[key][fiducial_mask]

    data['roll_name'] = np.array([
        get_roll_name(
            data['region'][idx], data['ring'][idx], data['station'][idx],
            data['sector'][idx], data['layer'][idx], data['subsector'][idx], data['roll'][idx]
        ) for idx in range(len(data['region']))
    ])

    if roll_blacklist_path is None:
        roll_blacklist = set()
    else:
        with open(roll_blacklist_path) as stream:
            roll_blacklist = set(json.load(stream))
    
    is_blacklist = np.vectorize(lambda item: item in roll_blacklist)
    blacklist_mask = is_blacklist(data['roll_name'])

    for key, values in data.items():
        data[key] = data[key][~blacklist_mask]

    return data

def load_region_data(
    data: dict,
    region: str
):
    if region == "all":
        is_region = np.vectorize(lambda item: type(item) is str)
    elif region == "barrel":
        is_region = np.vectorize(lambda item: item.startswith('W'))
    elif region == "disk123":
        is_region = np.vectorize(lambda item: item.startswith('RE') and ~(item.startswith('RE+4') or item.startswith('RE-4')))
    elif region == "disk4":
        is_region = np.vectorize(lambda item: item.startswith('RE') and (item.startswith('RE+4') or item.startswith('RE-4')))

    region_mask = is_region(data['roll_name'])

    region_data = {}
    for key, values in data.items():
        region_data[key] = data[key][region_mask]

    return region_data

def load_event_mean_data(
    input_path: Path,
    keys: list
) -> pd.DataFrame:

    data = uproot.open(f"{str(input_path)}:tree").arrays(keys + ['event', 'run'], library='np')

    event_mean_data = {}
    for key in keys: event_mean_data[key] = []

    runs = np.unique(data['run'])
    for run in tqdm(runs):
        run_mask = (data['run'] == run)
        events_run = data['event'][run_mask]
        for key in keys:
            values_run = data[key][run_mask]
            for event in np.unique(events_run):
                values_run_event = values_run[events_run == event]
                values_run_event = values_run_event[values_run_event > -300]
                event_mean_data[key].append(np.mean(values_run_event, dtype=np.float16))
            
    event_mean_data = pd.DataFrame.from_dict(data=event_mean_data, orient='columns')

    return event_mean_data

In [48]:
#########################################
##    Keys:
##    'cls', 'bx',
##    'probe_pt', 'probe_eta', 'probe_phi', 'probe_time',
##    'dimuon_pt', 'dimuon_mass', 
##    'residual_x', 'residual_y', 'pull_x', 'pull_y'
#########################################
working_dir = Path('/users/eigen1907/Workspace/Workspace-RPC/240425-TnP_RPC24/TnP_Plotting')


df_data = load_event_mean_data(
    input_path = working_dir / 'data' / 'SingleMuon__Run2022C.root',
    keys = ['cls', 'dimuon_mass']
)

df_data.to_csv(working_dir / 'data' / 'SingleMuon__Run2022C_mean.csv', index=False)

100%|██████████| 35/35 [00:53<00:00,  1.53s/it]


In [49]:
working_dir = Path('/users/eigen1907/Workspace/Workspace-RPC/240425-TnP_RPC24/TnP_Plotting')


df_data = load_event_mean_data(
    input_path = working_dir / 'data' / 'Run2022.root',
    keys = ['cls', 'dimuon_mass']
)

df_data.to_csv(working_dir / 'data' / 'Run2022_mean.csv', index=False)

100%|██████████| 358/358 [5:32:28<00:00, 55.72s/it]    


In [50]:
working_dir = Path('/users/eigen1907/Workspace/Workspace-RPC/240425-TnP_RPC24/TnP_Plotting')


df_data = load_event_mean_data(
    input_path = working_dir / 'data' / 'Run2023.root',
    keys = ['cls', 'dimuon_mass']
)

df_data.to_csv(working_dir / 'data' / 'Run2023_mean.csv', index=False)

100%|██████████| 98/98 [1:55:47<00:00, 70.89s/it]   


In [51]:
working_dir = Path('/users/eigen1907/Workspace/Workspace-RPC/240425-TnP_RPC24/TnP_Plotting')


df_data = load_event_mean_data(
    input_path = working_dir / 'data' / 'Run3.root',
    keys = ['cls', 'dimuon_mass']
)

df_data.to_csv(working_dir / 'data' / 'Run3_mean.csv', index=False)

100%|██████████| 456/456 [7:07:03<00:00, 56.19s/it]    
