In [18]:
from typing import Optional, Union
from collections import defaultdict
from pathlib import Path
from typing import Optional, Union
import json
import numpy as np
import numpy.typing as npt
import pandas as pd
import uproot
import matplotlib.pyplot as plt
from matplotlib.colors import Colormap
from matplotlib.patches import Polygon
from matplotlib.collections import PatchCollection
from mpl_toolkits.axes_grid1 import make_axes_locatable
import mplhep as mh
from hist.hist import Hist
from hist.axis import StrCategory
#from RPCDPGAnalysis.NanoAODTnP.RPCGeomServ import RPCRoll # type: ignore

In [19]:
def get_segment(ring: int, station: int, sector: int, subsector: int) -> int:
    """
    https://github.com/cms-sw/cmssw/blob/CMSSW_13_3_0_pre3/Geometry/RPCGeometry/src/RPCGeomServ.cc#L361-L368
    """
    nsub = 3 if ring == 1 and station > 1 else 6
    return subsector + nsub * (sector - 1)


def get_roll_name(region: int, ring: int, station: int, sector: int, layer: int,
             subsector: int, roll: int
) -> str:
    """
    https://github.com/cms-sw/cmssw/blob/CMSSW_13_3_0_pre3/Geometry/RPCGeometry/src/RPCGeomServ.cc#L11-L87
    """
    if region == 0:
        name = f'W{ring:+d}_RB{station}'

        if station <= 2:
            name += 'in' if layer == 1 else 'out'
        else:
            if sector == 4 and station == 4:
                name += ['--', '-', '+', '++'][subsector - 1]
            elif (station == 3) or (station == 4 and sector not in (4, 9, 11)):
                name += '-' if subsector == 1 else '+'
        name += f'_S{sector:0>2d}_'
        name += ['Backward', 'Middle', 'Forward'][roll - 1]
    else:
        segment = get_segment(ring, station, sector, subsector)
        name = f'RE{station * region:+d}_R{ring}_CH{segment:0>2d}_'
        name += ['A', 'B', 'C', 'D', 'E'][roll - 1]
    return name

In [20]:
from dataclasses import dataclass
from functools import singledispatchmethod

@dataclass
class LumiBlockChecker:
    """
    https://twiki.cern.ch/twiki/bin/view/CMSPublic/SWGuideGoodLumiSectionsJSONFile
    """
    cert: dict[np.uint32, npt.NDArray[np.uint32]]

    @staticmethod
    def _transform_lumi_ranges(lumi: list[tuple[int, int]]
    ) -> npt.NDArray[np.uint32]:
        """
        """
        flat_lumi = np.array(lumi, dtype=np.uint32).flatten()
        # [first, last] to (first, last]
        flat_lumi[::2] -= 1
        return flat_lumi

    @classmethod
    def from_dict(cls, cert: dict[int, list[tuple[int, int]]]):
        flat_cert = {np.uint32(run): cls._transform_lumi_ranges(lumi_ranges)
                     for run, lumi_ranges in cert.items()}
        return cls(flat_cert)

    @classmethod
    def from_json(cls, path):
        with open(path) as stream:
            cert = json.load(stream)
        return cls.from_dict(cert)

    @staticmethod
    def _get_lumi_mask(lumi_arr: npt.NDArray[np.uint32],
                     ranges: npt.NDArray[np.uint32]
    ) -> npt.NDArray[np.bool_]:
        """
        """
        # odd(even) indices indicate good(bad) lumi blocks
        indices = np.searchsorted(ranges, lumi_arr)
        mask = (indices & 0x1).astype(bool)
        return mask

    @singledispatchmethod
    def get_lumi_mask(self, run, lumi: npt.NDArray[np.uint32]):
        raise NotImplementedError(f'expected np.uint32, npt.NDArray[np.uint32]'
                                  f' or int but got {type(run)}')

    @get_lumi_mask.register(int)
    @get_lumi_mask.register(np.uint32)
    def _(self,
          run: np.uint32,
          lumi: npt.NDArray[np.uint32]
    ) -> npt.NDArray[np.bool_]:
        """
        """
        if isinstance(run, int):
            run = np.uint32(run)

        if run in self.cert:
            mask = self._get_lumi_mask(lumi, ranges=self.cert[run])
        else:
            mask = np.full_like(lumi, fill_value=False, dtype=bool)
        return mask

    @get_lumi_mask.register(np.ndarray)
    def _(self,
          run: npt.NDArray[np.uint32],
          lumi: npt.NDArray[np.uint32]
    ) -> npt.NDArray[np.bool_]:
        """
        """
        mask = np.full_like(lumi, fill_value=False, dtype=bool)
        for each in np.unique(run):
            run_mask = run == each
            mask[run_mask] = self.get_lumi_mask(each, lumi[run_mask])
        return mask

In [22]:
import awkward as ak

def read_nanoaod(path,
                 cert_path: str,
                 treepath: str = 'Events',
                 name: str = 'rpcTnP',
):
    tree = uproot.open(f'{path}:{treepath}')

    aliases = {key.removeprefix(f'{name}_'): key
               for key in tree.keys()
               if key.startswith(name)}
    # number of measurements
    aliases['size'] = f'n{name}'
    expressions = list(aliases.keys()) + ['run', 'luminosityBlock']
    cut = f'(n{name} > 0)'

    data: dict[str, np.ndarray] = tree.arrays(
        expressions=expressions,
        aliases=aliases,
        cut=cut,
        library='np'
    )

    run = data.pop('run')
    lumi_block = data.pop('luminosityBlock')
    size = data.pop('size')

    lumi_block_checker = LumiBlockChecker.from_json(cert_path)
    mask = lumi_block_checker.get_lumi_mask(run, lumi_block)
    data = {key: value[mask] for key, value in data.items()}
    data = {key: np.concatenate(value) for key, value in data.items()} 
    data['run'] = np.repeat(run[mask], size[mask])
    data = ak.Array(data)
    data['roll_name'] = np.array([get_roll_name(row.region, row.ring, row.station,
                              row.sector, row.layer, row.subsector,
                              row.roll)
                for row in data], dtype='<U8')
    return data


def flatten_nanoaod(input_path: Path,
                    cert_path: Path,
                    geom_path: Path,
                    output_path: Path,
                    name: str = 'rpcTnP',
):
    data = read_nanoaod(
        path=input_path,
        cert_path=cert_path,
        treepath='Events',
        name=name
    )

    name_arr = [get_roll_name(row.region, row.ring, row.station,
                              row.sector, row.layer, row.subsector,
                              row.roll)
                for row in data]
    name_arr = np.array(name_arr)

    geom = pd.read_csv(geom_path)

    roll_axis = StrCategory(geom['roll_name'].tolist())
    h_total = Hist(roll_axis) # type: ignore
    h_passed = h_total.copy()

    h_total.fill(name_arr[data.is_fiducial])
    h_passed.fill(name_arr[data.is_fiducial & data.is_matched].tolist())

    with uproot.writing.recreate(output_path) as output_file:
        output_file['tree'] = data
        output_file['total'] = h_total
        output_file['passed'] = h_passed


data = read_nanoaod(path="/u/user/sjws5411/Workspace/Efficiency/CMSSW_14_1_0_pre2/Workspace-RPC/240425-TnP_RPC24/TnP_Plotting/datasample/2022C_output_1.root",
                        cert_path="/u/user/sjws5411/Workspace/Efficiency/CMSSW_14_1_0_pre2/src/RPCDPGAnalysis/NanoAODTnP/data/cert/Cert_Collisions2022_eraC_355862_357482_Golden.json")


print(data)
print(data.fields)


flatten_nanoaod(
    input_path = "/u/user/sjws5411/Workspace/Efficiency/CMSSW_14_1_0_pre2/Workspace-RPC/240425-TnP_RPC24/TnP_Plotting/datasample/2022C_output_1.root",
    cert_path = "/u/user/sjws5411/Workspace/Efficiency/CMSSW_14_1_0_pre2/src/RPCDPGAnalysis/NanoAODTnP/data/cert/Cert_Collisions2022_eraC_355862_357482_Golden.json",
    geom_path = "/u/user/sjws5411/Workspace/Efficiency/CMSSW_14_1_0_pre2/Workspace-RPC/240425-TnP_RPC24/TnP_Plotting/geometry/run3.csv",
    output_path = "/u/user/sjws5411/Workspace/Efficiency/CMSSW_14_1_0_pre2/Workspace-RPC/240425-TnP_RPC24/TnP_Plotting/plotting/output.root",
)

[{is_fiducial: False, is_matched: False, region: 0, ring: -1, ...}, ..., {...}]
['is_fiducial', 'is_matched', 'region', 'ring', 'station', 'sector', 'layer', 'subsector', 'roll', 'cls', 'bx', 'tag_pt', 'tag_eta', 'tag_phi', 'probe_pt', 'probe_eta', 'probe_phi', 'probe_time', 'probe_dxdz', 'probe_dydz', 'dimuon_pt', 'dimuon_mass', 'residual_x', 'residual_y', 'pull_x', 'pull_y', 'pull_x_v2', 'pull_y_v2', 'run', 'roll_name']


TypeError: fields of a record must be NumPy types, though the record itself may be in a jagged array

    field 'roll_name' has type string