In [1]:
import awkward as ak
import numpy as np
import pyarrow as pa
import pyarrow.parquet as pq
from coffea.nanoevents import NanoEventsFactory, NanoAODSchema, PFNanoAODSchema

In [2]:
src = "/project01/ndcms/cmoore24/skims/parquet_nano_test/hgg/keep0*"
dst = "/project01/ndcms/cmoore24/skims/parquet_nano_test/hgg/test.parquet"

In [3]:
events = ak.from_parquet(src)

In [4]:
out = {}
for b in events.fields:
    if events[b].fields:
        for f in events[b].fields:
            out[f"{b}_{f}"] = ak.without_parameters(events[b][f])
    else:
        out[b] = ak.without_parameters(events[b])

In [5]:
def norm(x):
    s = str(ak.type(x))
    if "float64" in s:
        x = ak.values_astype(x, np.float32)
    if "int64" in s:
        x = ak.values_astype(x, np.int32)
    if "uint64" in s:
        x = ak.values_astype(x, np.uint32)
    if "?" in s:
        if "float" in s:
            x = ak.fill_none(x, 0.0)
        elif "int" in s or "uint" in s:
            x = ak.fill_none(x, 0)
        elif "bool" in s:
            x = ak.fill_none(x, False)
        else:
            x = ak.fill_none(x, "")
    return x

In [6]:
out = {k: norm(v) for k, v in out.items()}

In [7]:
drop_empty = []
for k, v in out.items():
    try:
        maxlen = ak.max(ak.num(v))
    except Exception:
        continue
    if isinstance(maxlen, (int, np.integer)) and maxlen == 0:
        drop_empty.append(k)
for k in drop_empty:
    out.pop(k, None)

In [8]:
table = ak.to_arrow_table(
    ak.zip(out, depth_limit=1),
    list_to32=True,
    string_to32=True,
    bytestring_to32=True,
    extensionarray=False,
)

In [9]:
bad = []
for field in table.schema:
    t = field.type
    if pa.types.is_list(t) or pa.types.is_large_list(t):
        vt = t.value_type
        try:
            pd_dtype = vt.to_pandas_dtype()      # may be numpy.dtype, numpy scalar type, or Python type
            scalar = pd_dtype()                  # coffea expects this to be callable
            prim = getattr(scalar, "dtype", None)
            if prim is None:
                bad.append(field.name)
        except Exception:
            bad.append(field.name)
    elif pa.types.is_struct(t) or pa.types.is_map(t) or pa.types.is_union(t):
        bad.append(field.name)

In [10]:
for field in table.schema:
    if pa.types.is_null(field.type):
        bad.append(field.name)

In [11]:
bad = sorted(set(bad))
if bad:
    for k in bad:
        out.pop(k, None)
    table = ak.to_arrow_table(
        ak.zip(out, depth_limit=1),
        list_to32=True,
        string_to32=True,
        bytestring_to32=True,
        extensionarray=False,
    )

In [12]:
pq.write_table(table, dst, compression="snappy")

In [13]:
import coffea.nanoevents.transforms as _t
_orig_nestedindex = _t.nestedindex

def _nestedindex_patch(stack):
    # Coerce any Awkward arrays/layouts on the stack into numpy arrays
    for i, x in enumerate(stack):
        try:
            if isinstance(x, ak.Array):
                stack[i] = ak.to_numpy(x)
            # low-level layout (Content) – wrap then convert
            elif hasattr(ak, "contents") and isinstance(x, ak.contents.Content):
                stack[i] = ak.to_numpy(ak.Array(x))
        except Exception:
            # If something isn't convertible, leave it as-is and let coffea handle/raise
            pass
    return _orig_nestedindex(stack)

_t.nestedindex = _nestedindex_patch

In [14]:
evts = NanoEventsFactory.from_parquet(dst, schemaclass=NanoAODSchema, mode='virtual').events()



In [None]:
ak.materialize(evts)