In [None]:
import uproot
import awkward as ak
import pandas as pd
import numpy as np

In [None]:
min_pt = 1
min_distance = 10

In [None]:
particles = uproot.open("/Users/andreas/cern/source/acts/acts/dev3/odd_output/particles_simulation.root")
particles = ak.to_dataframe(particles["particles"].arrays([
        "event_id",
        "particle_id",
        "q",
        "pt",
    ],library="ak"), how="inner")
particles = particles[
    (particles["q"] != 0) &
    (particles["pt"] >= min_pt)
]
particles = particles.reset_index(drop=True)
particles

In [None]:
hits = uproot.open("/Users/andreas/cern/source/acts/acts/dev3/odd_output/hits.root")
hits = ak.to_dataframe(hits["hits"].arrays([
        "event_id",
        "particle_id",
        "index",
        "geometry_id",
        "volume_id",
        "layer_id",
        "tx",
        "ty",
        "tz",
    ],library="ak"), how="inner")
hits = hits.reset_index(drop=True)
hits

In [None]:
particles = particles.convert_dtypes()
hits = hits.convert_dtypes()
reduced_hits = hits.set_index(["event_id", "particle_id"]).join(particles.set_index(["event_id", "particle_id"]), how="right")
reduced_hits

In [None]:
doublet_set = set()
triplet_set = set()

for name, groups in reduced_hits.groupby(["event_id", "particle_id"]):
    groups = groups.sort_values("index")

    pairs = pd.concat([groups[:-1].add_prefix("a_"), groups[1:].add_prefix("b_")], axis=1)
    vectors_ab = pairs[["a_tx","a_ty","a_tz"]].values.astype(float) - pairs[["b_tx","b_ty","b_tz"]].values.astype(float)
    pairs["ab_distance"] = np.linalg.norm(vectors_ab, axis=1)
    doublet_sequence = []
    for i, row in pairs.iterrows():
        doublet = (row["a_geometry_id"], row["b_geometry_id"])
        if row["ab_distance"] < min_distance:
            continue
        doublet_sequence.append(doublet)
        doublet_set.add(doublet)

    for a,b in zip(doublet_sequence[:-1], doublet_sequence[1:]):
        triplet = (a[0],a[1],b[1])
        triplet_set.add(triplet)

print("doublets", len(doublet_set))
print("triplets", len(triplet_set))

In [None]:
pd.DataFrame(doublet_set, columns=["module_a","module_b"]).to_csv("module_doublets.csv", index=False)
pd.DataFrame(triplet_set, columns=["module_a","module_b","module_c"]).to_csv("module_triplets.csv", index=False)