In [None]:
import numpy as np
import pandas as pd
from scipy.spatial import cKDTree

In [None]:
# Read MERSCOPE WT transcripts
transcripts = pd.read_parquet("../../data/MERSCOPE_WT_1/processed_data/transcripts.parquet")
transcripts.head()

In [None]:
# Spatial resolution
def min_positive_diff(series):
    # unique + sorted values
    vals = np.sort(series.dropna().unique())

    # differences between neighbors
    diffs = np.diff(vals)

    # keep only positive differences
    diffs = diffs[diffs > 0]

    return diffs.min() if len(diffs) else np.nan

x_resolution = min_positive_diff(transcripts["global_x"])
y_resolution = min_positive_diff(transcripts["global_y"])

print("X resolution:", x_resolution)
print("Y resolution:", y_resolution)

In [None]:
# Average nearest neighbor distance: Camk2a only
transcripts = transcripts[transcripts["target"] == "Camk2a"].copy()
tree = cKDTree(transcripts[["global_x", "global_y"]])
distances, _ = tree.query(transcripts[["global_x", "global_y"]], k=5)
nn_distances = distances[:, 1:].mean(axis=1)
pd.DataFrame({"nn_distances": nn_distances[nn_distances < 25]}).to_parquet("../../output/benchmark/benchmark_collapse.parquet")