# Experimental code for calculating Ripleys K metrics

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

import os
# cell border experimental code: 


import numpy as np
from scipy.spatial import ConvexHull

In [None]:
df = pd.read_csv('data/cluster_position _Detailed.csv', skiprows=3)
df

Unnamed: 0,Position X,Position Y,Position Z,Unit,Category,Collection,Time,ID,Unnamed: 8
0,49529.578,44229.441,1.116,µm,Surface,Position,1,0,
1,49467.621,44290.391,0.977,µm,Surface,Position,1,1,
2,49515.867,44246.172,0.776,µm,Surface,Position,1,2,
3,49315.445,44443.277,2.803,µm,Surface,Position,1,3,
4,49557.926,44204.316,2.187,µm,Surface,Position,1,4,
...,...,...,...,...,...,...,...,...,...
20219,47583.918,41356.871,3.285,µm,Surface,Position,1,20219,
20220,47669.691,41268.441,-1.012,µm,Surface,Position,1,20220,
20221,47700.023,41240.758,0.904,µm,Surface,Position,1,20221,
20222,47647.992,41296.988,0.550,µm,Surface,Position,1,20222,


In [11]:
pts = df[['Position X', 'Position Y']].values             # shape (N,2) array

# 2) Compute the convex hull
hull = ConvexHull(pts)

# hull.simplices is an (E,2) array of index pairs (i, j)
# each representing an edge between pts[i] and pts[j].
edges = hull.simplices

# 3) Define a vectorized distance‐to‐segment function
def point_to_segment_distances(points, seg_start, seg_end):
    """
    points: (N,2) array
    seg_start, seg_end: each (2,) array
    returns: (N,) array of distances from each point to the segment
    """
    v = seg_end - seg_start                      # (2,)
    w = points - seg_start                       # (N,2)
    # projection factor of each point onto the infinite line
    t = np.einsum('ij,j->i', w, v) / np.dot(v, v)
    # clamp to [0,1] so we “snap” to the segment
    t_clamped = np.clip(t, 0.0, 1.0)             # (N,)
    proj = seg_start + t_clamped[:,None] * v     # (N,2)
    # euclidean distance from each point to its projection
    return np.linalg.norm(points - proj, axis=1)

# 4) Loop over edges, keep the minimum distance per point
all_dists = np.full(len(pts), np.inf)
for i, j in edges:
    start, end = pts[i], pts[j]
    d = point_to_segment_distances(pts, start, end)
    all_dists = np.minimum(all_dists, d)

# 5) Attach back to your DataFrame
df['dist_to_hull'] = all_dists

# 6) (Optional) Inspect
df

Unnamed: 0,Position X,Position Y,Position Z,Unit,Category,Collection,Time,ID,Unnamed: 8,dist_to_hull
0,49529.578,44229.441,1.116,µm,Surface,Position,1,0,,50.176982
1,49467.621,44290.391,0.977,µm,Surface,Position,1,1,,89.600726
2,49515.867,44246.172,0.776,µm,Surface,Position,1,2,,57.978679
3,49315.445,44443.277,2.803,µm,Surface,Position,1,3,,31.777766
4,49557.926,44204.316,2.187,µm,Surface,Position,1,4,,31.090851
...,...,...,...,...,...,...,...,...,...,...
20219,47583.918,41356.871,3.285,µm,Surface,Position,1,20219,,130.581855
20220,47669.691,41268.441,-1.012,µm,Surface,Position,1,20220,,135.363585
20221,47700.023,41240.758,0.904,µm,Surface,Position,1,20221,,108.228682
20222,47647.992,41296.988,0.550,µm,Surface,Position,1,20222,,153.115935


In [12]:
import numpy as np
import pandas as pd
from sklearn.neighbors import NearestNeighbors

# 0) Example: Suppose you have a set of cell centroids (x, y) in micrometers
#    Here, we simulate 100 random cell positions in a 500×500 µm area:
np.random.seed(0)
n_cells = 100
coords = np.random.rand(n_cells, 2) * 500

# Put them into a DataFrame for clarity:
df = pd.DataFrame(coords, columns=['x', 'y'])

# ---------------------------------------------------------------------
# 1) Compute NND (Nearest-Neighbor Distance) for each cell
# ---------------------------------------------------------------------
# Use NearestNeighbors with n_neighbors=2 (first neighbor is itself)
nbrs = NearestNeighbors(n_neighbors=2, algorithm='ball_tree').fit(coords)
distances, indices = nbrs.kneighbors(coords)

# distances[:, 0] == 0 (distance to itself)
# distances[:, 1] == distance to the nearest OTHER cell
df['NND'] = distances[:, 1]

# ---------------------------------------------------------------------
# 2) Compute LD ("Local Density"): count of neighbors within radius r
# ---------------------------------------------------------------------
radius = 5.0  # e.g., 50 µm

# Use NearestNeighbors with radius search
nbrs_radius = NearestNeighbors(radius=radius, algorithm='ball_tree').fit(coords)
# radius_neighbors returns a list of neighbor-index arrays for each point
indices_within_radius = nbrs_radius.radius_neighbors(coords, return_distance=False)

# For each cell i, LD_i = (# of neighbors within radius r) - 1 (exclude the cell itself)
df['LD_50um'] = [len(neighbors) - 1 for neighbors in indices_within_radius]

# ---------------------------------------------------------------------
# View the first few rows with the new features
# ---------------------------------------------------------------------
df.head(10)


Unnamed: 0,x,y,NND,LD_50um
0,274.406752,357.594683,33.516522,0
1,301.381688,272.441591,14.696908,0
2,211.8274,322.947057,19.750703,0
3,218.793606,445.8865,0.864418,1
4,481.83138,191.720759,34.484572,0
5,395.862519,264.44746,21.561993,0
6,284.022281,462.798319,4.362548,1
7,35.518029,43.56465,45.477121,0
8,10.109199,416.309923,52.369101,0
9,389.078375,435.006074,28.109228,0


In [13]:
import numpy as np
import pandas as pd
from scipy.spatial import ConvexHull
from sklearn.neighbors import NearestNeighbors

# ---------------------------------------------------------------------
# ASSUMPTION: `df` is your existing DataFrame with columns
# 'Position X' and 'Position Y' (and any other columns you need).
# ---------------------------------------------------------------------

# 1) Extract (X, Y) coordinates from your DataFrame
pts = df[['Position X', 'Position Y']].values  # shape (N, 2)

# ---------------------------------------------------------------------
# PART A: COMPUTE distance‐to‐CONVEX HULL FOR EACH POINT
# ---------------------------------------------------------------------

# 2) Build the convex hull of all points
hull = ConvexHull(pts)
edges = hull.simplices  # Each row is a pair of indices [i, j] forming an edge

# 3) Define a vectorized function that returns the distance from every point to a single segment
def point_to_segment_distances(points, seg_start, seg_end):
    """
    points:   (N, 2) array of (x, y) coordinates
    seg_start, seg_end: each a length‐2 array giving the endpoints of one hull edge
    returns:  (N,) array of distances from each point to this line segment
    """
    v = seg_end - seg_start                      # Vector along the segment
    w = points - seg_start                       # Vectors from seg_start to each point
    # Projection factor t of each point onto the infinite line:  t = (w·v)/(v·v)
    t = np.einsum('ij,j->i', w, v) / np.dot(v, v)
    # Clamp t into [0, 1] so points outside the segment project to an endpoint
    t_clamped = np.clip(t, 0.0, 1.0)             # (N,)
    proj = seg_start + t_clamped[:, None] * v    # (N, 2): projected points on the segment
    # Euclidean distance from each point to its projection
    return np.linalg.norm(points - proj, axis=1)

# 4) Initialize an array of “infinite” distances, one per point
all_dists = np.full(len(pts), np.inf)

# 5) For each hull edge, compute the point‐to‐segment distances and keep the minimum
for i, j in edges:
    start, end = pts[i], pts[j]
    d = point_to_segment_distances(pts, start, end)
    all_dists = np.minimum(all_dists, d)

# 6) Attach these min‐distances back to the DataFrame
df['dist_to_hull'] = all_dists

# ---------------------------------------------------------------------
# PART B: COMPUTE Nearest‐Neighbor Distance (NND) and Local Density (LD)
# ---------------------------------------------------------------------

# 7) Nearest‐Neighbor Distance (excluding the point itself):
nbrs = NearestNeighbors(n_neighbors=2, algorithm='ball_tree').fit(pts)
distances, _indices = nbrs.kneighbors(pts)
# distances[:, 0] is always 0 (distance to itself); distances[:, 1] is the nearest OTHER point
df['NND'] = distances[:, 1]

# 8) Local Density: count how many neighbors lie within a fixed radius r
radius = 5.0  # <-- adjust this radius (in the same units as Position X/Y) to suit your needs
nbrs_radius = NearestNeighbors(radius=radius, algorithm='ball_tree').fit(pts)
indices_within_radius = nbrs_radius.radius_neighbors(pts, return_distance=False)
# For each point, #neighbors within radius minus one (to exclude the point itself)
df['LD'] = [len(neighbors) - 1 for neighbors in indices_within_radius]

# ---------------------------------------------------------------------
# 9) Inspect the first few rows to confirm the new columns
# ---------------------------------------------------------------------
df.head()


KeyError: "None of [Index(['Position X', 'Position Y'], dtype='object')] are in the [columns]"