# Distance to edge tool 

This code creates a mathematical edge of the tumor using dapi position data, then calculates the shortest distance to the border from each dapi position adding the results as a new column. Data used must contain X and Y dapi postion.

In [20]:
import os
import glob
import numpy as np
import pandas as pd
from scipy.spatial import ConvexHull

# 1. Find all CSV files in the folder
csv_files = glob.glob('csv_for_tools/*.csv')

# 2. Error if none found
if not csv_files:
    raise FileNotFoundError("No CSV files found in 'csv_for_tools/'")

# 3. Pick the first CSV
first_csv = csv_files[0]
print(f"Reading: {first_csv}")

# 4. Read into a DataFrame
df = pd.read_csv(first_csv)

# (Optional) Inspect the top of the DataFrame
df.head()


Reading: csv_for_tools/merged_for_tools.csv


Unnamed: 0,POSITION X,POSITION Y,POSITION Z,UNIT,CATEGORY,COLLECTION,TIME,SET 1,ID,UNNAMED: 9,CLUSTER,CD68,LY6G
0,51257.254,42903.758,26,µm,Surface,Position,1,Class A,3,0,0,0,0
1,51619.285,42545.438,12,µm,Surface,Position,1,Class A,4,0,0,0,0
2,51489.516,42677.035,14,µm,Surface,Position,1,Class A,5,0,0,0,0
3,51618.191,42549.02,16,µm,Surface,Position,1,Class A,9,0,0,0,0
4,51434.113,42733.695,17,µm,Surface,Position,1,Class A,10,0,0,0,0


# Check data before contuining

Make sure there is a positon x and position y column otherwise code will not work that is all. 

In [21]:

pts = df[['POSITION X', 'POSITION Y']].values             # shape (N,2) array

# 2) Compute the convex hull
hull = ConvexHull(pts)

# hull.simplices is an (E,2) array of index pairs (i, j)
# each representing an edge between pts[i] and pts[j].
edges = hull.simplices

# 3) Define a vectorized distance‐to‐segment function
def point_to_segment_distances(points, seg_start, seg_end):
    """
    points: (N,2) array
    seg_start, seg_end: each (2,) array
    returns: (N,) array of distances from each point to the segment
    """
    v = seg_end - seg_start                      # (2,)
    w = points - seg_start                       # (N,2)
    # projection factor of each point onto the infinite line
    t = np.einsum('ij,j->i', w, v) / np.dot(v, v)
    # clamp to [0,1] so we “snap” to the segment
    t_clamped = np.clip(t, 0.0, 1.0)             # (N,)
    proj = seg_start + t_clamped[:,None] * v     # (N,2)
    # euclidean distance from each point to its projection
    return np.linalg.norm(points - proj, axis=1)

# 4) Loop over edges, keep the minimum distance per point
all_dists = np.full(len(pts), np.inf)
for i, j in edges:
    start, end = pts[i], pts[j]
    d = point_to_segment_distances(pts, start, end)
    all_dists = np.minimum(all_dists, d)

# 5) Attach back to your DataFrame
df['Distance_to_Edge'] = all_dists

# 6) (Optional) Inspect
df

Unnamed: 0,POSITION X,POSITION Y,POSITION Z,UNIT,CATEGORY,COLLECTION,TIME,SET 1,ID,UNNAMED: 9,CLUSTER,CD68,LY6G,Distance_to_Edge
0,51257.254,42903.758,26,µm,Surface,Position,1,Class A,3,0,0,0,0,41.876219
1,51619.285,42545.438,12,µm,Surface,Position,1,Class A,4,0,0,0,0,121.859653
2,51489.516,42677.035,14,µm,Surface,Position,1,Class A,5,0,0,0,0,133.299273
3,51618.191,42549.020,16,µm,Surface,Position,1,Class A,9,0,0,0,0,120.332282
4,51434.113,42733.695,17,µm,Surface,Position,1,Class A,10,0,0,0,0,137.870283
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
115110,48725.254,40357.273,24,µm,Surface,Position,1,Class B,115202,0,0,0,0,84.913215
115111,48741.340,40339.059,24,µm,Surface,Position,1,Class B,115203,0,0,0,0,71.230821
115112,48718.805,40368.750,25,µm,Surface,Position,1,Class B,115206,0,0,0,0,94.444411
115113,48733.887,40358.613,24,µm,Surface,Position,1,Class B,115207,0,0,0,0,88.341651


# Review the ouput above before continuing

If the output above looks correct then move to next cell below. Running the next cell will update the csv file and rewrite it by adding the edge distance column make sure you are ok with this before running next cell. 

In [22]:
csv_path = glob.glob('csv_for_tools/*.csv')[0]

df.to_csv(csv_path, index=False)