Code developed by Atharva Weling. Sample images came from h2114153_tissues out of the Sheffield_Samples folder, available on the README file.

In [None]:
pip install opencv-python

In [2]:
import cv2
import numpy as np
import os

In [8]:
# Access folder of tissues and create pairs for comparison
tissue_folder = '/content/sample_tissues'
tissue_paths = [os.path.join(tissue_folder, fname) for fname in os.listdir(tissue_folder) if fname.endswith('.tif')]
tissue_pairs = []

for i in range(len(tissue_paths)):
    for j in range(i + 1, len(tissue_paths)):
        tissue_pairs.append([tissue_paths[i], tissue_paths[j]])

print(tissue_pairs)

[['/content/sample_tissues/h2114153_h&e.tif_3.tif', '/content/sample_tissues/h2114153_h&e.tif_4.tif'], ['/content/sample_tissues/h2114153_h&e.tif_3.tif', '/content/sample_tissues/h2114153_h&e.tif_1.tif'], ['/content/sample_tissues/h2114153_h&e.tif_3.tif', '/content/sample_tissues/h2114153_melan.tif_1.tif'], ['/content/sample_tissues/h2114153_h&e.tif_3.tif', '/content/sample_tissues/h2114153_h&e.tif_2.tif'], ['/content/sample_tissues/h2114153_h&e.tif_3.tif', '/content/sample_tissues/h2114153_melan.tif_2.tif'], ['/content/sample_tissues/h2114153_h&e.tif_4.tif', '/content/sample_tissues/h2114153_h&e.tif_1.tif'], ['/content/sample_tissues/h2114153_h&e.tif_4.tif', '/content/sample_tissues/h2114153_melan.tif_1.tif'], ['/content/sample_tissues/h2114153_h&e.tif_4.tif', '/content/sample_tissues/h2114153_h&e.tif_2.tif'], ['/content/sample_tissues/h2114153_h&e.tif_4.tif', '/content/sample_tissues/h2114153_melan.tif_2.tif'], ['/content/sample_tissues/h2114153_h&e.tif_1.tif', '/content/sample_tissu

In [13]:
def preprocess_tissue(tissue_path):
    # Load image
    img = cv2.imread(tissue_path)
    # Convert to grayscale
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    # Apply GaussianBlur to reduce noise
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    # Apply thresholding
    _, thresh = cv2.threshold(blurred, 127, 255, cv2.THRESH_BINARY)
    return thresh

def get_contour(tissue):
    # Find contours
    contours, _ = cv2.findContours(tissue, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if contours:
        # Return the largest contour
        return max(contours, key=cv2.contourArea)
    return None

def match_shapes(contour1, contour2):
    if contour1 is not None and contour2 is not None:
        # Match shapes
        return cv2.matchShapes(contour1, contour2, cv2.CONTOURS_MATCH_I1, 0.0)
    return float('inf')  # If one of the contours is None

def compare_tissues(tissue_pairs):
    scores = {}

    for tissue_pair in tissue_pairs:
      contour_pair = []
      for tissue in tissue_pair:
        image = preprocess_tissue(tissue)
        contour = get_contour(image)
        contour_pair.append(contour)
      score = match_shapes(contour_pair[0], contour_pair[1])
      scores[' & '.join([path.split('/')[-1] for path in tissue_pair])] = score

    # Sort by score (remember; lower score means more similar)
    sorted_scores = sorted(scores.items(), key=lambda item: item[1])
    return sorted_scores

sorted_tissues = compare_tissues(tissue_pairs)

# Print optimal sorted results
for tissue_pair, score in sorted_tissues:
  if score < 0.2: # arbitrary threshold for similarity for now
    print(f"{tissue_pair}: {score}")

h2114153_h&e.tif_3.tif & h2114153_melan.tif_1.tif: 0.012034721791042924
h2114153_h&e.tif_4.tif & h2114153_melan.tif_2.tif: 0.026204239509839966
h2114153_h&e.tif_3.tif & h2114153_h&e.tif_2.tif: 0.11638169514902086
h2114153_melan.tif_1.tif & h2114153_h&e.tif_2.tif: 0.12841641694006378
h2114153_h&e.tif_1.tif & h2114153_h&e.tif_2.tif: 0.18670999440210478
