# Part A
You should only need to run this once, after that can simply load in the saved images as done in part B.

In [2]:
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from PIL import Image, ImageOps
Image.MAX_IMAGE_PIXELS = None
import pandas as pd
import random
import numpy as np
import os

In [3]:
%matplotlib widget

# Open the .tif file
img = Image.open("../../data_processed/morphology_focus.tif")


# Convert to numpy array
image_array = np.array(img)

# Calculate the quantile to scale intensity
quantile_value = np.quantile(image_array, 0.99)

# Scale intensity by dividing by the quantile
scaled_image_array = image_array / quantile_value

# Clip values greater than 1 to ensure image is within [0, 1] range
img2 = np.clip(scaled_image_array, 0, 1)

# Display the scaled image
# plt.imshow(img2, cmap='gray')  # Uncomment this line and below to display the whole image (this runs slow)
# plt.show()

In [5]:
cell_boundaries = pd.read_csv("../../data_processed/cell_boundaries.csv.gz")
cell_boundaries['vertex_x_trans'] = cell_boundaries['vertex_x'].apply(lambda x: int(x/0.2125))
cell_boundaries['vertex_y_trans'] = cell_boundaries['vertex_y'].apply(lambda x: int(x/0.2125))

In [6]:
clusters = pd.read_csv("../../data_processed/clusters.csv")
ncells = clusters.shape[0]
ncells

36553

In [7]:
img_pix = np.array(img)

In [9]:
random.seed(2024)

ncells_subset = 1000

cells_subset = random.sample(range(ncells), ncells_subset)

# clusters.loc[cells_subset, "Cluster"]

for i in cells_subset:
    # extract the boundary vertices for the selected cell
    bounds_i = cell_boundaries.loc[cell_boundaries["cell_id"] == i]

    # extract the cluster value for the selected cell
    clustval_i = clusters.loc[i,"Cluster"]

    # extract the pixel intensities for the area covering the cell boundary
    img_sub = img_pix[min(bounds_i["vertex_y_trans"]):max(bounds_i["vertex_y_trans"]),
                min(bounds_i["vertex_x_trans"]):max(bounds_i["vertex_x_trans"])]

    # normalise the pixel intensities according to 99th percentile
    img_sub_norm = img_sub/np.quantile(img_sub, 0.99)

    # as an example, display the image for the first selected cell
    if i in cells_subset[0:5]:
        print(f'Displaying image for cell {i}')
        # plt.imshow(img_sub_norm, cmap="gray")  # Uncomment this line and below to display sample cell images
        # plt.show()

    # create directory for images if it doesn't exist
    cell_dir = os.path.join(os.getcwd(), '../../data_processed/cell_images_py')
    if not os.path.exists(cell_dir):
        os.mkdir(cell_dir)

    # create directory for cluster if it doesn't exist
    clust_dir = os.path.join(cell_dir, f'cluster_{clustval_i}')
    if not os.path.exists(clust_dir):
        os.mkdir(clust_dir)

    # save extracted image as a png file
    plt.imsave(os.path.join(clust_dir, f'cell_{i}.png'), img_sub_norm, cmap="gray")

Displaying image for cell 30807
Displaying image for cell 11908
Displaying image for cell 19906
Displaying image for cell 13120
Displaying image for cell 26875


# Part B

In [10]:
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from PIL import Image, ImageOps
Image.MAX_IMAGE_PIXELS = None
import pandas as pd
import random
import numpy as np
import os

In [11]:
CELL_IMAGES = os.path.abspath('../../data_processed/cell_images_py')

In [20]:
cluster_A_files = list(map(os.path.abspath, os.listdir(os.path.join(CELL_IMAGES, 'cluster_8'))))
cluster_B_files = list(map(os.path.abspath, os.listdir(os.path.join(CELL_IMAGES, 'cluster_13'))))