# Cropping and identifying HSC in .czi files

First we install and import modules

In [1]:
# Create an automated chunk for package installation (if necessary)
print('''"The following packages are necessary for proper script functioning: opencv-python, pandas, aicsimageio. 
Would you like to install them? Yes/No''')
response = input()
if response == "Yes":
    print("Installing necessary packages. Please wait")
    %pip install opencv-python
    %pip install pandas
    %pip install aicsimageio
    print("Successful package installation")
else:
    pass
  

"The following packages are necessary for proper script functioning: opencv-python, pandas, aicsimageio. 
Would you like to install them? Yes/No
No


In [2]:
# Importing necessary packages
# Creating folders to save files coming from each image
from os import listdir, mkdir
# To work with Excel files
import pandas as pd
# Rectangle drawing over red channel
import cv2
# Working with .czi files
from aicsimageio import AICSImage
# To work with regular expressions
import re

Next, we set the working directories

In [3]:
training_images_dir = 'C:/Users/cdedi/Desktop/Master images/'
young_dir = 'C:/Users/cdedi/Desktop/Master images/young/'
aged_dir = 'C:/Users/cdedi/Desktop/Master images/aged/'

In [5]:
# Establishing counter for total HSCs
total_hsc = 0

# Registering all HSCs from our datasets. It skips the dataframe in case of already stored
for i in listdir(training_images_dir):
    if ".xlsx" in i and not "numpys.xlsx" in i:
        xls = pd.ExcelFile(training_images_dir + i)
        coord = xls.parse(index_col=None, na_values=['NA'])
        total_hsc += len(coord)

# Creating lists for our datasets (Numpy array, young/aged, sample and polarity)
hsc_list = [0] * total_hsc
hsc_sample = [0] * total_hsc
hsc_age = [0] * total_hsc
hsc_polarity = [0] * total_hsc
hsc_excluded = total_hsc * ["No"]
hsc_points = [0] * total_hsc

# Creating a counter for iteration
hsc_counter = 0
cropsize = 12
for i in listdir(training_images_dir):
    # Setting up the iteration on datasets (skipping the storage for training dataset if already exists)
    if ".xlsx" in i and not "numpys.xlsx" in i:
        # Opening the file and removing spaces
        xls = pd.ExcelFile(training_images_dir + i)
        coord = xls.parse(index_col=None, na_values=['NA'])
        coord.columns = coord.columns.str.replace(' ', '')
        # Selecting .czi from their specific folders
        if "young_" in i:
            czi_folder = training_images_dir + "young/"
        if "aged_" in i:
            czi_folder = training_images_dir + "aged/"
        # Recording the sample name to make it easier. Using regular expressions to eliminate file extension
        current_sample = re.sub('\.lsm$', '', str(coord.at[1, "ItemName"]))
        # Creating directory for each sample
        if "aged" in i and not ("aged " + current_sample) in listdir(training_images_dir):
            mkdir(training_images_dir + "aged " + current_sample)
        if "young_" in i and not ("young " + current_sample) in listdir(training_images_dir):
            mkdir(training_images_dir + "young " + current_sample)
        # Opening the .czi file (thanks Dídac!)
        img = AICSImage(czi_folder + current_sample + ".czi")
        # Saving all the arrays corresponding to a single HSC (single channel) in the hsc_list
        for j in range(0, len(coord), 1):
            if "young_" in i:
                if ("young " + current_sample +"_slice " + str(coord.at[j,"PositionZ"]) + ".tiff") not in listdir(training_images_dir + "young " + current_sample):
                    rectangled = img.data[0, 0, 0, coord.at[j, "PositionZ"], :, :]
                else:
                    rectangled = cv2.imread(training_images_dir + "young " + current_sample + "/young_" + current_sample +"_slice " + str(coord.at[j,"PositionZ"]) + ".tiff")
                cv2.rectangle(rectangled, (coord.at[j, "PositionX"] - cropsize, coord.at[j, "PositionY"] - cropsize), (coord.at[j, "PositionX"] + cropsize,coord.at[j, "PositionY"] + cropsize), (255, 255, 255), 1)
                cv2.imwrite(training_images_dir + "young " + current_sample + "/young_" + current_sample +"_slice " + str(coord.at[j,"PositionZ"]) + ".tiff", rectangled)
                hsc_age[hsc_counter] = "Young"
            if "aged_" in i:
                if ("aged_" + current_sample + "_slice " + str(coord.at[j,"PositionZ"]) + ".tiff") not in listdir(training_images_dir + "aged " + current_sample):
                    rectangled = img.data[0, 0, 0, coord.at[j, "PositionZ"], :, :]
                else:
                    rectangled = cv2.imread(training_images_dir + "aged " + current_sample + "/aged_" + current_sample + "_slice " + str(coord.at[j,"PositionZ"]) + ".tiff")
                ## Añadir variable para tamaño / hacer loop de if para el tamaño menor de la imagen (y las que queden por fuera, no seleccionarlas y decir cuáles son)
                cv2.rectangle(rectangled, (coord.at[j, "PositionX"] - 12, coord.at[j, "PositionY"] - cropsize), (coord.at[j, "PositionX"] + cropsize,coord.at[j, "PositionY"] + cropsize), (255, 255, 255), 1)
                cv2.imwrite(training_images_dir + "aged " + current_sample + "/aged_" + current_sample +"_slice " + str(coord.at[j,"PositionZ"]) + ".tiff", rectangled)
                hsc_age[hsc_counter] = "Aged"
            # Storing sample name in the hsc_samples variable
            hsc_sample[hsc_counter] = current_sample
            # Saving images of each channel (corresponding to np arrays)
            if ((coord.at[j,"PositionY"] - cropsize) < 0) or ((coord.at[j,"PositionY"] + cropsize) > 1024) or ((coord.at[j,"PositionX"] - cropsize) < 0) or ((coord.at[j,"PositionX"] + cropsize) > 1024):
                hsc_list[hsc_counter] = "NA"
                hsc_excluded[hsc_counter] = "Yes: " + coord.at[j,"Name"] + " from " + current_sample + ", slice " + str(coord.at[j,"PositionZ"])
            else:
                hsc_list[hsc_counter] = img.data[0,0,0:5,(coord.at[j,"PositionZ"]),(coord.at[j,"PositionY"] - cropsize):(coord.at[j,"PositionY"]+cropsize),(coord.at[j,"PositionX"] - cropsize):(coord.at[j,"PositionX"] + cropsize)]
                hsc_now = hsc_list[hsc_counter]
                for k in range(0, 5):
                    im = hsc_now[k,:,:]
                    if "young_" in i:
                        cv2.imwrite(training_images_dir + "young " + current_sample + "/young_" + current_sample + "_" + str(j+1) + "_ch0" + str(k) + ".tiff", im)
                    if "aged_" in i:
                        cv2.imwrite(training_images_dir + "aged " + current_sample + "/aged_" + current_sample + "_" + str(j+1) + "_ch0" + str(k) + ".tiff", im)
            # Storing the polarity of each HSC
            hsc_polarity[hsc_counter] = coord.at[j,"POLARITY"]
            hsc_points[hsc_counter] = coord.at[j, "Name"]
            # Updating the counter for next HSC
            hsc_counter += 1

# Create and store dataset
hsc_dataset = pd.DataFrame(list(zip(hsc_list, hsc_excluded, hsc_age, hsc_sample, hsc_points, hsc_polarity)),  columns =['Numpy', 'Excluded?', 'Status', 'Sample', 'Reference', "Polarity"])
hsc_dataset.to_excel(training_images_dir + "numpys.xlsx", index=False)