This notebook converts single channel grayscale images to 5 channel images by adding blank channels. 
This is done to make the images compatible with the pre-trained models that expect 5 channel images. 
The code in this notebook will need to change to match a unique dataset, regretfully.    

Note that the data used here has four channels, but the model needs 5 channels input.

In [1]:
import os
import pathlib
import shutil
import sys

import cv2

# show the image
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tifffile as tiff

## Import paths

In [2]:
# set the path to the data directory
data_file_dir = pathlib.Path(
    "../../data/extracted_features/run_20231017ChromaLive_6hr_4ch_MaxIP_sc.parquet"
).resolve(strict=True)

# read in the data
cp_feature_data = pd.read_parquet(data_file_dir)
# print the data
print(cp_feature_data.shape)
cp_feature_data.head()

(197389, 2119)


Unnamed: 0,Metadata_plate,Metadata_Well,Metadata_number_of_singlecells,Metadata_compound,Metadata_dose,Metadata_control,Metadata_ImageNumber,Metadata_Cells_Number_Object_Number,Metadata_Cytoplasm_Parent_Cells,Metadata_Cytoplasm_Parent_Nuclei,...,Nuclei_TrackObjects_DistanceTraveled_50,Nuclei_TrackObjects_FinalAge_50,Nuclei_TrackObjects_IntegratedDistance_50,Nuclei_TrackObjects_Label_50,Nuclei_TrackObjects_Lifetime_50,Nuclei_TrackObjects_Linearity_50,Nuclei_TrackObjects_ParentImageNumber_50,Nuclei_TrackObjects_ParentObjectNumber_50,Nuclei_TrackObjects_TrajectoryX_50,Nuclei_TrackObjects_TrajectoryY_50
0,1,E-10,4928,Staurosporine,78.13,test,1457,1,1,4,...,0.0,,0.0,4,1,1.0,0,0,0,0
1,1,E-10,4928,Staurosporine,78.13,test,1457,2,2,5,...,0.0,,0.0,5,1,1.0,0,0,0,0
2,1,E-10,4928,Staurosporine,78.13,test,1457,3,3,6,...,0.0,,0.0,6,1,1.0,0,0,0,0
3,1,E-10,4928,Staurosporine,78.13,test,1457,4,4,7,...,0.0,,0.0,7,1,1.0,0,0,0,0
4,1,E-10,4928,Staurosporine,78.13,test,1457,5,5,8,...,0.0,,0.0,8,1,1.0,0,0,0,0


In [3]:
# rename Columns that contain Image to start with Metadata_
cp_feature_data = cp_feature_data.rename(
    columns=lambda x: x if not "Name" in x else "Metadata_" + x
)
# rename Columns that contain BoundingBox to start with Metadata_
cp_feature_data = cp_feature_data.rename(
    columns=lambda x: x if not "BoundingBox" in x else "Metadata_" + x
)
# rename Columns that contain Center_ to start with Metadata_
cp_feature_data = cp_feature_data.rename(
    columns=lambda x: x if not "Center_" in x else "Metadata_" + x
)
# get columns that contain Metadata
metadata_columns = [col for col in cp_feature_data.columns if "Metadata" in col]
metadata_df = cp_feature_data[metadata_columns]
# get columns that contain Features
feature_df = cp_feature_data.drop(columns=metadata_columns)
metadata_df.head()

Unnamed: 0,Metadata_plate,Metadata_Well,Metadata_number_of_singlecells,Metadata_compound,Metadata_dose,Metadata_control,Metadata_ImageNumber,Metadata_Cells_Number_Object_Number,Metadata_Cytoplasm_Parent_Cells,Metadata_Cytoplasm_Parent_Nuclei,...,Metadata_Nuclei_AreaShape_BoundingBoxArea,Metadata_Nuclei_AreaShape_BoundingBoxMaximum_X,Metadata_Nuclei_AreaShape_BoundingBoxMaximum_Y,Metadata_Nuclei_AreaShape_BoundingBoxMinimum_X,Metadata_Nuclei_AreaShape_BoundingBoxMinimum_Y,Metadata_Nuclei_AreaShape_Center_X,Metadata_Nuclei_AreaShape_Center_Y,Metadata_Nuclei_Location_Center_X,Metadata_Nuclei_Location_Center_Y,Metadata_Nuclei_Location_Center_Z
0,1,E-10,4928,Staurosporine,78.13,test,1457,1,1,4,...,2279.0,825.0,77.0,782.0,24.0,802.447719,50.757895,802.447719,50.757895,0.0
1,1,E-10,4928,Staurosporine,78.13,test,1457,2,2,5,...,2200.0,1803.0,106.0,1753.0,62.0,1777.088889,82.44127,1777.088889,82.44127,0.0
2,1,E-10,4928,Staurosporine,78.13,test,1457,3,3,6,...,3180.0,229.0,151.0,176.0,91.0,201.611366,120.887171,201.611366,120.887171,0.0
3,1,E-10,4928,Staurosporine,78.13,test,1457,4,4,7,...,2880.0,1464.0,171.0,1424.0,99.0,1442.588672,132.805942,1442.588672,132.805942,0.0
4,1,E-10,4928,Staurosporine,78.13,test,1457,5,5,8,...,1855.0,1600.0,156.0,1565.0,103.0,1582.456189,127.952712,1582.456189,127.952712,0.0


In [4]:
# define the center x and y and path
total_counter = 0
ommited_counter = 0
# define the psuedo radius
radius = 50
for i in range(len(metadata_df)):
    total_counter += 1
    image_information_df = metadata_df.iloc[i]
    image_path = pathlib.Path(
        "../../../live_cell_timelapse_apoptosis/2.cellprofiler_ic_processing/illum_directory/20231017ChromaLive_6hr_4ch_MaxIP/"
    )
    center_y = image_information_df["Metadata_Nuclei_Location_Center_Y"].astype(int)
    center_x = image_information_df["Metadata_Nuclei_Location_Center_X"].astype(int)
    # DNA
    image_name_DNA = pathlib.Path(image_information_df["Metadata_Image_FileName_DNA"])
    image_path_DNA = pathlib.Path(image_path / image_name_DNA).resolve(strict=True)
    # 488_1
    image_name_488_1 = pathlib.Path(
        image_information_df["Metadata_Image_FileName_488_1"]
    )
    image_path_488_1 = pathlib.Path(image_path / image_name_488_1).resolve(strict=True)
    # 488_2
    image_name_488_2 = pathlib.Path(
        image_information_df["Metadata_Image_FileName_488_2"]
    )
    image_path_488_2 = pathlib.Path(image_path / image_name_488_2).resolve(strict=True)
    # 561
    image_name_561 = pathlib.Path(image_information_df["Metadata_Image_FileName_561"])
    image_path_561 = pathlib.Path(image_path / image_name_561).resolve(strict=True)
    image_DNA = tiff.imread(image_path_DNA)
    image_488_1 = tiff.imread(image_path_488_1)
    image_488_2 = tiff.imread(image_path_488_2)
    image_561 = tiff.imread(image_path_561)

    image_DNA_crop = image_DNA[
        center_y - radius : center_y + radius, center_x - radius : center_x + radius
    ]
    image_488_1_crop = image_488_1[
        center_y - radius : center_y + radius, center_x - radius : center_x + radius
    ]
    image_488_2_crop = image_488_2[
        center_y - radius : center_y + radius, center_x - radius : center_x + radius
    ]
    image_561_crop = image_561[
        center_y - radius : center_y + radius, center_x - radius : center_x + radius
    ]

    # check if crop is an edge case
    # Where edge case is cells that are too close to the edge of the image to crop
    # This ensures that all crops are the same dimensions and can be used in the model
    if image_DNA_crop.shape[0] < radius * 2 or image_DNA_crop.shape[1] < radius * 2:
        print(
            f"Image {image_information_df['Metadata_Image_FileName_DNA']} is an edge case. Ommitting..."
        )
        ommited_counter += 1
        continue
    # merge the channels to a single image
    image_merge = np.stack(
        [image_DNA_crop, image_488_1_crop, image_488_2_crop, image_561_crop], axis=-1
    )
    if image_merge.shape[-1] < 5:
        channels_to_add = 5 - image_merge.shape[-1]
        for channel in range(channels_to_add):
            # add a new channel of all zeros
            new_channels = np.zeros((image_merge.shape[0], image_merge.shape[1], 1))
            image_merge = np.concatenate((image_merge, new_channels), axis=-1)
    print(image_merge.shape)
    # save images to disk
    image_save_path = pathlib.Path(
        f"../../data/processed_images/crops/{image_information_df['Metadata_Well']}"
    )
    image_save_path.mkdir(parents=True, exist_ok=True)
    file_name = image_information_df["Metadata_Image_FileName_DNA"].replace(
        ".tiff",
        f'_{image_information_df["Metadata_Nuclei_Number_Object_Number"]}_crop.tiff',
    )
    image_save_path = pathlib.Path(image_save_path / file_name)
    tiff.imwrite(image_save_path, image_merge)
    print(f"Image saved to {image_save_path}")

FileNotFoundError: [Errno 2] No such file or directory: '../../../live_cell_timelapse_apoptosis/2.cellprofiler_ic_processing/illum_directory/20231017ChromaLive_6hr_4ch_MaxIP/E-10_F0001_T0001_Z0001_C01_illumcorrect.tiff'

In [None]:
print(f"Total cell images: {total_counter}")
print(f"Ommited cell images: {ommited_counter}")
print(f"Total saved cell images: {total_counter - ommited_counter}")
print(
    f"{round(((total_counter - ommited_counter)/total_counter*100),2)}% of the images were saved"
)