#Script for removing ROIs from images, and removing cores
#The ROIs were generated by inspecting the images and selecting ares with noisy, bubbles, tissue-folding and other artifacts
#Some cores were removed if the proportion of cells was to low or if they had some staining problems in them

In [43]:
import numpy as np
import tifffile
import skimage
from tifffile import imread
import pandas as pd
import os
import glob
import matplotlib.pyplot as plt
import re

In [44]:
basePath="D:/users/fperez/NKI_TMAs_AF/" #Project name
maskPath="whole-cell-segmentation2/" #Input maks folder name
maskFileName = "_Probabilities_cytomask2.tiff" #Mask suffix
outputsubfolder = "quantification2"
cropCoordsPath = "dearray/cropCoords/"
cropCoordsFileName = "*_cropCoords.csv"
ROIsPath="ROI_Labels/"
ROIsuffix=".txt"
OMEzises = "D:/users/fperez/NKI_TMAs_AF/devNKI-scripts/utils/Ometif_sizes.csv" #SizesOfOMEtifs
Cores_to_ignore1= "D:/users/fperez/NKI_TMAs_AF/devNKI-scripts/Total_cores_to_ignore.csv"
Cores_to_ignore2= "D:/users/fperez/NKI_TMAs_AF/devNKI-scripts/Total_extra-cores_to_ignore.csv"
quantification_folder = "Cell-segment2_QC/"
quantification_prefix = "annotated_"
quantification_suffix = ".csv"
quantification_suffix_output= "_ROIs-label.csv"

In [45]:
Ome_sizes = pd.read_csv(OMEzises, header=0)
channelNames = pd.read_csv(basePath + "channel_list.csv")
slides = glob.glob(basePath + "TMA_*")
cores2ignore1 = pd.read_csv(Cores_to_ignore1, header=None)
cores2ignore2 = pd.read_csv(Cores_to_ignore2)

for i in slides:
    slide= i.split("\\")[1]
    
    #if (slide == "TMA_31_1020" or slide == "TMA_42_961" or slide == "TMA_44_810"):
    #    next
    
    print(slide)
    #Defining input and output files
    maskfiles = glob.glob(basePath + slide + "/" + maskPath + "*" + maskFileName)
    cropfiles = glob.glob(basePath + slide + "/" + cropCoordsPath + "*" + cropCoordsFileName)
    ROIfiles = glob.glob(basePath + ROIsPath + slide + "/" + "*" + ROIsuffix)
    quantification_file = (basePath + quantification_folder + "/" + quantification_prefix + slide + quantification_suffix)
    quantification_output = (basePath + quantification_folder + "/" + quantification_prefix + slide + quantification_suffix_output)

    #Reading quantification file and adding column for Inside_ROI status
    quantification_file = (basePath + quantification_folder + "/" + quantification_prefix + slide + quantification_suffix)
    quantification_table = pd.read_csv(quantification_file)
    zero_col = pd.Series(np.zeros(quantification_table.shape[0], dtype=int), name="Inside_ROI")
    quantification_file2 = pd.concat([quantification_table, zero_col], axis=1)
    quantification_file2.rename(columns={'Unnamed: 0':'Row_number'}, inplace=True)

    #Generating slide-matrix full of 0's with the size of ometiff slide
    Ome_sizes_slide = Ome_sizes[Ome_sizes.Var1 == slide]
    width = Ome_sizes_slide['Var2'].values[0] #OMEwidth
    heigh = Ome_sizes_slide['Var3'].values[0]
    slide_zeros = np.zeros((heigh,width), dtype=int)

    #Adding 1's to ROI positions in slide-matrix
    for r_file in ROIfiles:
        rois = pd.read_table(r_file, header=None)
        for r in range(rois.shape[0]):
            start_x = rois[3][r]
            start_y = rois[4][r]
            end_x = start_x+rois[5][r]
            end_y = start_y+rois[6][r]
            slide_zeros[start_y:end_y,start_x:end_x] = 1

    #For each core, crop roi slide-matrix, find overlapping cells in mask with ROIs
    for m in range(len(maskfiles)):
        mask=plt.imread(maskfiles[m])
        core_name = os.path.basename(maskfiles[m]).split("_")[0]
        core_number = int(re.sub(r'[C|c]ore', '', core_name))
        crop_area = pd.read_csv(cropfiles[m], header=None)
        start_x = crop_area[0][0]
        start_y = crop_area[1][0]
        end_x = crop_area[2][0]
        end_y = crop_area[3][0]
        #For some mask there is not exact overlap due to re-croping by hand, adding value of 1
        if ((end_x - start_x) != mask.shape[1]):
            mask_name = os.path.basename(maskfiles[m])
            if ((end_x - start_x + 1) != mask.shape[1]):
                print("Areas not the same for: ", mask_name, "& ", crop_file)
                print("Shape of mask: ", mask.shape[1])
                print("Shape of crop area: ", end_x - start_x)
                exit()
            end_x += 1
            end_y += 1
            croped_rois = slide_zeros[start_y:end_y,start_x:end_x]
        else:
            croped_rois = slide_zeros[start_y:end_y,start_x:end_x]
        if (croped_rois.shape[0] != mask.shape[0]):
            print ("Something bad with: ", os.path.basename(maskfiles[m]))
            exit()
        if (np.sum(croped_rois > 0) > 0):
            #Extract cells Ids and in quantification_file add value of 1 to Inside_ROI column
            in_roi = croped_rois * mask
            cell_ids = np.unique(in_roi)
            matching_rows = ((quantification_file2["CoreId"] == core_number) & (quantification_file2["CellId"].isin(cell_ids))).values
            quantification_file2.loc[matching_rows, ["Inside_ROI"]] = 1
    
    #Ignoring cores pre-selected in final output
    slide_cores2ignore = cores2ignore1[cores2ignore1.loc[:,0] == slide][1].values
    slide_cores2ignore2 = cores2ignore2[cores2ignore2.loc[:,"Slide"] == slide]["Core"].values
    slide_cores2ignoreall = np.concatenate((slide_cores2ignore, slide_cores2ignore2))
    quantification_file2 = quantification_file2[-quantification_file2["CoreId"].isin(slide_cores2ignoreall)]
    
    #Save new quantification file       
    quantification_file2.to_csv(quantification_output, index=0)

TMA_18_810
TMA_31_1020
TMA_33_576
TMA_34_504
TMA_41_812
TMA_42_961
TMA_43_616
TMA_44_810
TMA_45_312
TMA_46_325
