## TIF Image Processing for ark-analysis

This reads in component images from Akoya Inform and MASK files created by CellPose 2.0 and arranges them in a format that ark-analysis would prefer. We do this in order to call the ark.measure functions directly for normalized total-intensities, to run pixie (optional) and so we might use mantis-viewer.

Note: ROI refers to an Akoya stamped region of interest, and is equivalent for our purposes to 'fov' (field of view' terminology used in the ark-analysis documentation. 

For each ROI sample image in the given Panel and component files dir, 
1. read in all the component_data.tif filenames
2. open each 10 channel tif into separate subfolders per channel
3. copy everything into a directory structure with one subfolder for each roi/fov. 

e.g. 

fov0 (foldername '091221 P9HuP28 #01 S18-20937 F4_[10347,43351]')    
>TIFs  
 >>CD3.tif  
 >>CD4.tif  
 >>CD8.tif  
 >> ...
 
>091221 P9HuP28 #01 S18-20937 F4_[10347,43351]__figure_0.tif    
>091221 P9HuP28 #01 S18-20937 F4_[10347,43351]_figure_1.tif  
   
 
 Note: this is similar to the structure for mantis-viewer outlined here: https://github.com/angelolab/ark-analysis



### Dependencies:  
1. Cell segmentation must be done (Cellpose 2.0)
2. component_data.tif files are spectrally unmixed multichannel images from Akoya InForm



In [None]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
# %matplotlib inline 

#from skimage import io, color, filters, exposure
#from skimage.transform import resize, rotate
import tifffile as tif
import shutil
from skimage import io

In [None]:

# list of labels for channels
channels_p28 = ['CD3','pSTAT3 Y705', 'CD4', 'pSTAT5','pSTAT3 S727','pSTAT1', 'SOX10S100', 'CD8','DAPI','Autofluorescence']

channels_p68 = ['pSTAT6','SOX10S100','pSTAT4', 'CD4','CD3',
                'pSTAT1 S727', 'pSTAT2', 'CD8','DAPI','Autofluorescence']

cyto_channels_p28 = [0,2,6,7]  # CD3, CD4, SOX, CD8
cyto_channels_p68 = [1,3,4,7]  # SOX, CD4, CD3, CD8

channels=channels_p68
cyto_channels=cyto_channels_p68

print(channels)

In [None]:
root_dir = '/Users/annmstrange/Documents/Projects/Tumor IF'
# sample_data_file = os.path.join(root_dir,'Analysis_Py/TumorIF_SampleData.xlsx')

#components_dir = os.path.join(root_dir, 'Panel2/Export P28 3x') # 3x
#components_dir = os.path.join(root_dir, "Panel2/Export P28 full40x")  
components_dir = os.path.join(root_dir, "Panel3/Export P68 full40x")
#components_dir = os.path.join(root_dir, "Panel2/Export P28 21 full40x") 

#masks_dir = os.path.join(root_dir, 'Panel2/CellPose_moreCyto/Masks')
# Masks 
#masks_dir = os.path.join(root_dir, "Panel2/CellPose40x_23/Masks")
masks_dir = os.path.join(root_dir, "Panel3/CellPose40x_23/Masks")
#masks_dir = os.path.join(root_dir, "Panel2/CellPose40x_21/Masks")


#target_dir = os.path.join(root_dir, 'Panel2/ark-analysis_23')
#tifs_dir=os.path.join(root_dir, 'Panel2/ark-analysis_23')

#target_dir = os.path.join(root_dir, 'Panel2/ark-analysis_21')
#tifs_dir=os.path.join(root_dir, 'Panel2/ark-analysis_21')
target_dir = os.path.join(root_dir, 'Panel3/ark-analysis_23')
tifs_dir=os.path.join(root_dir, 'Panel3/ark-analysis_23')

print(os.path.exists(components_dir), os.path.exists(masks_dir ), 
      os.path.exists(target_dir), os.path.exists(tifs_dir))

In [None]:
if not os.path.exists(target_dir):
  os.makedirs(target_dir)

In [None]:
# For OneDrive file locations, uncomment the best line to use here

# !cd "cd ~/OneDrive\ -\ The\ University\ of\ Colorado\ Denver/Documents/Projects/Tumor\ IF/Panel2_Analysis "
#!cd "Panel2/020522 P28BatchAnalysis3x"

# We'll need 3x (low res files) from 020522\ P28BatchAnalysis3x/components, composites
#    binary masks, maybe qpTiff_as_tifs, binary_seg_maps

In [None]:
# # how os.path works...
# dir_name = os.path.basename(os.path.join(root_dir,"Panel2/020522 P28BatchAnalysis3x"))
# print(dir_name)
# print(os.path.dirname(dir_name)) 
# filename_suffix="tif"


In [None]:
# code for recursively looking at files in subfolders
# builds lists of files we want to process
# simplest:
import os
import fnmatch
import re

def get_files_in_folder (src, pattern):
    '''
    Args: src is the full path to look in recursively
    pattern: string like '*_composite_image.tif' to use with fnmatch.filter
    Returns: list of full filenames
    '''
    # build list of filenames we want
    fname_list = []

    for dirpath, dir, files in os.walk(src):
        for filename in fnmatch.filter(files, pattern):
            fname = filename
            fullname = os.path.join(dirpath, filename)
            fname_list.append(fullname)
                 
    return sorted(fname_list)
 
 
fname_component = get_files_in_folder(components_dir, '*_component_data.tif') 

print('found {0} files matching the pattern'.format(len(fname_component)))
print(fname_component[0])



In [None]:
# get a component from our data

component_file1 = fname_component[0]
component_file1

In [None]:
# to get code working, first get the 1st images from each list


img_component = io.imread(fname_component[0], plugin="tifffile")

In [None]:
# Check the shapes

print('Note: we should get 10 layers here in first dim, same as ImageJ BioFormats import')
print('img_component shape {}'.format(img_component.shape))


In [None]:
# get MASK files

mask_filenames_list = get_files_in_folder(os.path.join(root_dir, masks_dir), 'MASK_*rgb.tif')
print("{} mask files found ".format(len(mask_filenames_list)))
mask_filenames_list[0]

## Copy Mask files to ark format

In [None]:

# target file should look like:
print(os.path.join(root_dir, target_dir))

for mask_file in mask_filenames_list:
    sample_roi = os.path.basename(mask_file).replace('MASK_','').replace('_rgb.tif','')
    print(sample_roi)
    
    # does target dir exist?
    target_dir1 = os.path.join(root_dir, target_dir, sample_roi)
    print(target_dir1, os.path.exists(target_dir1))
    if not os.path.exists(target_dir1):
        os.makedirs(target_dir1)
        
    ark_mask_fn0 = os.path.basename(mask_file).replace('MASK_', '').replace('_rgb.tif', '_feature_0.tif')  
    ark_mask_fn1 = os.path.basename(mask_file).replace('MASK_', '').replace('_rgb.tif', '_feature_1.tif')  
    print('target file {}'.format(os.path.join(target_dir1, os.path.basename(mask_file))) )    
    print(' also for ark {}'.format(ark_mask_fn0))
    shutil.copy(mask_file, os.path.join(target_dir1, os.path.basename(mask_file)))
    shutil.copy(mask_file, os.path.join(target_dir1, ark_mask_fn0))
    shutil.copy(mask_file, os.path.join(target_dir1, ark_mask_fn1))
        


### Split up the TIFs by roi(fov) and channel

In [None]:
component_file1

In [None]:
component_file1


def copy_components_for_ark (component_file1, tif_dir):
    '''
    Args:
        component_file1 is location of the Polaris-generated 10 channel tif
        tif_dir is the directory where we'll copy single channel tifs
           with folder structure by roi (aka fov in MIBI terms)
    '''
    img_component = io.imread(component_file1, plugin="tifffile")
    print("processing {}".format(os.path.basename(component_file1)))

    sample=os.path.basename(component_file1).split('_')[0]
    # print(sample)   
    roi_sub_folder = os.path.basename(component_file1).replace('_component_data.tif', '')
    tif_dir=os.path.join(tif_dir,roi_sub_folder, "TIFs")
    # print('dir exists? {} {}'.format(tif_dir, os.path.exists(tif_dir)))
    if not os.path.exists(tif_dir):
        os.makedirs(tif_dir)
    
    # loop each channel for 
    for i in np.arange(len(channels)):
        #print("Processing {} {}".format(i, marker_list[i]))
        img_i = img_component[i,:,:]

        img_fn = channels[i] + '.tif'
        # print('save file {}'.format(os.path.join(tif_dir,img_fn)))
        tif.imwrite(os.path.join(tif_dir,img_fn), img_i, photometric='minisblack')
    
    return True
    
# test for one    
copy_components_for_ark(component_file1, tifs_dir)    

In [None]:

for comp_file in fname_component:    
    copy_components_for_ark(comp_file, tifs_dir)   
    

In [None]:
# files processed

