In [None]:
# multiplex image data extraction pipeline
# =========================================

# the latest version from this notebook`s template can be found here: 
# https://gitlab.com/bue/jinxif/-/blob/master/jupyter/mpleximage_data_extraction_pipeline.ipynb
# wget https://gitlab.com/bue/jinxif/-/raw/master/jupyter/mpleximage_data_extraction_pipeline.ipynb

# all jinxif functions have a docstring.
# e.g. use help(segment.segment_spawn) to get detailed information about the segment_spawn function.


In [None]:
# load libraries
# from jinxif import _version
# from jinxif import afsub
# from jinxif import basic
# from jinxif import config
# from jinxif import feat
# from jinxif import imgmeta
# from jinxif import ometiff
# from jinxif import regist
# from jinxif import sane
# from jinxif import segment
# from jinxif import thresh
# from jinxif import util

from mplexable import _version
from mplexable import afsub
from mplexable import basic
from mplexable import config
from mplexable import feat
from mplexable import imgmeta
from mplexable import ometiff
from mplexable import regist
from mplexable import sane
from mplexable import segment
from mplexable import thresh
from mplexable import util

print('you are running jinxif version:', _version.__version__)

In [None]:
# set constants
s_batch = 'RS-mTMA-20210707'
ls_slide = [
    'RS-mTMA-5',
]

In [None]:
# utility to handle file naming convention

# this step is specific for the chinlab workflow.
# by naming convention (specified in ~/.jinxif/config.py) the marker from all channels other than DAPI
# (which is always in channel 1) are listed between the first and second underscore and separated by dots.
# unfortunately, the software we use to generate the raw image tiff files from the microscopes czi files 
# (Zeiss Zen Blue) turns dots into underscores.
# this function call will generate a "rename dictionary" that can be used as input for util.dchange_fname, 
# to rename the tiff files back to the specified naming convention standard.  

util.underscore_to_dot(
    es_slide = ls_slide,
    s_start = 'R',
    ei_underscore_to_dot = {1,2,3},
    s_end = 'ORG.tif',
    s_rawdir = config.d_nconv['s_rawdir'],  #'RawImages/',
    s_format_rawdir = config.d_nconv['s_format_rawdir'],  #'{}{}/',  # s_rawdir, s_slide
)


In [None]:
# utility to rename files to be naming convention conform (specified in ~/.jinxif/config.py) 

# define renaming rules
ds_rename = {
    '_oldstring_':'_newstring_'
}

# run renaming function first with b_test=True (default setting). check output!
# if you are sure that the function does what you want, set b_test=False (uncomment) and re-run the function.
# comment out b_test again, so that  b_test=True (default setting) is activated again.

for s_slide in ls_slide:
    util.dchange_fname(
        ds_rename = ds_rename,
        #b_test = False,
        s_wd = config.d_nconv['s_rawdir'] + f'{s_slide}/',  # 'RawImages/', # s_slide
    )


In [None]:
# 1.1 check original czi image file names

# this functions help to check the original czi folder for completeness of czi image files.
# thereby, the file naming convention (specified in ~/.jinxif/config.py) is enforced.
# additionally, marker names will be checked against correct labeling, as specified in ~/.jinxif/config.py.
# at least one original czi is required and used for scene position metadata extraction. 
# original czi files might be used for exposure time metadata extraction.

# run function and check output!
# if filenames or markers are non-conform, correct them manually.
# if markers are entirely missing, specify them in ~/.jinxif/config.py

for s_slide in ls_slide:
    df_img_slide = basic.parse_czi_original(
        s_wd = config.d_nconv['s_format_czidir_original'].format(config.d_nconv['s_czidir'], s_slide),  #'{}{}/original/',  # s_czidir, s_slide
    )
    sane.count_images(df_img = df_img_slide)
    sane.check_markers(
        df_img = df_img_slide, 
        es_markerdapiblank_standard = config.es_markerdapiblank_standard,
        es_markerpartition_standard = config.es_markerpartition_standard,
    )


In [None]:
# 1.2 check splitscene czi image file names

# this functions help to check the splitscenes czi folder for completeness of czi image files.
# thereby, the file naming convention (specified in ~/.jinxif/config.py) is enforced.
# additionally, marker names will be checked against correct labeling, as specified in ~/.jinxif/config.py.
# splitscenes czi files might be used for exposure time metadata extraction.
# note: splitscenes czi files can not be used for scene position metadata extraction!

# run function and check output!
# if filenames or markers are non-conform, correct them manually.
# if markers are entirely missing, specify them in ~/.jinxif/config.py

for s_slide in ls_slide:
    df_img_slide = basic.parse_czi_splitscene(
        s_wd = config.d_nconv['s_format_czidir_splitscene'].format(config.d_nconv['s_czidir'], s_slide),  #'{}{}/splitscenes/',  # s_czidir, s_slide
    )
    sane.count_images(df_img = df_img_slide)
    sane.check_markers(
        df_img = df_img_slide, 
        es_markerdapiblank_standard = config.es_markerdapiblank_standard,
        es_markerpartition_standard = config.es_markerpartition_standard,
    )


In [None]:
# 1.3 check raw tiff image file names

# this functions help to check the raw tiff folder for completeness of tiff image files.
# thereby, the file naming convention (specified in ~/.jinxif/config.py) is enforced.
# additionally, marker names will be checked against correct labeling, as specified in ~/.jinxif/config.py.
# note: raw tiff files can not be used for exposure time or scene position metadata extraction!

# run function and check output!
# if filenames or markers are non-conform, 
# correct them with the utility util.dchange_fname function in the jupyter cell above.
# if markers are entirely missing, specify them in ~/.jinxif/config.py

for s_slide in ls_slide:
    print(f'process slide: {s_slide} ...')
    df_img_slide = basic.parse_tiff_raw(
        s_wd = config.d_nconv['s_format_rawdir'].format(config.d_nconv['s_rawdir'], s_slide),  # 'RawImages/{}/', # s_slide
    )
    sane.count_images(df_img = df_img_slide)
    sane.check_markers(
        df_img = df_img_slide, 
        es_markerdapiblank_standard = config.es_markerdapiblank_standard,
        es_markerpartition_standard = config.es_markerpartition_standard,
    )


In [None]:
# 1.4 generate qc images from the raw tiff images

# at s_qcdir check the generated qc png images! 
# fix whatever is necessary

sane.visualize_raw_images_spawn(
    es_slide = ls_slide,
    s_color = config.d_nconv['s_color_dapi_jinxif'],  #'c1',
    # processing
    s_type_processing = 'slurm',
    s_slurm_partition = 'exacloud',
    s_slurm_mem = '64G',
    s_slurm_time = '36:00:00',
    s_slurm_account = 'gray_lab',
    # file system
    s_rawdir = config.d_nconv['s_rawdir'],  #'RawImages/'
    s_format_rawdir = config.d_nconv['s_format_rawdir'],  #'{}{}/', # s_rawdir, s_slide
    s_qcdir = config.d_nconv['s_qcdir'],  #'QC/',
)


In [None]:
# 2.0 extract image metadata

# the metadata is by default extracted from the original czi files.
# alternatively, exposure time can be extracted from the splitscenes czi files (by setting b_exposuretime_splitscene to True).
# with default naming convention, the original files are expected to be under {s_czidir}/{s_slide_id}/original/
# and the splitscenes czi files are expected to be under {s_czidir}/{s_slide_id}/splitscenes/
# Additionaly, the czi images filenames them self have to follow the correct naming convention.

imgmeta.fetch_meta_batch(
    es_slide = ls_slide,
    s_czidir = config.d_nconv['s_czidir'], # 'CziImages/',
    s_format_czidir_original = config.d_nconv['s_format_czidir_original'], #'{}{}/original/',  # s_czidir, s_slide
    s_format_czidir_splitscene = config.d_nconv['s_format_czidir_splitscene'], #'{}{}/splitscenes/',  # s_czidir, s_slide
    s_sceneposition_round = 'R1_',  # file matching pattern, can but does not have to be round.  
    b_exposuretime_original = True,
    b_exposuretime_splitscene = True,
    b_sceneposition_original = True,
    s_metadir = config.d_nconv['s_metadir'],  #'MetaImages/',
)


In [None]:
# 2.1 TMA layout (optional)

# for each slide that is a tissue microarray (TMA), 
# run util.tma_grid to layout the TMA, based on the extracted scene position information.
# utilize {s_metadir}/{s_slide}_ScenePositions_coor_{r_sampler}.png 
# and {s_metadir}/{s_slide}_ScenePositions_coor.csv to map alphanumeric coordinate labels (scene_coor column).
# if necessary, in {s_metadir}/{s_slide}_ScenePositions_coor.csv the scene_coor can manually be changed.
# when editing {s_metadir}/{s_slide}_ScenePositions_coor.csv, if needed, additional columns can be added. 
# when done, rename 
# {s_metadir}/{s_slide}_ScenePositions_coor.csv to
# {s_metadir}/{s_slide}_ScenePositions_coor_ok.csv
# so, that the work is not overwritten when util.tma_grid is run again.

for s_slide, i_core_yaxis, i_core_xaxis in [
       ['slide-id-tma', None, None],
    ]:
    util.tma_grid(
        s_slide = s_slide,
        i_core_yaxis = i_core_yaxis,
        i_core_xaxis = i_core_xaxis,
        r_sampler = 0.98,
        s_metadir = config.d_nconv['s_metadir'],  #'MetaImages/',
    )


In [None]:
# 3.1 utility for template ddd_crop (helper function)
 
# run this function to generate a template ddd_crop dictionary.
# this template ddd_crop dictionaries are helpful to write the ddd_crop specification at 3.3.

util.template_dddcrop(
    ls_slide = [
        'slide-id',
    ],
    s_rawdir = config.d_nconv['s_rawdir'],  #'RawImages/',
    s_format_rawdir = config.d_nconv['s_format_rawdir'],  #'{}{}/',  # s_rawdir, s_slide  # to get slide_mscene
    s_metadir = config.d_nconv['s_metadir'],  #'MetaImages/',
    s_format_metafile_tmacoorcsv = '{}{}_ScenePositions_coor_ok.csv',  # s_metadir, s_slide  # for alphanumeric TMA coordinates!
    s_type_coor = 'xywh',  # xywh xyxy yxyx None
)


In [None]:
# 3.2 utility for grid cropping (helper function)

# run this function for huge tissue slide_mscenes, that should be grid cropped to smaller slide_pxscenes.
# the d_crop dictionary output can be used to write the ddd_crop specification at 3.3.

util.gridcrop(
    li_xywh = [0,0, 30000,30000],  # whole tissue square crop coordinates. the li_xywh coordinates you can get with irfanview or similar software.
    i_max = 20000,  # no cropped rectangle will be wider or higher than i_max pixel!
)


In [None]:
# 3.3  specify ddd_crop

# this variable is needed for registration and down stream!
# every slide mscene have to be mapped to one or more pxscene(s).
# if no cropping at all is requited, [0,0, 0,0, 'nnnn'] can be replaced by None.
# you can use the utilities at 3.1 and 3.2 to help you to specify this variable.

# run!
ddd_crop = {
    'RS-mTMA-5': {
        'Scene-A01': {'sceneA01': [426,880,6000,6000, 'xywh']},
        'Scene-A04': {'sceneA04': [136,2373,6000,3000, 'xywh']},
        'Scene-A05': {'sceneA05': [315,2614,6000,4000, 'xywh']},
        'Scene-A06': {'sceneA06': [45,1007,3000,3000, 'xywh']},
        'Scene-A07': {'sceneA07': [754,424,6000,6000, 'xywh']},
        'Scene-A08': {'sceneA08': [397,934,6000,6000, 'xywh']},
        'Scene-A09': {'sceneA09': [357,502,6000,6000, 'xywh']},
        'Scene-A11': {'sceneA11': [317,338,6000,6000, 'xywh']},
        'Scene-B01': {'sceneB01': [277,1109,6000,6000, 'xywh']},
        'Scene-B04': {'sceneB04': [127,158,5500,5500, 'xywh']},
        'Scene-B05': {'sceneB05': [568,1204,6000,6000, 'xywh']},
        'Scene-B06': {'sceneB06': [1374,565,6000,6000, 'xywh']},
        'Scene-B07': {'sceneB07': [674,442,6000,6000, 'xywh']},
        'Scene-B08': {'sceneB08': [1260,1502,6000,6000, 'xywh']},
        'Scene-B09': {'sceneB09': [560,850,6000,6000, 'xywh']},
        'Scene-B10': {'sceneB10': [889,877,6000,4000, 'xywh']},
        'Scene-B11': {'sceneB11': [842,740,6000,6000, 'xywh']},
        'Scene-C01': {'sceneC01': [20,14,5500,5500, 'xywh']},
        'Scene-C03': {'sceneC03': [337,331,6000,6000, 'xywh']},
        'Scene-C04': {'sceneC04': [42,222,6000,6000, 'xywh']},
        'Scene-C05': {'sceneC05': [1115,51,5500,5500, 'xywh']},
        'Scene-C06': {'sceneC06': [18,21,5500,5500, 'xywh']},
        'Scene-C07': {'sceneC07': [965,746,6000,6000, 'xywh']},
        'Scene-C08': {'sceneC08': [180,126,5500,5500, 'xywh']},
        'Scene-C09': {'sceneC09': [918,1072,6000,6000, 'xywh']},
        'Scene-C10': {'sceneC10': [64,756,5500,6000, 'xywh']},
        'Scene-C11': {'sceneC11': [1165,743,6000,6000, 'xywh']},
        'Scene-C12': {'sceneC12': [141,772,5500,6000, 'xywh']},
        'Scene-D01': {'sceneD01': [1193,29,5500,5500, 'xywh']},
        'Scene-D03': {'sceneD03': [1552,436,6000,6000, 'xywh']},
        'Scene-D04': {'sceneD04': [27,231,5000,6000, 'xywh']},
        'Scene-D05': {'sceneD05': [1211,425,6000,6000, 'xywh']},
        'Scene-D06': {'sceneD06': [531,1162,6000,6000, 'xywh']},
        'Scene-D07': {'sceneD07': [229,1046,6000,6000, 'xywh']},
        'Scene-D08': {'sceneD08': [157,194,6000,5500, 'xywh']},
        'Scene-D09': {'sceneD09': [746,28,6000,5500, 'xywh']},
        'Scene-D10': {'sceneD10': [25,1143,6000,6000, 'xywh']},  # missing!
        'Scene-D11': {'sceneD11': [1332,1061,6000,6000, 'xywh']},
        'Scene-E02': {'sceneE02': [762,306,6000,6000, 'xywh']},
        'Scene-E03': {'sceneE03': [110,321,6000,6000, 'xywh']},
        'Scene-E04': {'sceneE04': [998,550,6000,6000, 'xywh']},
        'Scene-E05': {'sceneE05': [1563,1519,6000,6000, 'xywh']},
        'Scene-E06': {'sceneE06': [808,1369,6000,6000, 'xywh']},
        'Scene-E07': {'sceneE07': [46,1150,6000,6000, 'xywh']},
        'Scene-E08': {'sceneE08': [188,84,5500,5500, 'xywh']},
        'Scene-E09': {'sceneE09': [959,1137,6000,6000, 'xywh']},
        'Scene-E10': {'sceneE10': [39,19,6000,5500, 'xywh']},
        'Scene-E11': {'sceneE11': [19,20,5500,5500, 'xywh']},
        'Scene-E12': {'sceneE12': [446,942,6000,6000, 'xywh']},
        'Scene-F02': {'sceneF02': [1389,1085,6000,6000, 'xywh']},
        'Scene-F03': {'sceneF03': [874,13,6500,6500, 'xywh']},
        'Scene-F04': {'sceneF04': [645,11,3000,3000, 'xywh']},
        'Scene-F05': {'sceneF05': [12,719,6000,6000, 'xywh']},
        'Scene-F07': {'sceneF07': [371,1415,5000,5000, 'xywh']},
        'Scene-F08': {'sceneF08': [861,136,6000,5500, 'xywh']},
        'Scene-F09': {'sceneF09': [1027,880,6000,6000, 'xywh']},
        'Scene-F10': {'sceneF10': [352,153,6000,5500, 'xywh']},
        #'Scene-F11': {'sceneF11': [1373,497,5500,4500, 'xywh']},  # dropped! tissue lost and does not register.
        'Scene-F12': {'sceneF12': [682,123,6000,5500, 'xywh']},
        'Scene-G01': {'sceneG01': [58,419,6000,6000, 'xywh']},
        'Scene-G02': {'sceneG02': [946,564,6000,6000, 'xywh']},
        'Scene-G04': {'sceneG04': [445,1528,6000,6000, 'xywh']},
        'Scene-G05': {'sceneG05': [1250,979,6000,6000, 'xywh']},
        'Scene-G06': {'sceneG06': [185,616,6000,6000, 'xywh']},
        'Scene-G08': {'sceneG08': [667,301,6000,6000, 'xywh']},
        'Scene-G09': {'sceneG09': [991,673,6000,6000, 'xywh']},
        'Scene-G10': {'sceneG10': [1018,195,6000,5500, 'xywh']},
        'Scene-G11': {'sceneG11': [4,206,5500,5500, 'xywh']},
        'Scene-G12': {'sceneG12': [583,317,6000,6000, 'xywh']},
        'Scene-H03': {'sceneH03': [1110,79,6000,5500, 'xywh']},
        'Scene-H04': {'sceneH04': [622,75,6000,5000, 'xywh']},
        'Scene-H05': {'sceneH05': [321,581,5000,5000, 'xywh']},
        'Scene-H06': {'sceneH06': [776,850,6000,5000, 'xywh']},
        'Scene-H07': {'sceneH07': [144,879,5500,6000, 'xywh']},
        'Scene-H08': {'sceneH08': [20,203,5500,5500, 'xywh']},
        'Scene-H09': {'sceneH09': [915,1050,5500,5500, 'xywh']},
        'Scene-H10': {'sceneH10': [758,790,6000,6000, 'xywh']},
        'Scene-H11': {'sceneH11': [1474,578,6000,6000, 'xywh']},
        'Scene-H12': {'sceneH12': [732,840,6000,6000, 'xywh']},
        'Scene-I01': {'sceneI01': [1056,23,6000,6000, 'xywh']},
        'Scene-I02': {'sceneI02': [218,1152,5500,6000, 'xywh']},
        'Scene-I03': {'sceneI03': [1389,20,6000,6000, 'xywh']},
        'Scene-I04': {'sceneI04': [214,1273,5500,6000, 'xywh']},
        'Scene-I05': {'sceneI05': [16,628,5500,6000, 'xywh']},
        'Scene-I06': {'sceneI06': [849,24,6000,6000, 'xywh']},
        'Scene-I07': {'sceneI07': [585,129,6000,5500, 'xywh']},
        'Scene-I08': {'sceneI08': [1019,27,6000,6000, 'xywh']},
        'Scene-I09': {'sceneI09': [1013,564,6000,6000, 'xywh']},
        'Scene-I10': {'sceneI10': [210,909,5500,6000, 'xywh']},
        'Scene-I11': {'sceneI11': [25,346,6000,6000, 'xywh']},
        'Scene-I12': {'sceneI12': [562,1007,6000,6000, 'xywh']},
    },
}


In [None]:
# 3.4 write ddd_crop dictionary to file

# this is just for backup, to save your work done at 3.3.
# result json file will be stored in s_rawdir.

regist.save_cropcoor(
    s_batch = s_batch,
    ddd_crop = ddd_crop,
    s_rawdir = config.d_nconv['s_rawdir'],  #'RawImages/',
)


In [None]:
# 4.0 register images 

# register and possibly crop each slide specified in es_slide.
# every slide in es_slide have to be specified in ddd_crop.

regist.regist_spawn(
    ddd_crop = ddd_crop,
    es_slide = ls_slide,
    s_type_registration = 'matlab',
    # file extension
    s_regex_ext = r'_(ORG.tif)$',  # regex file extension.
    # staining round
    s_regex_round_ref = r'^(R\d+Q?_).+$',  # regex round of raw reference round tiffs.
    s_regex_round_nonref = r'^(R\d+Q?_).+$',  # regex round of raw non-reference round tiffs.
    # staining marker
    s_regex_marker_ref = r'^.+_(.+\..+\..+\.[^_]+)_.+$',  # regex of reference round markers in raw tiffs.
    s_regex_marker_nonref = r'^.+_(.+\..+\..+\.[^_]+)_.+$',  # regex of non-reference round markers in raw tiffs.
    # microscopy channel
    s_regex_micchannel_ref = r'^.*_(c\d+)_.*$',  # regex of reference round microscopy channels/colors.
    s_regex_micchannel_nonref = r'^.*_(c\d+)_.*$',  # regex of non-reference round microscopy channels/colors.
    # dapi images only
    s_glob_img_dapiref = 'R1_*_{}_*-{}_c1_ORG.tif', # glob pattern of raw dapi exclusive reference round tiff, fetching slide and microscopy scene id.
    s_glob_img_dapinonref = 'R*_*_{}_*-{}_c1_ORG.tif', # glob pattern of raw dapi non-reference round tiffs, fetching slide and microscopy scene id.
    # non-dapi images (possibly dapi images too)
    s_glob_img_ref = 'R1_*_{}_*-{}_c*_ORG.tif', # glob pattern of raw exclusive reference round tiffs, fetching slide and microscopy scene id.
    s_glob_img_nonref = 'R*_*_{}_*-{}_c*_ORG.tif', # glob pattern of raw non-reference round tiffs, fetching slide and microscopy scene id.
    # registration
    i_npoint = str(10000),  # number of key points used for registration,
    # processing
    s_type_processing = 'slurm',
    s_slurm_partition = 'exacloud',
    s_slurm_mem = '64G',
    s_slurm_time = '36:00:00',
    s_slurm_account = 'gray_lab',
    # filesystem
    s_rawdir = config.d_nconv['s_rawdir'],  #'RawImages/',
    s_format_rawdir = config.d_nconv['s_format_rawdir'],  #'{}{}/',  # s_rawdir, s_slide
    s_qcregistration_dir = 'QC/RegistrationPlots/',
    s_regdir = config.d_nconv['s_regdir'],  #'RegisteredImages/',
)


In [None]:
# 4.1 qc plots from registered images

# generation qc png plots from the dapi channel registered tiff images, and possibly other channels.
# check generated qc png plots in the s_qcdir/s_regdir/ folder.
# fix whatever is necessary.

regist.visualize_reg_images_spawn(
    es_slide = ls_slide,
    s_color = config.d_nconv['s_color_dapi_jinxif'],  #'c1'
    # processing
    s_type_processing = 'slurm',
    s_slurm_partition = 'exacloud',
    s_slurm_mem = '32G',
    s_slurm_time = '36:00:00',
    s_slurm_account = 'gray_lab',
    # file system
    s_regdir = config.d_nconv['s_regdir'],  #'RegisteredImages/',
    s_format_regdir = config.d_nconv['s_format_regdir'],  #'{}{}/', # s_regdir, s_slide_pxscene
    s_qcdir = config.d_nconv['s_qcdir'],  #'QC/',
)


In [None]:
# 5.0 exposure qc

# generate a batch wide exposure time png and csv matrix.

# check s_metadir/s_batch_exposure_time_ms_matrix.png and 
# s_metadir/s_batch_exposure_time_ms_matrix.csv for wrong exposure time values. 
# in the csv, the marker exposure time mean value in the far right column,
# and the per slide or slide_scene summed up exposure time value at the bottom row 
# are very helpful to find errors.

# if you find errors, specify ddd_ect at 5.2 how to do exposure time correction! 

imgmeta.exposure_matrix(
    s_batch = s_batch,
    es_slide = ls_slide, 
    tr_figsize = (32,20),  # (w,h)
    s_metadir = config.d_nconv['s_metadir'],  #'MetaImages/',
)


In [None]:
# 5.1 utility for template ddd_etc (helper function)

# specify all slide_pxscene that have to be exposure time corrected.

util.template_dddetc(
    ls_slidepxscene = [
    ],
)


In [None]:
# 5.2 specify  ddd_etc

# empty dictionary, if nothing to correct.
# else, the dictionary format should look like this:
# ddd_etc = {'slide_scene': {'marker': {'is': 7,'should_be': 4}},}

# bue 20210120: since this is batch has only one slide, it is hard to detect error on slide level. 
# above all, cores have the same exposure time. 
# no et correvtion needed!

# run!
ddd_etc = {
}


In [None]:
# 5.3 write ddd_etc dictionary to file

# this is just for backup, to save your work done at 5.2. 
# result json file will be stored in s_metadir.

regist.save_exposuretimecorrect(
    s_batch =  s_batch,
    ddd_etc = ddd_etc,
    s_metadir = config.d_nconv['s_metadir'],  #'MetaImages/',
)


In [None]:
# 5.4 exposure time correct registered images

# run, if necessary.

regist.exposure_time_correct_spawn(
    es_slide = ls_slide,
    ddd_etc = ddd_etc,
    s_imagetype_original = 'ORG',
    # processing 
    s_type_processing = 'slurm',
    s_slurm_partition = 'exacloud',
    s_slurm_mem = '32G',
    s_slurm_time = '36:00:00',
    s_slurm_account = 'gray_lab',
    # file system
    s_regdir = config.d_nconv['s_regdir'],  #'RegisteredImages/',
    s_format_regdir = config.d_nconv['s_format_regdir'],  #'{}{}/',  s_regdir, s_slide_pxscene
)


In [None]:
# 6.0 generate marker table

# the whole batch, every slide_pxscene, will have the same marker table.
# for s_slide_pxscene choose one slide_pxscene to generate the marker table.

# result csv will be stored in s_regdir.

# fetch slide_pxscene from ddd_crop
s_slide = sorted(ddd_crop.keys())[0] 
s_mscene =  sorted(ddd_crop[s_slide].keys())[0]
s_pxscene =  sorted(ddd_crop[s_slide][s_mscene].keys())[0]
s_slide_pxscene = s_slide + '_' + s_pxscene

# generate tabel
basic.marker_table(
    s_slide_pxscene = s_slide_pxscene,  # this is a slide_pxscene defined in ddd_crop above
    s_batch = s_batch,
    s_regdir = config.d_nconv['s_regdir'],  #'RegisteredImages/',
    s_format_regdir = config.d_nconv['s_format_regdir'],  #'{}{}/',  # s_regdir, s_slide_pxscene
)


In [None]:
# 7.0 auto fluorescent subtract images

# specify the quenching round markers in ds_early and ds_late according to the marker tabel displayed at 6.0.
# if there is only one quenching round, use empty dictionary for ds_early.
# if there is no quenching round, you can not do auto fluorescent subtraction.

afsub.afsub_spawn(
    es_slide = ls_slide,
    ddd_crop = ddd_crop,
    ddd_etc = ddd_etc,
    ds_early = {'c2':'R0c2','c3':'R0c3','c4':'R0c4','c5':'R0c5'},  # adjust accoring to the marker table. 
    ds_late = {'c2':'R5Qc2','c3':'R5Qc3','c4':'R5Qc4','c5':'R5Qc5'},   # adjust accoring to the marker table. 
    es_exclude_color = {'c1','c5'},
    es_exclude_marker = {},
    b_8bit = False,
    # processing
    s_type_processing = 'slurm',
    s_slurm_partition = 'exacloud',
    s_slurm_mem = '64G',
    s_slurm_time = '36:00:00',
    s_slurm_account = 'gray_lab',
    # file system
    s_metadir = config.d_nconv['s_metadir'],  #'MetaImages/',
    s_regdir = config.d_nconv['s_regdir'],  #'RegisteredImages/',
    s_format_regdir = config.d_nconv['s_format_regdir'],  #'{}{}/', # s_regdir, s_slide_pxscene
    s_afsubdir = config.d_nconv['s_afsubdir'],  #'SubtractedRegisteredImages/',
    s_format_afsubdir = config.d_nconv['s_format_afsubdir'],  #'{}{}/', # s_afsubdir, s_slide_pxscene
)


In [None]:
# 7.1 qc plots from auto fluorescent subtracted registered images (optional)

# generation qc png plots from the af subtracted registered images.

for s_color in ['c1','c2','c3','c4','c5']:
    regist.visualize_reg_images_spawn(
        es_slide = ls_slide,
        s_color = s_color,
        # processing
        s_type_processing = 'slurm',
        s_slurm_partition = 'exacloud',
        s_slurm_mem = '36G',
        s_slurm_time = '36:00:0',
        s_slurm_account = 'gray_lab',
        # file system
        s_regdir = config.d_nconv['s_afsubdir'],  #'RegisteredImages/'
        s_format_regdir = config.d_nconv['s_format_afsubdir'],  #'{}{}/',  # s_afsubdir, s_slide_pxscene
        s_qcdir = config.d_nconv['s_qcdir'],  #'QC/'
    )


In [None]:
# 8.0 segmentation

# run s_task nuc or cell (or nuccell) for nucleus and/or cell segmentation.
# output nuc segmentation: s_segdir/s_slide_CellposeSegmentation/{slide_scene}_nuc{nuc_diam}_NucleiSegmentationBasins.tif
# output cell segmentation s_segdir/s_slide_CellposeSegmentation/{slide_scene}_{seg_marker}_nuc{nuc_diam}_cell{cell_diam}_matched_CellSegmentationBasins.tif

# then run s_task match to match nucleus and cell labels.
# output cell segmentation s_segdir/s_slide_CellposeSegmentation/{slide_scene}_{seg_marker}_cell{nuc_diam}_CellSegmentationBasins.tif

segment.segment_spawn(
    # input
    es_slide = ls_slide,
    s_task = 'match', # known segmentation tasks are nuc, cell, or nuccell, and match.
    # segmentation
    i_nuc_diam = 30,  # numer of pixel for minimal nucleus diameter. microscope dependent!
    i_cell_diam = 30,  # numer of pixel for minimal cell diameter. microscope dependent!
    s_dapi_round = 'R1',  # the same round that images were registered to.
    es_seg_marker = {'Ecad'},  # {'Ecad'}, # specify the cell segmentation marker(s). if None, no cell segmentation and label matching will be done.
    es_rare_marker = set(),  # markers will be slightly enhanced.
    s_type_data = 'cmif',  # implemented are 'cmif' and  'codex'.
    # gpu
    s_gpu = 'gpu:v100:1',  # if None, CPU will be used. anything else will use GPU. if s_type_processing is 'slurm', s_gpu will be used in slurm command call.
    # processing
    s_type_processing = 'slurm',
    s_slurm_partition = 'exacloud',
    s_slurm_mem ='128G',
    s_slurm_time ='12:00:00',
    s_slurm_account ='gray_lab',
    # file system
    s_regdir = config.d_nconv['s_regdir'],  #'RegisteredImages/',
    s_format_regdir = config.d_nconv['s_format_regdir'],  #'{}{}/', # s_regdir, s_slide_pxscene
    s_segdir = config.d_nconv['s_segdir'],  #'Segmentation/',
    s_format_segdir_cellpose = config.d_nconv['s_format_segdir_cellpose'],  #'{}{}_CellposeSegmentation/', # s_segdir, s_slide
)


In [None]:
# 9.0 auto threshold markers

# loop will process registered, and autofluorescent subtracted registered images.
# result csv will be stored in s_regdir and s_afsubdir, respective.

for s_inputdir, s_format_inputdir in [
        (config.d_nconv['s_regdir'], config.d_nconv['s_format_regdir']),
        (config.d_nconv['s_afsubdir'], config.d_nconv['s_format_afsubdir']),
    ]:
    thresh.auto_thresh_spawn(
        es_slide = ls_slide,
        # processing
        s_type_processing = 'slurm',
        s_slurm_partition = 'exacloud',
        s_slurm_mem = '32G',
        s_slurm_time = '36:00:00',
        s_slurm_account = 'gray_lab',
        # file system
        s_afsubdir = s_inputdir,  #'SubtractedRegisteredImages/' 'RegisteredImages/'
        s_format_afsubdir = s_format_inputdir,  #'{}{}/' # s_afsubdir, s_slide_pxscene
    )


In [None]:
# 10.0 feature extraction

# loop will process registered, and autofluorescent subtracted registered images.

# result csv will be stored in s_segdir/s_slide_CellposeSegmentation/
# output registered images: features_{slide_scene}_{seg_marker}_raw_MeanIntensity_Shape_Centroid_registeredimages.csv
# output af subtracted registered images: features_{slide_scene}_{seg_marker}_raw_MeanIntensity_Shape_Centroid_subtractedregisteredimages.csv

for s_inputdir, s_format_inputdir in [
        (config.d_nconv['s_regdir'], config.d_nconv['s_format_regdir']),
        (config.d_nconv['s_afsubdir'], config.d_nconv['s_format_afsubdir']),
    ]:
    feat.extract_features_spawn(
        es_slide = ls_slide,
        es_seg_marker = {'Ecad'},  # {'Ecad'}, # use the same cell segmentation marker(s) specified at 8.0. 
        i_exp = 5,  # number of pixel for cytoplasm doughnut. microscope dependent!
        i_mem = 2,  # number of pixel for membrane. microscope dependent!
        i_shrink = 0,  # optional, e.g. for patching against bleed through at 10.1 by setting es_custom_markerpartition, or es_shrink_marker and i_shrink.
        # processing
        s_type_processing = 'slurm',
        s_slurm_partition = 'exacloud',
        s_slurm_mem = '256G',  # this function eats RAM like no other!
        s_slurm_time = '36:00:00',
        s_slurm_account = 'gray_lab',
        # specify input and output directory
        s_afsubdir = s_inputdir,  #'RegisteredImages/', or 'SubtractedRegisteredImages/',
        s_format_afsubdir = s_format_inputdir,  #'{}{}/' # s_afsubdir, s_slide_pxscene
        s_segdir = config.d_nconv['s_segdir'],  #'Segmentation/',
        s_format_segdir_cellpose = config.d_nconv['s_format_segdir_cellpose'],
    )


In [None]:
# 10.1 feature filtering and patching

# loop will process registered, and autofluorescent subtracted registered images.
# important: depends on in which channel s_thresh_marker was, i_thresh_manual value might not be independent of autofluorescent subtraction.

# result csv will be stored in s_segdir/s_slide_CellposeSegmentation/:
# features_{slide}_CentroidXY.csv
# features_{slide}_{s_thresh}_patched_MeanIntensity_Shape_{es_dapipartition_filter}_registeredimages.csv 
# features_{slide}_{s_thresh}_patched_MeanIntensity_Shape_{es_dapipartition_filter}_subtractedregisteredimages.csv

# result png will be stored in s_qcdir/Segmentation/:
# {slide}_DAPI_rounds_registeredimages_lineplot.png
# {slide_scene}_DAPIn.DAPIm.DAPIo.DAPIp.{s_thresh}_thresh_registeredimages_scatter.png
# {slide_scene}_DAPIn.DAPIm.DAPIo.DAPIp.{s_thresh}_thresh_subtractedregisteredimages_scatter.png

# about the 3 related parameters, es_seg_marker, di_seg_marker, and des_cytoplasm_marker:
# you can specify as many celltype specific segmentation markers in es_seg_marker (specified and used at 8.0 and used at 10.0) and di_seg_marker,  
# as you have in the panel, and for which you have specified cytoplasm marker in des_cytoplasm_marker (specified at ~/.jinxif/config.py).
# di_seg_marker.keys() has to be a member of es_seg_marker and des_cytoplasm_marker.keys().
# the thing is, marker specified in des_cytoplasm_marker (values) will extract the mean value from the segmented cytoplasm, 
# instead of extracting the mean value from an i_exp thick doughnut around the nucleus.
# if you're not specifying any marker at des_cytoplasm_marker, setting di_seg_marker will have no effect.

# about di_seg_marker:
# set to None, if no cell segmentation was done.
# specify the cell segmentation marker (dictionary key) to be used to detect cytoplasm positive cells. this has to be a marker from es_seg_marker set specified at 8.0. 
# specify the as min threshold (dictionary value) for the segmentation marker. this value is microscope setting dependent!
# read above about the 3 related parameters, es_seg_marker, di_seg_marker, and des_cytoplasm_marker!

for s_inputdir, s_format_inputdir, di_seg_marker in [
        (config.d_nconv['s_regdir'], config.d_nconv['s_format_regdir'], {'Ecad': 1000}),  # {'Ecad': 1000} # read above about di_seg_marker!
        (config.d_nconv['s_afsubdir'], config.d_nconv['s_format_afsubdir'], {'Ecad': 1000}),  # {'Ecad': 1000} # read above about di_seg_marker!
    ]:
    feat.filter_features_spawn(
        es_slide = ls_slide,
        es_dapipartition_filter = {'DAPI1_nuclei','DAPI8_nuclei'},  # {'DAPI1_nuclei','DAPI16_nuclei'}, # for nuclei filtering, specify DAPI from the first and last round, and optionally week round in between.
        di_seg_marker = di_seg_marker,  # {'Ecad': 1000}, # read above about di_seg_marker!
        i_exp = 5,  # number of pixel for cytoplasm doughnut. microscope dependent!
        i_mem = 2,  # number of pixel for membrane. microscope dependent!
        i_shrink = 0,  # optional, for patching against bleed trough. use the same setting as in 10.0.
        es_shrink_marker = set(),   # optional, specify marker to be patching against bleed through.
        es_custom_markerpartition = set(),  # optional, for marker_partition in the raw feature extracted csv, that should additional be ported to the patched csv. 
        des_cytoplasm_marker = config.des_cytoplasmmarker_standard,  # read above about the 3 related parameters es_seg_marker, di_seg_marker, and des_cytoplasm_marker!
        s_tissue_dapi = 'DAPI1',  # used dapi from the same round that images were registered to.
        i_tissue_dapi_thresh = 500,  # 300 - 600,
        i_tissue_area_thresh = 50000,  # 65536
        ds_shape = {  # specify according to your dapi and s_thresh_marker. for shape determination, we use dapi from round 2 because it is less blurry than dapi from round 1.
            'DAPI2_nuclei_area': 'nuclei_area',
            'DAPI2_nuclei_eccentricity': 'nuclei_eccentricity',
            'Ecad_cell_area': 'cell_area',
            'Ecad_cell_eccentricity': 'cell_eccentricity',
            'Ecad_cell_euler': 'cell_euler',
            'Ecad_cytoplasm_area': 'cytoplasm_area',
            'Ecad_cytoplasm_eccentricity': 'cytoplasm_eccentricity',
            'Ecad_cytoplasm_euler': 'cytoplasm_euler',
        },
        ds_centroid = { # specify the dapi round for centroid coordinate detection. we use the same round that images were registered to.
            'DAPI2_nuclei_centroid-0': 'DAPI_Y',
            'DAPI2_nuclei_centroid-1': 'DAPI_X',
        },
        # processing
        s_type_processing = 'slurm',
        s_slurm_partition = 'exacloud',
        s_slurm_mem = '64G',
        s_slurm_time = '36:00:00',
        s_slurm_account = 'gray_lab',
        # filter
        s_afsubdir = s_inputdir,  #'RegisteredImages/', or 'SubtractedRegisteredImages/',
        s_format_afsubdir = s_format_inputdir,  # '{}{}/', # s_afsubdir, slide_scene
        s_segdir = config.d_nconv['s_segdir'],  #'Segmentaton/',
        s_format_segdir_cellpose = config.d_nconv['s_format_segdir_cellpose'],  #'{}{}_CellposeSegmentation/', # s_segdir, s_slide
        s_qcdir = config.d_nconv['s_qcdir'],  #'QC/',
    )


In [None]:
# 10.2 feature correct labels

# it makes only sens to run this function, if s_thresh_marker is not None!

# function expands cytoplasm negative cells from the matched nucleus cell label file by i_exp. 
# output in s_segdir/s_slide_CellposeSegmentation/:
# {slide_scene}_{seg_marker}_nuc{cell_diam}_cell{nuc_diam}__matched_{exp}_CellSegmentationBasins.tif
# celltouch_{slide}_{s_thresh_marker}_segmentation.json

feat.feature_correct_labels_spawn(
    es_slide = ls_slide,
    di_seg_marker = {'Ecad': 1000},  # {'Ecad': 1000} # choose same value as in 10.1 and in relation to s_afsubdir!
    i_exp = 5,  # choose same value as in 10.1.
    # processing
    s_type_processing = 'slurm',
    s_slurm_partition = 'exacloud',
    s_slurm_mem = '64G',
    s_slurm_time = '36:00:00',
    s_slurm_account = 'gray_lab',
    # file system
    s_afsubdir = config.d_nconv['s_regdir'],  #'RegisteredImages/', or 'SubtractedRegisteredImages/',
    s_segdir = config.d_nconv['s_segdir'],  #'Segmentation/',
    s_format_segdir_cellpose = config.d_nconv['s_format_segdir_cellpose'],  #'{}{}_CellposeSegmentation/', # s_segdir, s_slide
)


In [None]:
# 11.0 qc plots for z projection, segmentation basin and tissue edge distance

segment.nuccell_zprojlabel_imgs_spawn(
    es_slide = ls_slide,
    es_seg_marker = {'Ecad'},  # choose same value as in 8.0.
    s_tissue_dapi = 'DAPI1',  # choose same value as in 10.1.
    i_tissue_dapi_thresh = 500,  # choose same value as in 10.1.
    i_tissue_area_thresh = 50000,  # choose same value as in 10.1.
    # processing
    s_type_processing = 'slurm',
    s_slurm_partition = 'exacloud',
    s_slurm_mem = '64G',
    s_slurm_time = '36:00:00',
    s_slurm_account = 'gray_lab',
    # file system
    s_segdir = config.d_nconv['s_segdir'],  #'Segmentation/',
    s_format_segdir_cellpose = config.d_nconv['s_format_segdir_cellpose'],  #'{}{}_CellposeSegmentation/',  # s_segdir, s_slide
    s_qcdir = config.d_nconv['s_qcdir'],  #'QC/',
)


In [None]:
# 12.0 work directory tidy up

# the function generates a scripts folder and  puts all scripts that were run by the pipeline into it.
# additionally, the function deletes all  slurm-*.out and slurp-*.out output files because they are no longer needed.

util.sweep()


In [None]:
# ometiff  (optional)

# function generates for each specifies slide_pxscene 
# a (non-pyramide) multichannel tiff with ome metadata (not totally complete).
# + http://www.openmicroscopy.org/ome-files/

ometiff.ometiff_spawn(
    es_slide = ls_slide,  # slide filter
    es_slide_pxscene = None,  # scene filter . If None, all scenes from the es_slide specified slides will be generated. 
    es_exclude_round = {'R0'},  # rounds to be excluded from the multichannel ometiff. 
    ddd_crop = ddd_crop,  # to map slide_mscene to slide_pxscene
    ddd_etc = ddd_etc,  # exposure time correction
    # microscopy
    r_pixel_size_um = ,  # 0.325, # microscope dependent!
    # experiment
    s_batch_id = s_batch,  # experiment
    s_lab = ,  # experimenter_group
    s_email_leader = ,  # experimenter
    # output image
    b_8bit = False,  # if False, output will be like input tiffs, 16[bit].
    # processing
    s_type_processing = 'slurm',
    s_slurm_partition = 'exacloud',
    s_slurm_mem ='64G',
    s_slurm_time ='36:00:00',
    s_slurm_account ='gray_lab',
    # file system
    s_afsubdir = config.d_nconv['s_regdir'],  #'SubtractedRegisteredImages/' 'RegisteredImages/',
    s_format_afsubdir = config.d_nconv['s_format_regdir'],  #'{}{}/' # s_afsubdir, s_slide_pxscene,
    s_metadir = config.d_nconv['s_metadir'],  #'MetaImages/',
    s_ometiffdir = config.d_nconv['s_ometiffdir'],  #'OmeTiffImages/',
)


In [None]:
# compress files (optional)

# do save disk space, raw tiff and czi files can be compressed.
# compression needs time and process power. 
# decompression is relative quick.

util.compress_xz_spawn(
    es_slide = ls_slide,
    b_tiff_raw = False,  # compress raw tiff files?
    b_czi_original = False,  # compress original czi files?    
    b_czi_splitscene = False,  # compress splitscene czi files?
    # processing
    s_type_processing = 'slurm',
    s_slurm_partition = 'exacloud',
    s_slurm_mem = '64G',
    s_slurm_time = '36:00:00',
    s_slurm_account = 'gray_lab',
    # file system
    s_rawdir = config.d_nconv['s_rawdir'],  # 'RawImages/',
    s_format_rawdir = config.d_nconv['s_format_rawdir'],  #'{}{}/', # s_rawdir, s_slide
    s_czidir = config.d_nconv['s_czidir'],  # 'CziImages/',
    s_format_czidir_original = config.d_nconv['s_format_czidir_original'],  #'{}{}/original/', # s_czidir, s_slide
    s_format_czidir_splitscene = config.d_nconv['s_format_czidir_splitscene'],  #'{}{}/splitscene/', # s_czidir, s_slide
)


In [None]:
# de-compress xz files raw tiff.xz and czi.xz files (do not run, if not necessary)

# compression needs time and process power. 
# decompression is relative quick.

util.decompress_xz_spawn(
    es_slide = ls_slide,  # run this function only for the necessary slides!
    b_tiff_raw = False,   # de-compress raw tiff files?
    b_czi_original = False,  # de-compress original czi files?
    b_czi_splitscene = False,  # de-compress splitscene czi files?
    # processing
    s_type_processing = 'slurm',
    s_slurm_partition = 'exacloud',
    s_slurm_mem = '64G',
    s_slurm_time = '36:00:00',
    s_slurm_account = 'gray_lab',
    # file system
    s_rawdir = config.d_nconv['s_rawdir'],  #'RawImages/',
    s_format_rawdir = config.d_nconv['s_format_rawdir'],  #'{}{}/', # s_rawdir, s_slide
    s_czidir = config.d_nconv['s_czidir'],  # 'CziImages/',
    s_format_czidir_original = config.d_nconv['s_format_czidir_original'],  #'{}{}/original/', # s_czidir, s_slide
    s_format_czidir_splitscene = config.d_nconv['s_format_czidir_splitscene'],  #'{}{}/splitscene/', # s_czidir, s_slide
)
