## RUN: Feature Embedding

This notebook runs the ISLA and CBE pipeline based on segmented cells.

### Prep

In [None]:
### Imports

# Generic
from __future__ import division
import os, sys, pickle
import numpy as np

# Internal
from katachi.pipelines import feature_embedding as feat

In [None]:
### Function to parse relevant IDs from IDR bulk data

def parse_from_IDR(dir_path, target):
    
    # Get all samples
    samples = [d for d in os.listdir(dir_path) if len(d)==10
               and os.path.isdir(os.path.join(dir_path, d))]
    
    # Select relevant samples
    relevant_samples = []
    for d in samples:
        
        # Get image files
        images = [i for i in os.listdir(os.path.join(dir_path, d))
                  if i.startswith(d) and i.endswith('.tif')]
        
        # Special case for membranes only
        if target=='membranes_only':
            if all(['lynEGFP' in img for img in images]):
                relevant_samples.append(d)
            
        # All other cases
        else:
            if any([img.endswith(target+'.tif') for img in images]):
                relevant_samples.append(d)
    
    return relevant_samples

### Feature Embedding for `cldnB:lyn-EGFP`

#### Reference Shape Space

In [None]:
# Target directory
dir_path = r'data\experimentA\image_data'

# Parse relevant IDs from IDR bulk data
relevant_samples = parse_from_IDR(dir_path, 'membranes_only')
print "Found %i relevant samples!" % len(relevant_samples)

# Main arguments 
suffix_seg = 'seg'
suffix_int = 'seg'
num_LMs    = 2000
downsample = ('ddds', 200000)
clustering = ('kmeans', 20)
features   = ["kNN-distsManh"]

# Keyword arguments
feat_kwargs = dict()
feat_kwargs['select_IDs']      = relevant_samples
feat_kwargs['recurse']         = True
feat_kwargs['compute_TFOR']    = True
feat_kwargs['compute_CFOR']    = True
feat_kwargs['processes']       = 14
feat_kwargs['dask_graph_path'] = None
feat_kwargs['profiling']       = True
feat_kwargs['verbose']         = True

# Shape space: preprocessing function to generate outline shell
from scipy.ndimage import binary_erosion
def seg_shell_local(img, seg, cell_idx, iterations=1):
    shell = np.logical_xor(seg==cell_idx, 
                           binary_erosion(seg==cell_idx, 
                                          iterations=iterations))
    return shell
feat_kwargs['assign_landmarks_kwargs'] = {"local_prep_func"   : seg_shell_local,
                                          "local_prep_params" : 1}

# Additional settings for TFOR
feat_kwargs['perform_CBE_TFOR_kwargs'] = {"presample"       : ('kmeans', 500),
                                          "save_presampled" : True,
                                          "suffix_out"      : {'PRES' : '_kmeansPRES',
                                                               'DS'   : '_DDDS',
                                                               'CBE'  : '_CBEmanh',
                                                               'META' : '-'+suffix_int},
                                          }

# Additional settings for pdCFOR
feat_kwargs['perform_CBE_CFOR_kwargs'] = {"presample"       : ('kmeans', 500),
                                          "save_presampled" : True,
                                          "cfor"            : ('PD', 3),
                                          "suffix_out"      : {'PRES' : '_kmeansPRES',
                                                               'CFOR' : '_pdCFOR',
                                                               'DS'   : '_DDDS',
                                                               'CBE'  : '_CBEmanh',
                                                               'META' : '-'+suffix_int},
                                          }

# Run landmark and feature extraction pipeline
feat.feature_extraction(dir_path, suffix_seg, suffix_int, num_LMs, 
                        downsample, clustering, features,
                        **feat_kwargs)

#### Retrieve Reference CBE Cluster Objects for Shape Embeddings on Other Samples

In [None]:
### Retrieve reference CBE cluster objects from one of the metadata files

# Select reference sample
# This can be any of the membrane-only samples; the CBE cluster objects
# saved in their metadata are naturally all the same!
dir_path = r'data\experimentA\image_data'
ref_ID = '0B51F8B46C'

# Find the corresponding metadata file
done = False
for root, dirs, files in os.walk(dir_path):
    for f in files:
        if f == ref_ID + "_stack_metadata.pkl":
            meta_path = os.path.join(root, f)
            done = True
            break
    if done:
        break
        
# Open the file and retrieve the objects
with open(meta_path, 'rb') as metafile:
    meta_dict = pickle.load(metafile)
clust_TFOR = meta_dict['clustobj-TFOR_kmeansPRES_DDDS_CBEmanh-seg']
clust_pdCFOR = meta_dict['clustobj-kmeansPRES_pdCFOR_DDDS_CBEmanh-seg']

### Feature Embedding for `cldnB:lyn-EGFP + cxcr4b:NLS-tdTomato`

#### Shape Space

In [None]:
# Target directory
dir_path = r'data\experimentA\image_data'

# Parse relevant IDs from IDR bulk data
relevant_samples = parse_from_IDR(dir_path, 'NLStdTomato')
print "Found %i relevant samples!" % len(relevant_samples)

# Main arguments 
suffix_seg = 'seg'
suffix_int = 'seg'
num_LMs    = 2000
downsample = ('ddds', 200000)
clustering = (('previous', 'previous'),
              (clust_TFOR, clust_pdCFOR))
features   = ["kNN-distsManh"]

# Keyword arguments
feat_kwargs = dict()
feat_kwargs['select_IDs']      = relevant_samples
feat_kwargs['recurse']         = True
feat_kwargs['compute_TFOR']    = True
feat_kwargs['compute_CFOR']    = True
feat_kwargs['processes']       = 14
feat_kwargs['dask_graph_path'] = None
feat_kwargs['profiling']       = True
feat_kwargs['verbose']         = True

# Shape space: preprocessing function to generate outline shell
from scipy.ndimage import binary_erosion
def seg_shell_local(img, seg, cell_idx, iterations=1):
    shell = np.logical_xor(seg==cell_idx, 
                           binary_erosion(seg==cell_idx, 
                                          iterations=iterations))
    return shell
feat_kwargs['assign_landmarks_kwargs'] = {"local_prep_func"   : seg_shell_local,
                                          "local_prep_params" : 1}

# Additional settings for TFOR
feat_kwargs['perform_CBE_TFOR_kwargs'] = {"presample"       : ('kmeans', 500),
                                          "save_presampled" : True,
                                          "suffix_out"      : {'PRES' : '_kmeansPRES',
                                                               'DS'   : '_DDDS',
                                                               'CBE'  : '_CBEmanh',
                                                               'META' : '-'+suffix_int},
                                          }

# Additional settings for pdCFOR
feat_kwargs['perform_CBE_CFOR_kwargs'] = {"presample"       : ('kmeans', 500),
                                          "save_presampled" : True,
                                          "cfor"            : ('PD', 3),
                                          "suffix_out"      : {'PRES' : '_kmeansPRES',
                                                               'CFOR' : '_pdCFOR',
                                                               'DS'   : '_DDDS',
                                                               'CBE'  : '_CBEmanh',
                                                               'META' : '-'+suffix_int},
                                          }

# Run landmark and feature extraction pipeline
feat.feature_extraction(dir_path, suffix_seg, suffix_int, num_LMs, 
                        downsample, clustering, features,
                        **feat_kwargs)

#### Nuclei Space

In [None]:
# Target directory
dir_path = r'data\experimentA\image_data'

# Parse relevant IDs from IDR bulk data
relevant_samples = parse_from_IDR(dir_path, 'NLStdTomato')
print "Found %i relevant samples!" % len(relevant_samples)

# Main arguments 
suffix_seg = 'seg'
suffix_int = 'NLStdTomato'
num_LMs    = 2000
downsample = ('ddds', 200000)
clustering = ('kmeans', 20)
features   = ["kNN-distsManh"]

# Keyword arguments
feat_kwargs = dict()
feat_kwargs['select_IDs']      = relevant_samples
feat_kwargs['recurse']         = True
feat_kwargs['compute_TFOR']    = True
feat_kwargs['compute_CFOR']    = True
feat_kwargs['processes']       = 14
feat_kwargs['dask_graph_path'] = None
feat_kwargs['profiling']       = True
feat_kwargs['verbose']         = True

# Preprocessing function; mean background subtraction
def mean_bgsub_local(img, seg, cell_idx):   
    bg = img.mean()
    bgsub = img - bg
    bgsub[img < bg] = 0
    return bgsub
feat_kwargs['assign_landmarks_kwargs'] = {"local_prep_func" : mean_bgsub_local}

# Additional settings for TFOR
feat_kwargs['perform_CBE_TFOR_kwargs'] = {"presample"       : ('kmeans', 500),
                                          "save_presampled" : True,
                                          "suffix_out"      : {'PRES' : '_kmeansPRES',
                                                               'DS'   : '_DDDS',
                                                               'CBE'  : '_CBEmanh',
                                                               'META' : '-'+suffix_int},
                                          }

# Additional settings for pdCFOR
feat_kwargs['perform_CBE_CFOR_kwargs'] = {"presample"       : ('kmeans', 500),
                                          "save_presampled" : True,
                                          "cfor"            : ('PD', 3),
                                          "suffix_out"      : {'PRES' : '_kmeansPRES',
                                                               'CFOR' : '_pdCFOR',
                                                               'DS'   : '_DDDS',
                                                               'CBE'  : '_CBEmanh',
                                                               'META' : '-'+suffix_int},
                                          }

# Run landmark and feature extraction pipeline with pdCFOR
feat.feature_extraction(dir_path, suffix_seg, suffix_int, num_LMs, 
                        downsample, clustering, features,
                        **feat_kwargs)

### Feature Embedding for `cldnB:lyn-EGFP + Actb2:mKate-Rab11a`

#### Shape Space

In [None]:
# Target directory
dir_path = r'data\experimentA\image_data'

# Parse relevant IDs from IDR bulk data
relevant_samples = parse_from_IDR(dir_path, 'mKate2rab11')
print "Found %i relevant samples!" % len(relevant_samples)

# Main arguments 
suffix_seg = 'seg'
suffix_int = 'seg'
num_LMs    = 2000
downsample = ('ddds', 200000)
clustering = (('previous', 'previous'),
              (clust_TFOR, clust_pdCFOR))
features   = ["kNN-distsManh"]

# Keyword arguments
feat_kwargs = dict()
feat_kwargs['select_IDs']      = relevant_samples
feat_kwargs['recurse']         = True
feat_kwargs['compute_TFOR']    = True
feat_kwargs['compute_CFOR']    = True
feat_kwargs['processes']       = 14
feat_kwargs['dask_graph_path'] = None
feat_kwargs['profiling']       = True
feat_kwargs['verbose']         = True

# Shape space: preprocessing function to generate outline shell
from scipy.ndimage import binary_erosion
def seg_shell_local(img, seg, cell_idx, iterations=1):
    shell = np.logical_xor(seg==cell_idx, 
                           binary_erosion(seg==cell_idx, 
                                          iterations=iterations))
    return shell
feat_kwargs['assign_landmarks_kwargs'] = {"local_prep_func"   : seg_shell_local,
                                          "local_prep_params" : 1}

# Additional settings for TFOR
feat_kwargs['perform_CBE_TFOR_kwargs'] = {"presample"       : ('kmeans', 500),
                                          "save_presampled" : True,
                                          "suffix_out"      : {'PRES' : '_kmeansPRES',
                                                               'DS'   : '_DDDS',
                                                               'CBE'  : '_CBEmanh',
                                                               'META' : '-'+suffix_int},
                                          }

# Additional settings for pdCFOR
feat_kwargs['perform_CBE_CFOR_kwargs'] = {"presample"       : ('kmeans', 500),
                                          "save_presampled" : True,
                                          "cfor"            : ('PD', 3),
                                          "suffix_out"      : {'PRES' : '_kmeansPRES',
                                                               'CFOR' : '_pdCFOR',
                                                               'DS'   : '_DDDS',
                                                               'CBE'  : '_CBEmanh',
                                                               'META' : '-'+suffix_int},
                                          }

# Run landmark and feature extraction pipeline
feat.feature_extraction(dir_path, suffix_seg, suffix_int, num_LMs, 
                        downsample, clustering, features,
                        **feat_kwargs)

#### Rab11 Space

In [None]:
# Target directory
dir_path = r'data\experimentA\image_data'

# Parse relevant IDs from IDR bulk data
relevant_samples = parse_from_IDR(dir_path, 'mKate2rab11')
print "Found %i relevant samples!" % len(relevant_samples)

# Main arguments 
suffix_seg = 'seg'
suffix_int = 'mKate2rab11'
num_LMs    = 2000
downsample = ('ddds', 200000)
clustering = ('kmeans', 20)
features   = ["kNN-distsManh"]

# Keyword arguments
feat_kwargs = dict()
feat_kwargs['select_IDs']      = relevant_samples
feat_kwargs['recurse']         = True
feat_kwargs['compute_TFOR']    = True
feat_kwargs['compute_CFOR']    = True
feat_kwargs['processes']       = 14
feat_kwargs['dask_graph_path'] = None
feat_kwargs['profiling']       = True
feat_kwargs['verbose']         = True

# Preprocessing function; mean background subtraction
def mean_bgsub_local(img, seg, cell_idx):   
    bg = img.mean()
    bgsub = img - bg
    bgsub[img < bg] = 0
    return bgsub
feat_kwargs['assign_landmarks_kwargs'] = {"local_prep_func" : mean_bgsub_local}

# Additional settings for TFOR
feat_kwargs['perform_CBE_TFOR_kwargs'] = {"presample"       : ('kmeans', 500),
                                          "save_presampled" : True,
                                          "suffix_out"      : {'PRES' : '_kmeansPRES',
                                                               'DS'   : '_DDDS',
                                                               'CBE'  : '_CBEmanh',
                                                               'META' : '-'+suffix_int},
                                          }

# Additional settings for pdCFOR
feat_kwargs['perform_CBE_CFOR_kwargs'] = {"presample"       : ('kmeans', 500),
                                          "save_presampled" : True,
                                          "cfor"            : ('PD', 3),
                                          "suffix_out"      : {'PRES' : '_kmeansPRES',
                                                               'CFOR' : '_pdCFOR',
                                                               'DS'   : '_DDDS',
                                                               'CBE'  : '_CBEmanh',
                                                               'META' : '-'+suffix_int},
                                          }

# Run landmark and feature extraction pipeline with pdCFOR
feat.feature_extraction(dir_path, suffix_seg, suffix_int, num_LMs, 
                        downsample, clustering, features,
                        **feat_kwargs)

### Feature Embedding for `cldnB:lyn-EGFP + RNA:mKate2-Rab5a`

#### Shape Space

In [None]:
# Target directory
dir_path = r'data\experimentA\image_data'

# Parse relevant IDs from IDR bulk data
relevant_samples = parse_from_IDR(dir_path, 'mKate2rab5')
print "Found %i relevant samples!" % len(relevant_samples)

# Main arguments 
suffix_seg = 'seg'
suffix_int = 'seg'
num_LMs    = 2000
downsample = ('ddds', 200000)
clustering = (('previous', 'previous'),
              (clust_TFOR, clust_pdCFOR))
features   = ["kNN-distsManh"]

# Keyword arguments
feat_kwargs = dict()
feat_kwargs['select_IDs']      = relevant_samples
feat_kwargs['recurse']         = True
feat_kwargs['compute_TFOR']    = True
feat_kwargs['compute_CFOR']    = True
feat_kwargs['processes']       = 14
feat_kwargs['dask_graph_path'] = None
feat_kwargs['profiling']       = True
feat_kwargs['verbose']         = True

# Shape space: preprocessing function to generate outline shell
from scipy.ndimage import binary_erosion
def seg_shell_local(img, seg, cell_idx, iterations=1):
    shell = np.logical_xor(seg==cell_idx, 
                           binary_erosion(seg==cell_idx, 
                                          iterations=iterations))
    return shell
feat_kwargs['assign_landmarks_kwargs'] = {"local_prep_func"   : seg_shell_local,
                                          "local_prep_params" : 1}

# Additional settings for TFOR
feat_kwargs['perform_CBE_TFOR_kwargs'] = {"presample"       : ('kmeans', 500),
                                          "save_presampled" : True,
                                          "suffix_out"      : {'PRES' : '_kmeansPRES',
                                                               'DS'   : '_DDDS',
                                                               'CBE'  : '_CBEmanh',
                                                               'META' : '-'+suffix_int},
                                          }

# Additional settings for pdCFOR
feat_kwargs['perform_CBE_CFOR_kwargs'] = {"presample"       : ('kmeans', 500),
                                          "save_presampled" : True,
                                          "cfor"            : ('PD', 3),
                                          "suffix_out"      : {'PRES' : '_kmeansPRES',
                                                               'CFOR' : '_pdCFOR',
                                                               'DS'   : '_DDDS',
                                                               'CBE'  : '_CBEmanh',
                                                               'META' : '-'+suffix_int},
                                          }

# Run landmark and feature extraction pipeline
feat.feature_extraction(dir_path, suffix_seg, suffix_int, num_LMs, 
                        downsample, clustering, features,
                        **feat_kwargs)

#### Rab5 Space

In [None]:
# Target directory
dir_path = r'data\experimentA\image_data'

# Parse relevant IDs from IDR bulk data
relevant_samples = parse_from_IDR(dir_path, 'mKate2rab5')
print "Found %i relevant samples!" % len(relevant_samples)

# Main arguments 
suffix_seg = 'seg'
suffix_int = 'mKate2rab5'
num_LMs    = 2000
downsample = ('ddds', 200000)
clustering = ('kmeans', 20)
features   = ["kNN-distsManh"]

# Keyword arguments
feat_kwargs = dict()
feat_kwargs['select_IDs']      = relevant_samples
feat_kwargs['recurse']         = True
feat_kwargs['compute_TFOR']    = True
feat_kwargs['compute_CFOR']    = True
feat_kwargs['processes']       = 14
feat_kwargs['dask_graph_path'] = None
feat_kwargs['profiling']       = True
feat_kwargs['verbose']         = True

# Preprocessing function; mean background subtraction
def mean_bgsub_local(img, seg, cell_idx):   
    bg = img.mean()
    bgsub = img - bg
    bgsub[img < bg] = 0
    return bgsub
feat_kwargs['assign_landmarks_kwargs'] = {"local_prep_func" : mean_bgsub_local}

# Additional settings for TFOR
feat_kwargs['perform_CBE_TFOR_kwargs'] = {"presample"       : ('kmeans', 500),
                                          "save_presampled" : True,
                                          "suffix_out"      : {'PRES' : '_kmeansPRES',
                                                               'DS'   : '_DDDS',
                                                               'CBE'  : '_CBEmanh',
                                                               'META' : '-'+suffix_int},
                                          }

# Additional settings for pdCFOR
feat_kwargs['perform_CBE_CFOR_kwargs'] = {"presample"       : ('kmeans', 500),
                                          "save_presampled" : True,
                                          "cfor"            : ('PD', 3),
                                          "suffix_out"      : {'PRES' : '_kmeansPRES',
                                                               'CFOR' : '_pdCFOR',
                                                               'DS'   : '_DDDS',
                                                               'CBE'  : '_CBEmanh',
                                                               'META' : '-'+suffix_int},
                                          }

# Run landmark and feature extraction pipeline with pdCFOR
feat.feature_extraction(dir_path, suffix_seg, suffix_int, num_LMs, 
                        downsample, clustering, features,
                        **feat_kwargs)

### Feature Embedding for `cldnB:lyn-EGFP + RNA:mKate2-GM130(rat)`

#### Shape Space

In [None]:
# Target directory
dir_path = r'data\experimentA\image_data'

# Parse relevant IDs from IDR bulk data
relevant_samples = parse_from_IDR(dir_path, 'mKate2GM130')
print "Found %i relevant samples!" % len(relevant_samples)

# Main arguments 
suffix_seg = 'seg'
suffix_int = 'seg'
num_LMs    = 2000
downsample = ('ddds', 200000)
clustering = (('previous', 'previous'),
              (clust_TFOR, clust_pdCFOR))
features   = ["kNN-distsManh"]

# Keyword arguments
feat_kwargs = dict()
feat_kwargs['select_IDs']      = relevant_samples
feat_kwargs['recurse']         = True
feat_kwargs['compute_TFOR']    = True
feat_kwargs['compute_CFOR']    = True
feat_kwargs['processes']       = 14
feat_kwargs['dask_graph_path'] = None
feat_kwargs['profiling']       = True
feat_kwargs['verbose']         = True

# Shape space: preprocessing function to generate outline shell
from scipy.ndimage import binary_erosion
def seg_shell_local(img, seg, cell_idx, iterations=1):
    shell = np.logical_xor(seg==cell_idx, 
                           binary_erosion(seg==cell_idx, 
                                          iterations=iterations))
    return shell
feat_kwargs['assign_landmarks_kwargs'] = {"local_prep_func"   : seg_shell_local,
                                          "local_prep_params" : 1}

# Additional settings for TFOR
feat_kwargs['perform_CBE_TFOR_kwargs'] = {"presample"       : ('kmeans', 500),
                                          "save_presampled" : True,
                                          "suffix_out"      : {'PRES' : '_kmeansPRES',
                                                               'DS'   : '_DDDS',
                                                               'CBE'  : '_CBEmanh',
                                                               'META' : '-'+suffix_int},
                                          }

# Additional settings for pdCFOR
feat_kwargs['perform_CBE_CFOR_kwargs'] = {"presample"       : ('kmeans', 500),
                                          "save_presampled" : True,
                                          "cfor"            : ('PD', 3),
                                          "suffix_out"      : {'PRES' : '_kmeansPRES',
                                                               'CFOR' : '_pdCFOR',
                                                               'DS'   : '_DDDS',
                                                               'CBE'  : '_CBEmanh',
                                                               'META' : '-'+suffix_int},
                                          }

# Run landmark and feature extraction pipeline
feat.feature_extraction(dir_path, suffix_seg, suffix_int, num_LMs, 
                        downsample, clustering, features,
                        **feat_kwargs)

#### GM130 Space

In [None]:
# Target directory
dir_path = r'data\experimentA\image_data'

# Parse relevant IDs from IDR bulk data
relevant_samples = parse_from_IDR(dir_path, 'mKate2GM130')
print "Found %i relevant samples!" % len(relevant_samples)

# Main arguments 
suffix_seg = 'seg'
suffix_int = 'mKate2GM130'
num_LMs    = 2000
downsample = ('ddds', 200000)
clustering = ('kmeans', 20)
features   = ["kNN-distsManh"]

# Keyword arguments
feat_kwargs = dict()
feat_kwargs['select_IDs']      = relevant_samples
feat_kwargs['recurse']         = True
feat_kwargs['compute_TFOR']    = True
feat_kwargs['compute_CFOR']    = True
feat_kwargs['processes']       = 14
feat_kwargs['dask_graph_path'] = None
feat_kwargs['profiling']       = True
feat_kwargs['verbose']         = True

# Preprocessing function; mean background subtraction
def mean_bgsub_local(img, seg, cell_idx):   
    bg = img.mean()
    bgsub = img - bg
    bgsub[img < bg] = 0
    return bgsub
feat_kwargs['assign_landmarks_kwargs'] = {"local_prep_func" : mean_bgsub_local}

# Additional settings for TFOR
feat_kwargs['perform_CBE_TFOR_kwargs'] = {"presample"       : ('kmeans', 500),
                                          "save_presampled" : True,
                                          "suffix_out"      : {'PRES' : '_kmeansPRES',
                                                               'DS'   : '_DDDS',
                                                               'CBE'  : '_CBEmanh',
                                                               'META' : '-'+suffix_int},
                                          }

# Additional settings for pdCFOR
feat_kwargs['perform_CBE_CFOR_kwargs'] = {"presample"       : ('kmeans', 500),
                                          "save_presampled" : True,
                                          "cfor"            : ('PD', 3),
                                          "suffix_out"      : {'PRES' : '_kmeansPRES',
                                                               'CFOR' : '_pdCFOR',
                                                               'DS'   : '_DDDS',
                                                               'CBE'  : '_CBEmanh',
                                                               'META' : '-'+suffix_int},
                                          }

# Run landmark and feature extraction pipeline with pdCFOR
feat.feature_extraction(dir_path, suffix_seg, suffix_int, num_LMs, 
                        downsample, clustering, features,
                        **feat_kwargs)

### Feature Embedding for `cldnB:lyn-EGFP + lexOP:CDMPR-tagRFPt`

#### Shape Space

In [None]:
# Target directory
dir_path = r'data\experimentA\image_data'

# Parse relevant IDs from IDR bulk data
relevant_samples = parse_from_IDR(dir_path, 'CDMPRtagRFPt')
print "Found %i relevant samples!" % len(relevant_samples)

# Main arguments 
suffix_seg = 'seg'
suffix_int = 'seg'
num_LMs    = 2000
downsample = ('ddds', 200000)
clustering = (('previous', 'previous'),
              (clust_TFOR, clust_pdCFOR))
features   = ["kNN-distsManh"]

# Keyword arguments
feat_kwargs = dict()
feat_kwargs['select_IDs']      = relevant_samples
feat_kwargs['recurse']         = True
feat_kwargs['compute_TFOR']    = True
feat_kwargs['compute_CFOR']    = True
feat_kwargs['processes']       = 14
feat_kwargs['dask_graph_path'] = None
feat_kwargs['profiling']       = True
feat_kwargs['verbose']         = True

# Shape space: preprocessing function to generate outline shell
from scipy.ndimage import binary_erosion
def seg_shell_local(img, seg, cell_idx, iterations=1):
    shell = np.logical_xor(seg==cell_idx, 
                           binary_erosion(seg==cell_idx, 
                                          iterations=iterations))
    return shell
feat_kwargs['assign_landmarks_kwargs'] = {"local_prep_func"   : seg_shell_local,
                                          "local_prep_params" : 1}

# Additional settings for TFOR
feat_kwargs['perform_CBE_TFOR_kwargs'] = {"presample"       : ('kmeans', 500),
                                          "save_presampled" : True,
                                          "suffix_out"      : {'PRES' : '_kmeansPRES',
                                                               'DS'   : '_DDDS',
                                                               'CBE'  : '_CBEmanh',
                                                               'META' : '-'+suffix_int},
                                          }

# Additional settings for pdCFOR
feat_kwargs['perform_CBE_CFOR_kwargs'] = {"presample"       : ('kmeans', 500),
                                          "save_presampled" : True,
                                          "cfor"            : ('PD', 3),
                                          "suffix_out"      : {'PRES' : '_kmeansPRES',
                                                               'CFOR' : '_pdCFOR',
                                                               'DS'   : '_DDDS',
                                                               'CBE'  : '_CBEmanh',
                                                               'META' : '-'+suffix_int},
                                          }

# Run landmark and feature extraction pipeline
feat.feature_extraction(dir_path, suffix_seg, suffix_int, num_LMs, 
                        downsample, clustering, features,
                        **feat_kwargs)

#### CDMPR Space

In [None]:
# Target directory
dir_path = r'data\experimentA\image_data'

# Parse relevant IDs from IDR bulk data
relevant_samples = parse_from_IDR(dir_path, 'CDMPRtagRFPt')
print "Found %i relevant samples!" % len(relevant_samples)

# Main arguments 
suffix_seg = 'seg'
suffix_int = 'CDMPRtagRFPt'
num_LMs    = 2000
downsample = ('ddds', 200000)
clustering = ('kmeans', 20)
features   = ["kNN-distsManh"]

# Keyword arguments
feat_kwargs = dict()
feat_kwargs['select_IDs']      = relevant_samples
feat_kwargs['recurse']         = True
feat_kwargs['compute_TFOR']    = True
feat_kwargs['compute_CFOR']    = True
feat_kwargs['processes']       = 14
feat_kwargs['dask_graph_path'] = None
feat_kwargs['profiling']       = True
feat_kwargs['verbose']         = True

# Preprocessing function; mean background subtraction
def mean_bgsub_local(img, seg, cell_idx):   
    bg = img.mean()
    bgsub = img - bg
    bgsub[img < bg] = 0
    return bgsub
feat_kwargs['assign_landmarks_kwargs'] = {"local_prep_func" : mean_bgsub_local}

# Additional settings for TFOR
feat_kwargs['perform_CBE_TFOR_kwargs'] = {"presample"       : ('kmeans', 500),
                                          "save_presampled" : True,
                                          "suffix_out"      : {'PRES' : '_kmeansPRES',
                                                               'DS'   : '_DDDS',
                                                               'CBE'  : '_CBEmanh',
                                                               'META' : '-'+suffix_int},
                                          }

# Additional settings for pdCFOR
feat_kwargs['perform_CBE_CFOR_kwargs'] = {"presample"       : ('kmeans', 500),
                                          "save_presampled" : True,
                                          "cfor"            : ('PD', 3),
                                          "suffix_out"      : {'PRES' : '_kmeansPRES',
                                                               'CFOR' : '_pdCFOR',
                                                               'DS'   : '_DDDS',
                                                               'CBE'  : '_CBEmanh',
                                                               'META' : '-'+suffix_int},
                                          }

# Run landmark and feature extraction pipeline with pdCFOR
feat.feature_extraction(dir_path, suffix_seg, suffix_int, num_LMs, 
                        downsample, clustering, features,
                        **feat_kwargs)

### Feature Embedding for `cldnB:lyn-EGFP + LexOP:B4GalT1(1-55Q)-tagRFPt`

#### Shape Space

In [None]:
# Target directory
dir_path = r'data\experimentA\image_data'

# Parse relevant IDs from IDR bulk data
relevant_samples = parse_from_IDR(dir_path, 'b4galT1tagRFPt')
print "Found %i relevant samples!" % len(relevant_samples)

# Main arguments 
suffix_seg = 'seg'
suffix_int = 'seg'
num_LMs    = 2000
downsample = ('ddds', 200000)
clustering = (('previous', 'previous'),
              (clust_TFOR, clust_pdCFOR))
features   = ["kNN-distsManh"]

# Keyword arguments
feat_kwargs = dict()
feat_kwargs['select_IDs']      = relevant_samples
feat_kwargs['recurse']         = True
feat_kwargs['compute_TFOR']    = True
feat_kwargs['compute_CFOR']    = True
feat_kwargs['processes']       = 14
feat_kwargs['dask_graph_path'] = None
feat_kwargs['profiling']       = True
feat_kwargs['verbose']         = True

# Shape space: preprocessing function to generate outline shell
from scipy.ndimage import binary_erosion
def seg_shell_local(img, seg, cell_idx, iterations=1):
    shell = np.logical_xor(seg==cell_idx, 
                           binary_erosion(seg==cell_idx, 
                                          iterations=iterations))
    return shell
feat_kwargs['assign_landmarks_kwargs'] = {"local_prep_func"   : seg_shell_local,
                                          "local_prep_params" : 1}

# Additional settings for TFOR
feat_kwargs['perform_CBE_TFOR_kwargs'] = {"presample"       : ('kmeans', 500),
                                          "save_presampled" : True,
                                          "suffix_out"      : {'PRES' : '_kmeansPRES',
                                                               'DS'   : '_DDDS',
                                                               'CBE'  : '_CBEmanh',
                                                               'META' : '-'+suffix_int},
                                          }

# Additional settings for pdCFOR
feat_kwargs['perform_CBE_CFOR_kwargs'] = {"presample"       : ('kmeans', 500),
                                          "save_presampled" : True,
                                          "cfor"            : ('PD', 3),
                                          "suffix_out"      : {'PRES' : '_kmeansPRES',
                                                               'CFOR' : '_pdCFOR',
                                                               'DS'   : '_DDDS',
                                                               'CBE'  : '_CBEmanh',
                                                               'META' : '-'+suffix_int},
                                          }

# Run landmark and feature extraction pipeline
feat.feature_extraction(dir_path, suffix_seg, suffix_int, num_LMs, 
                        downsample, clustering, features,
                        **feat_kwargs)

#### B4GalT1 Space

In [None]:
# Target directory
dir_path = r'data\experimentA\image_data'

# Parse relevant IDs from IDR bulk data
relevant_samples = parse_from_IDR(dir_path, 'b4galT1tagRFPt')
print "Found %i relevant samples!" % len(relevant_samples)

# Main arguments 
suffix_seg = 'seg'
suffix_int = 'b4galT1tagRFPt'
num_LMs    = 2000
downsample = ('ddds', 200000)
clustering = ('kmeans', 20)
features   = ["kNN-distsManh"]

# Keyword arguments
feat_kwargs = dict()
feat_kwargs['select_IDs']      = relevant_samples
feat_kwargs['recurse']         = True
feat_kwargs['compute_TFOR']    = True
feat_kwargs['compute_CFOR']    = True
feat_kwargs['processes']       = 14
feat_kwargs['dask_graph_path'] = None
feat_kwargs['profiling']       = True
feat_kwargs['verbose']         = True

# Preprocessing function; mean background subtraction
def mean_bgsub_local(img, seg, cell_idx):   
    bg = img.mean()
    bgsub = img - bg
    bgsub[img < bg] = 0
    return bgsub
feat_kwargs['assign_landmarks_kwargs'] = {"local_prep_func" : mean_bgsub_local}

# Additional settings for TFOR
feat_kwargs['perform_CBE_TFOR_kwargs'] = {"presample"       : ('kmeans', 500),
                                          "save_presampled" : True,
                                          "suffix_out"      : {'PRES' : '_kmeansPRES',
                                                               'DS'   : '_DDDS',
                                                               'CBE'  : '_CBEmanh',
                                                               'META' : '-'+suffix_int},
                                          }

# Additional settings for pdCFOR
feat_kwargs['perform_CBE_CFOR_kwargs'] = {"presample"       : ('kmeans', 500),
                                          "save_presampled" : True,
                                          "cfor"            : ('PD', 3),
                                          "suffix_out"      : {'PRES' : '_kmeansPRES',
                                                               'CFOR' : '_pdCFOR',
                                                               'DS'   : '_DDDS',
                                                               'CBE'  : '_CBEmanh',
                                                               'META' : '-'+suffix_int},
                                          }

# Run landmark and feature extraction pipeline with pdCFOR
feat.feature_extraction(dir_path, suffix_seg, suffix_int, num_LMs, 
                        downsample, clustering, features,
                        **feat_kwargs)

### Feature Embedding for `cldnB:lyn-EGFP + atoh1a:dtomato`

#### Shape Space

In [None]:
# Target directory
dir_path = r'data\experimentA\image_data'

# Parse relevant IDs from IDR bulk data
relevant_samples = parse_from_IDR(dir_path, 'atoh1a')
print "Found %i relevant samples!" % len(relevant_samples)

# Main arguments 
suffix_seg = 'seg'
suffix_int = 'seg'
num_LMs    = 2000
downsample = ('ddds', 200000)
clustering = (('previous', 'previous'),
              (clust_TFOR, clust_pdCFOR))
features   = ["kNN-distsManh"]

# Keyword arguments
feat_kwargs = dict()
feat_kwargs['select_IDs']      = relevant_samples
feat_kwargs['recurse']         = True
feat_kwargs['compute_TFOR']    = True
feat_kwargs['compute_CFOR']    = True
feat_kwargs['processes']       = 14
feat_kwargs['dask_graph_path'] = None
feat_kwargs['profiling']       = True
feat_kwargs['verbose']         = True

# Shape space: preprocessing function to generate outline shell
from scipy.ndimage import binary_erosion
def seg_shell_local(img, seg, cell_idx, iterations=1):
    shell = np.logical_xor(seg==cell_idx, 
                           binary_erosion(seg==cell_idx, 
                                          iterations=iterations))
    return shell
feat_kwargs['assign_landmarks_kwargs'] = {"local_prep_func"   : seg_shell_local,
                                          "local_prep_params" : 1}

# Additional settings for TFOR
feat_kwargs['perform_CBE_TFOR_kwargs'] = {"presample"       : ('kmeans', 500),
                                          "save_presampled" : True,
                                          "suffix_out"      : {'PRES' : '_kmeansPRES',
                                                               'DS'   : '_DDDS',
                                                               'CBE'  : '_CBEmanh',
                                                               'META' : '-'+suffix_int},
                                          }

# Additional settings for pdCFOR
feat_kwargs['perform_CBE_CFOR_kwargs'] = {"presample"       : ('kmeans', 500),
                                          "save_presampled" : True,
                                          "cfor"            : ('PD', 3),
                                          "suffix_out"      : {'PRES' : '_kmeansPRES',
                                                               'CFOR' : '_pdCFOR',
                                                               'DS'   : '_DDDS',
                                                               'CBE'  : '_CBEmanh',
                                                               'META' : '-'+suffix_int},
                                          }

# Run landmark and feature extraction pipeline
feat.feature_extraction(dir_path, suffix_seg, suffix_int, num_LMs, 
                        downsample, clustering, features,
                        **feat_kwargs)

### Feature Embedding for `cldnB:lyn-EGFP + 6xUAS:tagRFPt-UtrCH`

#### Shape Space

In [None]:
# Target directory
dir_path = r'data\experimentA\image_data'

# Parse relevant IDs from IDR bulk data
relevant_samples = parse_from_IDR(dir_path, 'tagRFPtUtrCH')
print "Found %i relevant samples!" % len(relevant_samples)

# Main arguments 
suffix_seg = 'seg'
suffix_int = 'seg'
num_LMs    = 2000
downsample = ('ddds', 200000)
clustering = (('previous', 'previous'),
              (clust_TFOR, clust_pdCFOR))
features   = ["kNN-distsManh"]

# Keyword arguments
feat_kwargs = dict()
feat_kwargs['select_IDs']      = relevant_samples
feat_kwargs['recurse']         = True
feat_kwargs['compute_TFOR']    = True
feat_kwargs['compute_CFOR']    = True
feat_kwargs['processes']       = 14
feat_kwargs['dask_graph_path'] = None
feat_kwargs['profiling']       = True
feat_kwargs['verbose']         = True

# Shape space: preprocessing function to generate outline shell
from scipy.ndimage import binary_erosion
def seg_shell_local(img, seg, cell_idx, iterations=1):
    shell = np.logical_xor(seg==cell_idx, 
                           binary_erosion(seg==cell_idx, 
                                          iterations=iterations))
    return shell
feat_kwargs['assign_landmarks_kwargs'] = {"local_prep_func"   : seg_shell_local,
                                          "local_prep_params" : 1}

# Additional settings for TFOR
feat_kwargs['perform_CBE_TFOR_kwargs'] = {"presample"       : ('kmeans', 500),
                                          "save_presampled" : True,
                                          "suffix_out"      : {'PRES' : '_kmeansPRES',
                                                               'DS'   : '_DDDS',
                                                               'CBE'  : '_CBEmanh',
                                                               'META' : '-'+suffix_int},
                                          }

# Additional settings for pdCFOR
feat_kwargs['perform_CBE_CFOR_kwargs'] = {"presample"       : ('kmeans', 500),
                                          "save_presampled" : True,
                                          "cfor"            : ('PD', 3),
                                          "suffix_out"      : {'PRES' : '_kmeansPRES',
                                                               'CFOR' : '_pdCFOR',
                                                               'DS'   : '_DDDS',
                                                               'CBE'  : '_CBEmanh',
                                                               'META' : '-'+suffix_int},
                                          }

# Run landmark and feature extraction pipeline
feat.feature_extraction(dir_path, suffix_seg, suffix_int, num_LMs, 
                        downsample, clustering, features,
                        **feat_kwargs)

#### F-Actin Space

In [None]:
# Target directory
dir_path = r'data\experimentA\image_data'

# Parse relevant IDs from IDR bulk data
relevant_samples = parse_from_IDR(dir_path, 'tagRFPtUtrCH')
print "Found %i relevant samples!" % len(relevant_samples)

# Main arguments 
suffix_seg = 'seg'
suffix_int = 'tagRFPtUtrCH'
num_LMs    = 2000
downsample = ('ddds', 200000)
clustering = ('kmeans', 20)
features   = ["kNN-distsManh"]

# Keyword arguments
feat_kwargs = dict()
feat_kwargs['select_IDs']      = relevant_samples
feat_kwargs['recurse']         = True
feat_kwargs['compute_TFOR']    = True
feat_kwargs['compute_CFOR']    = True
feat_kwargs['processes']       = 14
feat_kwargs['dask_graph_path'] = None
feat_kwargs['profiling']       = True
feat_kwargs['verbose']         = True

# Preprocessing function; mean background subtraction
def mean_bgsub_local(img, seg, cell_idx):   
    bg = img.mean()
    bgsub = img - bg
    bgsub[img < bg] = 0
    return bgsub
feat_kwargs['assign_landmarks_kwargs'] = {"local_prep_func" : mean_bgsub_local}

# Additional settings for TFOR
feat_kwargs['perform_CBE_TFOR_kwargs'] = {"presample"       : ('kmeans', 500),
                                          "save_presampled" : True,
                                          "suffix_out"      : {'PRES' : '_kmeansPRES',
                                                               'DS'   : '_DDDS',
                                                               'CBE'  : '_CBEmanh',
                                                               'META' : '-'+suffix_int},
                                          }

# Additional settings for pdCFOR
feat_kwargs['perform_CBE_CFOR_kwargs'] = {"presample"       : ('kmeans', 500),
                                          "save_presampled" : True,
                                          "cfor"            : ('PD', 3),
                                          "suffix_out"      : {'PRES' : '_kmeansPRES',
                                                               'CFOR' : '_pdCFOR',
                                                               'DS'   : '_DDDS',
                                                               'CBE'  : '_CBEmanh',
                                                               'META' : '-'+suffix_int},
                                          }

# Run landmark and feature extraction pipeline with pdCFOR
feat.feature_extraction(dir_path, suffix_seg, suffix_int, num_LMs, 
                        downsample, clustering, features,
                        **feat_kwargs)

### Feature Embedding for `cldnB:lyn-EGFP + LysoTracker Deep Red`

#### Shape Space

In [None]:
# Target directory
dir_path = r'data\experimentA\image_data'

# Parse relevant IDs from IDR bulk data
relevant_samples = parse_from_IDR(dir_path, 'lysotrackerdeepred')
print "Found %i relevant samples!" % len(relevant_samples)

# Main arguments 
suffix_seg = 'seg'
suffix_int = 'seg'
num_LMs    = 2000
downsample = ('ddds', 200000)
clustering = (('previous', 'previous'),
              (clust_TFOR, clust_pdCFOR))
features   = ["kNN-distsManh"]

# Keyword arguments
feat_kwargs = dict()
feat_kwargs['select_IDs']      = relevant_samples
feat_kwargs['recurse']         = True
feat_kwargs['compute_TFOR']    = True
feat_kwargs['compute_CFOR']    = True
feat_kwargs['processes']       = 14
feat_kwargs['dask_graph_path'] = None
feat_kwargs['profiling']       = True
feat_kwargs['verbose']         = True

# Shape space: preprocessing function to generate outline shell
from scipy.ndimage import binary_erosion
def seg_shell_local(img, seg, cell_idx, iterations=1):
    shell = np.logical_xor(seg==cell_idx, 
                           binary_erosion(seg==cell_idx, 
                                          iterations=iterations))
    return shell
feat_kwargs['assign_landmarks_kwargs'] = {"local_prep_func"   : seg_shell_local,
                                          "local_prep_params" : 1}

# Additional settings for TFOR
feat_kwargs['perform_CBE_TFOR_kwargs'] = {"presample"       : ('kmeans', 500),
                                          "save_presampled" : True,
                                          "suffix_out"      : {'PRES' : '_kmeansPRES',
                                                               'DS'   : '_DDDS',
                                                               'CBE'  : '_CBEmanh',
                                                               'META' : '-'+suffix_int},
                                          }

# Additional settings for pdCFOR
feat_kwargs['perform_CBE_CFOR_kwargs'] = {"presample"       : ('kmeans', 500),
                                          "save_presampled" : True,
                                          "cfor"            : ('PD', 3),
                                          "suffix_out"      : {'PRES' : '_kmeansPRES',
                                                               'CFOR' : '_pdCFOR',
                                                               'DS'   : '_DDDS',
                                                               'CBE'  : '_CBEmanh',
                                                               'META' : '-'+suffix_int},
                                          }

# Run landmark and feature extraction pipeline
feat.feature_extraction(dir_path, suffix_seg, suffix_int, num_LMs, 
                        downsample, clustering, features,
                        **feat_kwargs)

#### Lysosome Space

In [None]:
# Target directory
dir_path = r'data\experimentA\image_data'

# Parse relevant IDs from IDR bulk data
relevant_samples = parse_from_IDR(dir_path, 'lysotrackerdeepred')
print "Found %i relevant samples!" % len(relevant_samples)

# Main arguments 
suffix_seg = 'seg'
suffix_int = 'lysotrackerdeepred'
num_LMs    = 2000
downsample = ('ddds', 200000)
clustering = ('kmeans', 20)
features   = ["kNN-distsManh"]

# Keyword arguments
feat_kwargs = dict()
feat_kwargs['select_IDs']      = relevant_samples
feat_kwargs['recurse']         = True
feat_kwargs['compute_TFOR']    = True
feat_kwargs['compute_CFOR']    = True
feat_kwargs['processes']       = 14
feat_kwargs['dask_graph_path'] = None
feat_kwargs['profiling']       = True
feat_kwargs['verbose']         = True

# Preprocessing function; mean background subtraction
def mean_bgsub_local(img, seg, cell_idx):   
    bg = img.mean()
    bgsub = img - bg
    bgsub[img < bg] = 0
    return bgsub
feat_kwargs['assign_landmarks_kwargs'] = {"local_prep_func" : mean_bgsub_local}

# Additional settings for TFOR
feat_kwargs['perform_CBE_TFOR_kwargs'] = {"presample"       : ('kmeans', 500),
                                          "save_presampled" : True,
                                          "suffix_out"      : {'PRES' : '_kmeansPRES',
                                                               'DS'   : '_DDDS',
                                                               'CBE'  : '_CBEmanh',
                                                               'META' : '-'+suffix_int},
                                          }

# Additional settings for pdCFOR
feat_kwargs['perform_CBE_CFOR_kwargs'] = {"presample"       : ('kmeans', 500),
                                          "save_presampled" : True,
                                          "cfor"            : ('PD', 3),
                                          "suffix_out"      : {'PRES' : '_kmeansPRES',
                                                               'CFOR' : '_pdCFOR',
                                                               'DS'   : '_DDDS',
                                                               'CBE'  : '_CBEmanh',
                                                               'META' : '-'+suffix_int},
                                          }

# Run landmark and feature extraction pipeline with pdCFOR
feat.feature_extraction(dir_path, suffix_seg, suffix_int, num_LMs, 
                        downsample, clustering, features,
                        **feat_kwargs)

### Feature Embedding for `pea3 smFISH`

#### Shape Space

In [None]:
# Target directory
dir_path = r'data\experimentB\image_data'

# Parse relevant IDs from IDR bulk data
relevant_samples = parse_from_IDR(dir_path, 'pea3smFISH')
print "Found %i relevant samples!" % len(relevant_samples)

# Main arguments 
suffix_seg = 'seg'
suffix_int = 'seg'
num_LMs    = 2000
downsample = ('ddds', 200000)
clustering = (('previous', 'previous'),
              (clust_TFOR, clust_pdCFOR))
features   = ["kNN-distsManh"]

# Keyword arguments
feat_kwargs = dict()
feat_kwargs['select_IDs']      = relevant_samples
feat_kwargs['recurse']         = True
feat_kwargs['compute_TFOR']    = True
feat_kwargs['compute_CFOR']    = True
feat_kwargs['processes']       = 14
feat_kwargs['dask_graph_path'] = None
feat_kwargs['profiling']       = True
feat_kwargs['verbose']         = True

# Shape space: preprocessing function to generate outline shell
from scipy.ndimage import binary_erosion
def seg_shell_local(img, seg, cell_idx, iterations=1):
    shell = np.logical_xor(seg==cell_idx, 
                           binary_erosion(seg==cell_idx, 
                                          iterations=iterations))
    return shell
feat_kwargs['assign_landmarks_kwargs'] = {"local_prep_func"   : seg_shell_local,
                                          "local_prep_params" : 1}

# Additional settings for TFOR
feat_kwargs['perform_CBE_TFOR_kwargs'] = {"presample"       : ('kmeans', 500),
                                          "save_presampled" : True,
                                          "suffix_out"      : {'PRES' : '_kmeansPRES',
                                                               'DS'   : '_DDDS',
                                                               'CBE'  : '_CBEmanh',
                                                               'META' : '-'+suffix_int},
                                          }

# Additional settings for pdCFOR
feat_kwargs['perform_CBE_CFOR_kwargs'] = {"presample"       : ('kmeans', 500),
                                          "save_presampled" : True,
                                          "cfor"            : ('PD', 3),
                                          "suffix_out"      : {'PRES' : '_kmeansPRES',
                                                               'CFOR' : '_pdCFOR',
                                                               'DS'   : '_DDDS',
                                                               'CBE'  : '_CBEmanh',
                                                               'META' : '-'+suffix_int},
                                          }

# Run landmark and feature extraction pipeline
feat.feature_extraction(dir_path, suffix_seg, suffix_int, num_LMs, 
                        downsample, clustering, features,
                        **feat_kwargs)