## RUN: Segmentation

This notebook runs the segmentations based on membrane data.

The IDR data already includes the segmentations, so by default this does nothing when run. To actually rerun the pipeline when segmented images are alread present, change the `ignore_old` keyword argument in `seg.full_segmentation` to `False`. Note that the already existing segmentations will be overwritten by the new ones (though everything should stay the same unless parameters are being changed).

Optionally, linear unmixing of bleed-through from a secondary channel into the membrane channel can be performed prior to segmentation. By default, this only happens for the `cxc4b:NLS-tdTomato` data.

### Prep

In [None]:
### Imports

# Generic
from __future__ import division
import os, sys, pickle
import numpy as np

# Internal
from katachi.pipelines import initialization as init
#from katachi.pipelines import segmentation as seg
#from katachi.pipelines import feature_extraction as feat
#from katachi.pipelines import covariate_extraction as cov
#from katachi.utilities import loading as ld

In [None]:
### Function to parse relevant IDs from IDR bulk data

def parse_from_IDR(dir_path, target):
    
    # Get all samples
    samples = [d for d in os.listdir(dir_path) if len(d)==10
               and os.path.isdir(os.path.join(dir_path, d))]
    
    # Select relevant samples
    relevant_samples = []
    for d in samples:
        
        # Get image files
        images = [i for i in os.listdir(os.path.join(dir_path, d))
                  if i.startswith(d) and i.endswith('.tif')]
        
        # Special case for membranes only
        if target=='membranes_only':
            if all(['lynEGFP' in img for img in images]):
                relevant_samples.append(d)
            
        # All other cases
        else:
            if any([img.endswith(target+'.tif') for img in images]):
                relevant_samples.append(d)
    
    return relevant_samples

### Segment `cldnB:lyn-EGFP`

In [None]:
# Target directory
dir_path = r'data\experimentA\image_data'

# Parse relevant IDs from IDR bulk data
relevant_samples = parse_from_IDR(dir_path, 'membranes_only')
print "Found %i relevant samples!" % len(relevant_samples)

# Additional arguments 
segment_params = {'median_size'     : 3,
                  'gaussian_sigma'  : 3,
                  'max_offset'      : 10,
                  'offset_step'     : 1,
                  'clean_small'     : 1000,
                  'clean_big'       : 1000000, 
                  'expansion_sigma' : 3}

# Run segmentation pipeline
seg.full_segmentation(dir_path, 'lynEGFP', IDs=relevant_samples,
                      lin_unmix=False, 
                      recurse=True, ignore_old=True, processes=14,
                      subprocesses=1, profiling=True, verbose=True,
                      segment_params=segment_params)

### Segment `cldnB:lyn-EGFP + cxcr4b:NLS-tdTomato`

In [None]:
# Target directory
dir_path = r'data\experimentA\image_data'

# Parse relevant IDs from IDR bulk data
relevant_samples = parse_from_IDR(dir_path, 'NLStdTomato')
print "Found %i relevant samples!" % len(relevant_samples)

# Additional arguments 
segment_params = {'median_size'     : 3,
                  'gaussian_sigma'  : 3,
                  'max_offset'      : 20,
                  'offset_step'     : 1,
                  'clean_small'     : 1000,
                  'clean_big'       : 1000000, 
                  'expansion_sigma' : 3}
unmix_params = (0.0, 1.0, 20)

# Run segmentation pipeline
seg.full_segmentation(dir_path, 'lynEGFP', IDs=relevant_samples,
                      lin_unmix='NLStdTomato', 
                      recurse=True, ignore_old=True, processes=14,
                      subprocesses=1, profiling=True, verbose=True,
                      unmix_params=unmix_params, segment_params=segment_params)

### Segment `cldnB:lyn-EGFP + Actb2:mKate-Rab11a`

In [None]:
# Target directory
dir_path = r'data\experimentA\image_data'

# Parse relevant IDs from IDR bulk data
relevant_samples = parse_from_IDR(dir_path, 'mKate2rab11')
print "Found %i relevant samples!" % len(relevant_samples)

# Additional arguments 
segment_params = {'median_size'     : 3,
                  'gaussian_sigma'  : 3,
                  'max_offset'      : 40,
                  'offset_step'     : 2,
                  'clean_small'     : 1000,
                  'clean_big'       : 1000000, 
                  'expansion_sigma' : 3}

# Run segmentation pipeline
seg.full_segmentation(dir_path, 'lynEGFP', IDs=relevant_samples,
                      lin_unmix=False, 
                      recurse=True, ignore_old=True, processes=14,
                      subprocesses=1, profiling=True, verbose=True,
                      segment_params=segment_params)

### Segment `cldnB:lyn-EGFP + RNA:mKate2-Rab5a`

In [None]:
# Target directory
dir_path = r'data\experimentA\image_data'

# Parse relevant IDs from IDR bulk data
relevant_samples = parse_from_IDR(dir_path, 'mKate2rab5')
print "Found %i relevant samples!" % len(relevant_samples)

# Additional arguments 
segment_params = {'median_size'     : 3,
                  'gaussian_sigma'  : 3,
                  'max_offset'      : 40,
                  'offset_step'     : 2,
                  'clean_small'     : 1000,
                  'clean_big'       : 1000000, 
                  'expansion_sigma' : 3}

# Run segmentation pipeline
seg.full_segmentation(dir_path, 'lynEGFP', IDs=relevant_samples,
                      lin_unmix=False, 
                      recurse=True, ignore_old=True, processes=14,
                      subprocesses=1, profiling=True, verbose=True,
                      segment_params=segment_params)

### Segment `cldnB:lyn-EGFP + RNA:mKate2-GM130(rat)`

In [None]:
# Target directory
dir_path = r'data\experimentA\image_data'

# Parse relevant IDs from IDR bulk data
relevant_samples = parse_from_IDR(dir_path, 'mKate2GM130')
print "Found %i relevant samples!" % len(relevant_samples)

# Additional arguments 
segment_params = {'median_size'     : 3,
                  'gaussian_sigma'  : 3,
                  'max_offset'      : 40,
                  'offset_step'     : 2,
                  'clean_small'     : 1000,
                  'clean_big'       : 1000000, 
                  'expansion_sigma' : 3}

# Run segmentation pipeline
seg.full_segmentation(dir_path, 'lynEGFP', IDs=relevant_samples,
                      lin_unmix=False, 
                      recurse=True, ignore_old=True, processes=14,
                      subprocesses=1, profiling=True, verbose=True,
                      segment_params=segment_params)

### Segment `cldnB:lyn-EGFP + lexOP:CDMPR-tagRFPt`

In [None]:
# Target directory
dir_path = r'data\experimentA\image_data'

# Parse relevant IDs from IDR bulk data
relevant_samples = parse_from_IDR(dir_path, 'CDMPRtagRFPt')
print "Found %i relevant samples!" % len(relevant_samples)

# Additional arguments 
segment_params = {'median_size'     : 3,
                  'gaussian_sigma'  : 3,
                  'max_offset'      : 40,
                  'offset_step'     : 2,
                  'clean_small'     : 1000,
                  'clean_big'       : 1000000, 
                  'expansion_sigma' : 3}

# Run segmentation pipeline
seg.full_segmentation(dir_path, 'lynEGFP', IDs=relevant_samples,
                      lin_unmix=False, 
                      recurse=True, ignore_old=True, processes=14,
                      subprocesses=1, profiling=True, verbose=True,
                      segment_params=segment_params)

### Segment `cldnB:lyn-EGFP + LexOP:B4GalT1(1-55Q)-tagRFPt`

In [None]:
# Target directory
dir_path = r'data\experimentA\image_data'

# Parse relevant IDs from IDR bulk data
relevant_samples = parse_from_IDR(dir_path, 'b4galT1tagRFPt')
print "Found %i relevant samples!" % len(relevant_samples)

# Additional arguments 
segment_params = {'median_size'     : 3,
                  'gaussian_sigma'  : 3,
                  'max_offset'      : 40,
                  'offset_step'     : 2,
                  'clean_small'     : 1000,
                  'clean_big'       : 1000000, 
                  'expansion_sigma' : 3}

# Run segmentation pipeline
seg.full_segmentation(dir_path, 'lynEGFP', IDs=relevant_samples,
                      lin_unmix=False, 
                      recurse=True, ignore_old=True, processes=14,
                      subprocesses=1, profiling=True, verbose=True,
                      segment_params=segment_params)

### Segment `cldnB:lyn-EGFP + atoh1a:dtomato`

In [None]:
# Target directory
dir_path = r'data\experimentA\image_data'

# Parse relevant IDs from IDR bulk data
relevant_samples = parse_from_IDR(dir_path, 'atoh1a')
print "Found %i relevant samples!" % len(relevant_samples)

# Additional arguments 
segment_params = {'median_size'     : 3,
                  'gaussian_sigma'  : 3,
                  'max_offset'      : 10,
                  'offset_step'     : 1,
                  'clean_small'     : 1000,
                  'clean_big'       : 1000000, 
                  'expansion_sigma' : 3}

# Run segmentation pipeline
seg.full_segmentation(dir_path, 'lynEGFP', IDs=relevant_samples,
                      lin_unmix=False, 
                      recurse=True, ignore_old=True, processes=14,
                      subprocesses=1, profiling=True, verbose=True,
                      segment_params=segment_params)

### Segment `cldnB:lyn-EGFP + 6xUAS:tagRFPt-UtrCH`

In [None]:
# Target directory
dir_path = r'data\experimentA\image_data'

# Parse relevant IDs from IDR bulk data
relevant_samples = parse_from_IDR(dir_path, 'tagRFPtUtrCH')
print "Found %i relevant samples!" % len(relevant_samples)

# Additional arguments 
segment_params = {'median_size'     : 3,
                  'gaussian_sigma'  : 3,
                  'max_offset'      : 10,
                  'offset_step'     : 1,
                  'clean_small'     : 1000,
                  'clean_big'       : 1000000, 
                  'expansion_sigma' : 3}

# Run segmentation pipeline
seg.full_segmentation(dir_path, 'lynEGFP', IDs=relevant_samples,
                      lin_unmix=False, 
                      recurse=True, ignore_old=True, processes=14,
                      subprocesses=1, profiling=True, verbose=True,
                      segment_params=segment_params)

### Segment `cldnB:lyn-EGFP + LysoTracker Deep Red`

In [None]:
# Target directory
dir_path = r'data\experimentA\image_data'

# Parse relevant IDs from IDR bulk data
relevant_samples = parse_from_IDR(dir_path, 'lysotrackerdeepred')
print "Found %i relevant samples!" % len(relevant_samples)

# Additional arguments 
segment_params = {'median_size'     : 3,
                  'gaussian_sigma'  : 3,
                  'max_offset'      : 40,
                  'offset_step'     : 2,
                  'clean_small'     : 1000,
                  'clean_big'       : 1000000, 
                  'expansion_sigma' : 3}

# Run segmentation pipeline
seg.full_segmentation(dir_path, 'lynEGFP', IDs=relevant_samples,
                      lin_unmix=False, 
                      recurse=True, ignore_old=True, processes=14,
                      subprocesses=1, profiling=True, verbose=True,
                      segment_params=segment_params)

### Segment `pea3 smFISH`

In [None]:
# Target directory
dir_path = r'data\experimentB\image_data'

# Parse relevant IDs from IDR bulk data
relevant_samples = parse_from_IDR(dir_path, 'pea3smFISH')
print "Found %i relevant samples!" % len(relevant_samples)

# Additional arguments 
segment_params = {'median_size'     : 3,
                  'gaussian_sigma'  : 3,
                  'max_offset'      : 20,
                  'offset_step'     : 1,
                  'clean_small'     : 1000,
                  'clean_big'       : 1000000, 
                  'expansion_sigma' : 3}

# Run segmentation pipeline
seg.full_segmentation(dir_path, 'lynEGFP', IDs=relevant_samples,
                      lin_unmix=False, 
                      recurse=True, ignore_old=True, processes=14,
                      subprocesses=1, profiling=True, verbose=True,
                      segment_params=segment_params)