## RUN: Atlas Construction

This notebook runs the atlas prediction pipeline across the different datasets.

Model selection and hyperparameter optimization was done in `DEV_Atlas.ipynb`.

### Prep

In [None]:
### Imports

# Generic
from __future__ import division
import os, sys, pickle
import numpy as np
import matplotlib.pyplot as plt

# Modules
from katachi.pipelines import atlas_construction as ac

In [None]:
### Function to parse relevant IDs from IDR bulk data

def parse_from_IDR(dir_path, target):
    
    # Get all samples
    samples = [d for d in os.listdir(dir_path) if len(d)==10
               and os.path.isdir(os.path.join(dir_path, d))]
    
    # Select relevant samples
    relevant_samples = []
    for d in samples:
        
        # Get image files
        images = [i for i in os.listdir(os.path.join(dir_path, d))
                  if i.startswith(d) and i.endswith('.tif')]
        
        # Special case for membranes only
        if target=='membranes_only':
            if all(['lynEGFP' in img for img in images]):
                relevant_samples.append(d)
            
        # All other cases
        else:
            if any([img.endswith(target+'.tif') for img in images]):
                relevant_samples.append(d)
    
    return relevant_samples

### Atlas Construction based on TFOR

**tagRFPtUtrCH**

In [None]:
### Predict tagRFPtUtrCH channel for all prims that do not have it

# Target directories
train_dirpath   = r'data\experimentA\image_data'
predict_dirpath = r'data\experimentA\image_data'

# Target IDs
train_IDs = parse_from_IDR(train_dirpath, 'tagRFPtUtrCH')
print "Found %i training IDs!" % len(train_IDs)

# Channels
ref_channel = ['lynEGFP_seg_LMs_TFOR_kmeansPRES_DDDS_CBEmanh',
               'lynEGFP_linUnmix_seg_LMs_TFOR_kmeansPRES_DDDS_CBEmanh']
sec_channel = 'tagRFPtUtrCH_LMs_TFOR_kmeansPRES_DDDS_CBEmanh'

# Core settings
outlier_removal_ref = 'isolation_forest'
outlier_removal_sec = 'isolation_forest'
outlier_removal_cov = 'percentile_thresh'
covariates_to_use   = 'img.cell.'+sec_channel.split('_')[0]+'.mean_total'
regressor           = 'MO-SVR'

# Additional parameters
outlier_params_ref = { 'isoforest_params' : {'contamination':0.05}}
outlier_params_sec = { 'isoforest_params' : {'contamination':0.05}}
outlier_params_cov = { 'bounds'     : 'lower',
                       'percentile' : 33 }
regressor_params   = { 'kernel'  : 'rbf',
                       'C'       : 20.0,
                       'epsilon' :  0.5,
                       'gamma'   :  1.0 / 20.0 * 0.01 }
atlas_params       = { 'zscore_X'       : True,
                       'zscore_y'       : True,
                       'pca_X'          : True,
                       'pca_y'          : True,
                       'rezscore_X'     : False,
                       'rezscore_y'     : False,
                       'subselect_X'    : 20,
                       'subselect_y'    : 20,
                       'add_covariates' : None }

# Additional arguments
recurse     = True
ignore_self = False
processes   = 14
profiling   = True
verbose     = True

# Run prediction pipeline
ac.atlas_construction(train_dirpath, predict_dirpath, 
                      ref_channel, sec_channel, 
                      train_IDs=train_IDs, predict_IDs=None,
                      recurse=recurse, ignore_self=ignore_self, 
                      processes=processes, profiling=profiling, verbose=verbose,
                      outlier_removal_ref=outlier_removal_ref,
                      outlier_removal_sec=outlier_removal_sec,
                      outlier_removal_cov=outlier_removal_cov,
                      covariates_to_use=covariates_to_use,
                      regressor=regressor, 
                      outlier_params_ref=outlier_params_ref,
                      outlier_params_sec=outlier_params_sec,
                      outlier_params_cov=outlier_params_cov,
                      regressor_params=regressor_params,  
                      atlas_params=atlas_params)

**NLStdTomato**

In [None]:
### Predict NLStdTomato channel for all prims that do not have it

# Target directories
train_dirpath   = r'data\experimentA\image_data'
predict_dirpath = r'data\experimentA\image_data'

# Target IDs
train_IDs = parse_from_IDR(train_dirpath, 'NLStdTomato')
print "Found %i training IDs!" % len(train_IDs)

# Channels
ref_channel = ['lynEGFP_seg_LMs_TFOR_kmeansPRES_DDDS_CBEmanh',
               'lynEGFP_linUnmix_seg_LMs_TFOR_kmeansPRES_DDDS_CBEmanh']
sec_channel = 'NLStdTomato_LMs_TFOR_kmeansPRES_DDDS_CBEmanh'

# Core settings
outlier_removal_ref = 'isolation_forest'
outlier_removal_sec = 'isolation_forest'
outlier_removal_cov = 'percentile_thresh'
covariates_to_use   = 'img.cell.'+sec_channel.split('_')[0]+'.mean_total'
regressor           = 'MO-SVR'

# Additional parameters
outlier_params_ref = { 'isoforest_params' : {'contamination':0.05}}
outlier_params_sec = { 'isoforest_params' : {'contamination':0.05}}
outlier_params_cov = { 'bounds'     : 'lower',
                       'percentile' : 33 }
regressor_params   = { 'kernel'  : 'rbf',
                       'C'       : 10.0,
                       'epsilon' :  0.5,
                       'gamma'   :  1.0 / 20.0 * 0.1 }
atlas_params       = { 'zscore_X'       : True,
                       'zscore_y'       : True,
                       'pca_X'          : True,
                       'pca_y'          : True,
                       'rezscore_X'     : False,
                       'rezscore_y'     : False,
                       'subselect_X'    : 20,
                       'subselect_y'    : 20,
                       'add_covariates' : None }

# Additional arguments
recurse     = True
ignore_self = False
processes   = 10
profiling   = True
verbose     = True

# Run prediction pipeline
ac.atlas_construction(train_dirpath, predict_dirpath, 
                      ref_channel, sec_channel, 
                      train_IDs=train_IDs, predict_IDs=None,
                      recurse=recurse, ignore_self=ignore_self, 
                      processes=processes, profiling=profiling, verbose=verbose,
                      outlier_removal_ref=outlier_removal_ref,
                      outlier_removal_sec=outlier_removal_sec,
                      outlier_removal_cov=outlier_removal_cov,
                      covariates_to_use=covariates_to_use,
                      regressor=regressor, 
                      outlier_params_ref=outlier_params_ref,
                      outlier_params_sec=outlier_params_sec,
                      outlier_params_cov=outlier_params_cov,
                      regressor_params=regressor_params,  
                      atlas_params=atlas_params)

**b4galT1tagRFPt**

In [None]:
### Predict b4galT1tagRFPt channel for all prims that do not have it

# Target directories
train_dirpath   = r'data\experimentA\image_data'
predict_dirpath = r'data\experimentA\image_data'

# Target IDs
train_IDs = parse_from_IDR(train_dirpath, 'b4galT1tagRFPt')
print "Found %i training IDs!" % len(train_IDs)

# Channels
ref_channel = ['lynEGFP_seg_LMs_TFOR_kmeansPRES_DDDS_CBEmanh',
               'lynEGFP_linUnmix_seg_LMs_TFOR_kmeansPRES_DDDS_CBEmanh']
sec_channel = 'b4galT1tagRFPt_LMs_TFOR_kmeansPRES_DDDS_CBEmanh'

# Core settings
outlier_removal_ref = 'isolation_forest'
outlier_removal_sec = 'isolation_forest'
outlier_removal_cov = 'percentile_thresh'
covariates_to_use   = 'img.cell.'+sec_channel.split('_')[0]+'.mean_total'
regressor           = 'MO-SVR'

# Additional parameters
outlier_params_ref = { 'isoforest_params' : {'contamination':0.05}}
outlier_params_sec = { 'isoforest_params' : {'contamination':0.05}}
outlier_params_cov = { 'bounds'     : 'lower',
                       'percentile' : 33 }
regressor_params   = { 'kernel'  : 'rbf',
                       'C'       : 20.0,
                       'epsilon' :  0.5,
                       'gamma'   :  1.0 / 20.0 * 0.01 }
atlas_params       = { 'zscore_X'       : True,
                       'zscore_y'       : True,
                       'pca_X'          : True,
                       'pca_y'          : True,
                       'rezscore_X'     : False,
                       'rezscore_y'     : False,
                       'subselect_X'    : 20,
                       'subselect_y'    : 20,
                       'add_covariates' : None }

# Additional arguments
recurse     = True
ignore_self = False
processes   = 10
profiling   = True
verbose     = True

# Run prediction pipeline
ac.atlas_construction(train_dirpath, predict_dirpath, 
                      ref_channel, sec_channel,
                      train_IDs=train_IDs, predict_IDs=None,
                      recurse=recurse, ignore_self=ignore_self, 
                      processes=processes, profiling=profiling, verbose=verbose,
                      outlier_removal_ref=outlier_removal_ref,
                      outlier_removal_sec=outlier_removal_sec,
                      outlier_removal_cov=outlier_removal_cov,
                      covariates_to_use=covariates_to_use,
                      regressor=regressor, 
                      outlier_params_ref=outlier_params_ref,
                      outlier_params_sec=outlier_params_sec,
                      outlier_params_cov=outlier_params_cov,
                      regressor_params=regressor_params,  
                      atlas_params=atlas_params)

**CDMPRtagRFPt**

In [None]:
### Predict CDMPRtagRFPt channel for all prims that do not have it

# Target directories
train_dirpath   = r'data\experimentA\image_data'
predict_dirpath = r'data\experimentA\image_data'

# Target IDs
train_IDs = parse_from_IDR(train_dirpath, 'CDMPRtagRFPt')
print "Found %i training IDs!" % len(train_IDs)

# Channels
ref_channel = ['lynEGFP_seg_LMs_TFOR_kmeansPRES_DDDS_CBEmanh',
               'lynEGFP_linUnmix_seg_LMs_TFOR_kmeansPRES_DDDS_CBEmanh']
sec_channel = 'CDMPRtagRFPt_LMs_TFOR_kmeansPRES_DDDS_CBEmanh'

# Core settings
outlier_removal_ref = 'isolation_forest'
outlier_removal_sec = 'isolation_forest'
outlier_removal_cov = 'percentile_thresh'
covariates_to_use   = 'img.cell.'+sec_channel.split('_')[0]+'.mean_total'
regressor           = 'MO-SVR'

# Additional parameters
outlier_params_ref = { 'isoforest_params' : {'contamination':0.05}}
outlier_params_sec = { 'isoforest_params' : {'contamination':0.05}}
outlier_params_cov = { 'bounds'     : 'lower',
                       'percentile' : 33 }
regressor_params   = { 'kernel'  : 'rbf',
                       'C'       : 20.0,
                       'epsilon' :  0.5,
                       'gamma'   :  1.0 / 20.0 * 0.01 }
atlas_params       = { 'zscore_X'       : True,
                       'zscore_y'       : True,
                       'pca_X'          : True,
                       'pca_y'          : True,
                       'rezscore_X'     : False,
                       'rezscore_y'     : False,
                       'subselect_X'    : 20,
                       'subselect_y'    : 20,
                       'add_covariates' : None }

# Additional arguments
recurse     = True
ignore_self = False
processes   = 10
profiling   = True
verbose     = True

# Run prediction pipeline
ac.atlas_construction(train_dirpath, predict_dirpath, 
                      ref_channel, sec_channel, 
                      train_IDs=train_IDs, predict_IDs=None,
                      recurse=recurse, ignore_self=ignore_self, 
                      processes=processes, profiling=profiling, verbose=verbose,
                      outlier_removal_ref=outlier_removal_ref,
                      outlier_removal_sec=outlier_removal_sec,
                      outlier_removal_cov=outlier_removal_cov,
                      covariates_to_use=covariates_to_use,
                      regressor=regressor, 
                      outlier_params_ref=outlier_params_ref,
                      outlier_params_sec=outlier_params_sec,
                      outlier_params_cov=outlier_params_cov,
                      regressor_params=regressor_params,  
                      atlas_params=atlas_params)

**mKate2GM130**

In [None]:
### Predict mKate2GM130 channel for all prims that do not have it

# Target directories
train_dirpath   = r'data\experimentA\image_data'
predict_dirpath = r'data\experimentA\image_data'

# Target IDs
train_IDs = parse_from_IDR(train_dirpath, 'mKate2GM130')
print "Found %i training IDs!" % len(train_IDs)

# Channels
ref_channel = ['lynEGFP_seg_LMs_TFOR_kmeansPRES_DDDS_CBEmanh',
               'lynEGFP_linUnmix_seg_LMs_TFOR_kmeansPRES_DDDS_CBEmanh']
sec_channel = 'mKate2GM130_LMs_TFOR_kmeansPRES_DDDS_CBEmanh'

# Core settings
outlier_removal_ref = 'isolation_forest'
outlier_removal_sec = 'isolation_forest'
outlier_removal_cov = 'percentile_thresh'
covariates_to_use   = 'img.cell.'+sec_channel.split('_')[0]+'.mean_total'
regressor           = 'MO-SVR'

# Additional parameters
outlier_params_ref = { 'isoforest_params' : {'contamination':0.05}}
outlier_params_sec = { 'isoforest_params' : {'contamination':0.05}}
outlier_params_cov = { 'bounds'     : 'lower',
                       'percentile' : 33 }
regressor_params   = { 'kernel'  : 'rbf',
                       'C'       : 20.0,
                       'epsilon' :  0.5,
                       'gamma'   :  1.0 / 20.0 * 0.01 }
atlas_params       = { 'zscore_X'       : True,
                       'zscore_y'       : True,
                       'pca_X'          : True,
                       'pca_y'          : True,
                       'rezscore_X'     : False,
                       'rezscore_y'     : False,
                       'subselect_X'    : 20,
                       'subselect_y'    : 20,
                       'add_covariates' : None }

# Additional arguments
recurse     = True
ignore_self = False
processes   = 10
profiling   = True
verbose     = True

# Run prediction pipeline
ac.atlas_construction(train_dirpath, predict_dirpath, 
                      ref_channel, sec_channel, 
                      train_IDs=train_IDs, predict_IDs=None,
                      recurse=recurse, ignore_self=ignore_self, 
                      processes=processes, profiling=profiling, verbose=verbose,
                      outlier_removal_ref=outlier_removal_ref,
                      outlier_removal_sec=outlier_removal_sec,
                      outlier_removal_cov=outlier_removal_cov,
                      covariates_to_use=covariates_to_use,
                      regressor=regressor, 
                      outlier_params_ref=outlier_params_ref,
                      outlier_params_sec=outlier_params_sec,
                      outlier_params_cov=outlier_params_cov,
                      regressor_params=regressor_params,  
                      atlas_params=atlas_params)

**lysotrackerdeepred**

In [None]:
### Predict lysotrackerdeepred channel for all prims that do not have it

# Target directories
train_dirpath   = r'data\experimentA\image_data'
predict_dirpath = r'data\experimentA\image_data'

# Target IDs
train_IDs = parse_from_IDR(train_dirpath, 'lysotrackerdeepred')
print "Found %i training IDs!" % len(train_IDs)

# Channels
ref_channel = ['lynEGFP_seg_LMs_TFOR_kmeansPRES_DDDS_CBEmanh',
               'lynEGFP_linUnmix_seg_LMs_TFOR_kmeansPRES_DDDS_CBEmanh']
sec_channel = 'lysotrackerdeepred_LMs_TFOR_kmeansPRES_DDDS_CBEmanh'

# Core settings
outlier_removal_ref = 'isolation_forest'
outlier_removal_sec = 'isolation_forest'
outlier_removal_cov = 'percentile_thresh'
covariates_to_use   = 'img.cell.'+sec_channel.split('_')[0]+'.mean_total'
regressor           = 'MO-SVR'

# Additional parameters
outlier_params_ref = { 'isoforest_params' : {'contamination':0.05}}
outlier_params_sec = { 'isoforest_params' : {'contamination':0.05}}
outlier_params_cov = { 'bounds'     : 'lower',
                       'percentile' : 33 }
regressor_params   = { 'kernel'  : 'rbf',
                       'C'       : 20.0,
                       'epsilon' :  0.5,
                       'gamma'   :  1.0 / 20.0 * 0.01 }
atlas_params       = { 'zscore_X'       : True,
                       'zscore_y'       : True,
                       'pca_X'          : True,
                       'pca_y'          : True,
                       'rezscore_X'     : False,
                       'rezscore_y'     : False,
                       'subselect_X'    : 20,
                       'subselect_y'    : 20,
                       'add_covariates' : None }

# Additional arguments
recurse     = True
ignore_self = False
processes   = 10
profiling   = True
verbose     = True

# Run prediction pipeline
ac.atlas_construction(train_dirpath, predict_dirpath, 
                      ref_channel, sec_channel, 
                      train_IDs=train_IDs, predict_IDs=None,
                      recurse=recurse, ignore_self=ignore_self, 
                      processes=processes, profiling=profiling, verbose=verbose,
                      outlier_removal_ref=outlier_removal_ref,
                      outlier_removal_sec=outlier_removal_sec,
                      outlier_removal_cov=outlier_removal_cov,
                      covariates_to_use=covariates_to_use,
                      regressor=regressor, 
                      outlier_params_ref=outlier_params_ref,
                      outlier_params_sec=outlier_params_sec,
                      outlier_params_cov=outlier_params_cov,
                      regressor_params=regressor_params,  
                      atlas_params=atlas_params)

**mKate2rab5**

In [None]:
### Predict mKate2rab5 channel for all prims that do not have it

# Target directories
train_dirpath   = r'data\experimentA\image_data'
predict_dirpath = r'data\experimentA\image_data'

# Target IDs
train_IDs = parse_from_IDR(train_dirpath, 'mKate2rab5')
print "Found %i training IDs!" % len(train_IDs)

# Channels
ref_channel = ['lynEGFP_seg_LMs_TFOR_kmeansPRES_DDDS_CBEmanh',
               'lynEGFP_linUnmix_seg_LMs_TFOR_kmeansPRES_DDDS_CBEmanh']
sec_channel = 'mKate2rab5_LMs_TFOR_kmeansPRES_DDDS_CBEmanh'

# Core settings
outlier_removal_ref = 'isolation_forest'
outlier_removal_sec = 'isolation_forest'
outlier_removal_cov = 'percentile_thresh'
covariates_to_use   = 'img.cell.'+sec_channel.split('_')[0]+'.mean_total'
regressor           = 'MO-SVR'

# Additional parameters
outlier_params_ref = { 'isoforest_params' : {'contamination':0.05}}
outlier_params_sec = { 'isoforest_params' : {'contamination':0.05}}
outlier_params_cov = { 'bounds'     : 'lower',
                       'percentile' : 33 }
regressor_params   = { 'kernel'  : 'rbf',
                       'C'       : 20.0,
                       'epsilon' :  0.5,
                       'gamma'   :  1.0 / 20.0 * 0.01 }
atlas_params       = { 'zscore_X'       : True,
                       'zscore_y'       : True,
                       'pca_X'          : True,
                       'pca_y'          : True,
                       'rezscore_X'     : False,
                       'rezscore_y'     : False,
                       'subselect_X'    : 20,
                       'subselect_y'    : 20,
                       'add_covariates' : None }

# Additional arguments
recurse     = True
ignore_self = False
processes   = 10
profiling   = True
verbose     = True

# Run prediction pipeline
ac.atlas_construction(train_dirpath, predict_dirpath, 
                      ref_channel, sec_channel, 
                      train_IDs=train_IDs, predict_IDs=None,
                      recurse=recurse, ignore_self=ignore_self, 
                      processes=processes, profiling=profiling, verbose=verbose,
                      outlier_removal_ref=outlier_removal_ref,
                      outlier_removal_sec=outlier_removal_sec,
                      outlier_removal_cov=outlier_removal_cov,
                      covariates_to_use=covariates_to_use,
                      regressor=regressor, 
                      outlier_params_ref=outlier_params_ref,
                      outlier_params_sec=outlier_params_sec,
                      outlier_params_cov=outlier_params_cov,
                      regressor_params=regressor_params,  
                      atlas_params=atlas_params)

**mKate2rab11**

In [None]:
### Predict mKate2rab11 channel for all prims that do not have it

# Target directories
train_dirpath   = r'data\experimentA\image_data'
predict_dirpath = r'data\experimentA\image_data'

# Target IDs
train_IDs = parse_from_IDR(train_dirpath, 'mKate2rab11')
print "Found %i training IDs!" % len(train_IDs)

# Channels
ref_channel = ['lynEGFP_seg_LMs_TFOR_kmeansPRES_DDDS_CBEmanh',
               'lynEGFP_linUnmix_seg_LMs_TFOR_kmeansPRES_DDDS_CBEmanh']
sec_channel = 'mKate2rab11_LMs_TFOR_kmeansPRES_DDDS_CBEmanh'

# Core settings
outlier_removal_ref = 'isolation_forest'
outlier_removal_sec = 'isolation_forest'
outlier_removal_cov = 'percentile_thresh'
covariates_to_use   = 'img.cell.'+sec_channel.split('_')[0]+'.mean_total'
regressor           = 'MO-SVR'

# Additional parameters
outlier_params_ref = { 'isoforest_params' : {'contamination':0.05}}
outlier_params_sec = { 'isoforest_params' : {'contamination':0.05}}
outlier_params_cov = { 'bounds'     : 'lower',
                       'percentile' : 33 }
regressor_params   = { 'kernel'  : 'rbf',
                       'C'       : 20.0,
                       'epsilon' :  0.5,
                       'gamma'   :  1.0 / 20.0 * 0.01 }
atlas_params       = { 'zscore_X'       : True,
                       'zscore_y'       : True,
                       'pca_X'          : True,
                       'pca_y'          : True,
                       'rezscore_X'     : False,
                       'rezscore_y'     : False,
                       'subselect_X'    : 20,
                       'subselect_y'    : 20,
                       'add_covariates' : None }

# Additional arguments
recurse     = True
ignore_self = False
processes   = 10
profiling   = True
verbose     = True

# Run prediction pipeline
ac.atlas_construction(train_dirpath, predict_dirpath, 
                      ref_channel, sec_channel, 
                      train_IDs=train_IDs, predict_IDs=None,
                      recurse=recurse, ignore_self=ignore_self, 
                      processes=processes, profiling=profiling, verbose=verbose,
                      outlier_removal_ref=outlier_removal_ref,
                      outlier_removal_sec=outlier_removal_sec,
                      outlier_removal_cov=outlier_removal_cov,
                      covariates_to_use=covariates_to_use,
                      regressor=regressor, 
                      outlier_params_ref=outlier_params_ref,
                      outlier_params_sec=outlier_params_sec,
                      outlier_params_cov=outlier_params_cov,
                      regressor_params=regressor_params,  
                      atlas_params=atlas_params)

### Atlas Construction based on CFOR

**tagRFPtUtrCH**

In [None]:
### Predict tagRFPtUtrCH channel for all prims that do not have it

# Target directories
train_dirpath   = r'data\experimentA\image_data'
predict_dirpath = r'data\experimentA\image_data'

# Target IDs
train_IDs = parse_from_IDR(train_dirpath, 'tagRFPtUtrCH')
print "Found %i training IDs!" % len(train_IDs)

# Channels
ref_channel = ['lynEGFP_seg_LMs_kmeansPRES_pdCFOR_DDDS_CBEmanh',
               'lynEGFP_linUnmix_seg_LMs_kmeansPRES_pdCFOR_DDDS_CBEmanh']
sec_channel = 'tagRFPtUtrCH_LMs_kmeansPRES_pdCFOR_DDDS_CBEmanh'

# Core settings
outlier_removal_ref = 'isolation_forest'
outlier_removal_sec = 'isolation_forest'
outlier_removal_cov = 'percentile_thresh'
covariates_to_use   = 'img.cell.'+sec_channel.split('_')[0]+'.mean_total'
regressor           = 'MT-ENetCV'

# Additional parameters
outlier_params_ref = { 'isoforest_params' : {'contamination':0.05}}
outlier_params_sec = { 'isoforest_params' : {'contamination':0.05}}
outlier_params_cov = { 'bounds'     : 'lower',
                       'percentile' : 33 }
regressor_params   = { }
atlas_params       = { 'zscore_X'       : True,
                       'zscore_y'       : True,
                       'pca_X'          : True,
                       'pca_y'          : True,
                       'rezscore_X'     : False,
                       'rezscore_y'     : False,
                       'subselect_X'    : 20,
                       'subselect_y'    : 20,
                       'add_covariates' : None }

# Additional arguments
recurse     = True
ignore_self = False
processes   = 10
profiling   = True
verbose     = True

# Run prediction pipeline
ac.atlas_construction(train_dirpath, predict_dirpath, 
                      ref_channel, sec_channel, 
                      train_IDs=train_IDs, predict_IDs=None,
                      recurse=recurse, ignore_self=ignore_self, 
                      processes=processes, profiling=profiling, verbose=verbose,
                      outlier_removal_ref=outlier_removal_ref,
                      outlier_removal_sec=outlier_removal_sec,
                      outlier_removal_cov=outlier_removal_cov,
                      covariates_to_use=covariates_to_use,
                      regressor=regressor, 
                      outlier_params_ref=outlier_params_ref,
                      outlier_params_sec=outlier_params_sec,
                      outlier_params_cov=outlier_params_cov,
                      regressor_params=regressor_params,  
                      atlas_params=atlas_params)

**NLStdTomato**

In [None]:
### Predict NLStdTomato channel for all prims that do not have it

# Target directories
train_dirpath   = r'data\experimentA\image_data'
predict_dirpath = r'data\experimentA\image_data'

# Target IDs
train_IDs = parse_from_IDR(train_dirpath, 'NLStdTomato')
print "Found %i training IDs!" % len(train_IDs)

# Channels
ref_channel = ['lynEGFP_seg_LMs_kmeansPRES_pdCFOR_DDDS_CBEmanh',
               'lynEGFP_linUnmix_seg_LMs_kmeansPRES_pdCFOR_DDDS_CBEmanh']
sec_channel = 'NLStdTomato_LMs_kmeansPRES_pdCFOR_DDDS_CBEmanh'

# Core settings
outlier_removal_ref = 'isolation_forest'
outlier_removal_sec = 'isolation_forest'
outlier_removal_cov = 'percentile_thresh'
covariates_to_use   = 'img.cell.'+sec_channel.split('_')[0]+'.mean_total'
regressor           = 'MT-ENetCV'

# Additional parameters
outlier_params_ref = { 'isoforest_params' : {'contamination':0.05}}
outlier_params_sec = { 'isoforest_params' : {'contamination':0.05}}
outlier_params_cov = { 'bounds'     : 'lower',
                       'percentile' : 33 }
regressor_params   = { }
atlas_params       = { 'zscore_X'       : True,
                       'zscore_y'       : True,
                       'pca_X'          : True,
                       'pca_y'          : True,
                       'rezscore_X'     : False,
                       'rezscore_y'     : False,
                       'subselect_X'    : 20,
                       'subselect_y'    : 20,
                       'add_covariates' : None }

# Additional arguments
recurse     = True
ignore_self = False
processes   = 10
profiling   = True
verbose     = True

# Run prediction pipeline
ac.atlas_construction(train_dirpath, predict_dirpath, 
                      ref_channel, sec_channel, 
                      train_IDs=train_IDs, predict_IDs=None,
                      recurse=recurse, ignore_self=ignore_self, 
                      processes=processes, profiling=profiling, verbose=verbose,
                      outlier_removal_ref=outlier_removal_ref,
                      outlier_removal_sec=outlier_removal_sec,
                      outlier_removal_cov=outlier_removal_cov,
                      covariates_to_use=covariates_to_use,
                      regressor=regressor, 
                      outlier_params_ref=outlier_params_ref,
                      outlier_params_sec=outlier_params_sec,
                      outlier_params_cov=outlier_params_cov,
                      regressor_params=regressor_params,  
                      atlas_params=atlas_params)

**b4galT1tagRFPt**

In [None]:
### Predict b4galT1tagRFPt channel for all prims that do not have it

# Target directories
train_dirpath   = r'data\experimentA\image_data'
predict_dirpath = r'data\experimentA\image_data'

# Target IDs
train_IDs = parse_from_IDR(train_dirpath, 'b4galT1tagRFPt')
print "Found %i training IDs!" % len(train_IDs)

# Channels
ref_channel = ['lynEGFP_seg_LMs_kmeansPRES_pdCFOR_DDDS_CBEmanh',
               'lynEGFP_linUnmix_seg_LMs_kmeansPRES_pdCFOR_DDDS_CBEmanh']
sec_channel = 'b4galT1tagRFPt_LMs_kmeansPRES_pdCFOR_DDDS_CBEmanh'

# Core settings
outlier_removal_ref = 'isolation_forest'
outlier_removal_sec = 'isolation_forest'
outlier_removal_cov = 'percentile_thresh'
covariates_to_use   = 'img.cell.'+sec_channel.split('_')[0]+'.mean_total'
regressor           = 'MT-ENetCV'

# Additional parameters
outlier_params_ref = { 'isoforest_params' : {'contamination':0.05}}
outlier_params_sec = { 'isoforest_params' : {'contamination':0.05}}
outlier_params_cov = { 'bounds'     : 'lower',
                       'percentile' : 33 }
regressor_params   = { }
atlas_params       = { 'zscore_X'       : True,
                       'zscore_y'       : True,
                       'pca_X'          : True,
                       'pca_y'          : True,
                       'rezscore_X'     : False,
                       'rezscore_y'     : False,
                       'subselect_X'    : 20,
                       'subselect_y'    : 20,
                       'add_covariates' : None }

# Additional arguments
recurse     = True
ignore_self = False
processes   = 10
profiling   = True
verbose     = True

# Run prediction pipeline
ac.atlas_construction(train_dirpath, predict_dirpath, 
                      ref_channel, sec_channel, 
                      train_IDs=train_IDs, predict_IDs=None,
                      recurse=recurse, ignore_self=ignore_self, 
                      processes=processes, profiling=profiling, verbose=verbose,
                      outlier_removal_ref=outlier_removal_ref,
                      outlier_removal_sec=outlier_removal_sec,
                      outlier_removal_cov=outlier_removal_cov,
                      covariates_to_use=covariates_to_use,
                      regressor=regressor, 
                      outlier_params_ref=outlier_params_ref,
                      outlier_params_sec=outlier_params_sec,
                      outlier_params_cov=outlier_params_cov,
                      regressor_params=regressor_params,  
                      atlas_params=atlas_params)

**CDMPRtagRFPt**

In [None]:
### Predict CDMPRtagRFPt channel for all prims that do not have it

# Target directories
train_dirpath   = r'data\experimentA\image_data'
predict_dirpath = r'data\experimentA\image_data'

# Target IDs
train_IDs = parse_from_IDR(train_dirpath, 'CDMPRtagRFPt')
print "Found %i training IDs!" % len(train_IDs)

# Channels
ref_channel = ['lynEGFP_seg_LMs_kmeansPRES_pdCFOR_DDDS_CBEmanh',
               'lynEGFP_linUnmix_seg_LMs_kmeansPRES_pdCFOR_DDDS_CBEmanh']
sec_channel = 'CDMPRtagRFPt_LMs_kmeansPRES_pdCFOR_DDDS_CBEmanh'

# Core settings
outlier_removal_ref = 'isolation_forest'
outlier_removal_sec = 'isolation_forest'
outlier_removal_cov = 'percentile_thresh'
covariates_to_use   = 'img.cell.'+sec_channel.split('_')[0]+'.mean_total'
regressor           = 'MT-ENetCV'

# Additional parameters
outlier_params_ref = { 'isoforest_params' : {'contamination':0.05}}
outlier_params_sec = { 'isoforest_params' : {'contamination':0.05}}
outlier_params_cov = { 'bounds'     : 'lower',
                       'percentile' : 33 }
regressor_params   = { }
atlas_params       = { 'zscore_X'       : True,
                       'zscore_y'       : True,
                       'pca_X'          : True,
                       'pca_y'          : True,
                       'rezscore_X'     : False,
                       'rezscore_y'     : False,
                       'subselect_X'    : 20,
                       'subselect_y'    : 20,
                       'add_covariates' : None }

# Additional arguments
recurse     = True
ignore_self = False
processes   = 10
profiling   = True
verbose     = True

# Run prediction pipeline
ac.atlas_construction(train_dirpath, predict_dirpath, 
                      ref_channel, sec_channel, 
                      train_IDs=train_IDs, predict_IDs=None,
                      recurse=recurse, ignore_self=ignore_self, 
                      processes=processes, profiling=profiling, verbose=verbose,
                      outlier_removal_ref=outlier_removal_ref,
                      outlier_removal_sec=outlier_removal_sec,
                      outlier_removal_cov=outlier_removal_cov,
                      covariates_to_use=covariates_to_use,
                      regressor=regressor, 
                      outlier_params_ref=outlier_params_ref,
                      outlier_params_sec=outlier_params_sec,
                      outlier_params_cov=outlier_params_cov,
                      regressor_params=regressor_params,  
                      atlas_params=atlas_params)

**mKate2GM130**

In [None]:
### Predict mKate2GM130 channel for all prims that do not have it

# Target directories
train_dirpath   = r'data\experimentA\image_data'
predict_dirpath = r'data\experimentA\image_data'

# Target IDs
train_IDs = parse_from_IDR(train_dirpath, 'mKate2GM130')
print "Found %i training IDs!" % len(train_IDs)

# Channels
ref_channel = ['lynEGFP_seg_LMs_kmeansPRES_pdCFOR_DDDS_CBEmanh',
               'lynEGFP_linUnmix_seg_LMs_kmeansPRES_pdCFOR_DDDS_CBEmanh']
sec_channel = 'mKate2GM130_LMs_kmeansPRES_pdCFOR_DDDS_CBEmanh'

# Core settings
outlier_removal_ref = 'isolation_forest'
outlier_removal_sec = 'isolation_forest'
outlier_removal_cov = 'percentile_thresh'
covariates_to_use   = 'img.cell.'+sec_channel.split('_')[0]+'.mean_total'
regressor           = 'MT-ENetCV'

# Additional parameters
outlier_params_ref = { 'isoforest_params' : {'contamination':0.05}}
outlier_params_sec = { 'isoforest_params' : {'contamination':0.05}}
outlier_params_cov = { 'bounds'     : 'lower',
                       'percentile' : 33 }
regressor_params   = { }
atlas_params       = { 'zscore_X'       : True,
                       'zscore_y'       : True,
                       'pca_X'          : True,
                       'pca_y'          : True,
                       'rezscore_X'     : False,
                       'rezscore_y'     : False,
                       'subselect_X'    : 20,
                       'subselect_y'    : 20,
                       'add_covariates' : None }

# Additional arguments
recurse     = True
ignore_self = False
processes   = 10
profiling   = True
verbose     = True

# Run prediction pipeline
ac.atlas_construction(train_dirpath, predict_dirpath, 
                      ref_channel, sec_channel, 
                      train_IDs=train_IDs, predict_IDs=None,
                      recurse=recurse, ignore_self=ignore_self, 
                      processes=processes, profiling=profiling, verbose=verbose,
                      outlier_removal_ref=outlier_removal_ref,
                      outlier_removal_sec=outlier_removal_sec,
                      outlier_removal_cov=outlier_removal_cov,
                      covariates_to_use=covariates_to_use,
                      regressor=regressor, 
                      outlier_params_ref=outlier_params_ref,
                      outlier_params_sec=outlier_params_sec,
                      outlier_params_cov=outlier_params_cov,
                      regressor_params=regressor_params,  
                      atlas_params=atlas_params)

**lysotrackerdeepred**

In [None]:
### Predict lysotrackerdeepred channel for all prims that do not have it

# Target directories
train_dirpath   = r'data\experimentA\image_data'
predict_dirpath = r'data\experimentA\image_data'

# Target IDs
train_IDs = parse_from_IDR(train_dirpath, 'lysotrackerdeepred')
print "Found %i training IDs!" % len(train_IDs)

# Channels
ref_channel = ['lynEGFP_seg_LMs_kmeansPRES_pdCFOR_DDDS_CBEmanh',
               'lynEGFP_linUnmix_seg_LMs_kmeansPRES_pdCFOR_DDDS_CBEmanh']
sec_channel = 'lysotrackerdeepred_LMs_kmeansPRES_pdCFOR_DDDS_CBEmanh'

# Core settings
outlier_removal_ref = 'isolation_forest'
outlier_removal_sec = 'isolation_forest'
outlier_removal_cov = 'percentile_thresh'
covariates_to_use   = 'img.cell.'+sec_channel.split('_')[0]+'.mean_total'
regressor           = 'MT-ENetCV'

# Additional parameters
outlier_params_ref = { 'isoforest_params' : {'contamination':0.05}}
outlier_params_sec = { 'isoforest_params' : {'contamination':0.05}}
outlier_params_cov = { 'bounds'     : 'lower',
                       'percentile' : 33 }
regressor_params   = { }
atlas_params       = { 'zscore_X'       : True,
                       'zscore_y'       : True,
                       'pca_X'          : True,
                       'pca_y'          : True,
                       'rezscore_X'     : False,
                       'rezscore_y'     : False,
                       'subselect_X'    : 20,
                       'subselect_y'    : 20,
                       'add_covariates' : None }

# Additional arguments
recurse     = True
ignore_self = False
processes   = 10
profiling   = True
verbose     = True

# Run prediction pipeline
ac.atlas_construction(train_dirpath, predict_dirpath, 
                      ref_channel, sec_channel, 
                      train_IDs=train_IDs, predict_IDs=None,
                      recurse=recurse, ignore_self=ignore_self, 
                      processes=processes, profiling=profiling, verbose=verbose,
                      outlier_removal_ref=outlier_removal_ref,
                      outlier_removal_sec=outlier_removal_sec,
                      outlier_removal_cov=outlier_removal_cov,
                      covariates_to_use=covariates_to_use,
                      regressor=regressor, 
                      outlier_params_ref=outlier_params_ref,
                      outlier_params_sec=outlier_params_sec,
                      outlier_params_cov=outlier_params_cov,
                      regressor_params=regressor_params,  
                      atlas_params=atlas_params)

**mKate2rab5**

In [None]:
### Predict mKate2rab5 channel for all prims that do not have it

# Target directories
train_dirpath   = r'data\experimentA\image_data'
predict_dirpath = r'data\experimentA\image_data'

# Target IDs
train_IDs = parse_from_IDR(train_dirpath, 'mKate2rab5')
print "Found %i training IDs!" % len(train_IDs)

# Channels
ref_channel = ['lynEGFP_seg_LMs_kmeansPRES_pdCFOR_DDDS_CBEmanh',
               'lynEGFP_linUnmix_seg_LMs_kmeansPRES_pdCFOR_DDDS_CBEmanh']
sec_channel = 'mKate2rab5_LMs_kmeansPRES_pdCFOR_DDDS_CBEmanh'

# Core settings
outlier_removal_ref = 'isolation_forest'
outlier_removal_sec = 'isolation_forest'
outlier_removal_cov = 'percentile_thresh'
covariates_to_use   = 'img.cell.'+sec_channel.split('_')[0]+'.mean_total'
regressor           = 'MT-ENetCV'

# Additional parameters
outlier_params_ref = { 'isoforest_params' : {'contamination':0.05}}
outlier_params_sec = { 'isoforest_params' : {'contamination':0.05}}
outlier_params_cov = { 'bounds'     : 'lower',
                       'percentile' : 33 }
regressor_params   = { }
atlas_params       = { 'zscore_X'       : True,
                       'zscore_y'       : True,
                       'pca_X'          : True,
                       'pca_y'          : True,
                       'rezscore_X'     : False,
                       'rezscore_y'     : False,
                       'subselect_X'    : 20,
                       'subselect_y'    : 20,
                       'add_covariates' : None }

# Additional arguments
recurse     = True
ignore_self = False
processes   = 10
profiling   = True
verbose     = True

# Run prediction pipeline
ac.atlas_construction(train_dirpath, predict_dirpath, 
                      ref_channel, sec_channel, 
                      train_IDs=train_IDs, predict_IDs=None,
                      recurse=recurse, ignore_self=ignore_self, 
                      processes=processes, profiling=profiling, verbose=verbose,
                      outlier_removal_ref=outlier_removal_ref,
                      outlier_removal_sec=outlier_removal_sec,
                      outlier_removal_cov=outlier_removal_cov,
                      covariates_to_use=covariates_to_use,
                      regressor=regressor, 
                      outlier_params_ref=outlier_params_ref,
                      outlier_params_sec=outlier_params_sec,
                      outlier_params_cov=outlier_params_cov,
                      regressor_params=regressor_params,  
                      atlas_params=atlas_params)

**mKate2rab11**

In [None]:
### Predict mKate2rab11 channel for all prims that do not have it

# Target directories
train_dirpath   = r'data\experimentA\image_data'
predict_dirpath = r'data\experimentA\image_data'

# Target IDs
train_IDs = parse_from_IDR(train_dirpath, 'mKate2rab11')
print "Found %i training IDs!" % len(train_IDs)

# Channels
ref_channel = ['lynEGFP_seg_LMs_kmeansPRES_pdCFOR_DDDS_CBEmanh',
               'lynEGFP_linUnmix_seg_LMs_kmeansPRES_pdCFOR_DDDS_CBEmanh']
sec_channel = 'mKate2rab11_LMs_kmeansPRES_pdCFOR_DDDS_CBEmanh'

# Core settings
outlier_removal_ref = 'isolation_forest'
outlier_removal_sec = 'isolation_forest'
outlier_removal_cov = 'percentile_thresh'
covariates_to_use   = 'img.cell.'+sec_channel.split('_')[0]+'.mean_total'
regressor           = 'MT-ENetCV'

# Additional parameters
outlier_params_ref = { 'isoforest_params' : {'contamination':0.05}}
outlier_params_sec = { 'isoforest_params' : {'contamination':0.05}}
outlier_params_cov = { 'bounds'     : 'lower',
                       'percentile' : 33 }
regressor_params   = { }
atlas_params       = { 'zscore_X'       : True,
                       'zscore_y'       : True,
                       'pca_X'          : True,
                       'pca_y'          : True,
                       'rezscore_X'     : False,
                       'rezscore_y'     : False,
                       'subselect_X'    : 20,
                       'subselect_y'    : 20,
                       'add_covariates' : None }

# Additional arguments
recurse     = True
ignore_self = False
processes   = 10
profiling   = True
verbose     = True

# Run prediction pipeline
ac.atlas_construction(train_dirpath, predict_dirpath, 
                      ref_channel, sec_channel, 
                      train_IDs=train_IDs, predict_IDs=None,
                      recurse=recurse, ignore_self=ignore_self, 
                      processes=processes, profiling=profiling, verbose=verbose,
                      outlier_removal_ref=outlier_removal_ref,
                      outlier_removal_sec=outlier_removal_sec,
                      outlier_removal_cov=outlier_removal_cov,
                      covariates_to_use=covariates_to_use,
                      regressor=regressor, 
                      outlier_params_ref=outlier_params_ref,
                      outlier_params_sec=outlier_params_sec,
                      outlier_params_cov=outlier_params_cov,
                      regressor_params=regressor_params,  
                      atlas_params=atlas_params)