## RUN: Initialization

This notebook prepares image data for the rest of the pipeline by assigning unique IDs, splitting channels and generating a metadata file. 

For data downloaded from the IDR repo, only metadata file generation is necessary.

### Prep

In [None]:
### Imports

# Generic
from __future__ import division
import os, sys, pickle
import numpy as np

# Internal
from katachi.pipelines import initialization as init

In [None]:
### Function to parse relevant samples from IDR bulk data

def parse_from_IDR(dir_path, target):
    
    # Get all samples
    samples = [d for d in os.listdir(dir_path) if len(d)==10
               and os.path.isdir(os.path.join(dir_path, d))]
    
    # Select relevant samples
    relevant_samples = []
    for d in samples:
        
        # Get image files
        images = [i for i in os.listdir(os.path.join(dir_path, d))
                  if i.startswith(d) and i.endswith('.tif')]
        
        # Special case for membranes only
        if target=='membranes_only':
            if all(['lynEGFP' in img for img in images]):
                relevant_samples.append(d)
            
        # All other cases
        else:
            if any([img.endswith(target+'.tif') for img in images]):
                relevant_samples.append(d)
    
    return relevant_samples

### Initialize `cldnB:lyn-EGFP`

In [None]:
# Target directory
dir_path = r'data\experimentA\image_data'

# Parse relevant samples from IDR bulk data
relevant_samples = parse_from_IDR(dir_path, 'membranes_only')
print "Found %i relevant samples!" % len(relevant_samples)
    
# Additional arguments
id_path      = r'other/IDs.txt'
fname_suffix =  "_8bit.tif"
meta_dict    = {'channels'   : ['lynEGFP'],
                'resolution' : [0.225, 0.099, 0.099],
                'date'       : '20180726',
                'microscope' : 'ZEISS-LSM880-AIRY-FAST',
                'condition'  : 'wild-type',
                'notes'      : '_none_'}

# Run initialization pipeline
init.initialize_dir(dir_path, id_path, meta_dict,
                    IDR_data=True, IDR_IDs=relevant_samples,
                    ignore_old=False, verbose=True)

### Initialize `cldnB:lyn-EGFP + cxcr4b:NLS-tdTomato`

In [None]:
# Target directory
dir_path = r'data\experimentA\image_data'

# Parse relevant samples from IDR bulk data
relevant_samples = parse_from_IDR(dir_path, 'NLStdTomato')
print "Found %i relevant samples!" % len(relevant_samples)
    
# Additional arguments
id_path      = r'other/IDs.txt'
fname_suffix =  "_8bit.tif"
meta_dict    = {'channels'   : ['lynEGFP', 'NLStdTomato'],
                'resolution' : [0.225, 0.102, 0.102],
                'date'       : '20161222',
                'microscope' : 'ZEISS-LSM880-AIRY-FAST',
                'condition'  : 'wild-type',
                'notes'      : '_none_'}

# Run initialization pipeline
init.initialize_dir(dir_path, id_path, meta_dict,
                    IDR_data=True, IDR_IDs=relevant_samples,
                    ignore_old=False, verbose=True)

### Initialize `cldnB:lyn-EGFP + Actb2:mKate-Rab11a`

In [None]:
# Target directory
dir_path = r'data\experimentA\image_data'

# Parse relevant samples from IDR bulk data
relevant_samples = parse_from_IDR(dir_path, 'mKate2rab11')
print "Found %i relevant samples!" % len(relevant_samples)
    
# Additional arguments
id_path      = r'other/IDs.txt'
fname_suffix =  "_8bit.tif"
meta_dict    = {'channels'   : ['lynEGFP', 'mKate2rab11'],
                'resolution' : [0.225, 0.099, 0.099],
                'date'       : '20170906',
                'microscope' : 'ZEISS-LSM880-AIRY-FAST',
                'condition'  : 'wild-type',
                'notes'      : '_none_'}

# Run initialization pipeline
init.initialize_dir(dir_path, id_path, meta_dict,
                    IDR_data=True, IDR_IDs=relevant_samples,
                    ignore_old=False, verbose=True)

### Initialize `cldnB:lyn-EGFP + RNA:mKate2-Rab5a`

In [None]:
# Target directory
dir_path = r'data\experimentA\image_data'

# Parse relevant samples from IDR bulk data
relevant_samples = parse_from_IDR(dir_path, 'mKate2rab5')
print "Found %i relevant samples!" % len(relevant_samples)
    
# Additional arguments
id_path      = r'other/IDs.txt'
fname_suffix =  "_8bit.tif"
meta_dict    = {'channels'   : ['lynEGFP', 'mKate2rab5'],
                'resolution' : [0.225, 0.099, 0.099],
                'date'       : '20171014',
                'microscope' : 'ZEISS-LSM880-AIRY-FAST',
                'condition'  : 'wild-type',
                'notes'      : '_none_'}

# Run initialization pipeline
init.initialize_dir(dir_path, id_path, meta_dict,
                    IDR_data=True, IDR_IDs=relevant_samples,
                    ignore_old=False, verbose=True)

### Initialize `cldnB:lyn-EGFP + RNA:mKate2-GM130(rat)`

In [None]:
# Target directory
dir_path = r'data\experimentA\image_data'

# Parse relevant samples from IDR bulk data
relevant_samples = parse_from_IDR(dir_path, 'mKate2GM130')
print "Found %i relevant samples!" % len(relevant_samples)
    
# Additional arguments
id_path      = r'other/IDs.txt'
fname_suffix =  "_8bit.tif"
meta_dict    = {'channels'   : ['lynEGFP', 'mKate2GM130'],
                'resolution' : [0.225, 0.099, 0.099],
                'date'       : '20170930',
                'microscope' : 'ZEISS-LSM880-AIRY-FAST',
                'condition'  : 'wild-type',
                'notes'      : '_none_'}

# Run initialization pipeline
init.initialize_dir(dir_path, id_path, meta_dict,
                    IDR_data=True, IDR_IDs=relevant_samples,
                    ignore_old=False, verbose=True)

### Initialize `cldnB:lyn-EGFP + lexOP:CDMPR-tagRFPt`

In [None]:
# Target directory
dir_path = r'data\experimentA\image_data'

# Parse relevant samples from IDR bulk data
relevant_samples = parse_from_IDR(dir_path, 'CDMPRtagRFPt')
print "Found %i relevant samples!" % len(relevant_samples)
    
# Additional arguments
id_path      = r'other/IDs.txt'
fname_suffix =  "_8bit.tif"
meta_dict    = {'channels'   : ['lynEGFP', 'CDMPRtagRFPt'],
                'resolution' : [0.225, 0.099, 0.099],
                'date'       : '20171212',
                'microscope' : 'ZEISS-LSM880-AIRY-FAST',
                'condition'  : 'wild-type',
                'notes'      : '_none_'}

# Run initialization pipeline
init.initialize_dir(dir_path, id_path, meta_dict,
                    IDR_data=True, IDR_IDs=relevant_samples,
                    ignore_old=False, verbose=True)

### Initialize `cldnB:lyn-EGFP + LexOP:B4GalT1(1-55Q)-tagRFPt`

In [None]:
# Target directory
dir_path = r'data\experimentA\image_data'

# Parse relevant samples from IDR bulk data
relevant_samples = parse_from_IDR(dir_path, 'b4galT1tagRFPt')
print "Found %i relevant samples!" % len(relevant_samples)
    
# Additional arguments
id_path      = r'other/IDs.txt'
fname_suffix =  "_8bit.tif"
meta_dict    = {'channels'   : ['lynEGFP', 'b4galT1tagRFPt'],
                'resolution' : [0.225, 0.099, 0.099],
                'date'       : '20171211',
                'microscope' : 'ZEISS-LSM880-AIRY-FAST',
                'condition'  : 'wild-type',
                'notes'      : '_none_'}

# Run initialization pipeline
init.initialize_dir(dir_path, id_path, meta_dict,
                    IDR_data=True, IDR_IDs=relevant_samples,
                    ignore_old=False, verbose=True)

### Initialize `cldnB:lyn-EGFP + atoh1a:dtomato`

In [None]:
# Target directory
dir_path = r'data\experimentA\image_data'

# Parse relevant samples from IDR bulk data
relevant_samples = parse_from_IDR(dir_path, 'atoh1a')
print "Found %i relevant samples!" % len(relevant_samples)
    
# Additional arguments
id_path      = r'other/IDs.txt'
fname_suffix =  "_8bit.tif"
meta_dict    = {'channels'   : ['lynEGFP', 'atoh1a'],
                'resolution' : [0.225, 0.099, 0.099],
                'date'       : '20170406',
                'microscope' : 'ZEISS-LSM880-AIRY-FAST',
                'condition'  : 'wild-type',
                'notes'      : '_none_'}

# Run initialization pipeline
init.initialize_dir(dir_path, id_path, meta_dict,
                    IDR_data=True, IDR_IDs=relevant_samples,
                    ignore_old=False, verbose=True)

### Initialize `cldnB:lyn-EGFP + 6xUAS:tagRFPt-UtrCH`

In [None]:
# Target directory
dir_path = r'data\experimentA\image_data'

# Parse relevant samples from IDR bulk data
relevant_samples = parse_from_IDR(dir_path, 'tagRFPtUtrCH')
print "Found %i relevant samples!" % len(relevant_samples)
    
# Additional arguments
id_path      = r'other/IDs.txt'
fname_suffix =  "_8bit.tif"
meta_dict    = {'channels'   : ['lynEGFP', 'tagRFPtUtrCH'],
                'resolution' : [0.225, 0.102, 0.102],
                'date'       : '20161222',
                'microscope' : 'ZEISS-LSM880-AIRY-FAST',
                'condition'  : 'wild-type',
                'notes'      : '_none_'}

# Run initialization pipeline
init.initialize_dir(dir_path, id_path, meta_dict,
                    IDR_data=True, IDR_IDs=relevant_samples,
                    ignore_old=False, verbose=True)

### Initialize `cldnB:lyn-EGFP + LysoTracker Deep Red`

In [None]:
# Target directory
dir_path = r'data\experimentA\image_data'

# Parse relevant samples from IDR bulk data
relevant_samples = parse_from_IDR(dir_path, 'lysotrackerdeepred')
print "Found %i relevant samples!" % len(relevant_samples)
    
# Additional arguments
id_path      = r'other/IDs.txt'
fname_suffix =  "_8bit.tif"
meta_dict    = {'channels'   : ['lynEGFP', 'lysotrackerdeepred'],
                'resolution' : [0.225, 0.099, 0.099],
                'date'       : '20170906',
                'microscope' : 'ZEISS-LSM880-AIRY-FAST',
                'condition'  : 'wild-type',
                'notes'      : '_none_'}

# Run initialization pipeline
init.initialize_dir(dir_path, id_path, meta_dict,
                    IDR_data=True, IDR_IDs=relevant_samples,
                    ignore_old=False, verbose=True)

### Initialize `pea3 smFISH`

In [None]:
# Target directory
dir_path = r'data\experimentB\image_data'

# Parse relevant samples from IDR bulk data
relevant_samples = parse_from_IDR(dir_path, 'pea3smFISH')
print "Found %i relevant samples!" % len(relevant_samples)
    
# Additional arguments
id_path      = r'other/IDs.txt'
fname_suffix =  "_8bit.tif"
meta_dict    = {'channels'   : ['pea3smFISH', 'lynEGFP'],
                'resolution' : [0.187, 0.085, 0.085],
                'date'       : '20180727',
                'microscope' : 'ZEISS-LSM880-AIRY-FAST',
                'condition'  : 'wild-type',
                'notes'      : 'Sample fixed for smFISH staining.'}

# Run initialization pipeline
init.initialize_dir(dir_path, id_path, meta_dict,
                    IDR_data=True, IDR_IDs=relevant_samples,
                    ignore_old=False, verbose=True)