### Projecting and aggregating FOVs from `PublicationQuality` directory
__Keith Cheveralls__<br>
__Fall 2019__

This notebook contains ad hoc scripts to generate aggregated z-projections of all FOVs from various subdirectories of the `PlateMicroscopy` directory.

Its primary application was to generate and aggregate z-projections of all stacks appearing in the 'PublicationQuality' subdirectories of each plate directory. This corresponded to about 4500 FOVs. These aggregated z-projections were then manually sorted (into bad/neutral/good categories) and used to train a regression model to predict a FOV 'score'. 

This notebook was superseded in late October 2019 by the API and management methods for the `PlateMicroscopy` directory found in the `opencell-process` repo. These methods are a more rigorous and better-documented way to process and aggregate all of the raw, processed, or publication quality FOVs. 

In [None]:
import os
import re
import sys
import glob
import json
import shutil
import pickle
import datetime
import numpy as np
import pandas as pd
import tifffile
from matplotlib import pyplot as plt

In [None]:
sys.path.append('/Users/keith.cheveralls/projects/opencell-process/')
from pipeline_process.imaging import utils, viz

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
# mountpoint of the PlateMicroscopy directory (on either Flexo or IBM)
flexo_root = '/Volumes/MicroscopyData/ML_group/Plate_Microscopy/'
os.path.isdir(flexo_root)

In [None]:
# walk the plate_microscopy directory (slow) and cache
os_walk = list(os.walk(flexo_root))
with open('20191016_os_walk_plate_microscopy.p', 'wb') as file:
    pickle.dump(os_walk, file)

In [None]:
# load cached results from walking plate_microscopy
with open('./20191016_os_walk_plate_microscopy.p', 'rb') as file:
    os_walk = pickle.load(file)

In [None]:
len(os_walk)

In [None]:
# list all of the 'PublicationQuality' subdirectories
# (if an 'Updated_PublicationQuality' directory is found in a plate directory, 
# we include it and ignore the 'PublicationQuality' directory)
plate_dirname_pattern = 'mNG96wp[0-9]{1,2}$'

dirpaths = []
for dirpath, dirnames, filenames in os_walk:
    if not re.match(plate_dirname_pattern, dirpath.split(os.sep)[-1]):
        continue
    
    if 'PublicationQuality' in dirnames:
        if 'Updated_PublicationQuality' in dirnames:
            dirpaths.append(os.path.join(dirpath, 'Updated_PublicationQuality'))
        else:
            dirpaths.append(os.path.join(dirpath, 'PublicationQuality'))

In [None]:
# list all of the '_IJClean' subdirectories in a _Thawed plate directory
# (as of 2019-10-08, most of these directories do not have a 'PublicationQuality' subdirectory)
thawed_plate_dirname_pattern = 'mNG96wp[0-9]{1,2}_Thawed$'

dirpaths = []
for dirpath, dirnames, filenames in os_walk:
    if not re.match(thawed_plate_dirname_pattern, dirpath.split(os.sep)[-1]):
        continue
    for dirname in dirnames:
        if '_IJClean' in dirname:
            dirpaths.append(os.path.join(dirpath, dirname))

In [None]:
# list all of the '_Processed' subdirectories in a (non-thawed) plate directory
# (all of the FOVs, not just the 'good' FOVs, appear in the _Processed subdirectories)
plate_dirname_pattern = 'mNG96wp[0-9]{1,2}$'

dirpaths = []
for dirpath, dirnames, filenames in os_walk:
    if not re.match(plate_dirname_pattern, dirpath.split(os.sep)[-1]):
        continue
    for dirname in dirnames:
        if '_Processed' in dirname:
            dirpaths.append(os.path.join(dirpath, dirname))

In [None]:
# aggregrate all of the images from all of the subdirectories aggregated above
# (either PublicationQuality or IJClean)

all_fovs = []
for dirpath in dirpaths:
    filepaths = glob.glob(os.path.join(dirpath, '*.tif'))
    for filepath in filepaths:
        filename = filepath.split(os.sep)[-1]

        try:
            well_id = filename.split('_')[0]
            fov_num = filename.split('_')[1]
            target_name = filename.split('_')[2]
        except:
            print(filename)
            
        all_fovs.append({
            'filepath': filepath,
            'well_id': well_id,
            'fov_num': fov_num,
            'target_name': target_name,
        })

In [None]:
# sort
all_fovs = sorted(all_fovs, key=lambda fov: fov['filepath'])
len(all_fovs)

In [None]:
len(set([fov['filepath'].split(os.sep)[-1] for fov in all_fovs]))

In [None]:
# FOVs for the CLTA and BCAP31 controls
control_fovs = [fov for fov in all_fovs if fov['target_name'] in ['CLTA', 'BCAP31']]
len(control_fovs)

### Generating z-projections

In [None]:
def make_projs(filepath):
    
    # shape should be (z, channels, x, y)
    im = utils.load(filepath)
    proj = im.max(axis=0)

    dapi = utils.autogain(proj[0], percentile=.99)
    gfp = utils.autogain(proj[1], percentile=.99)
    
    return dapi, gfp

In [None]:
# generate z-projections for a subset of all aggregated FOVs 
# (this was for the initial attempt at FOV annotation using a notebook-based UI)

# take every tenth FOV (this yields 452 FOVs)
random_fovs = all_fovs[::10]

# create the directory structure expected by the confluency annotator
root_dir = './random-FOVs'
os.makedirs(root_dir, exist_ok=True)
os.makedirs(os.path.join(root_dir, 'GFP'), exist_ok=True)
os.makedirs(os.path.join(root_dir, 'DAPI'), exist_ok=True)

# create the projections
for ind, fov in enumerate(random_fovs):
    print('%d: %s' % (ind, fov['filepath']))
    
    filepath_in = fov['filepath']
    filename_in = filepath_in.split(os.sep)[-1]

    dapi, gfp = make_projs(filepath_in)
    tifffile.imsave(
        os.path.join(root_dir, 'DAPI', filename_in.replace('.tif', '_%s_PROJ.tif' % 'DAPI'), dapi)
        
    tifffile.imsave(
        os.path.join(root_dir, 'GFP', filename_in.replace('.tif', '_%s_PROJ.tif' % 'GFP'), gfp)

In [None]:
# save the metadata
with open('./FOVs-random//metadata.json', 'w') as file:
    json.dump(fovs[::10], file)

In [None]:
# generate z-projections for all aggregated FOVs 
# and mirror the directory structure of the PlateMicroscopy directory itself

root_dir = '/Users/keith.cheveralls/image-data/PlateMicroscopyProjections-uint16-thawed'
os.makedirs(root_dir, exist_ok=True)

current_plate_dir = None
for ind, fov in enumerate(all_fovs[:]):
    
    filepath_in = fov['filepath']
    filename_in = filepath_in.split(os.sep)[-1]
    
    # plate_dir is of the form 'mNG96wp1'
    plate_dir = filepath_in.replace(flexo_root, '').split(os.sep)[0]
    
    if current_plate_dir != plate_dir:
        current_plate_dir = plate_dir
        print('Processing plate %s' % plate_dir)
        
    dirpath_out = os.path.join(root_dir, plate_dir)
    os.makedirs(dirpath_out, exist_ok=True)
    
    dapi_filepath_out = os.path.join(dirpath_out, filename_in.replace('.tif', '_DAPI_PROJ.tif'))
    gfp_filepath_out = os.path.join(dirpath_out, filename_in.replace('.tif', '_GFP_PROJ.tif'))
    if os.path.isfile(dapi_filepath_out) and os.path.isfile(gfp_filepath_out):
        continue

    print('%d: %s' % (ind, filepath_in))
    
    im = utils.load(filepath_in)
    proj = im.max(axis=0)
    
    # uncomment to populate the 'PlateMicroscopyProjections-uint8' directory
    # dapi = utils.autogain(proj[0], percentile=.99)
    # gfp = utils.autogain(proj[1], percentile=.99)
    
    dapi, gfp = proj[0], proj[1]    
    tifffile.imsave(dapi_filepath_out, dapi)
    tifffile.imsave(gfp_filepath_out, gfp)

In [None]:
# copy all DAPI or GFP projections into one directory
root = '/Users/keith.cheveralls/image-data/PlateMicroscopyProjections-uint8/'
dst_dirpath = '/Users/keith.cheveralls/image-data/PlateMicroscopyProjections-uint8-all-DAPI/'

dirpaths = glob.glob('%s*' % root)
for dirpath in dirpaths:
    print(dirpath)
    filepaths = glob.glob(os.path.join(dirpath, '*_DAPI_PROJ.tif'))
    for filepath in filepaths:
        filename = filepath.split(os.sep)[-1]
        dst_filepath = os.path.join(dst_dirpath, filename)
        shutil.copy(filepath, dst_filepath)