In [None]:
import os
import re
import sys
import glob
import json
import shutil
import skimage
import imageio
import tifffile
import datetime
import requests
import jsonschema
import numpy as np
import pandas as pd
import seaborn as sns
from scipy import ndimage
from matplotlib import pyplot as plt

%load_ext autoreload
%autoreload 1

sys.path.append('/Users/keith.cheveralls/projects/opencell/')
%aimport opencell.imaging.images
from opencell.imaging.images import RawPipelineTIFF

sys.path.append('/Users/keith.cheveralls/projects/dragonfly-automation/')
import dragonfly_automation.utils

%aimport dragonfly_automation.qc.pipeline_plate_qc
from dragonfly_automation.fov_models import PipelineFOVScorer
from dragonfly_automation.qc.pipeline_plate_qc import PipelinePlateQC

In [None]:
# resize images for the SOP
jpgs = glob.glob('/Users/keith.cheveralls/Box/KCC-slides/automated-pipeline-microscopy-SOP/*.JPG')
for jpg in jpgs:
    continue
    im = imageio.imread(jpg)
    imageio.imsave(jpg.replace('.JPG', '_RESZ4.JPG'), skimage.transform.downscale_local_mean(im, (4, 4, 1)))

In [None]:
# get target names from API
result = requests.get('http://localhost:5000/lines?plate_id=P0019')
pd.DataFrame(data=result.json())[['cell_line_id', 'plate_id', 'well_id', 'target_name']].sort_values(by='well_id')

In [None]:
# local acquisition directories (logs only)
pml_dirs = glob.glob('/Users/keith.cheveralls/image-data/raw-pipeline-microscopy/PML*/')
pml_dirs = sorted(pml_dirs)
len(pml_dirs)

In [None]:
# real acquisition directories (on ESS)
pml_dirs = glob.glob('/Volumes/ml_group/raw-pipeline-microscopy/PML*')
pml_dirs = sorted(pml_dirs)
len(pml_dirs)

### Create the external_metadata files for existing experiments

In [None]:
# load the google sheet export
google_sheet = pd.read_csv('/Users/keith.cheveralls/Downloads/2020-01-17-Pipeline-microscopy-master-key-PMLs.csv')
google_sheet.rename(columns={'id': 'pml_id'}, inplace=True)
google_sheet

In [None]:
# generate the metadata JSON files from the google sheet
# (note that these files are created when the acquisition is initialized; 
# they were regenerated to eliminate variation in e.g., parental_line_id)

# the metadata attributes for canonical half-plate or full-plate imaging
columns = ['pml_id', 'parental_line', 'plate_id', 'platemap_type', 'imaging_round_id']

overwrite = False
for path in pml_dirs:
    
    # the directory name is exactly the pml_id
    pml_id = path.split(os.sep)[-1]
    
    # retrieve the entry in the google sheet for this pml_id
    if pml_id not in google_sheet.pml_id.values:
        print('pml_id %s not found in google sheet' % pml_id)
        continue
    metadata = google_sheet.loc[google_sheet.pml_id==pml_id].iloc[0][columns]
   
    # if there is a custom platemap, the metadata should contain only the pml_id and the platemap_type
    if metadata['platemap_type'] == 'custom':
        metadata = metadata[['pml_id', 'platemap_type']]

    dst_filepath = os.path.join(path, 'metadata.json')
    if os.path.isfile(dst_filepath) and not overwrite:
        print('metadata.json already exists in %s' % path)
    else:
        with open(dst_filepath, 'w') as file:
            json.dump(dict(metadata), file)

In [None]:
# update all of the custom platemaps so that the parental_line is always consistent
overwrite = False
for dirpath in pml_dirs:
    filepaths = glob.glob(os.path.join(dirpath, '*platemap.csv'))
    if not filepaths:
        continue
    if len(filepaths) > 1:
        print('Warning: more than one platemap in %s' % dirpath)
    filepath = filepaths[0]
    print(filepath)

    platemap = pd.read_csv(filepath)
    platemap['parental_line'] = 'czML0383'
    if 'electroporation_id' in platemap.columns:
        platemap.drop('electroporation_id', axis=1, inplace=True)
        
    if overwrite:
        platemap.to_csv(filepath, index=False)

### Load all of the existing external metadata files

In [None]:
rows = []
for path in pml_dirs:
    dirname = path.split(os.sep)[-1]
    filepath = glob.glob(os.path.join(path, 'metadata.json'))
    if len(filepath) != 1:
        print('More than one metadata file in %s' % path)

    with open(filepath.pop()) as file:
        metadata = json.load(file)
        if metadata.get('pml_id') is not None and metadata['pml_id'] != dirname:
            print('Warning')
            
        row = {'dirname': dirname}
        row.update(metadata)
        rows.append(row)

In [None]:
all_md = pd.DataFrame(data=rows)
all_md.head()

### Concat all raw TIFF metadata

In [None]:
mds = []
for pml_dir in pml_dirs:
    print(pml_dir)
    qc = PipelinePlateQC(pml_dir)
    mds.append(qc.construct_fov_metadata(overwrite=True))

In [None]:
all_md = pd.concat(mds, axis=0, sort=False)
all_md.shape

In [None]:
# rows with any NANs
all_md.loc[all_md.isna().sum(axis=1) > 0]

In [None]:
# unique values
cols = ('parental_line', 'imaging_round_id', 'platemap_type', 'plate_id', 'pml_id', 'site_num')
for col in cols:
    print('%s: %s' % (col, np.array(sorted(all_md[col].unique()))))

In [None]:
# fix existing metadata filenames
for pml_dir in pml_dirs:
    print(pml_dir)
    # os.rename(os.path.join(pml_dir, 'raw-tiff-metadata.csv'), os.path.join(pml_dir, 'fov-metadata.csv'))

In [None]:
# test construct_fov_metadata after files have been renamed
qc = PipelinePlateQC('/Users/keith.cheveralls/image-data/raw-pipeline-microscopy/PML0227/')
md = qc.construct_fov_metadata(renamed=True, overwrite=True)

In [None]:
md

### Debugging

In [None]:
qc = PipelinePlateQC('/Volumes/ml_group/raw-pipeline-microscopy/PML0234/')

In [None]:
qc = PipelinePlateQC('/Volumes/ml_group/raw-pipeline-microscopy/PML0219/')

In [None]:
qc = PipelinePlateQC('/Users/keith.cheveralls/image-data/raw-pipeline-microscopy/PML0227/')

In [None]:
qc.summarize()

In [None]:
qc.plot_counts_and_scores()

In [None]:
md = qc.construct_fov_metadata(renamed=False, overwrite=True)
md.manually_flagged.sum()

### Finding the cell layer center

In [None]:
# an image with diffuse-ish nuclear GFP signal
path = '/Users/keith.cheveralls/image-data/raw-pipeline-microscopy/PML0234/raw_data/MMStack_165-B5-21.ome.tif'

# nuclear GFP with 0.5um step size
path = '/Users/keith.cheveralls/image-data/H6_9_PCNA.ome.tif'
tiff = RawPipelineTIFF(path)
tiff.parse_micromanager_metadata()
tiff.validate_micromanager_metadata()
tiff.split_channels()

In [None]:
def profile_com(profile):
    p = profile - profile.mean()
    p[p < 0] = 0
    x = np.arange(len(p))
    com = (p*x).sum()/p.sum()
    return com

In [None]:
# cell layer center using Hoechst
bot, top, cen, profile_405 = tiff.find_cell_layer(tiff.laser_405, -7, 7, 0.2)
cen, profile_com(profile_405)

In [None]:
# cell layer center using GFP
bot, top, cen, profile_488 = tiff.find_cell_layer(tiff.laser_488, -5, 5, 0.2)
cen, profile_com(profile_488)

In [None]:
# we observe a four-slice offset in the cell layer center between hoechst and GFP
# this corresponds to 0.8um, which is more than what christian measured (which was 0.5um)
plt.plot(profile_405[:] - profile_405.mean())
plt.plot(profile_488[2:] - profile_488.mean())

In [None]:
# testing output of ndimage.shift
# note that lower interpolation orders (below 3) result in some smoothing of the shifted image
im = tiff.stacks[tiff.laser_405][:, :255, :255]
out = ndimage.shift(im, (.5, 0, 0), order=1)

In [None]:
plt.figure(figsize=(16, 12))
plt.imshow(np.concatenate((im[33, :, :], out[33, :, :]), axis=1))