In [None]:
import os
import re
import sys
import glob
import json
import dask
import shutil
import pickle
import hashlib
import skimage
import datetime
import tifffile
import numpy as np
import pandas as pd

import dask.diagnostics
from matplotlib import pyplot as plt

In [None]:
tifffile.__version__, skimage.external.tifffile.__version__

In [None]:
sys.path.append('../')
from pipeline_process.imaging import image, plate_microscopy_api, utils, viz

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
# ESS 'PlateMicroscopy' directory
ess_root = '/Volumes/ml_group/PlateMicroscopy/'
os.path.isdir(ess_root)

In [None]:
# times to hash
# with dask on ess: 1200 rows in 30 seconds
# without dask on ess: 50 rows in 30 seconds

### Instance of a plateMicroscopy API

In [None]:
api = plate_microscopy_api.PlateMicroscopyAPI(ess_root, '../plate-microscopy-cache/20191025-ess/')

In [None]:
len(api.os_walk), api.md.shape, api.md.is_raw.sum()

### Parsing the metadata text files

As far as I can tell, there's nothing in these text files (which are actually JSON files) that's not also in the IJMetadata and MicroManagerMetadata TIFF tags.

In [None]:
with open(api.src_filepath(d_raw.iloc[0]).replace('.ome.tif', '_metadata.txt'), 'r') as file:
    d = json.load(file)

In [None]:
d['Summary']

In [None]:
sorted([(key, val) for key, val in d['FrameKey-0--1-0'].items()])

### Parsing raw TIFF metadata

This is essential, because tifffile.imread does not always work. Known issues:
- some raw TIFFs have a negative DAPI channel index (indices are -1 and 0 for DAPI and GFP)
- some raw TIFFs have extra pages at the beginning with no metadata
- some raw TIFFs have extra pages at the end, possibly with valid metadata (according to Nathan)

In [None]:
d_raw = api.md.loc[api.md.is_raw].copy()
d_raw['exp_id'] = [exp_dir.split('_')[0] for exp_dir in d_raw.exp_dir]

In [None]:
# test parsing a raw file
api.parse_raw_file(d_raw.iloc[-1], src_root=ess_root, dst_root='/Users/keith.cheveralls/image-data/PM-test')

In [None]:
d = image.RawPipelineImage('/Users/keith.cheveralls/image-data/H1_1_RABGGTB.ome.tif')
d.parse_micromanager_metadata()

In [None]:
# from plate1
d = image.RawPipelineImage('/Users/keith.cheveralls/image-data/A1_1_ATL2.ome.tif')
d.parse_micromanager_metadata()

In [None]:
# from plate1 thawed
d = image.RawPipelineImage('/Users/keith.cheveralls/image-data/E7_9_RAB14.ome.tif')
d.parse_micromanager_metadata()
d.validate_mm_metadata()

In [None]:
# problematic file from plate14 with an extra and tag-less page
d = image.RawPipelineImage('/Users/keith.cheveralls/image-data/A1_1_CTRL1.ome.tif')
d.parse_micromanager_metadata()

In [None]:
d.validate_mm_metadata()

### Generating projections

This is deprecated/unused.

In [None]:
d_raw = api.md.loc[api.md.is_raw].copy()

In [None]:
# count projections on ESS to 'watch' progress
projections_root = '/Volumes/ml_group/PlateMicroscopy-metadata/'
counts = d_raw.groupby('plate_dir').count().sort_values(by='plate_dir', ascending=False).filename

for plate_dir in counts.index:
    n = 0
    if os.path.isdir(os.path.join(projections_root, plate_dir)):
        n = len(glob.glob(os.path.join(projections_root, plate_dir, '*.tif')))
    print('%s: %s/%s' % (plate_dir, n/6, counts.loc[plate_dir]))

In [None]:
# load the cached raw metadata after generating projections on 2019-10-26
d_raw_c = pd.read_csv('/Users/keith.cheveralls/image-data/2019-10-26_PlateMicroscopy-raw-metadata.csv')

### Refactoring nathan's method to select in-focus stacks

Still in development.

In [None]:
# a raw stack
stack = tifffile.imread('/Users/keith.cheveralls/image-data/MMStack_601-E2-1.ome.tif')
dapi_stack = stack[:131, :, :]
stack.shape

In [None]:
dapi_stack.max(axis=1).shape

In [None]:
# a stack from nathan
stack = tifffile.imread('/Users/keith.cheveralls/image-data/A9_1_BAG6.ome.tif')
dapi_stack = stack[:131, :, :]
stack.shape

In [None]:
viz.imshow(dapi_stack[25, :, :])

In [None]:
viz.imshow(dapi_stack.max(axis=2))

In [None]:
# blur_vals = np.array([cv2.Laplacian(zslice, cv2.CV_64F).var() for zslice in dapi_stack])
sum_vals = np.array([zslice.mean() for zslice in dapi_stack]).astype(float)

In [None]:
# suppose one z-slice is underexposed by a factor of two
# sum_vals[30] = sum_vals[30]/2
plt.plot((sum_vals))

In [None]:
# check derivative for spikes due to isolated unexposed z-slices
np.abs(np.diff(sum_vals)).max()

In [None]:
# calculate the mean and variance of the intensity profile in z
sum_vals -= sum_vals.min()
sum_vals /= sum_vals.sum()
x = np.arange(len(sum_vals))
xm = (x * sum_vals).sum()
xv = (x * x * sum_vals).sum()
xs = np.sqrt(xv - xm**2)
xm, xs

In [None]:
xm - 2*xs, xm + 2*xs