In [None]:
import os
import re
import io
import sys
import glob
import enum
import json
import dask
import xlrd
import base64
import imageio
import requests
import datetime
import numpy as np
import pandas as pd
import sqlalchemy as db

from scipy import ndimage
from matplotlib import pyplot as plt

%load_ext autoreload
%autoreload 1

sys.path.append('..')
%aimport opencell.imaging
%aimport opencell.imaging.managers
%aimport opencell.imaging.processors
%aimport opencell.file_utils
%aimport opencell.database.operations

from opencell import constants, file_utils
from opencell.cli import database as db_cli
from opencell.cli import imaging as imaging_cli
from opencell.database import models
from opencell.database import operations as ops
from opencell.database import utils as db_utils
from opencell.imaging import utils as im_utils
from opencell.imaging.images import RawPipelineTIFF

In [None]:
def plt_hist(vals, **kwargs):
    counts, edges = np.histogram(vals, **kwargs)
    plt.plot(edges[1:], counts)

In [None]:
def profile_com(profile):
    profile = np.array(profile)
    p = profile - profile.mean()
    p[p < 0] = 0
    x = np.arange(len(p))
    com = (p*x).sum()/p.sum()
    return com

### All profiles from opencell database

In [None]:
url = db_utils.url_from_credentials('../db-credentials-cap.json')
engine = db.create_engine(url)
session_factory = db.orm.sessionmaker(bind=engine)
Session = db.orm.scoped_session(session_factory)

In [None]:
# instantiate FOV processors from opencelldb
fovs = Session.query(models.MicroscopyFOV).all()
ps = [processors.FOVProcessor.from_database(fov) for fov in fovs]

In [None]:
# construct FOV metadata (fov_id, plate_id, well_id, imaging_round_id)
rows = []
for p in ps:
    rows.append({
        'fov_id': p.fov_id,
        'plate_id': p.plate_id,
        'well_id': p.well_id,
        'target_name': p.target_name,
        'step_size': p.z_step_size(),
    })

In [None]:
fov_metadata = pd.DataFrame(data=rows)
fov_metadata.head()

In [None]:
def all_results(kind):
    '''
    Aggregate results whose data column is a dict (not a list)
    '''
    results = Session.query(models.MicroscopyFOVResult)\
        .filter(models.MicroscopyFOVResult.kind == kind).all()  
    data = [{
        'fov_id': result.fov.id, 
        'line_id': result.fov.cell_line_id, 
        'pml_id': result.fov.dataset.pml_id,
        **result.data
    } for result in results]
    df = pd.DataFrame(data=data)
    return df

In [None]:
# all z-profiles
fov_profiles = all_results('z-profiles')
fov_profiles.shape

In [None]:
# merge metadata and features on fov_id
data = pd.merge(fov_metadata, fov_profiles, left_on='fov_id', right_on='fov_id', how='inner')
data.shape

In [None]:
# calculate the center of mass of the mean intensity profile
data['com_405'] = None
data['com_488'] = None
for ind, row in data.iterrows():
    for channel in ['405', '488']:
        try:
            profile = row[channel]['mean']
            data.at[ind, 'com_%s' % channel] = profile_com(profile)
        except:
            print(ind) 
data['com_405'] = data.com_405 * data.step_size
data['com_488'] = data.com_488 * data.step_size
data['delta'] = data.com_488 - data.com_405

In [None]:
# calculate the total depth of each z-stack
data['depth_405'] = None
data['depth_405'] = None
for ind, row in data.iterrows():
    for channel in ['405', '488']:
        try:
            profile = row[channel]['mean']
            data.at[ind, 'depth_%s' % channel] = len(profile)
        except:
            print(ind)
data['depth_405'] = data.depth_405 * data.step_size
data['depth_488'] = data.depth_488 * data.step_size

In [None]:
data.to_csv('2020-01-23-all-z-profiles.csv')

In [None]:
# histogram of distances of the cell layer center from the bottom of the stack
plt_hist(data.com_405[~data.com_405.isna()], bins=np.arange(0, 15, .2), density=True)
plt_hist(data.com_488[~data.com_488.isna()], bins=np.arange(0, 15, .2), density=True)

In [None]:
# the percent of FOVs with a cell layer center close to the bottom of the stack
(data.com_405 < 5).sum() / data.shape[0]

In [None]:
# histogram of distances of the cell layer center from the top of the stack
d = data.loc[~data.com_405.isna()]
plt_hist(d.depth_405 - d.com_405, bins=np.arange(0, 30, .2), density=True)

d = data.loc[~data.com_488.isna()]
plt_hist(data.depth_488 - data.com_488, bins=np.arange(0, 30, .2), density=True)

In [None]:
# the distribution of difference between 488 and 405 is centered on zero
plt_hist(data.com_405 - data.com_488, bins=100, density=True)

In [None]:
data.loc[data.target_name=='PCNA']

### development with cherrypicked example TIFFs

In [None]:
# an image with diffuse-ish nuclear GFP signal (this is PRKDC in D01 on P0014)
path = '/Users/keith.cheveralls/image-data/raw-pipeline-microscopy/PML0234/raw_data/MMStack_165-B5-21.ome.tif'
# path = '/Users/keith.cheveralls/image-data/raw-pipeline-microscopy/PML0234/raw_data/MMStack_114-B6-6.ome.tif'

# nuclear GFP with 0.5um step size
#path = '/Users/keith.cheveralls/image-data/H6_9_PCNA.ome.tif'
path = '/Users/keith.cheveralls/image-data/E1_15_PTMA.ome.tif'

tiff = RawPipelineTIFF(path)
tiff.parse_micromanager_metadata()
tiff.validate_micromanager_metadata()
tiff.split_channels()

In [None]:
# cell layer center using Hoechst
bot, top, cen, profile_405 = tiff.find_cell_layer(tiff.laser_405, -7, 7, 0.2)
cen, profile_com(profile_405)

In [None]:
# cell layer center using GFP
bot, top, cen, profile_488 = tiff.find_cell_layer(tiff.laser_488, -5, 5, 0.2)
cen, profile_com(profile_488)

In [None]:
# we observe a four-slice offset in the cell layer center between hoechst and GFP
# this corresponds to 0.8um, which is more than what christian measured (which was 0.5um)
plt.plot((profile_405[:] - profile_405.min())/(profile_405.max() - profile_405.min()))
plt.plot((profile_488[5:] - profile_488.min())/(profile_488.max()- profile_488.min()))

In [None]:
slices_per_um = 1/.5
plt.figure(figsize=(12, 12))
plt.imshow(tiff.stacks[tiff.laser_405][13 - int(5*slices_per_um), :512, :512])

In [None]:
# testing ndimage.shift
# note that lower interpolation orders (below 3) result in some smoothing of the shifted image
im = tiff.stacks[tiff.laser_405][:, :255, :255]
out = ndimage.shift(im, (.5, 0, 0), order=1)

In [None]:
plt.figure(figsize=(16, 12))
plt.imshow(np.concatenate((im[33, :, :], out[33, :, :]), axis=1))

### Refactoring nathan's method to select in-focus stacks

In [None]:
# a raw stack
stack = tifffile.imread('/Users/keith.cheveralls/image-data/MMStack_10-B9-10.ome.tif')
dapi_stack = stack[:131, :, :]
stack.shape

In [None]:
dapi_stack.max(axis=1).shape

In [None]:
viz.imshow(dapi_stack.max(axis=2))

In [None]:
# blur_vals = np.array([cv2.Laplacian(zslice, cv2.CV_64F).var() for zslice in dapi_stack])
sum_vals = np.array([zslice.mean() for zslice in dapi_stack]).astype(float)
plt.plot(sum_vals)

In [None]:
# suppose one z-slice is underexposed by a factor of two
# sum_vals[30] = sum_vals[30]/10
dist = sum_vals - sum_vals.mean()
dist[dist < 0] = 0
dist /= dist.sum()
plt.plot(dist * 30)
plt.plot(np.cumsum(dist))
np.argwhere(np.cumsum(dist) > .5).min()

In [None]:
# check derivative for spikes due to isolated unexposed z-slices
np.abs(np.diff(sum_vals)).max()

In [None]:
# calculate the mean and variance of the intensity profile in z
sum_vals -= sum_vals.min()
sum_vals /= sum_vals.sum()
x = np.arange(len(sum_vals))
xm = (x * sum_vals).sum()
xv = (x * x * sum_vals).sum()
xs = np.sqrt(xv - xm**2)
xm, xs

In [None]:
xm - 2*xs, xm + 2*xs