In [None]:
import os
import sys
import glob
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
import matplotlib.colors as mpl_colors

from scipy import interpolate
import FlowCytometryTools as fct

In [None]:
%load_ext autoreload
%autoreload 2
%aimport -fct -np -pd -plt

In [None]:
sys.path.append('../')
from pipeline_process import common
from pipeline_process.facs import constants, manager, processor, unmixer

FITC = constants.FITC
BOX_ROOT = '/Users/keith.cheveralls/Box-cache/'

In [None]:
# initialize a manager
m = manager.FACSManager(BOX_ROOT)

In [None]:
# initialize a processor for one plate
p = processor.FACSProcessor(*m.get_sample_and_control_dirpaths('P0014'), verbose=False)

In [None]:
# process a sample
plt.figure(figsize=(8,6))
s = p.process_sample('D1', show_plots=True)

In [None]:
all_plate_ids = list(manager.CONTROL_DIRNAMES.keys())

### Process all plates

In [None]:
# load all plates and process all samples
stats, dists = [], []
for plate_id in all_plate_ids:
    print('Loading plate %s' % plate_id)

    p = processor.FACSProcessor(*m.get_sample_and_control_dirpaths(plate_id), verbose=False)

    for well_id in p.well_ids:
        stat, dist = p.process_sample(well_id, show_plots=False)
        stat['well_id'] = well_id
        dist['well_id'] = well_id
        stat['plate_id'] = plate_id
        dist['plate_id'] = plate_id
        stats.append(stat)
        dists.append(dist)

In [None]:
# cache stats
pd.DataFrame(data=stats).to_csv('2019-07-16_all-facs-results.csv')

In [None]:
# cache dists
def to_jsonable(dists):
    '''coerce arrays to lists'''
    for row in dists:
        for key, val in row.items():
            row[key] = list(val)
    return dists

with open('2019-07-16_dists-cache.json', 'w') as file:
    json.dump(to_jsonable(dists), file)

### Some plots

In [None]:
d = pd.DataFrame(data=stats)
d = d.dropna(how='any', axis=0)
d.shape

In [None]:
_ = plt.scatter(d.area, d.rel_median_log, alpha=.1)

In [None]:
# area vs log median
_ = plt.hist2d(
    d.area,
    d.rel_median_log,
    bins=(np.arange(0, .4, .01), np.arange(0, 1.5, .03)),
    norm=mpl_colors.PowerNorm(.5))

In [None]:
# mean vs std
_ = plt.hist2d(
    d.raw_mean,
    d.raw_std,
    bins=(np.arange(3000, 7000, 90), np.arange(0, 1500, 30)),
    norm=mpl_colors.PowerNorm(.7))

In [None]:
# std vs max
_ = plt.hist2d(
    d.rel_mean_hlog,
    d.rel_percentile99_hlog,
    bins=(np.arange(1000, 6000, 90), np.arange(2000, 8000, 60)),
    norm=mpl_colors.PowerNorm(.7))

### Compare control distributions from different plates

In [None]:
# plot the reference distributions
plate_nums_weird_controls = [1, 6, 10, 11, 19]
for plate_num in plate_nums_weird_controls:
    print('Loading plate %s' % plate_num)
    p = processor.FACSProcessor(*m.get_sample_and_control_dirpaths(plate_num))
    plt.plot(p.x_ref, p.y_ref, label=plate_num)
plt.legend()