In [None]:
import glob #filenames and pathnames utility
import os   #operating sytem utility

import flowgatenist as flow
#from flowgatenist import gaussian_mixture as nist_gmm
import flowgatenist.batch_process as batch_p

import matplotlib.pyplot as plt
from matplotlib import colors
#from matplotlib.backends.backend_pdf import PdfPages

import numpy as np
import pandas as pd
#from scipy import special
#from scipy import misc
from scipy import stats

#import pystan
import pickle

import seaborn as sns
sns.set()

%load_ext autoreload
%autoreload 2

%matplotlib inline

Indicate the directory where the data is stored:

In [None]:
notebook_dir = os.getcwd()
notebook_dir

In [None]:
cytometry_directory = notebook_dir[:notebook_dir.rfind("\\")]
cytometry_directory

In [None]:
main_directory = cytometry_directory[:cytometry_directory.rfind("\\")]
os.chdir(main_directory)
main_directory

In [None]:
plate_str = cytometry_directory[cytometry_directory.find("plate_"):]
plate_str

In [None]:
layout_file = glob.glob('*cytom-' + plate_str + '*.csv')[0]
layout_file

In [None]:
plate_layout_0 = pd.read_csv(layout_file)

In [None]:
#plate_layout_0.dropna(inplace=True)

In [None]:
inducerId = plate_layout_0['inducerId'].tolist()
for i in range(len(inducerId)):
    if inducerId[i] == "none":
        continue
    elif inducerId[i] != "none":
        inducer = inducerId[i] 
        break
    else:
        print("No Inducer Identified")
inducer

inducerUnits = plate_layout_0['inducerUnits'].tolist()
for i in range(len(inducerUnits)):
    if inducerUnits[i] == "none":
        continue
    elif inducerUnits[i] != "none":
        inducerU = inducerUnits[i] 
        break
    else:
        print("No Inducer Units Identified")

l_strain = plate_layout_0['strain'].tolist()
exStrain = l_strain[0]

In [None]:
inducerU

In [None]:
plate_layout = plate_layout_0[plate_layout_0['strain']!="none"].copy()

In [None]:
plate_layout

Read in cytometry data:

In [None]:
plate_1_dir = cytometry_directory.replace('plate_2', 'plate_1')
plate_1_dir

In [None]:
os.chdir(plate_1_dir)
back_fit_file = glob.glob('*BL1-A-MEF*.stan_samp_pkl')[0]
sm_back, stan_back_fit = batch_p.unpickle_stan_sampling(file=back_fit_file)

In [None]:
stan_back_fit_samples = stan_back_fit.stan_variable('mu')
back_mu = np.mean(stan_back_fit_samples)
back_mu

In [None]:
os.chdir(cytometry_directory)
cytometry_directory

In [None]:
coli_file = []
for w in plate_layout['well']:
    x = glob.glob('*' + w + '.fcs_pkl')
    if len(x)>0:
        x = x[0]
    else:
        x = ''
    coli_file.append(x)
plate_layout['coli_file'] = coli_file

In [None]:
plate_layout

In [None]:
plate_layout['sample'] = [ p for p in plate_layout['plasmid'] ]
plate_layout['sample'] += [ '-' + str(i) + '_' for i in plate_layout['inducerConcentration']]
plate_layout['sample'] += [ w for w in plate_layout['well']]

plate_layout['variant'] = plate_layout['plasmid']

In [None]:
plate_layout.sort_values(by=['plasmid', 'inducerConcentration'], inplace=True)

In [None]:
plate_layout

In [None]:
max_points = 30000
coli_data = []

for file in plate_layout['coli_file']:
    if file == '':
        coli_data.append(None)
    else:
        data = pickle.load(open(file, 'rb'))
        #data.flow_frame = data.flow_frame[:max_points]
        coli_data.append(data)

In [None]:
samples = plate_layout['sample']
len(samples)

In [None]:
%%time
df_list = [ data.flow_frame if data is not None else None for data in coli_data ]
batch_p.central_2d_guassian(df_list, alpha=0.3)

In [None]:
%%time 
for data in coli_data:
    data.save_as_pickle()

Plot the central Gaussian gating results (used for mean fluoresence)

In [None]:
no_row = int(np.ceil(len(coli_data)/4))
plt.rcParams["figure.figsize"] = [4*4, 4*no_row]
fig, axs = plt.subplots(no_row, 4)
axs = axs.flatten()

for ax, data in zip(axs, coli_data):
    if data is not None:
        df = data.flow_frame
        df = df[df.is_singlet]
        df = df[df['FSC-A']>0]
        df = df[df['SSC-A']>0]

        x = np.log10(df['FSC-A'])
        y = np.log10(df['SSC-A'])
        ax.grid(False)
        ax.hist2d(x, y, norm=colors.LogNorm(), bins=50);

        x = np.log10(df[df.is_central]['FSC-A'])
        y = np.log10(df[df.is_central]['SSC-A'])
        ax.plot(x, y, 'o', alpha=0.1, ms=2);

Plot histograms of calibrated ('MEF') data to check that things worked ok:

In [None]:
fl_channel = 'BL1-A-MEF'
x_min = -3000
x_max = 150000

geo_mean_list = []
geo_mean_err_list = []
count_list = []

mean_list = []
mean_err_list = []
central_count_list = []

bins = np.linspace(x_min, x_max, 200)

sns.set()
plt.rcParams["figure.figsize"] = [16, 4]

for data, samp in zip(coli_data, plate_layout['sample']):
    fig, axs = plt.subplots(1, 2)
    
    if data is not None:
        axs[1].set_yscale('log')

        df_0 = data.flow_frame
        
        # geometric mean
        df = df_0[df_0.is_singlet]
        signal = df[fl_channel].copy()
        signal = signal - back_mu
        signal = signal[signal>0]
        geo_mean = np.exp( np.log(signal).mean() )
        geo_mean_err = ( np.log(signal).std() )/( np.sqrt(len(signal)) ) * geo_mean
        geo_mean_list.append(geo_mean)
        geo_mean_err_list.append(geo_mean_err)
        count_list.append(len(signal))
        
        # mean
        df = df_0[df_0.is_central]
        signal = df[fl_channel].copy()
        signal = signal - back_mu
        mean = np.mean(signal)
        mean_err = stats.sem(signal)
        mean_list.append(mean)
        mean_err_list.append(mean_err)
        central_count_list.append(len(signal))
        
        # plot histograms
        for ax in axs:
            label = samp + ', YFP signal'
            ax.text(0.5, 0.9, label, horizontalalignment='center', verticalalignment='center',
                    transform=ax.transAxes)
            df = df_0[df_0.is_cell]
            ax.hist(df[fl_channel], density=False, bins=bins, alpha=0.3)
            df = df_0[df_0.is_singlet]
            ax.hist(df[fl_channel], density=False, bins=bins, alpha=0.3)
            df = df_0[df_0.is_central]
            ax.hist(df[fl_channel], density=False, bins=bins, alpha=0.5);
    else:
        geo_mean_list.append(np.nan)
        geo_mean_err_list.append(np.nan)
        count_list.append(0)
        
        mean_list.append(np.nan)
        mean_err_list.append(np.nan)
        central_count_list.append(0)

In [None]:
plate_layout['geo_mean'] = geo_mean_list
plate_layout['geo_mean_err'] = geo_mean_err_list
plate_layout['singlet_count'] = count_list

plate_layout['mean'] = mean_list
plate_layout['mean_err'] = mean_err_list
plate_layout['central_count'] = central_count_list

plate_layout['background_signal'] = back_mu

In [None]:
plate_layout

In [None]:
print(plate_layout["singlet_count"].mean())
print(plate_layout["singlet_count"].std())
print(plate_layout["singlet_count"].min())
print(plate_layout["singlet_count"].max())

In [None]:
inducerConc = plate_layout['inducerConcentration'].tolist()
inducerUnits = plate_layout['inducerUnits'].tolist()
for i in range(len(inducerConc)):
    if inducerUnits[i] == "mmol/L":
        inducerUnits[i] = "umol/L"
        inducerConc[i] = inducerConc[i]*1000
plate_layout['inducerConcentration'] = inducerConc
plate_layout['inducerUnits'] = inducerUnits

In [None]:
plasmids = np.unique(plate_layout['plasmid'].values)
plasmids

In [None]:
experiment = main_directory[main_directory.rfind('\\')+1:]
experiment

In [None]:
maxConc = plate_layout['inducerConcentration'].max()
minConc = plate_layout[plate_layout['inducerConcentration']>0]['inducerConcentration'].min()

In [None]:
inducer, inducerU

In [None]:
plt.rcParams["figure.figsize"] = [16, 8]

for r in plasmids:
    f = plate_layout[plate_layout['plasmid']==r]
    fig, axs = plt.subplots(2, 2)
    
    for mu in ['geo_mean', 'mean']:
        for ax in axs[0]:
            ax.plot(f['inducerConcentration'], f[mu], 'o');
            ax.set_xlabel(f"ligand concentration ({inducerU})", size=12)
            ax.set_ylabel('YFP mean and geometric mean (MEF)', size=12)
            ax.set_title(exStrain + ', plasmid: ' + r, size=14)
        axs[0,1].set_xscale('symlog', linthresh=minConc);
        axs[0,1].set_xlim(-minConc/4, maxConc*2);
        ax = axs[1,1]
        ax.plot(f['inducerConcentration'], f[mu], 'o');
        ax.set_xlabel(f"ligand concentration ({inducerU})", size=12)
        ax.set_ylabel('YFP mean and geometric mean (MEF)', size=12)
        ax.set_xscale('symlog', linthresh=minConc);
        ax.set_xlim(-minConc/4, maxConc*2);
        ax.set_yscale('log');
        ax = axs[1,0]
        ax.plot(f['inducerConcentration'], f[mu], 'o');
        ax.set_xlabel(f"ligand concentration ({inducerU})", size=12)
        ax.set_ylabel('YFP mean and geometric mean (MEF)', size=12)
        ax.set_yscale('log');

In [None]:
experiment

In [None]:
cytometry_directory

In [None]:
os.chdir(cytometry_directory)
plate_pickle_file = experiment + f'_{plate_str}_summary.frame_pkl'
with open(plate_pickle_file, 'wb') as f:
    pickle.dump(plate_layout, f)

In [None]:
plt.rcParams["figure.figsize"] = [16, 8]

for r in plasmids:
    f = plate_layout[plate_layout['plasmid']==r]
    fig, axs = plt.subplots(2, 2)
    
    for count in ['singlet_count', 'central_count']:
        for ax in axs[0]:
            ax.plot(f['inducerConcentration'], f[count], 'o');
            ax.set_xlabel(f"ligand concentration ({inducerU})", size=12)
            ax.set_ylabel('Singlet Count', size=12)
            ax.set_title(exStrain + ', plasmid: ' + r, size=14)
        axs[0,1].set_xscale('symlog', linthresh=minConc);
        axs[0,1].set_xlim(-minConc/4, maxConc*2);
        ax = axs[1,1]
        ax.plot(f['inducerConcentration'], f[count], 'o');
        ax.set_xlabel(f"ligand concentration ({inducerU})", size=12)
        ax.set_ylabel('Singlet Count', size=12)
        ax.set_xscale('symlog', linthresh=minConc);
        ax.set_xlim(-minConc/4, maxConc*2);
        ax.set_yscale('log');
        #ax.set_ylim(50, 20000);
        ax = axs[1,0]
        ax.plot(f['inducerConcentration'], f[count], 'o');
        ax.set_xlabel(f"ligand concentration ({inducerU})", size=12)
        ax.set_ylabel('Singlet Count', size=12)
    #     ax.set_xscale('log');
    #     ax.set_xlim(minConc/2, maxConc*2);
        ax.set_yscale('log');
        #ax.set_ylim(50, 20000);

In [None]:
plate_layout.columns.values

In [None]:
experiment