In [None]:
import glob #filenames and pathnames utility
import os   #operating sytem utility
import warnings

import flowgatenist as flow
#from flowgatenist import gaussian_mixture as nist_gmm
import flowgatenist.batch_process as batch_p

from Bio.Seq import Seq

import matplotlib.pyplot as plt
from matplotlib import colors
import matplotlib.dates
#from matplotlib.backends.backend_pdf import PdfPages

import numpy as np
import pandas as pd
from scipy.optimize import curve_fit
#from scipy import special
#from scipy import misc

import cmdstanpy
import gsf_ims_fitness.stan_utility as stan_utility
import pickle

import seaborn as sns
sns.set()

%load_ext autoreload
%autoreload 2

%matplotlib inline

# set global default style:
sns.set_style("white")
sns.set_style("ticks", {'xtick.direction':'in', 'xtick.top':True, 'ytick.direction':'in', 'ytick.right':True, })
#sns.set_style({"axes.labelsize": 20, "xtick.labelsize" : 16, "ytick.labelsize" : 16})

plt.rcParams['axes.labelsize'] = 16
plt.rcParams['xtick.labelsize'] = 14
plt.rcParams['ytick.labelsize'] = 14

plt.rcParams['legend.fontsize'] = 12
plt.rcParams['legend.edgecolor'] = 'k'

Indicate the directory where the notebook is saved:

In [None]:
notebook_dir = os.getcwd()
notebook_dir

In [None]:
main_directory = notebook_dir[:notebook_dir.rfind("\\")]
main_directory

In [None]:
os.chdir(main_directory)
data_directories = glob.glob('*_Cytom*')
data_directories.sort()
data_directories = np.array(data_directories)
data_directories

In [None]:
os.chdir(notebook_dir)
file_name = 'Ligafy_cytom_data_all.pkl'
full_summary = pickle.load(open(file_name, 'rb'))

In [None]:
df = full_summary
df = df[df.variant=='pAN-1201']

non_fluorescent_mean = df['mean'].mean()
non_fluorescent_mean_err = df['mean'].std()

ok_to_fail_fit = []
non_fluorescent_mean, non_fluorescent_mean_err

In [None]:
np.unique(full_summary.inducerId)

In [None]:
%%time
rows_to_plot = ['B', 'E']

conc_dict = {}
cytom_data_dict = {}

for date_plate, df_0 in full_summary.groupby('date_plate'):
    for row in rows_to_plot:
        df = df_0
        df = df[df.row==row]
        if len(df) > 0:
            df = df.sort_values(by='inducerConcentration')
            lig_list = np.unique(df.inducerId)
            lig_list = lig_list[lig_list!='none']
            if (len(lig_list)==1):
                ligand = lig_list[0]
                variant = df.iloc[-1].variant
                #print(row, ligand, variant, date_plate)
                if (variant, ligand) in conc_dict:
                    print(f'duplicate row for {(variant, ligand)}')
                conc_dict[(variant, ligand)] = list(df.inducerConcentration)
                
                date = date_plate[:date_plate.find('_')]
                plate = date_plate[date_plate.find('_')+1:]
                x = data_directories
                x = x[[date in x for x in x]]
                direct = x[0]
                
                os.chdir(main_directory)
                os.chdir(direct)
                os.chdir(plate.replace('-', '_'))
                cytom_data = []
                for file in df.coli_file:
                    cytom_df = pickle.load(open(file, 'rb')).flow_frame
                    cytom_df = cytom_df[cytom_df.is_central]
                    data = cytom_df['BL1-A-MEF'].values
                    cytom_data.append(data)
                cytom_data_dict[(variant, ligand)] = cytom_data
    #break

In [None]:
list(cytom_data_dict.keys())

In [None]:
# Subtract background and trim to keep only positive values:
for (variant, ligand), cytom_data in cytom_data_dict.items():
    new_data = []
    for data in cytom_data:
        data = data - non_fluorescent_mean
        data = data[data>0]
        new_data.append(data)
    cytom_data_dict[(variant, ligand)] = new_data

In [None]:
np.quantile(data, [0.0001, 0.9999]), (variant, ligand)

In [None]:
os.chdir(notebook_dir)
os.chdir('cytometry_histogram_plots')

plt.rcParams["figure.figsize"] = [8, 4]

bin_quantiles = [0.005, 0.995]
num_bins = 80

hist_quantiles = [0.03, 0.99]

for (variant, ligand), cytom_data in cytom_data_dict.items():
    fig, ax = plt.subplots()
    ax.set_xscale('log')
    all_data = np.concatenate(cytom_data)
    bin_min_max = np.log10(np.quantile(all_data, bin_quantiles))
    bins = np.logspace(bin_min_max[0], bin_min_max[1], num_bins)
    concentrations = conc_dict[(variant, ligand)]
    for i, (x, conc) in enumerate(zip(cytom_data[::-1], concentrations[::-1])):
        label = f'{conc:.4f} μmol/L'
        zorder = len(cytom_data) - i - 1
        hist_ret = np.histogram(x, bins=bins)
        bar_max = hist_ret[0].max()
        weights = np.array([1/bar_max]*len(x))
        x_quantiles = np.quantile(x, hist_quantiles)
        weights = weights[(x>x_quantiles[0])&(x<x_quantiles[1])]
        x = x[(x>x_quantiles[0])&(x<x_quantiles[1])]
        n, b, pat = ax.hist(x, bins=bins, weights=weights, bottom=zorder*0.7, alpha=0.7, label=label, ec='none')
    ylim = ax.get_ylim()
    ax.set_ylim(ylim[0]-0.25, ylim[1])
    ax.set_xlabel('GFP Fluorescence (MEF)')
    ax.set_title(f'{variant}, {ligand}', size=16)
    ax.legend(loc='upper left', bbox_to_anchor= (1.02, 1), ncol=1);
    #break
    fig_file = f'{variant}_{ligand}.png'
    #fig.savefig(fig_file)
os.chdir(notebook_dir)

In [None]:
from  scipy.stats import gaussian_kde

In [None]:
hist_colors = sns.color_palette(n_colors=12)

In [None]:
# Kernel density version-
os.chdir(notebook_dir)
os.chdir('cytometry_histogram_plots')

plt.rcParams["figure.figsize"] = [8, 4]

bin_quantiles = [0.003, 0.997]
num_bins = 80


for (variant, ligand), cytom_data in cytom_data_dict.items():
    fig, ax = plt.subplots()
    ax2 = ax.twiny()
    ax.set_xscale('log')
    #ax.tick_params(top=False, labeltop=False, bottom=False, labelbottom=False)
    ax2.tick_params(top=False, labeltop=False, bottom=False, labelbottom=False)
    
    all_data = np.concatenate(cytom_data)
    bin_min_max = np.log10(np.quantile(all_data, bin_quantiles))
    bins = np.logspace(bin_min_max[0], bin_min_max[1], num_bins)
    concentrations = conc_dict[(variant, ligand)]
    
    if max(concentrations) >= 1000:
        concentrations = np.array(concentrations)/1000
        conc_units = 'mmol/L'
    else:
        conc_units = 'μmol/L'
    
    for i, (x, conc, color) in enumerate(zip(cytom_data[::-1], concentrations[::-1], hist_colors)):
        label = f'{conc:.4f} {conc_units}'
        zorder = len(cytom_data) - i - 1
        bottom = zorder*0.7
        
        x = np.log10(x)
        
        x_plot = np.linspace(bin_min_max[0], bin_min_max[1], 100)
        y_plot = gaussian_kde(x)(x_plot)
        y_plot = y_plot/y_plot.max() + bottom
        
        ax2.plot(x_plot, y_plot, color=color, label=label)
        ax2.fill_between(x_plot, y_plot, bottom, color=color, alpha=0.5)
        
    ylim = ax2.get_ylim()
    ax2.set_ylim(ylim[0]-0.25, ylim[1])
    
    xlim = ax2.get_xlim()
    ax.set_xlim(10**xlim[0], 10**xlim[1])
    
    ax.set_xlabel('GFP Fluorescence (MEF)')
    ax.set_title(f'{variant}, {ligand}', size=16)
    ax2.legend(loc='upper left', bbox_to_anchor= (1.02, 1), ncol=1);
    #break
    fig_file = f'{variant}_{ligand}.svg'
    fig.savefig(fig_file, bbox_inches='tight')
os.chdir(notebook_dir)