# Fig. 1 Introducing the spatial pattern 

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
import pickle
from scipy.optimize import minimize, basinhopping
from tqdm import tqdm

  from pandas.core.computation.check import NUMEXPR_INSTALLED


In [9]:
%matplotlib qt
#%matplotlib inline

In [2]:
"""plot style"""
linewidth = 4
mpl.rc('axes', linewidth=linewidth)
mpl.rc('font', family='Arial')
fontsize = 24


colors = {'no_inj': [0.8, 0.8, 0.8],
         'mock': [0.4, 0.4, 0.4],
         'e.coli': [0, 0.4, 0],
         'complete': [0, 0.8, 0]}

def style_axes(ax, fontsize=24):
    plt.minorticks_off()
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.xaxis.set_tick_params(labelsize=20)
    ax.yaxis.set_tick_params(labelsize=20)
    for tick in ax.xaxis.get_major_ticks():
        tick.label1.set_fontsize(fontsize)
    for tick in ax.yaxis.get_major_ticks():
        tick.label1.set_fontsize(fontsize)
    plt.tight_layout()
    
    return ax


In [3]:
def log_normal_dist(x, mu, sigma):
    return (1 / (sigma * x * np.sqrt(2 * np.pi))) * np.exp(-(np.log(x) - mu) ** 2 / 2 / sigma ** 2)


def fit_log_normal(x):
    mu = np.mean(np.log(x))
    sigma = np.std(np.log(x))
    
    return mu, sigma


def double_log_normal_likelihood(x, mu1, sigma1, mu2, sigma2, n_samples=100_000, n_bins=100):
    X1 = np.exp(np.random.normal(loc=mu1, scale=sigma1, size=n_samples))
    X2 = np.exp(np.random.normal(loc=mu2, scale=sigma2, size=n_samples))
    X = X1 + X2
    
    bins = np.logspace(0, mu2 + 3 * sigma2, n_bins)
    counts, _ = np.histogram(X, bins)
    # use a pseudo count of 1 to make the log likelihood well behaved
    counts[counts == 0] = 1
    prob_dens = counts / np.sum(counts) / np.diff(bins)
    
    bins = bins[:-1]
    
    lh = np.zeros(len(np.array(x)))
    for i in range(len(lh)):
        index = np.where(np.abs(x[i] - bins) == np.min(np.abs(x[i] - bins)))[0][0]
        lh[i] = prob_dens[index]
        
    return lh


def double_log_normal_neg_log_likelihood(p, x):
    mu1, sigma1, mu2, sigma2 = p
    lh = double_log_normal_likelihood(x, mu1, sigma1, mu2, sigma2, n_samples=n_samples)
    nllh = -1 * np.sum(np.log(lh))
    
    return nllh


"""testing mle for lognormal-gamma convolution model"""
def likelihood(x, mu, sigma, k, theta, n_samples=100_000, n_bins=100, pseudo_count=None):
    X0 = np.exp(np.random.normal(loc=mu, scale=sigma, size=n_samples))
    R = np.random.gamma(shape=k, scale=theta, size=n_samples)
    Xt = X0 + R

    bins = np.logspace(0, 6, n_bins)
    counts, _ = np.histogram(Xt, bins)
    if pseudo_count is not None:
        counts[counts == 0] = pseudo_count
    prob_dens = counts / np.sum(counts) / np.diff(bins)
    bins = bins[:-1]
    
    lh = np.zeros(len(np.array(x)))
    for i in range(len(lh)):
        index = np.where(np.abs(x[i] - bins) == np.min(np.abs(x[i] - bins)))[0][0]
        lh[i] = prob_dens[index]
        
    return lh
    

def neg_log_likelihood(p, args):
    k, theta = p
    x, mu, sigma, n_samples = args
    lh = likelihood(x, mu, sigma, k, theta, n_samples=n_samples, pseudo_count=1)
    nllh = -1 * np.sum(np.log(lh))
    
    return nllh
    
    
def neg_log_likelihood_fit4(p, x):
    k, theta, mu, sigma = p
    lh = likelihood(x, mu, sigma, k, theta, n_samples=n_samples, pseudo_count=1)
    nllh = -1 * np.sum(np.log(lh))
    
    return nllh
    

In [4]:
"""high dose data"""
# 18h-18C 24 hpi E. coli
df_inj = pd.read_pickle(r'/media/brandon/Data2/Brandon/fly_immune/Lightsheet_Z1/df_emL3_24.pkl')
# 18h-18C 24 hpi mock
df_mock = pd.read_pickle(r'/media/brandon/Data2/Brandon/fly_immune/Lightsheet_Z1/df_emL3_mock.pkl')
# 18h-18C 24 hpi no injection
df_no = pd.read_pickle(r'/media/brandon/Data2/Brandon/fly_immune/Lightsheet_Z1/df_emL3_noinj.pkl')

"""older 10x diluted data"""
# 3h-22C 24 hpi mock
df_e_mock = pd.read_pickle(r'/media/brandon/Data1/Brandon/fly_immune/diptericin_paper/manual_analysis/df_eL3_ctl.pkl')
# 18h-24C 24 hpi no injection
df_m_no = pd.read_pickle(r'/media/brandon/Data1/Brandon/fly_immune/diptericin_paper/manual_analysis/df_mL3_24_noinj.pkl')

df_inj


Unnamed: 0,experiment_id,y,x,dpt_gfp,mem_mch
0,0,240.70486,1269.328656,5927.750994,9018.343675
1,0,396.410604,1258.712355,4821.583134,8898.338107
2,0,592.812167,1260.481739,6446.630867,8355.653142
3,0,713.130241,634.119998,2272.979316,8869.243437
4,0,859.989067,605.809863,2142.789181,9112.609387
...,...,...,...,...,...
6126,19,4787.100318,1505.411505,1899.669849,8088.492442
6127,19,4660.289762,1561.294123,2062.060461,9410.208433
6128,19,4838.684273,1720.344651,930.402546,6819.805887
6129,19,4666.737756,1165.817135,879.618934,7400.588703


## Scatter plot of median intensities

In [5]:
"""threshold for dividing samples based on median intensity"""
partial_thresh = 4000

In [14]:
"""scatter plot of median intensities"""
plt.figure(figsize=(8,6))
sigma_x = 0.02

# no injection --- pool all samples
x = 0
color = colors['no_inj']
all_medians = []
df = df_no
medians = df.groupby(by='experiment_id').median().dpt_gfp.values
all_medians.extend(medians)

df = df_m_no
medians = df.groupby(by='experiment_id').median().dpt_gfp.values
all_medians.extend(medians)

plt.plot(x * np.ones(len(all_medians)) + sigma_x * np.random.normal(size=len(all_medians)), 
         all_medians, 'ko', markerfacecolor=color, markersize=16, alpha=0.75)

# mock injection --- pool all samples
x = 1
color = colors['mock']
all_medians = []
df = df_mock
medians = df.groupby(by='experiment_id').median().dpt_gfp.values
all_medians.extend(medians)

df = df_e_mock
medians = df.groupby(by='experiment_id').median().dpt_gfp.values
all_medians.extend(medians)

plt.plot(x * np.ones(len(all_medians)) + sigma_x * np.random.normal(size=len(all_medians)), 
         all_medians, 'ko', markerfacecolor=color, markersize=16, alpha=0.75)

# injected group --- color by partial/complete split
x = 2
all_medians = df_inj.groupby(by='experiment_id').median().dpt_gfp.values
partial_ids = np.where(all_medians < partial_thresh)[0]
complete_ids = np.where(all_medians >= partial_thresh)[0]

# partial
color = colors['e.coli']
sel = partial_ids
plt.plot(x * np.ones(len(all_medians[sel])) + sigma_x * np.random.normal(size=len(all_medians[sel])), 
         all_medians[sel], 'ko', markerfacecolor=color, markersize=16, alpha=0.75)

# complete
color = colors['complete']
sel = complete_ids
plt.plot(x * np.ones(len(all_medians[sel])) + sigma_x * np.random.normal(size=len(all_medians[sel])), 
         all_medians[sel], 'ko', markerfacecolor=color, markersize=16, alpha=0.75)

plt.xticks([0, 1, 2], labels=['no \ninjection', 'mock', '$E. coli$'], fontsize=fontsize)
plt.xlim([-0.5, x + 0.5])
plt.yticks([1e3, 1e4])
plt.ylim([3e2, 2e4])
plt.ylabel('\n \n \n \n \n median diptericin-gfp \nfluorescence intensity \nper cell (a.u.)', fontsize=fontsize)
plt.yscale('log')
ax = plt.gca()
ax = style_axes(ax)


In [15]:
#plt.savefig(r'/home/brandon/Documents/Code/diptericin-paper/figures/Fig1-SpatialPattern/median_inten_dot_plot.pdf')

In [8]:
"""divide samples based on median intensity"""
partial_thresh = 4000
partial_ids = np.where(all_medians < partial_thresh)[0]
complete_ids = np.where(all_medians >= partial_thresh)[0]

In [16]:
partial_ids

array([ 0,  1,  2,  3,  5,  6,  8, 11, 13, 16, 18, 19])

## Full intensity distributions

In [9]:
n_bins = 20 + 1


In [11]:
# assemble mock injection distribution
all_mock_intens = []
df = df_mock
intens = df.dpt_gfp.values
all_mock_intens.extend(intens)

df = df_e_mock
intens = df.dpt_gfp.values
all_mock_intens.extend(intens)

mock_bins = np.logspace(np.min(np.log10(all_mock_intens)), np.max(np.log10(all_mock_intens)), n_bins)
counts, _ = np.histogram(all_mock_intens, mock_bins)

mock_prob_dens = counts / np.sum(counts) / np.diff(mock_bins)
mock_bins = mock_bins[:-1]


In [82]:
"""combine both mock dataframes into one"""
tmp_df = df_e_mock.copy()
tmp_df.experiment_id += df_mock.experiment_id.max() + 1
df_all_mock = pd.concat((df_mock, tmp_df), axis=0)

In [9]:
df_partial = df_inj[[eid in partial_ids for eid in df_inj.experiment_id]]
df_complete = df_inj[[eid in complete_ids for eid in df_inj.experiment_id]]

plt.figure(figsize=(6, 6))

"""partial"""
# plot mock first
plt.plot(mock_bins, mock_prob_dens, '--', linewidth=2 * linewidth, color=colors['mock'], label='mock')

# plot individual partial dists
df = df_partial
for i, eid in enumerate(df.experiment_id.unique()):
    sub_df = df[df.experiment_id == eid]
    intens = sub_df.dpt_gfp.astype('float32').values
    bins = np.logspace(np.min(np.log10(intens)), np.max(np.log10(intens)), int(n_bins / 2))
    counts, _ = np.histogram(intens, bins)
    prob_dens = counts / np.sum(counts) / np.diff(bins)
    bins = bins[:-1]
    bins = bins[prob_dens > 1e-7]
    prob_dens = prob_dens[prob_dens > 1e-7]
    if i == 0:
        label = 'individual larvae'
    else:
        label = '_nolabel_'
    plt.plot(bins, prob_dens, '-', color=colors['e.coli'], linewidth=linewidth, alpha=0.35, label=label)

# now plot a pooled distribution
intens = df.dpt_gfp.astype('float32').values
bins = np.logspace(np.min(np.log10(intens)), np.max(np.log10(intens)), n_bins)
counts, _ = np.histogram(intens, bins)
prob_dens = counts / np.sum(counts) / np.diff(bins)
bins = bins[:-1]

#plt.plot(bins, prob_dens, '-', color=colors['e.coli'], linewidth=2 * linewidth)
plt.plot(bins, prob_dens, 'ko', markerfacecolor=colors['e.coli'], markeredgewidth=2, markersize=18, alpha=0.75, label='pooled larvae')

plt.xscale('log')
plt.yscale('log')
plt.xlabel('diptericin-gfp \nfluorescence intensity (a.u.)', fontsize=fontsize)
plt.ylabel('probability density (1/a.u.)', fontsize=fontsize)
plt.title('partial responders', fontsize=fontsize, fontweight='bold')
plt.legend(fontsize=0.75 * fontsize)
ax = plt.gca()
ax = style_axes(ax)




In [151]:
"""for mock, plot prob dens with error bars and lognormal fit"""
df = df_mock
n_bins = 20 + 1
n_bootstraps = 100
plt.figure(figsize=(6, 6))


# construct array of individual prob dens for error bars
intens = df.dpt_gfp.astype('float32').values
bins = np.logspace(np.min(np.log10(intens)), np.max(np.log10(intens)), n_bins)

eids = df.experiment_id.unique()
prob_dens_bootstrap = np.zeros((n_bootstraps, len(bins) -1))
for n in range(n_bootstraps):
    these_ids = np.random.choice(eids, size=len(eids))
    these_intens = []
    for j in range(len(these_ids)):
        these_intens.extend(df[df.experiment_id == these_ids[j]].dpt_gfp.values.astype('float'))
    these_intens = np.array(sorted(these_intens))
    counts, _ = np.histogram(these_intens, bins)
    prob_dens = counts / np.sum(counts) / np.diff(bins)
    prob_dens_bootstrap[n] = prob_dens
    
sigma_prob_dens = np.nanstd(prob_dens_bootstrap, axis=0)

# now plot a pooled distribution
intens = df.dpt_gfp.astype('float32').values
counts, _ = np.histogram(intens, bins)
prob_dens = counts / np.sum(counts) / np.diff(bins)
bins = bins[:-1]

plt.errorbar(bins, prob_dens, sigma_prob_dens, ecolor='k', elinewidth=3, capsize=3, marker='o', 
             markersize=14, linewidth=0, markeredgecolor='k', markerfacecolor=colors['e.coli'] + [0.5], 
             barsabove=True, label='$E.coli$')

mu_partial, sigma_partial = fit_log_normal(intens[intens > 0])
x = np.logspace(np.log10(bins[0]), np.log10(bins[-1]), 1000)
px_fit = log_normal_dist(x, mu_partial, sigma_partial)
plt.plot(x, px_fit, 'k-', linewidth=2, label='log-normal fit')

plt.xscale('log')
plt.yscale('log')
plt.xlabel('diptericin-gfp \nfluorescence intensity (a.u.)', fontsize=fontsize)
plt.ylabel('probability density (1/a.u.)', fontsize=fontsize)
plt.title('partial responders', fontsize=fontsize, fontweight='bold')
plt.legend(fontsize=0.75 * fontsize)
plt.xlim([200, 65000])
ax = plt.gca()
ax = style_axes(ax)

In [161]:
"""plot prob dens with error bars and lognormal fit"""
df_partial = df_inj[[eid in partial_ids for eid in df_inj.experiment_id]]
df_complete = df_inj[[eid in complete_ids for eid in df_inj.experiment_id]]
n_bins = 10 + 1
n_bootstraps = 100
plt.figure(figsize=(5.5, 5.5))

"""partial"""
df = df_partial

# construct array of individual prob dens for error bars
intens = df.dpt_gfp.astype('float32').values
bins = np.logspace(np.min(np.log10(intens)), np.max(np.log10(intens)), n_bins)

eids = df.experiment_id.unique()
prob_dens_bootstrap = np.zeros((n_bootstraps, len(bins) -1))
for n in range(n_bootstraps):
    these_ids = np.random.choice(eids, size=len(eids))
    these_intens = []
    for j in range(len(these_ids)):
        these_intens.extend(df[df.experiment_id == these_ids[j]].dpt_gfp.values.astype('float'))
    these_intens = np.array(sorted(these_intens))
    counts, _ = np.histogram(these_intens, bins)
    prob_dens = counts / np.sum(counts) / np.diff(bins)
    prob_dens_bootstrap[n] = prob_dens
    
sigma_prob_dens = np.nanstd(prob_dens_bootstrap, axis=0)

# now plot a pooled distribution
intens = df.dpt_gfp.astype('float32').values
counts, _ = np.histogram(intens, bins)
prob_dens = counts / np.sum(counts) / np.diff(bins)
bins = bins[:-1]

plt.errorbar(bins, prob_dens, sigma_prob_dens, ecolor='k', elinewidth=3, capsize=3, marker='o', 
             markersize=14, linewidth=0, markeredgecolor='k', markerfacecolor=colors['e.coli'] + [0.5], 
             barsabove=True, label='$E.coli$ partial')

mu_partial, sigma_partial = fit_log_normal(intens[intens > 0])
x = np.logspace(np.log10(bins[0]), np.log10(bins[-1]), 1000)
px_fit = log_normal_dist(x, mu_partial, sigma_partial)
plt.plot(x, px_fit, 'k-', linewidth=2, label='log-normal fit')

plt.xscale('log')
plt.yscale('log')
plt.xlabel('diptericin-gfp \nfluorescence intensity (a.u.)', fontsize=fontsize)
plt.ylabel('probability density (1/a.u.)', fontsize=fontsize)
#plt.title('partial responders', fontsize=fontsize, fontweight='bold')
plt.legend(fontsize=0.75 * fontsize)
plt.xlim([200, 65000])
plt.yticks([1e-8, 1e-6, 1e-4])
ax = plt.gca()
ax = style_axes(ax)

In [162]:
plt.savefig(r'/home/brandon/Documents/Code/diptericin-paper/figures/Fig1-SpatialPattern/partial_dist_log_normal_fit.pdf')

In [159]:
"""for complete, plot prob dens with error bars and lognormal fit"""
df = df_complete
n_bins = 20 + 1
n_bootstraps = 100
plt.figure(figsize=(6, 6))



# construct array of individual prob dens for error bars
intens = df.dpt_gfp.astype('float32').values
bins = np.logspace(np.min(np.log10(intens)), np.max(np.log10(intens)), n_bins)

eids = df.experiment_id.unique()
prob_dens_bootstrap = np.zeros((n_bootstraps, len(bins) -1))
for n in range(n_bootstraps):
    these_ids = np.random.choice(eids, size=len(eids))
    these_intens = []
    for j in range(len(these_ids)):
        these_intens.extend(df[df.experiment_id == these_ids[j]].dpt_gfp.values.astype('float'))
    these_intens = np.array(sorted(these_intens))
    counts, _ = np.histogram(these_intens, bins)
    prob_dens = counts / np.sum(counts) / np.diff(bins)
    prob_dens_bootstrap[n] = prob_dens
    
sigma_prob_dens = np.nanstd(prob_dens_bootstrap, axis=0)

# now plot a pooled distribution
intens = df.dpt_gfp.astype('float32').values
counts, _ = np.histogram(intens, bins)
prob_dens = counts / np.sum(counts) / np.diff(bins)
bins = bins[:-1]

plt.errorbar(bins, prob_dens, sigma_prob_dens, ecolor='k', elinewidth=3, capsize=3, marker='o', 
             markersize=14, linewidth=0, markeredgecolor='k', markerfacecolor=colors['e.coli'] + [0.5], 
             barsabove=True, label='$E.coli$')

mu_partial, sigma_partial = fit_log_normal(intens[intens > 0])
x = np.logspace(np.log10(bins[0]), np.log10(bins[-1]), 1000)
px_fit = log_normal_dist(x, mu_partial, sigma_partial)
plt.plot(x, px_fit, 'k-', linewidth=2, label='log-normal fit')

plt.xscale('log')
plt.yscale('log')
plt.xlabel('diptericin-gfp \nfluorescence intensity (a.u.)', fontsize=fontsize)
plt.ylabel('probability density (1/a.u.)', fontsize=fontsize)
plt.title('partial responders', fontsize=fontsize, fontweight='bold')
plt.legend(fontsize=0.75 * fontsize)
plt.xlim([200, 65000])
plt.yticks([1e-8, 1e-6, 1e-4])
ax = plt.gca()
ax = style_axes(ax)

In [149]:
"""plot mock, partial, and complete dists on one graph"""
"""for complete, plot prob dens with error bars and lognormal fit"""
n_bootstraps = 100
plt.figure(figsize=(6, 6))




"""complete"""
df = df_complete
n_bins = 10 + 1

# construct array of individual prob dens for error bars
intens = df.dpt_gfp.astype('float32').values
bins = np.floor(np.logspace(np.min(np.log10(intens)), np.max(np.log10(intens)), n_bins))

eids = df.experiment_id.unique()
prob_dens_bootstrap = np.zeros((n_bootstraps, len(bins) -1))
for n in range(n_bootstraps):
    these_ids = np.random.choice(eids, size=len(eids))
    these_intens = []
    for j in range(len(these_ids)):
        these_intens.extend(df[df.experiment_id == these_ids[j]].dpt_gfp.values.astype('float'))
    these_intens = np.array(sorted(these_intens))
    counts, _ = np.histogram(these_intens, bins)
    prob_dens = counts / np.sum(counts) / np.diff(bins)
    prob_dens_bootstrap[n] = prob_dens
    
sigma_prob_dens = np.nanstd(prob_dens_bootstrap, axis=0)

# now plot a pooled distribution
intens = df.dpt_gfp.astype('float32').values
counts, _ = np.histogram(intens, bins)
prob_dens = counts / np.sum(counts) / np.diff(bins)
bins = bins[:-1]

plt.errorbar(bins, prob_dens, sigma_prob_dens, ecolor='k', elinewidth=3, capsize=3, marker='o', 
             markersize=14, color=colors['complete'] + [0.5], linewidth=3, markeredgecolor='k', markerfacecolor=colors['complete'] + [0.5], 
             barsabove=True, label='$E.coli$ complete')

mu_partial, sigma_partial = fit_log_normal(intens[intens > 0])
x = np.logspace(np.log10(bins[0]), np.log10(bins[-1]), 1000)
px_fit = log_normal_dist(x, mu_partial, sigma_partial)
#plt.plot(x, px_fit, 'k-', linewidth=2, label='log-normal fit')

"""partial"""
df = df_partial
n_bins = 10 + 1

# construct array of individual prob dens for error bars
intens = df.dpt_gfp.astype('float32').values
bins = np.logspace(np.min(np.log10(intens)), np.max(np.log10(intens)), n_bins)

eids = df.experiment_id.unique()
prob_dens_bootstrap = np.zeros((n_bootstraps, len(bins) -1))
for n in range(n_bootstraps):
    these_ids = np.random.choice(eids, size=len(eids))
    these_intens = []
    for j in range(len(these_ids)):
        these_intens.extend(df[df.experiment_id == these_ids[j]].dpt_gfp.values.astype('float'))
    these_intens = np.array(sorted(these_intens))
    counts, _ = np.histogram(these_intens, bins)
    prob_dens = counts / np.sum(counts) / np.diff(bins)
    prob_dens_bootstrap[n] = prob_dens
    
sigma_prob_dens = np.nanstd(prob_dens_bootstrap, axis=0)

# now plot a pooled distribution
intens = df.dpt_gfp.astype('float32').values
counts, _ = np.histogram(intens, bins)
prob_dens = counts / np.sum(counts) / np.diff(bins)
bins = bins[:-1]

plt.errorbar(bins, prob_dens, sigma_prob_dens, ecolor='k', elinewidth=3, capsize=3, marker='o', 
             markersize=14, color=colors['e.coli'] + [0.5], linewidth=3, markeredgecolor='k', markerfacecolor=colors['e.coli'] + [0.5], 
             barsabove=True, label='$E.coli$ partial', zorder=10)

mu_partial, sigma_partial = fit_log_normal(intens[intens > 0])
x = np.logspace(np.log10(bins[0]), np.log10(bins[-1]), 1000)
px_fit = log_normal_dist(x, mu_partial, sigma_partial)
#plt.plot(x, px_fit, 'k-', linewidth=2, label='log-normal fit')


"""mock"""
df = df_all_mock
n_bins = 10 + 1

# construct array of individual prob dens for error bars
intens = df.dpt_gfp.astype('float32').values
bins = np.logspace(np.min(np.log10(intens)), np.max(np.log10(intens)), n_bins)

eids = df.experiment_id.unique()
prob_dens_bootstrap = np.zeros((n_bootstraps, len(bins) -1))
for n in range(n_bootstraps):
    these_ids = np.random.choice(eids, size=len(eids))
    these_intens = []
    for j in range(len(these_ids)):
        these_intens.extend(df[df.experiment_id == these_ids[j]].dpt_gfp.values.astype('float'))
    these_intens = np.array(sorted(these_intens))
    counts, _ = np.histogram(these_intens, bins)
    prob_dens = counts / np.sum(counts) / np.diff(bins)
    prob_dens_bootstrap[n] = prob_dens
    
sigma_prob_dens = np.nanstd(prob_dens_bootstrap, axis=0)

# now plot a pooled distribution
intens = df.dpt_gfp.astype('float32').values
counts, _ = np.histogram(intens, bins)
prob_dens = counts / np.sum(counts) / np.diff(bins)
bins = bins[:-1]

plt.errorbar(bins, prob_dens, sigma_prob_dens, ecolor='k', elinewidth=3, capsize=3, marker='o', 
             markersize=14, color=colors['mock'] + [0.5], linewidth=3, markeredgecolor='k', markerfacecolor=colors['mock'] + [0.5], 
             barsabove=True, label='mock injection', zorder=0)

mu_partial, sigma_partial = fit_log_normal(intens[intens > 0])
x = np.logspace(np.log10(bins[0]), np.log10(bins[-1]), 1000)
px_fit = log_normal_dist(x, mu_partial, sigma_partial)
#plt.plot(x, px_fit, 'k-', linewidth=2, label='log-normal fit')


plt.xscale('log')
plt.yscale('log')
plt.xlabel('diptericin-gfp \nfluorescence intensity (a.u.)', fontsize=fontsize)
plt.ylabel('probability density (1/a.u.)', fontsize=fontsize)
#plt.legend(fontsize=0.75 * fontsize)
#plt.legend(fontsize=0.75 * fontsize, loc='upper left', bbox_to_anchor=(-0.5, 1.2), ncol=2, fancybox=True, facecolor=np.array([210, 180, 140]) / 255, framealpha=0.5)
plt.legend(fontsize=0.65 * fontsize, loc='upper left', bbox_to_anchor=(0.5, 1.1), ncol=1, fancybox=True, framealpha=0.5)

plt.xlim([100, 65000])
plt.ylim([1e-9, 5e-3])
plt.yticks([1e-8, 1e-6, 1e-4])
ax = plt.gca()
ax = style_axes(ax)

In [150]:
plt.savefig(r'/home/brandon/Documents/Code/diptericin-paper/figures/Fig1-SpatialPattern/mock_partial_complete_dists_errorbars.pdf')

In [73]:
"""fit double log normal distribution"""
# fit double log normal
n_samples = 100_000
args = intens[intens > 0]
p0 = (3.0, 1.0, 4.0, 1.0)
res = minimize(double_log_normal_neg_log_likelihood, p0, args=args, bounds=((0, np.inf), (0, np.inf), (0, np.inf), (0, np.inf)), method='Nelder-Mead')
mu1, sigma1, mu2, sigma2 = res.x

In [166]:
"""fit log normal to mock distribution"""
intens = df_mock.dpt_gfp.values.astype('float')
mu_mock, sigma_mock = fit_log_normal(intens[intens > 0])

plt.figure()
plt.plot(mock_bins, mock_prob_dens, '--', linewidth=2 * linewidth, color=colors['mock'], label='mock')
x = np.logspace(2, 5, 1000)
px_fit = log_normal_dist(x, mu_mock, sigma_mock)
plt.plot(x, px_fit, 'b-', linewidth=2)
plt.xscale('log')
plt.yscale('log')
plt.xlabel('diptericin-gfp \nfluorescence intensity (a.u.)', fontsize=fontsize)
plt.ylabel('probability density (1/a.u.)', fontsize=fontsize)
plt.title('partial responders', fontsize=fontsize, fontweight='bold')
plt.legend(fontsize=0.75 * fontsize)
plt.ylim([1e-8, 1e-1])
ax = plt.gca()
ax = style_axes(ax)

In [192]:
"""fit lognormal-gamma distribution using mock lognormal params"""
#p0 = (0.8, 300)
p0 = (0.35, 3717)

df = df_partial
intens = df.dpt_gfp.astype('float32').values
args = [intens[intens > 0], mu_mock, sigma_mock, n_samples]

res = minimize(neg_log_likelihood, p0, args=args, bounds=((0, np.inf), (0, np.inf)), method='Nelder-Mead')
k_fit_mock, theta_fit_mock = res.x



In [266]:
theta_fit_mock * 1.1

4200.518717280552

In [282]:
"""fit lognormal-gamma distribution fitting all 4 params"""
#p0 = (0.8, 300, 6.25, 0.39)
#p0 = (0.35, 3717, 6.25, 0.39)
p0 = (0.4, 4201, 1.07*mu_mock, sigma_mock)

df = df_partial
intens = df.dpt_gfp.astype('float32').values
args = intens[intens > 0]

#res = minimize(neg_log_likelihood_fit4, p0, args=args, bounds=((0, np.inf), (0, np.inf), (0, np.inf), (0, np.inf)), method='Nelder-Mead')
minimizer_kwargs={'method': 'Nelder-Mead', 'args': args, 'bounds': ((0, np.inf), (0, np.inf), (0, np.inf), (0, np.inf))}
res = basinhopping(neg_log_likelihood_fit4, p0, minimizer_kwargs=minimizer_kwargs)

k_fit, theta_fit, mu_fit, sigma_fit = res.x

  return self.minimizer(self.func, x0, **self.kwargs)


In [285]:
res

                        fun: 30287.387991411528
 lowest_optimization_result:  final_simplex: (array([[3.45784401e-01, 5.43280024e+03, 6.71540707e+00, 4.93788575e-01],
       [3.45784401e-01, 5.43280024e+03, 6.71540707e+00, 4.93788575e-01],
       [3.45784401e-01, 5.43280024e+03, 6.71540707e+00, 4.93788575e-01],
       [3.45784401e-01, 5.43280024e+03, 6.71540707e+00, 4.93788575e-01],
       [3.45784401e-01, 5.43280024e+03, 6.71540707e+00, 4.93788575e-01]]), array([30287.38799141, 30296.36698367, 30298.82098602, 30299.34274097,
       30302.49493793]))
           fun: 30287.387991411528
       message: 'Maximum number of function evaluations has been exceeded.'
          nfev: 800
           nit: 311
        status: 1
       success: False
             x: array([3.45784401e-01, 5.43280024e+03, 6.71540707e+00, 4.93788575e-01])
                    message: ['requested number of basinhopping iterations completed successfully']
      minimization_failures: 101
                       nfev: 80

In [283]:
res.x

array([3.45784401e-01, 5.43280024e+03, 6.71540707e+00, 4.93788575e-01])

In [271]:
res.x

array([4.03804564e-01, 4.29934631e+03, 6.61335483e+00, 4.10569510e-01])

In [233]:
"""fit lognormal-gamma using mock params on each sample independently"""
df = df_partial
k_fit_mock_arr = np.zeros(len(df.experiment_id.unique()))
theta_fit_mock_arr = np.zeros(len(df.experiment_id.unique()))
n_samples = 100_000
p0 = (0.35, 3717)

counter = 0
for i in tqdm(df.experiment_id.unique()):
    sub_df = df[df.experiment_id == i]
    intens = sub_df.dpt_gfp.astype('float32').values
    intens = intens[intens > 0]
    if len(intens) <= 5:
        continue
    args = [intens[intens > 0], mu_mock, sigma_mock, n_samples]

    res = minimize(neg_log_likelihood, p0, args=args, bounds=((0, np.inf), (0, np.inf)), method='Nelder-Mead')
    k_fit_mock_arr[counter], theta_fit_mock_arr[counter] = res.x
    counter += 1

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 12/12 [00:50<00:00,  4.22s/it]


In [236]:
"""plot results"""
df = df_partial
counter = 0
for i in tqdm(df.experiment_id.unique()):
    sub_df = df[df.experiment_id == i]
    intens = sub_df.dpt_gfp.astype('float32').values
    intens = intens[intens > 0]
    if len(intens) <= 5:
        continue
    bins = np.logspace(np.min(np.log10(intens)), np.max(np.log10(intens)), 11)
    counts, _ = np.histogram(intens, bins)
    prob_dens = counts / np.sum(counts) / np.diff(bins)
    bins = bins[:-1]

    plt.figure()
    plt.plot(bins, prob_dens, 'ko', markerfacecolor=colors['e.coli'], markeredgewidth=2, markersize=18, alpha=0.75, label='pooled larvae')
    x = np.logspace(2, 5, 100)
    px_fit = likelihood(x, mu_mock, sigma_mock, k_fit_mock_arr[counter], theta_fit_mock_arr[counter], n_samples=10_000_000, n_bins=1000)
    plt.plot(x, px_fit, 'k-', linewidth=2)
    plt.xscale('log')
    plt.yscale('log')
    plt.xlabel('diptericin-gfp \nfluorescence intensity (a.u.)', fontsize=fontsize)
    plt.ylabel('probability density (1/a.u.)', fontsize=fontsize)
    plt.title(f'expID {i}, total inten{np.sum(intens)}', fontsize=fontsize, fontweight='bold')
    plt.legend(fontsize=0.75 * fontsize)
    plt.ylim([1e-8, 1e-1])
    ax = plt.gca()
    ax = style_axes(ax)
    counter += 1


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 12/12 [00:11<00:00,  1.06it/s]


In [73]:
mu_fit = 6.7154
sigma_fit = 0.49378
k_fit = 0.2458
theta_fit = 5432.8

In [169]:
"""plot probability density along with various fits"""
df = df_partial
plt.figure()
n_bins = 10 + 1

intens = df.dpt_gfp.astype('float32').values
bins = np.logspace(np.min(np.log10(intens)), np.max(np.log10(intens)), n_bins)
counts, _ = np.histogram(intens, bins)
prob_dens = counts / np.sum(counts) / np.diff(bins)
bins = bins[:-1]

plt.plot(bins, prob_dens, 'ko', markerfacecolor=colors['e.coli'], markeredgewidth=2, markersize=18, alpha=0.75, label='pooled larvae')

# plot single log normal fit
#mu, sigma = fit_log_normal(intens[intens > 0])
#x = np.logspace(2, 5, 1000)
#px_fit = log_normal_dist(x, mu, sigma)
#plt.plot(x, px_fit, 'b-', linewidth=2)

# # plot double log normal fit
# n_samples = 100_000
# mu1, sigma1, mu2, sigma2 = np.array([4.15329191, 0., 7.50493143, 0.90792562])
# x = np.logspace(2, 5, 100)
# px_fit = double_log_normal_likelihood(x, mu1, sigma1, mu2, sigma2, n_samples=10_000_000, n_bins=1000)
# plt.plot(x, px_fit, 'm-', linewidth=2)

# plot log normal - gamma fit
#x = np.logspace(2, 5, 100)
#px_fit = likelihood(x, mu_mock, sigma_mock, k_fit_mock, theta_fit_mock, n_samples=10_000_000, n_bins=1000)
#plt.plot(x, px_fit, 'k-', linewidth=2)

# plot log normal - gamma fit with me fudging parameters
#x = np.logspace(2, 5, 100)
#px_fit = likelihood(x, 1.07*mu_mock, sigma_mock, 0.4, 1.1*theta_fit_mock, n_samples=10_000_000, n_bins=1000)
#plt.plot(x, px_fit, 'k--', linewidth=2)

# plot log normal-gamma fit where we fit all 4 parameters
x = np.logspace(2, 5, 100)
px_fit = likelihood(x, mu_fit, sigma_fit, k_fit, theta_fit, n_samples=10_000_000, n_bins=1000)
plt.plot(x, px_fit, 'r-', linewidth=2)

plt.xscale('log')
plt.yscale('log')
plt.xlabel('diptericin-gfp \nfluorescence intensity (a.u.)', fontsize=fontsize)
plt.ylabel('probability density (1/a.u.)', fontsize=fontsize)
plt.title('partial responders', fontsize=fontsize, fontweight='bold')
plt.legend(fontsize=0.75 * fontsize)
plt.ylim([1e-8, 1e-3])
ax = plt.gca()
ax = style_axes(ax)


In [184]:
"""plot probability density along with various fits"""
df = df_partial
plt.figure(figsize=(5.5, 5.5))
n_bins = 10 + 1

# construct array of individual prob dens for error bars
intens = df.dpt_gfp.astype('float32').values
bins = np.logspace(np.min(np.log10(intens)), np.max(np.log10(intens)), n_bins)

eids = df.experiment_id.unique()
prob_dens_bootstrap = np.zeros((n_bootstraps, len(bins) -1))
for n in range(n_bootstraps):
    these_ids = np.random.choice(eids, size=len(eids))
    these_intens = []
    for j in range(len(these_ids)):
        these_intens.extend(df[df.experiment_id == these_ids[j]].dpt_gfp.values.astype('float'))
    these_intens = np.array(sorted(these_intens))
    counts, _ = np.histogram(these_intens, bins)
    prob_dens = counts / np.sum(counts) / np.diff(bins)
    prob_dens_bootstrap[n] = prob_dens
    
sigma_prob_dens = np.nanstd(prob_dens_bootstrap, axis=0)

# now plot a pooled distribution
intens = df.dpt_gfp.astype('float32').values
counts, _ = np.histogram(intens, bins)
prob_dens = counts / np.sum(counts) / np.diff(bins)
bins = bins[:-1]

plt.errorbar(bins, prob_dens, sigma_prob_dens, ecolor='k', elinewidth=3, capsize=3, marker='o', 
             markersize=14, color=colors['e.coli'] + [0.5], linewidth=0, markeredgecolor='k', markerfacecolor=colors['e.coli'] + [0.5], 
             barsabove=True, label='$E.coli$ partial', zorder=0)

# plot lognormal fit
mu_partial, sigma_partial = fit_log_normal(intens[intens > 0])
x = np.logspace(np.log10(bins[0]), np.log10(bins[-1]), 1000)
px_fit = log_normal_dist(x, mu_partial, sigma_partial)
plt.plot(x, px_fit, '--', linewidth=4, color=[0, 0, 0, 0.5], label='log-normal fit')

# plot log normal-gamma fit where we fit all 4 parameters
x = np.logspace(np.log10(bins[0]), np.log10(bins[-1]), 1000)
px_fit = likelihood(x, mu_fit, sigma_fit, k_fit, theta_fit, n_samples=10_000_000, n_bins=1000)
plt.plot(x, px_fit, '-', linewidth=4, color=colors['e.coli'] + [0.5], label='log-normal-gamma fit')

plt.xscale('log')
plt.yscale('log')
plt.xlabel('diptericin-gfp \nfluorescence intensity (a.u.)', fontsize=fontsize)
plt.ylabel('probability density (1/a.u.)', fontsize=fontsize)
plt.legend(fontsize=0.65 * fontsize)
plt.ylim([5e-7, 1e-3])
plt.yticks([1e-6, 1e-4])
ax = plt.gca()
ax = style_axes(ax)


In [185]:
plt.savefig(r'/home/brandon/Documents/Code/diptericin-paper/figures/Fig1-SpatialPattern/partial_dist_lognormal-gamma_vs_lognormal.pdf')

In [181]:
print(k_fit)
print(theta_fit)
print(k_fit_mock)
print(theta_fit_mock)

0.46525940596297577
183.6972320155287
0.5929941077399613
3717.2729051513006


In [275]:
mu_fit

6.613354829216856

In [276]:
mu_mock

6.2503989992276345

In [277]:
sigma_fit

0.41056951013097986

In [278]:
sigma_mock

0.3871225616696109

In [247]:
cum_dist_arr_sim[-1]

0.0

In [252]:
theta_fit_mock

3818.6533793459557

In [344]:
"""compute reverse cumulative distributions"""
df = df_partial
intens = df.dpt_gfp.values.astype('float')
intens = sorted(intens[intens > 0])
n_bootstraps = 20
inten_arr = np.arange(int(np.min(intens)), int(np.max(intens)))
#cum_dist_arr_bootstrap = np.zeros((n_bootstraps, len(intens)))
cum_dist_arr_bootstrap = np.zeros((n_bootstraps, len(inten_arr)))

eids = df.experiment_id.unique()
for n in tqdm(range(n_bootstraps)):
    these_ids = np.random.choice(eids, size=len(eids))
    these_intens = []
    for j in range(len(these_ids)):
        these_intens.extend(df[df.experiment_id == these_ids[j]].dpt_gfp.values.astype('float'))
    these_intens = np.array(sorted(these_intens))
    cum_dist_arr = np.zeros_like(inten_arr, dtype='float')
    for i in range(len(cum_dist_arr)):
        cum_dist_arr[i] = np.sum(these_intens > inten_arr[i]) / len(these_intens)
    #cum_dist_arr_bootstrap.append(cum_dist_arr)
    cum_dist_arr_bootstrap[n] = cum_dist_arr
    
cum_dist_arr_mean = np.mean(cum_dist_arr_bootstrap, axis=0)
cum_dist_uncertainty = np.std(cum_dist_arr_bootstrap, axis=0)
l = cum_dist_arr_mean - cum_dist_uncertainty
u = cum_dist_arr_mean + cum_dist_uncertainty
cum_dist_arr_pooled = np.zeros_like(intens)
for i in range(len(cum_dist_arr)):
    cum_dist_arr_pooled[i] = np.sum(intens > intens[i]) / len(intens)



100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:04<00:00,  4.46it/s]


IndexError: list index out of range

In [342]:
"""simulated cum dist"""
n_samples = 20_000
#X0 = np.exp(np.random.normal(loc=1.07*mu_mock, scale=sigma_mock, size=n_samples))
#R = np.random.gamma(shape=0.4, scale=1.1*theta_fit_mock, size=n_samples)
X0 = np.exp(np.random.normal(loc=mu_fit, scale=sigma_fit, size=n_samples))
R = np.random.gamma(shape=k_fit, scale=theta_fit, size=n_samples)
Xt = sorted(X0 + R)

cum_dist_arr_sim = np.zeros_like(Xt)
for i in range(len(cum_dist_arr_sim)):
    cum_dist_arr_sim[i] = np.sum(Xt > Xt[i]) / len(Xt)


In [361]:
"""plot reverse cumulative distributions"""
plt.figure(figsize=(5.5,5))
plt.fill_between(inten_arr, l, u, facecolor=colors['e.coli'], alpha=0.5, label='_nolabel_')
plt.plot(intens, cum_dist_arr_pooled, '-', color=colors['e.coli'], linewidth=linewidth, label='pooled data')
plt.plot(Xt, cum_dist_arr_sim, '--', color='k', linewidth=3, label='log-normal \n+ gamma fit')
plt.xscale('log')
plt.yscale('log')
plt.xlabel('$x$ (a.u.)', fontsize=fontsize)
plt.ylabel('$P($diptericin-gfp$ > x)$', fontsize=fontsize)
#plt.title('partial responders', fontsize=fontsize, fontweight='bold')
plt.legend(fontsize=0.75 * fontsize)
#plt.ylim([1e-4, 1e0])
ax = plt.gca()
ax = style_axes(ax)

In [362]:
plt.savefig(r'/home/brandon/Documents/Code/diptericin-paper/figures/Fig1-SpatialPattern/partial_dist_lognormal_gamma_fit.pdf')

In [138]:
#plt.savefig(r'/home/brandon/Documents/Code/diptericin-paper/figures/Fig1-SpatialPattern/partial_dists.pdf')

In [68]:
"""plot examples of individual partial dists"""
df_partial = df_inj[[eid in partial_ids for eid in df_inj.experiment_id]]
df_complete = df_inj[[eid in complete_ids for eid in df_inj.experiment_id]]

plt.figure(figsize=(6.5, 6))

"""partial"""
# plot mock first
plt.plot(mock_bins, mock_prob_dens, '--', linewidth=linewidth, color=colors['mock'], label='mock')

# plot individual partial dists
df = df_partial
#good_eids = [8, 13, 11, 16, 19]#, 1, 18, 6]   # power law
#good_eids = [0, 3]          # log normal
#good_eids = [2, 6, 5]      # bimodal
good_eids = [1, 18]         # in between
for i, eid in enumerate(good_eids):
    sub_df = df[df.experiment_id == eid]
    intens = sub_df.dpt_gfp.astype('float32').values
    bins = np.logspace(np.min(np.log10(intens)), np.max(np.log10(intens)), int(n_bins / 2))
    counts, _ = np.histogram(intens, bins)
    prob_dens = counts / np.sum(counts) / np.diff(bins)
    bins = bins[:-1]
    bins = bins[prob_dens > 1e-7]
    prob_dens = prob_dens[prob_dens > 1e-7]
    plt.plot(bins, prob_dens, '-', linewidth=linewidth, alpha=0.75, label=f'larva {str(eid)}')



plt.xscale('log')
plt.yscale('log')
plt.xlabel('diptericin-gfp \nfluorescence intensity (a.u.)', fontsize=fontsize)
plt.ylabel('probability density (1/a.u.)', fontsize=fontsize)
plt.legend(fontsize=0.75 * fontsize)
plt.xlim([200, 65000])
ax = plt.gca()
ax = style_axes(ax)




In [39]:
plt.close('all')

In [69]:
plt.savefig(r'/home/brandon/Documents/Code/diptericin-paper/figures/Fig1-SpatialPattern/partial_dists_example_in_between.pdf')

In [None]:
"""complete"""
plt.figure(figsize=(6, 6))
# plot mock first
plt.plot(mock_bins, mock_prob_dens, '--', linewidth=2 * linewidth, color=colors['mock'], label='mock')

# plot individual partial dists
df = df_complete
for i, eid in enumerate(df.experiment_id.unique()):
    sub_df = df[df.experiment_id == eid]
    intens = sub_df.dpt_gfp.astype('float32').values
    bins = np.logspace(np.min(np.log10(intens)), np.max(np.log10(intens)), int(n_bins / 2))
    counts, _ = np.histogram(intens, bins)
    prob_dens = counts / np.sum(counts) / np.diff(bins)
    bins = bins[:-1]
    bins = bins[prob_dens > 1e-7]
    prob_dens = prob_dens[prob_dens > 1e-7]
    if i == 0:
        label = 'individual larvae'
    else:
        label = '_nolabel_'
    plt.plot(bins, prob_dens, '-', color=colors['e.coli'], linewidth=linewidth, alpha=0.35, label=label)

# now plot a pooled distribution
intens = df.dpt_gfp.astype('float32').values
bins = np.logspace(np.min(np.log10(intens)), np.max(np.log10(intens)), n_bins)
counts, _ = np.histogram(intens, bins)
prob_dens = counts / np.sum(counts) / np.diff(bins)
bins = bins[:-1]

#plt.plot(bins, prob_dens, '-', color=colors['e.coli'], linewidth=2 * linewidth)
plt.plot(bins, prob_dens, 'ko', markerfacecolor=colors['e.coli'], markeredgewidth=2, markersize=18, alpha=0.75, label='pooled larvae')

plt.xscale('log')
plt.yscale('log')
plt.xlabel('diptericin-gfp \nfluorescence intensity (a.u.)', fontsize=fontsize)
plt.ylabel('probability density (1/a.u.)', fontsize=fontsize)
plt.title('complete responders', fontsize=fontsize, fontweight='bold')
plt.legend(fontsize=0.75 * fontsize)
ax = plt.gca()
ax = style_axes(ax)

In [136]:
#plt.savefig(r'/home/brandon/Documents/Code/diptericin-paper/figures/Fig1-SpatialPattern/complete_dists.pdf')

In [85]:
# testing distribution shape from time series fit
k = 2.28
theta = 1 /1.15
t = np.linspace(1, 5000, 5)
x = np.logspace(2, 5, 50)

plt.figure(figsize=(7,6))
reds = np.linspace(1, 0, len(t))
greens = np.linspace(0, 1, len(t))
blues = np.ones(len(t))
for i in range(len(t)):
    px = likelihood(x, mu_fit, sigma_fit, k, theta * t[i], n_samples=1_000_000, n_bins=100)
    plt.plot(x, px, linewidth=4, color=[reds[i], greens[i], blues[i]])

plt.xscale('log')
plt.yscale('log')
plt.ylim([1e-7, 1e-2])
ax = style_axes(plt.gca())
    

## Distribution of expression levels along anterior-posterior axis

In [17]:
with open(r'/media/brandon/Data2/Brandon/fly_immune/Lightsheet_Z1/emL3_24_line_dist.pkl', 'rb') as f:
    line_dist_arr = pickle.load(f).values

# throw out the very first and last ap-bins, as these often have noise in them
line_dist_arr = line_dist_arr[:, 1:-2]

In [36]:
"""partial"""
plt.figure(figsize=(10,5))
x = np.arange(line_dist_arr.shape[1]) / line_dist_arr.shape[1]
this_line_dist_arr = line_dist_arr[partial_ids]
"""plot individual dists first"""
# for i in range(len(this_line_dist_arr)):
#     if i == 0:
#         label = 'individual larvae'
#     else:
#         label = '_nolabel_'
#     plt.plot(x, this_line_dist_arr[i] / np.max(this_line_dist_arr[i]), '-', linewidth=linewidth, alpha=0.35, label=label)

this_line_dist_arr = this_line_dist_arr / 1e9
m = np.mean(this_line_dist_arr, axis=0)
s = np.std(this_line_dist_arr, axis=0) #/ np.sqrt(len(this_line_dist_arr))
l = m - s
u = m + s
plt.fill_between(x, l, u, facecolor=colors['e.coli'], alpha=0.5)
plt.plot(x, m, '-', linewidth=2*linewidth, color=colors['e.coli'], label='mean')
plt.xlabel('fraction of anterior-posterior axis', fontsize=fontsize)
plt.ylabel('diptericin-gfp \nfluorescence \nintensity (a.u. x $10^9$)', fontsize=fontsize)
plt.title('partial responders', fontsize=fontsize, fontweight='bold')
#plt.legend(fontsize=0.75 * fontsize)
ax = style_axes(plt.gca())



In [146]:
#plt.savefig(r'/home/brandon/Documents/Code/diptericin-paper/figures/Fig1-SpatialPattern/partial_ap_dist.pdf')

In [23]:
"""complete"""
plt.figure(figsize=(10,5))
x = np.arange(line_dist_arr.shape[1]) / line_dist_arr.shape[1]
this_line_dist_arr = line_dist_arr[complete_ids]
this_line_dist_arr = this_line_dist_arr / 1e9

"""plot individual dists first"""
for i in range(len(this_line_dist_arr)):
    if i == 0:
        label = 'individual larvae'
    else:
        label = '_nolabel_'
    plt.plot(x, this_line_dist_arr[i] / np.max(this_line_dist_arr[i]), '-', linewidth=linewidth, alpha=0.35, label=label)

m = np.mean(this_line_dist_arr, axis=0)
s = np.std(this_line_dist_arr, axis=0) / np.sqrt(len(this_line_dist_arr))
l = m - s
u = m + s
#plt.fill_between(x, l, u, facecolor=colors['e.coli'], alpha=0.5)
#plt.plot(x, m, '-', linewidth=2*linewidth, color=colors['e.coli'], label='mean')
plt.xlabel('fraction of anterior-posterior axis', fontsize=fontsize)
plt.ylabel('diptericin-gfp \nfluorescence \nintensity (a.u. x $10^9$)', fontsize=fontsize)
plt.title('complete responders', fontsize=fontsize, fontweight='bold')
#plt.ylim([0, 1.5e9])
#plt.legend(fontsize=0.75 * fontsize)
ax = style_axes(plt.gca())

In [28]:
"""all"""
plt.figure(figsize=(10,5))
x = np.arange(line_dist_arr.shape[1]) / line_dist_arr.shape[1]
this_line_dist_arr = line_dist_arr
this_line_dist_arr = this_line_dist_arr / 1e9

"""plot individual dists first"""
# for i in range(len(this_line_dist_arr)):
#     if i == 0:
#         label = 'individual larvae'
#     else:
#         label = '_nolabel_'
#     plt.plot(x, this_line_dist_arr[i] / np.max(this_line_dist_arr[i]), '-', linewidth=linewidth, alpha=0.35, label=label)

norm_line_dist_arr = np.zeros_like(line_dist_arr)
for i in range(len(line_dist_arr)):
    norm_line_dist_arr[i] = line_dist_arr[i] / np.max(line_dist_arr[i])
    
m = np.mean(norm_line_dist_arr, axis=0)
s = np.std(norm_line_dist_arr, axis=0) #/ np.sqrt(len(this_line_dist_arr))
l = m - s
u = m + s
plt.fill_between(x, l, u, facecolor=colors['e.coli'], alpha=0.5)
plt.plot(x, m, '-', linewidth=2*linewidth, color=colors['e.coli'], label='mean')
plt.xlabel('fraction of anterior-posterior axis', fontsize=fontsize)
plt.ylabel('normalized diptericin-gfp \nfluorescence \nintensity (a.u. x $10^9$)', fontsize=fontsize)
plt.title('all', fontsize=fontsize, fontweight='bold')
#plt.ylim([0, 1.5e9])
#plt.legend(fontsize=0.75 * fontsize)
ax = style_axes(plt.gca())

In [33]:
"""partial normalized"""
plt.figure(figsize=(10,5))
x = np.arange(line_dist_arr.shape[1]) / line_dist_arr.shape[1]
this_line_dist_arr = line_dist_arr[partial_ids]
"""plot individual dists first"""
# for i in range(len(this_line_dist_arr)):
#     if i == 0:
#         label = 'individual larvae'
#     else:
#         label = '_nolabel_'
#     plt.plot(x, this_line_dist_arr[i] / np.max(this_line_dist_arr[i]), '-', linewidth=linewidth, alpha=0.35, label=label)

norm_line_dist_arr = np.zeros_like(this_line_dist_arr)
for i in range(len(this_line_dist_arr)):
    norm_line_dist_arr[i] = this_line_dist_arr[i] / np.max(this_line_dist_arr[i])

m = np.mean(norm_line_dist_arr, axis=0)
s = np.std(norm_line_dist_arr, axis=0) #/ np.sqrt(len(this_line_dist_arr))
l = m - s
u = m + s
plt.fill_between(x, l, u, facecolor=colors['e.coli'], alpha=0.5)
plt.plot(x, m, '-', linewidth=2*linewidth, color=colors['e.coli'], label='mean')
plt.xlabel('fraction of anterior-posterior axis', fontsize=fontsize)
plt.ylabel('normalized diptericin-gfp \nfluorescence \nintensity (a.u. x $10^9$)', fontsize=fontsize)
plt.title('partial responders', fontsize=fontsize, fontweight='bold')
#plt.legend(fontsize=0.75 * fontsize)
ax = style_axes(plt.gca())



In [40]:
"""complete normalized"""
plt.figure(figsize=(10,5))
x = np.arange(line_dist_arr.shape[1]) / line_dist_arr.shape[1]
this_line_dist_arr = line_dist_arr[complete_ids]
"""plot individual dists first"""
# for i in range(len(this_line_dist_arr)):
#     if i == 0:
#         label = 'individual larvae'
#     else:
#         label = '_nolabel_'
#     plt.plot(x, this_line_dist_arr[i] / np.max(this_line_dist_arr[i]), '-', linewidth=linewidth, alpha=0.35, label=label)

norm_line_dist_arr = np.zeros_like(this_line_dist_arr)
for i in range(len(this_line_dist_arr)):
    norm_line_dist_arr[i] = this_line_dist_arr[i] / np.max(this_line_dist_arr[i])

m = np.mean(norm_line_dist_arr, axis=0)
s = np.std(norm_line_dist_arr, axis=0) #/ np.sqrt(len(this_line_dist_arr))
l = m - s
u = m + s
plt.fill_between(x, l, u, facecolor=colors['e.coli'], alpha=0.5)
plt.plot(x, m, '-', linewidth=2*linewidth, color=colors['e.coli'], label='mean')
plt.xlabel('fraction of anterior-posterior axis', fontsize=fontsize)
plt.ylabel('normalized diptericin-gfp \nfluorescence \nintensity (a.u. x $10^9$)', fontsize=fontsize)
plt.title('complete responders', fontsize=fontsize, fontweight='bold')
#plt.legend(fontsize=0.75 * fontsize)
ax = style_axes(plt.gca())



In [148]:
#plt.savefig(r'/home/brandon/Documents/Code/diptericin-paper/figures/Fig1-SpatialPattern/complete_ap_dist.pdf')

In [151]:
from skimage.filters import threshold_multiotsu

In [166]:
im = plt.imread(r'/media/brandon/Data2/Brandon/fly_immune/Lightsheet_Z1/2023_05_03-dpt-gfp_r4-gal4_uas-mcd8-mcherry_ecoli-hs-dtom_early-mid_24hrs_high_dose/larvae_1/mips4_2_crop_gut/mip_crop_gut_channel1_t0.tif')


In [167]:
threshes = threshold_multiotsu(np.log10(im[im > 0]))

In [168]:
threshes

array([3.2674813, 3.572224 ], dtype=float32)

In [169]:
10 ** threshes

array([1851.3193, 3734.4263], dtype=float32)

In [161]:
plt.figure()
plt.hist(np.log10(im[im>0]), bins=100)

(array([5.000e+00, 3.200e+01, 9.500e+01, 1.900e+02, 2.880e+02, 3.270e+02,
        4.430e+02, 5.420e+02, 6.910e+02, 8.460e+02, 8.270e+02, 1.028e+03,
        1.061e+03, 9.740e+02, 1.044e+03, 8.620e+02, 9.140e+02, 7.710e+02,
        8.110e+02, 8.240e+02, 9.260e+02, 1.053e+03, 1.261e+03, 1.349e+03,
        1.301e+03, 1.456e+03, 1.300e+03, 1.564e+03, 1.643e+03, 1.761e+03,
        1.928e+03, 1.986e+03, 2.134e+03, 2.405e+03, 2.372e+03, 2.584e+03,
        2.593e+03, 2.466e+03, 2.201e+03, 2.187e+03, 1.840e+03, 1.815e+03,
        1.674e+03, 1.662e+03, 1.659e+03, 1.529e+03, 1.653e+03, 1.514e+03,
        1.340e+03, 1.283e+03, 1.131e+03, 1.099e+03, 9.610e+02, 8.830e+02,
        9.380e+02, 8.850e+02, 9.400e+02, 8.240e+02, 8.570e+02, 7.850e+02,
        7.490e+02, 7.660e+02, 7.730e+02, 7.180e+02, 7.260e+02, 7.320e+02,
        7.350e+02, 7.010e+02, 6.320e+02, 5.210e+02, 4.530e+02, 4.080e+02,
        3.440e+02, 2.250e+02, 1.840e+02, 1.240e+02, 1.340e+02, 1.250e+02,
        7.800e+01, 7.200e+01, 7.600e+0

In [171]:
from glob import glob

In [189]:
experiment_paths = [r'/media/brandon/Data2/Brandon/fly_immune/Lightsheet_Z1/2023_05_03-dpt-gfp_r4-gal4_uas-mcd8-mcherry_ecoli-hs-dtom_early-mid_24hrs_high_dose',
                            r'/media/brandon/Data2/Brandon/fly_immune/Lightsheet_Z1/2023_06_01-dpt-gfp_r4-gal4_uas-mcd8-mcherry_ecoli-hs-dtom_early-mid_24hrs_high_dose',
                            r'/media/brandon/Data2/Brandon/fly_immune/Lightsheet_Z1/2023_06_07-dpt-gfp_r4-gal4_uas-mcd8-mcherry_ecoli-hs-dtom_early-mid_24hrs_high_dose']

line_dist_list = []
for experiment_path in experiment_paths:
    larvae_dirs = glob(experiment_path + '/larvae*')
    for larvae_dir in larvae_dirs:
        with open(larvae_dir + '/line_dist_raw.pkl', 'rb') as f:
            line_dist_list.append(pickle.load(f))

In [190]:
plt.figure()
for i, ld in enumerate(line_dist_list):
    if i in partial_ids:
        x = np.linspace(0, 1, len(ld))
        plt.plot(x, ld)

In [191]:
plt.figure()
for ld in line_dist_arr[partial_ids]:
    x = np.linspace(0, 1, len(ld))
    plt.plot(x, ld)

In [202]:
experiment_paths = [
    r'/media/brandon/Data2/Brandon/fly_immune/Lightsheet_Z1/2024_04_18_dpt-gfp_NP1029-Gal4-UAS-Mhc-RNAi_ecoli-hs-dtom_6hrs/no_heartbeat',
    r'/media/brandon/Data2/Brandon/fly_immune/Lightsheet_Z1/2024_04_19_dpt-gfp_NP1029-Gal4-UAS-Mhc-RNAi_ecoli-hs-dtom_6hrs/no_heartbeat']

line_dist_list = []
for experiment_path in experiment_paths:
    larvae_dirs = glob(experiment_path + '/larva*')
    for larvae_dir in larvae_dirs:
        try:
            with open(larvae_dir + '/line_dist.pkl', 'rb') as f:
                line_dist_list.append(pickle.load(f))
        except FileNotFoundError as e:
            print(e)
            continue

plt.figure()
for i, ld in enumerate(line_dist_list):
    if i == 0:
        continue
    x = np.linspace(0, 1, len(ld))
    plt.plot(x, ld)

[Errno 2] No such file or directory: '/media/brandon/Data2/Brandon/fly_immune/Lightsheet_Z1/2024_04_18_dpt-gfp_NP1029-Gal4-UAS-Mhc-RNAi_ecoli-hs-dtom_6hrs/no_heartbeat/larva_2/line_dist.pkl'


In [205]:
experiment_paths = [
    r'/media/brandon/Data2/Brandon/fly_immune/Lightsheet_Z1/2024_04_18_dpt-gfp_NP1029-Gal4-UAS-Mhc-RNAi_ecoli-hs-dtom_6hrs/yes_heartbeat',
    r'/media/brandon/Data2/Brandon/fly_immune/Lightsheet_Z1/2024_04_19_dpt-gfp_NP1029-Gal4-UAS-Mhc-RNAi_ecoli-hs-dtom_6hrs/yes_heartbeat']

line_dist_list = []
for experiment_path in experiment_paths:
    larvae_dirs = glob(experiment_path + '/larva*')
    for larvae_dir in larvae_dirs:
        try:
            with open(larvae_dir + '/line_dist.pkl', 'rb') as f:
                line_dist_list.append(pickle.load(f))
        except FileNotFoundError as e:
            print(e)
            continue
            
plt.figure()
for i, ld in enumerate(line_dist_list):
    x = np.linspace(0, 1, len(ld))
    plt.plot(x, ld)

In [37]:
with open(r'/media/brandon/Data2/Brandon/fly_immune/Lightsheet_Z1/2023_05_03-dpt-gfp_r4-gal4_uas-mcd8-mcherry_ecoli-hs-dtom_early-mid_24hrs_high_dose/larvae_2/line_dist_auto_thresh.pkl', 'rb') as file:
    ld = pickle.load(file)

In [39]:
plt.figure()
plt.plot(ld)

[<matplotlib.lines.Line2D at 0x7f1daab18970>]

## Simulations showing the log-normal-gamma distribution

In [363]:
mu_mock

6.2503989992276345

In [364]:
sigma_mock

0.3871225616696109

In [365]:
theta_fit

5432.8002362521365

In [406]:
"""sweep k"""
mu = 6.25
sigma = 0.387
theta = 5433
k_arr = [0.1, 0.5, 1.5]
x = np.logspace(2, 5, 50)
prob_dens_arr = np.zeros((len(k_arr), len(x)))
for i in range(len(prob_dens_arr)):
    prob_dens_arr[i] = likelihood(x, mu, sigma, k_arr[i], theta, n_samples=1_000_000, n_bins=100)

In [407]:
plt.figure(figsize=(7,6))
reds = np.linspace(1, 0, len(prob_dens_arr))
greens = np.linspace(0, 1, len(prob_dens_arr))
blues = np.ones(len(prob_dens_arr))
for i in range(len(prob_dens_arr)):
    this_prob_dens = prob_dens_arr[i]
    plot_x = x[this_prob_dens > 5e-7]
    plot_prob = this_prob_dens[this_prob_dens > 5e-7]
    plt.plot(plot_x, plot_prob, linewidth=4, color=(reds[i], greens[i], blues[i]), label=r'$\alpha=$' + str(k_arr[i]))

plt.xscale('log')
plt.yscale('log')
plt.xlabel('$x$', fontsize=fontsize)
plt.ylabel('$p(x)$', fontsize=fontsize)
plt.legend(fontsize=0.75 * fontsize)
ax = style_axes(plt.gca())

In [408]:
plt.savefig(r'/home/brandon/Documents/Code/diptericin-paper/figures/misc/example_log-normal-gamma_dists_sweep_alpha.pdf')

In [402]:
"""sweep theta"""
mu = 6.25
sigma = 0.387
theta_arr = [50, 500, 5000]
k = 0.5
x = np.logspace(2, 5, 50)
prob_dens_arr = np.zeros((len(theta_arr), len(x)))
for i in range(len(prob_dens_arr)):
    prob_dens_arr[i] = likelihood(x, mu, sigma, k, theta_arr[i], n_samples=1_000_000, n_bins=100)

In [404]:
plt.figure(figsize=(7,6))
reds = np.linspace(1, 0, len(prob_dens_arr))
greens = np.linspace(0, 1, len(prob_dens_arr))
blues = np.ones(len(prob_dens_arr))
for i in range(len(prob_dens_arr)):
    this_prob_dens = prob_dens_arr[i]
    plot_x = x[this_prob_dens > 5e-7]
    plot_prob = this_prob_dens[this_prob_dens > 5e-7]
    plt.plot(plot_x, plot_prob, linewidth=4, color=(reds[i], greens[i], blues[i]), label=r'$\beta=1/$' + str(theta_arr[i]))

plt.xscale('log')
plt.yscale('log')
plt.xlabel('$x$', fontsize=fontsize)
plt.ylabel('$p(x)$', fontsize=fontsize)
plt.legend(fontsize=0.75 * fontsize)
ax = style_axes(plt.gca())

In [405]:
plt.savefig(r'/home/brandon/Documents/Code/diptericin-paper/figures/misc/example_log-normal-gamma_dists_sweep_beta.pdf')

## experimenting with gaussian mixture models

In [1]:
from sklearn.mixture import GaussianMixture

In [2]:
gm = GaussianMixture(n_components=2, covariance_type='spherical')