# Fig. 1 Introducing the spatial pattern 

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
import pickle
from scipy.optimize import minimize, basinhopping
from tqdm import tqdm
from glob import glob
from pathlib import Path

  from pandas.core.computation.check import NUMEXPR_INSTALLED


In [3]:
%matplotlib qt
#%matplotlib inline

In [4]:
"""plot style"""
linewidth = 4
mpl.rc('axes', linewidth=linewidth)
mpl.rc('font', family='Arial')
fontsize = 24


colors = {'no_inj': [0.8, 0.8, 0.8],
         'mock': [0.4, 0.4, 0.4],
         'e.coli': [0, 0.4, 0],
         'complete': [0, 0.8, 0]}

def style_axes(ax, fontsize=24):
    plt.minorticks_off()
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.xaxis.set_tick_params(labelsize=20)
    ax.yaxis.set_tick_params(labelsize=20)
    for tick in ax.xaxis.get_major_ticks():
        tick.label1.set_fontsize(fontsize)
    for tick in ax.yaxis.get_major_ticks():
        tick.label1.set_fontsize(fontsize)
    plt.tight_layout()
    
    return ax


## Scatter plot of total intensities
Uses older 10x diluted injection experiments

In [61]:
df_eL3_24 = pd.read_pickle(r'/media/brandon/Data1/Brandon/fly_immune/diptericin_paper/manual_analysis/df_el3_24.pkl')
df_mL3_24 = pd.read_pickle(r'/media/brandon/Data1/Brandon/fly_immune/diptericin_paper/manual_analysis/df_mL3_24.pkl')
df_emL3_24 = pd.read_pickle(r'/media/brandon/Data1/Brandon/fly_immune/diptericin_paper/manual_analysis/df_mod_eL3_24.pkl')
df_mock_1 = pd.read_pickle(r'/media/brandon/Data2/Brandon/fly_immune/Lightsheet_Z1/df_emL3_mock.pkl')
df_mock_2 = pd.read_pickle(r'/media/brandon/Data1/Brandon/fly_immune/diptericin_paper/manual_analysis/df_eL3_ctl.pkl')
df_no_inj_1 = pd.read_pickle(r'/media/brandon/Data1/Brandon/fly_immune/diptericin_paper/manual_analysis/df_mL3_24_noinj.pkl')
df_no_inj_2 = pd.read_pickle(r'/media/brandon/Data2/Brandon/fly_immune/Lightsheet_Z1/df_emL3_noinj.pkl')

# combine the mocks
tmp_df = df_mock_2.copy()
tmp_df.experiment_id += df_mock_1.experiment_id.max() + 1
df_mock = pd.concat((df_mock_1, tmp_df), axis=0)

# combine the no injections
tmp_df = df_no_inj_2.copy()
tmp_df.experiment_id += df_no_inj_1.experiment_id.max() + 1
df_no_inj = pd.concat((df_no_inj_1, tmp_df), axis=0)


In [62]:
# scatter plot of total intensities
plt.figure(figsize=(10,5))
sigma_x = 0.02
markersize=24
alpha=1

all_dfs = [df_no_inj, df_mock, df_eL3_24, df_emL3_24, df_mL3_24]
color_strs = ['no_inj', 'mock', 'e.coli', 'e.coli', 'e.coli']
for i, df in enumerate(all_dfs):
    intens = df.groupby(by='experiment_id').sum().dpt_gfp.values
    
    l, m, u = np.quantile(intens, [0.25, 0.5, 0.75])
    errors = np.expand_dims(np.array([l, u]), axis=1)
    
    plt.errorbar(i, m, yerr=errors, marker='o', markerfacecolor=colors[color_strs[i]], markersize=markersize, alpha=alpha,
                linewidth=0, elinewidth=4, ecolor='k', capsize=4, capthick=4, markeredgecolor='k', markeredgewidth=3)
    #plt.plot(i * np.ones(len(intens)) + sigma_x * np.random.normal(size=len(intens)), intens, 'ko',
           # markerfacecolor=colors[color_strs[i]], markersize=markersize, alpha=alpha)


plt.xticks([0, 1, 2, 3, 4], labels=['no inj', 'mock', '$E. coli$ \n3h-25C', '$E.coli$ \n18h-18C', '$E.coli$ \n18h-25C'], fontsize=fontsize)
plt.xlim([-0.5, 4.5])
plt.ylabel('total diptericin-gfp \nfluorescence intensity \nper larva (a.u.)', fontsize=fontsize)
plt.yscale('log')
plt.minorticks_off()
ax = style_axes(plt.gca())

In [63]:
#plt.savefig(r'/media/brandon/Data1/Brandon/fly_immune/diptericin_paper/supplemental_figures/total_dpt-gfp_by_treatment.pdf')

## Scatter plot of median intensities

In [64]:
"""high dose data"""
# 18h-18C 24 hpi E. coli
df_inj = pd.read_pickle(r'/media/brandon/Data2/Brandon/fly_immune/Lightsheet_Z1/df_emL3_24.pkl')
# 18h-18C 24 hpi mock
df_mock = pd.read_pickle(r'/media/brandon/Data2/Brandon/fly_immune/Lightsheet_Z1/df_emL3_mock.pkl')
# 18h-18C 24 hpi no injection
df_no = pd.read_pickle(r'/media/brandon/Data2/Brandon/fly_immune/Lightsheet_Z1/df_emL3_noinj.pkl')

"""older 10x diluted data"""
# 3h-22C 24 hpi mock
df_e_mock = pd.read_pickle(r'/media/brandon/Data1/Brandon/fly_immune/diptericin_paper/manual_analysis/df_eL3_ctl.pkl')
# 18h-24C 24 hpi no injection
df_m_no = pd.read_pickle(r'/media/brandon/Data1/Brandon/fly_immune/diptericin_paper/manual_analysis/df_mL3_24_noinj.pkl')

df_inj


Unnamed: 0,experiment_id,y,x,dpt_gfp,mem_mch
0,0,240.70486,1269.328656,5927.750994,9018.343675
1,0,396.410604,1258.712355,4821.583134,8898.338107
2,0,592.812167,1260.481739,6446.630867,8355.653142
3,0,713.130241,634.119998,2272.979316,8869.243437
4,0,859.989067,605.809863,2142.789181,9112.609387
...,...,...,...,...,...
6126,19,4787.100318,1505.411505,1899.669849,8088.492442
6127,19,4660.289762,1561.294123,2062.060461,9410.208433
6128,19,4838.684273,1720.344651,930.402546,6819.805887
6129,19,4666.737756,1165.817135,879.618934,7400.588703


In [65]:
"""threshold for dividing samples based on median intensity"""
partial_thresh = 4000

In [66]:
"""scatter plot of median intensities"""
plt.figure(figsize=(8,6))
sigma_x = 0.02

# no injection --- pool all samples
x = 0
color = colors['no_inj']
all_medians = []
df = df_no
medians = df.groupby(by='experiment_id').median().dpt_gfp.values
all_medians.extend(medians)

df = df_m_no
medians = df.groupby(by='experiment_id').median().dpt_gfp.values
all_medians.extend(medians)

plt.plot(x * np.ones(len(all_medians)) + sigma_x * np.random.normal(size=len(all_medians)), 
         all_medians, 'ko', markerfacecolor=color, markersize=16, alpha=0.75)

# mock injection --- pool all samples
x = 1
color = colors['mock']
all_medians = []
df = df_mock
medians = df.groupby(by='experiment_id').median().dpt_gfp.values
all_medians.extend(medians)

df = df_e_mock
medians = df.groupby(by='experiment_id').median().dpt_gfp.values
all_medians.extend(medians)

plt.plot(x * np.ones(len(all_medians)) + sigma_x * np.random.normal(size=len(all_medians)), 
         all_medians, 'ko', markerfacecolor=color, markersize=16, alpha=0.75)

# injected group --- color by partial/complete split
x = 2
all_medians = df_inj.groupby(by='experiment_id').median().dpt_gfp.values
partial_ids = np.where(all_medians < partial_thresh)[0]
complete_ids = np.where(all_medians >= partial_thresh)[0]

# partial
color = colors['e.coli']
sel = partial_ids
plt.plot(x * np.ones(len(all_medians[sel])) + sigma_x * np.random.normal(size=len(all_medians[sel])), 
         all_medians[sel], 'ko', markerfacecolor=color, markersize=16, alpha=0.75)

# complete
color = colors['complete']
sel = complete_ids
plt.plot(x * np.ones(len(all_medians[sel])) + sigma_x * np.random.normal(size=len(all_medians[sel])), 
         all_medians[sel], 'ko', markerfacecolor=color, markersize=16, alpha=0.75)

plt.xticks([0, 1, 2], labels=['no \ninjection', 'mock', '$E. coli$'], fontsize=fontsize)
plt.xlim([-0.5, x + 0.5])
plt.yticks([1e3, 1e4])
plt.ylim([3e2, 2e4])
plt.ylabel('\n \n \n \n \n median diptericin-gfp \nfluorescence intensity \nper cell (a.u.)', fontsize=fontsize)
plt.yscale('log')
ax = plt.gca()
ax = style_axes(ax)


In [15]:
#plt.savefig(r'/home/brandon/Documents/Code/diptericin-paper/figures/Fig1-SpatialPattern/median_inten_dot_plot.pdf')

In [8]:
"""divide samples based on median intensity"""
partial_ids = np.where(all_medians < partial_thresh)[0]
complete_ids = np.where(all_medians >= partial_thresh)[0]

In [9]:
partial_ids

array([ 0,  1,  2,  3,  5,  6,  8, 11, 13, 16, 18, 19])

## Full intensity distributions

In [10]:
n_bins = 20 + 1


In [11]:
# assemble mock injection distribution
all_mock_intens = []
df = df_mock
intens = df.dpt_gfp.values
all_mock_intens.extend(intens)

df = df_e_mock
intens = df.dpt_gfp.values
all_mock_intens.extend(intens)

mock_bins = np.logspace(np.min(np.log10(all_mock_intens)), np.max(np.log10(all_mock_intens)), n_bins)
counts, _ = np.histogram(all_mock_intens, mock_bins)

mock_prob_dens = counts / np.sum(counts) / np.diff(mock_bins)
mock_bins = mock_bins[:-1]


In [12]:
"""combine both mock dataframes into one"""
tmp_df = df_e_mock.copy()
tmp_df.experiment_id += df_mock.experiment_id.max() + 1
df_all_mock = pd.concat((df_mock, tmp_df), axis=0)

In [16]:
df_partial = df_inj[[eid in partial_ids for eid in df_inj.experiment_id]]
df_complete = df_inj[[eid in complete_ids for eid in df_inj.experiment_id]]


In [18]:
"""plot mock, partial, and complete dists on one graph"""
"""for complete, plot prob dens with error bars and lognormal fit"""
n_bootstraps = 100
plt.figure(figsize=(6, 6))




"""complete"""
df = df_complete
n_bins = 10 + 1

# construct array of individual prob dens for error bars
intens = df.dpt_gfp.astype('float32').values
bins = np.floor(np.logspace(np.min(np.log10(intens)), np.max(np.log10(intens)), n_bins))

eids = df.experiment_id.unique()
prob_dens_bootstrap = np.zeros((n_bootstraps, len(bins) -1))
for n in range(n_bootstraps):
    these_ids = np.random.choice(eids, size=len(eids))
    these_intens = []
    for j in range(len(these_ids)):
        these_intens.extend(df[df.experiment_id == these_ids[j]].dpt_gfp.values.astype('float'))
    these_intens = np.array(sorted(these_intens))
    counts, _ = np.histogram(these_intens, bins)
    prob_dens = counts / np.sum(counts) / np.diff(bins)
    prob_dens_bootstrap[n] = prob_dens
    
sigma_prob_dens = np.nanstd(prob_dens_bootstrap, axis=0)

# now plot a pooled distribution
intens = df.dpt_gfp.astype('float32').values
counts, _ = np.histogram(intens, bins)
prob_dens = counts / np.sum(counts) / np.diff(bins)
bins = bins[:-1]

plt.errorbar(bins, prob_dens, sigma_prob_dens, ecolor='k', elinewidth=3, capsize=3, marker='o', 
             markersize=14, color=colors['complete'] + [0.5], linewidth=3, markeredgecolor='k', markerfacecolor=colors['complete'] + [0.5], 
             barsabove=True, label='$E.coli$ complete')

mu_partial, sigma_partial = fit_log_normal(intens[intens > 0])
x = np.logspace(np.log10(bins[0]), np.log10(bins[-1]), 1000)
px_fit = log_normal_dist(x, mu_partial, sigma_partial)
#plt.plot(x, px_fit, 'k-', linewidth=2, label='log-normal fit')

"""partial"""
df = df_partial
n_bins = 10 + 1

# construct array of individual prob dens for error bars
intens = df.dpt_gfp.astype('float32').values
bins = np.logspace(np.min(np.log10(intens)), np.max(np.log10(intens)), n_bins)

eids = df.experiment_id.unique()
prob_dens_bootstrap = np.zeros((n_bootstraps, len(bins) -1))
for n in range(n_bootstraps):
    these_ids = np.random.choice(eids, size=len(eids))
    these_intens = []
    for j in range(len(these_ids)):
        these_intens.extend(df[df.experiment_id == these_ids[j]].dpt_gfp.values.astype('float'))
    these_intens = np.array(sorted(these_intens))
    counts, _ = np.histogram(these_intens, bins)
    prob_dens = counts / np.sum(counts) / np.diff(bins)
    prob_dens_bootstrap[n] = prob_dens
    
sigma_prob_dens = np.nanstd(prob_dens_bootstrap, axis=0)

# now plot a pooled distribution
intens = df.dpt_gfp.astype('float32').values
counts, _ = np.histogram(intens, bins)
prob_dens = counts / np.sum(counts) / np.diff(bins)
bins = bins[:-1]

plt.errorbar(bins, prob_dens, sigma_prob_dens, ecolor='k', elinewidth=3, capsize=3, marker='o', 
             markersize=14, color=colors['e.coli'] + [0.5], linewidth=3, markeredgecolor='k', markerfacecolor=colors['e.coli'] + [0.5], 
             barsabove=True, label='$E.coli$ partial', zorder=10)

mu_partial, sigma_partial = fit_log_normal(intens[intens > 0])
x = np.logspace(np.log10(bins[0]), np.log10(bins[-1]), 1000)
px_fit = log_normal_dist(x, mu_partial, sigma_partial)
#plt.plot(x, px_fit, 'k-', linewidth=2, label='log-normal fit')


"""mock"""
df = df_all_mock
n_bins = 10 + 1

# construct array of individual prob dens for error bars
intens = df.dpt_gfp.astype('float32').values
bins = np.logspace(np.min(np.log10(intens)), np.max(np.log10(intens)), n_bins)

eids = df.experiment_id.unique()
prob_dens_bootstrap = np.zeros((n_bootstraps, len(bins) -1))
for n in range(n_bootstraps):
    these_ids = np.random.choice(eids, size=len(eids))
    these_intens = []
    for j in range(len(these_ids)):
        these_intens.extend(df[df.experiment_id == these_ids[j]].dpt_gfp.values.astype('float'))
    these_intens = np.array(sorted(these_intens))
    counts, _ = np.histogram(these_intens, bins)
    prob_dens = counts / np.sum(counts) / np.diff(bins)
    prob_dens_bootstrap[n] = prob_dens
    
sigma_prob_dens = np.nanstd(prob_dens_bootstrap, axis=0)

# now plot a pooled distribution
intens = df.dpt_gfp.astype('float32').values
counts, _ = np.histogram(intens, bins)
prob_dens = counts / np.sum(counts) / np.diff(bins)
bins = bins[:-1]

plt.errorbar(bins, prob_dens, sigma_prob_dens, ecolor='k', elinewidth=3, capsize=3, marker='o', 
             markersize=14, color=colors['mock'] + [0.5], linewidth=3, markeredgecolor='k', markerfacecolor=colors['mock'] + [0.5], 
             barsabove=True, label='mock injection', zorder=0)

mu_partial, sigma_partial = fit_log_normal(intens[intens > 0])
x = np.logspace(np.log10(bins[0]), np.log10(bins[-1]), 1000)
px_fit = log_normal_dist(x, mu_partial, sigma_partial)
#plt.plot(x, px_fit, 'k-', linewidth=2, label='log-normal fit')


plt.xscale('log')
plt.yscale('log')
plt.xlabel('diptericin-gfp \nfluorescence intensity (a.u.)', fontsize=fontsize)
plt.ylabel('probability density (1/a.u.)', fontsize=fontsize)
#plt.legend(fontsize=0.75 * fontsize)
#plt.legend(fontsize=0.75 * fontsize, loc='upper left', bbox_to_anchor=(-0.5, 1.2), ncol=2, fancybox=True, facecolor=np.array([210, 180, 140]) / 255, framealpha=0.5)
plt.legend(fontsize=0.65 * fontsize, loc='upper left', bbox_to_anchor=(0.5, 1.1), ncol=1, fancybox=True, framealpha=0.5)

plt.xlim([100, 65000])
plt.ylim([1e-9, 5e-3])
plt.yticks([1e-8, 1e-6, 1e-4])
ax = plt.gca()
ax = style_axes(ax)

In [150]:
plt.savefig(r'/home/brandon/Documents/Code/diptericin-paper/figures/Fig1-SpatialPattern/mock_partial_complete_dists_errorbars.pdf')

In [21]:
"""plot examples of individual partial dists"""
df_partial = df_inj[[eid in partial_ids for eid in df_inj.experiment_id]]
df_complete = df_inj[[eid in complete_ids for eid in df_inj.experiment_id]]

plt.figure(figsize=(7.5, 7))

"""partial"""
# plot mock first
plt.plot(mock_bins, mock_prob_dens, '--', linewidth=linewidth, color=colors['mock'], label='mock')

# plot individual partial dists
df = df_partial
#good_eids = [8, 13, 11, 16, 19]#, 1, 18, 6]   # power law
#good_eids = [0, 3]          # log normal
#good_eids = [2, 6, 5]      # bimodal
#good_eids = [1, 18]         # in between
good_eids = partial_ids
for i, eid in enumerate(good_eids):
    sub_df = df[df.experiment_id == eid]
    intens = sub_df.dpt_gfp.astype('float32').values
    bins = np.logspace(np.min(np.log10(intens)), np.max(np.log10(intens)), int(n_bins / 2))
    counts, _ = np.histogram(intens, bins)
    prob_dens = counts / np.sum(counts) / np.diff(bins)
    bins = bins[:-1]
    bins = bins[prob_dens > 1e-7]
    prob_dens = prob_dens[prob_dens > 1e-7]
    plt.plot(bins, prob_dens, '-', linewidth=linewidth, alpha=0.75, label=f'larva {str(eid)}')



plt.xscale('log')
plt.yscale('log')
plt.xlabel('diptericin-gfp \nfluorescence intensity (a.u.)', fontsize=fontsize)
plt.ylabel('probability density (1/a.u.)', fontsize=fontsize)
plt.legend(fontsize=0.75 * fontsize)
plt.xlim([200, 65000])
ax = plt.gca()
ax = style_axes(ax)




In [22]:
#plt.savefig(r'/home/brandon/Documents/Code/diptericin-paper/figures/Fig1-SpatialPattern/individual_partial_dists.pdf')

## Distribution of expression levels along anterior-posterior axis

In [5]:
"""assemble line_dist arrays"""
experiment_paths = [r'/media/brandon/Data2/Brandon/fly_immune/Lightsheet_Z1/2023_05_03-dpt-gfp_r4-gal4_uas-mcd8-mcherry_ecoli-hs-dtom_early-mid_24hrs_high_dose', 
                    r'/media/brandon/Data2/Brandon/fly_immune/Lightsheet_Z1/2023_06_01-dpt-gfp_r4-gal4_uas-mcd8-mcherry_ecoli-hs-dtom_early-mid_24hrs_high_dose',
                    r'/media/brandon/Data2/Brandon/fly_immune/Lightsheet_Z1/2023_06_07-dpt-gfp_r4-gal4_uas-mcd8-mcherry_ecoli-hs-dtom_early-mid_24hrs_high_dose']

line_dist_list = []
for experiment_path in experiment_paths:
    larvae_dirs = glob(experiment_path + '/larva*')
    for larvae_dir in larvae_dirs:
        if Path(larvae_dir + '/manual_df.pkl').is_file():
            with open(larvae_dir + '/line_dist_auto_thresh.pkl', 'rb') as f:
                line_dist_list.append(pickle.load(f))
                
line_dist_arr = np.zeros((len(line_dist_list), len(line_dist_list[0])))
for i in range(len(line_dist_list)):
    line_dist_arr[i] = line_dist_list[i]

In [67]:
"""partial normalized"""
plt.figure(figsize=(10,5))
x = np.arange(line_dist_arr.shape[1]) / line_dist_arr.shape[1]
this_line_dist_arr = line_dist_arr[partial_ids]
"""plot individual dists first"""
# for i in range(len(this_line_dist_arr)):
#     if i == 0:
#         label = 'individual larvae'
#     else:
#         label = '_nolabel_'
#     plt.plot(x, this_line_dist_arr[i] / np.max(this_line_dist_arr[i]), '-', linewidth=linewidth, alpha=0.35, label=label)

norm_line_dist_arr = np.zeros_like(this_line_dist_arr)
for i in range(len(this_line_dist_arr)):
    norm_line_dist_arr[i] = this_line_dist_arr[i] / np.max(this_line_dist_arr[i])

m = np.mean(norm_line_dist_arr, axis=0)
s = np.std(norm_line_dist_arr, axis=0) #/ np.sqrt(len(this_line_dist_arr))
l = m - s
u = m + s
plt.fill_between(x, l, u, facecolor=colors['e.coli'], alpha=0.5)
plt.plot(x, m, '-', linewidth=2*linewidth, color=colors['e.coli'], label='mean')
plt.xlabel('fraction of anterior-posterior axis', fontsize=fontsize)
plt.ylabel('normalized diptericin-gfp \nfluorescence intensity \n(fraction of max)', fontsize=fontsize)
plt.title('partial responders', fontsize=fontsize, fontweight='bold')
plt.ylim([0, 1])
#plt.legend(fontsize=0.75 * fontsize)
ax = style_axes(plt.gca())



In [68]:
plt.savefig(r'/home/brandon/Documents/Code/diptericin-paper/figures/Fig1-SpatialPattern/partial_normalized_ap_dist.pdf')

In [69]:
"""complete normalized"""
plt.figure(figsize=(10,5))
x = np.arange(line_dist_arr.shape[1]) / line_dist_arr.shape[1]
this_line_dist_arr = line_dist_arr[complete_ids]
"""plot individual dists first"""
# for i in range(len(this_line_dist_arr)):
#     if i == 0:
#         label = 'individual larvae'
#     else:
#         label = '_nolabel_'
#     plt.plot(x, this_line_dist_arr[i] / np.max(this_line_dist_arr[i]), '-', linewidth=linewidth, alpha=0.35, label=label)

norm_line_dist_arr = np.zeros_like(this_line_dist_arr)
for i in range(len(this_line_dist_arr)):
    norm_line_dist_arr[i] = this_line_dist_arr[i] / np.max(this_line_dist_arr[i])

m = np.mean(norm_line_dist_arr, axis=0)
s = np.std(norm_line_dist_arr, axis=0) #/ np.sqrt(len(this_line_dist_arr))
l = m - s
u = m + s
plt.fill_between(x, l, u, facecolor=colors['complete'], alpha=0.5)
plt.plot(x, m, '-', linewidth=2*linewidth, color=colors['complete'], label='mean')
plt.xlabel('fraction of anterior-posterior axis', fontsize=fontsize)
plt.ylabel('normalized diptericin-gfp \nfluorescence intensity \n(fraction of max)', fontsize=fontsize)
plt.title('complete responders', fontsize=fontsize, fontweight='bold')
plt.ylim([0, 1])

#plt.legend(fontsize=0.75 * fontsize)
ax = style_axes(plt.gca())



In [70]:
plt.savefig(r'/home/brandon/Documents/Code/diptericin-paper/figures/Fig1-SpatialPattern/complete_normalized_ap_dist.pdf')