In [6]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [7]:
from tqdm import tqdm
import sys
sys.path.append('/fs/cbsuvlaminck2/workdir/bmg224/hiprfish/image_analysis_code')
import image_functions as imfn
from segmentation import Segmentation


In [8]:
from image import Image

In [9]:
data_dir = '/fs/cbsuvlaminck2/workdir/bmg224/data/2021/devlaminck_widefield/2021_04_05_dnafishmethodchar'
sample_names = imfn.get_sample_names(data_dir=data_dir)
refl = ['gfp', 'cy5']
factors = ['method', 'plasmid','fov']
flat_field_filename = '/fs/cbsuvlaminck2/workdir/bmg224/data/2021/devlaminck_widefield/2021_04_12_flat_field_correction_lens_100x_refl_cy5.czi'
print(len(sample_names))
print(sample_names[0])

50
2021_04_05_dnafishmethodchar_method_e_plasmid_pos_fov_2


In [10]:
keys =[imfn.get_filename_keys(sn, factors) for sn in sample_names]
len(keys)

50

In [11]:
# load csvs
import pandas as pd
output_dir = '../image_processing'
spot_props_filenames = [output_dir + '/' + sn + '_spot_seg_cell_id.csv' for sn in sample_names]
spot_props = [pd.read_csv(spf) for spf in spot_props_filenames]

In [13]:
# Get cell segmentation
import numpy as np
cell_seg_filename = [output_dir + '/' + sn + '_cell_seg.npy' for sn in sample_names]
cell_seg = [np.load(csf) for csf in cell_seg_filename]

In [14]:
# Get Raw spot images (flat field corrected)
from image import Image
raw_corrected = []
for sn in sample_names:
    im = Image(sample_name=sn, illumination_names=refl, data_dir=data_dir, 
               flat_field_filename=flat_field_filename)
    im.pick_channel('cy5')
    im.flat_field_correction()
    raw_corrected.append(im.raw_2D)

In [15]:
# Get bg pixels
bg_pixels = [rc[cs == 0] for rc, cs in zip(raw_corrected, cell_seg)]

In [16]:
# Calculate median background pixel intensity
mean_bg_pix_int = [np.median(bp) for bp in bg_pixels]
print(mean_bg_pix_int[0])

0.18716942


In [17]:
# Reset b pixel intensities as their negative controls
sample_neg_bool = [1 if ('method_b_' in sn) and ('plasmid_neg_' in sn) else 0 for sn in sample_names]
sample_pos_bool = [1 if ('method_b_' in sn) and ('plasmid_pos_' in sn) else 0 for sn in sample_names]
mbpi_neg = np.array(mean_bg_pix_int)[np.where(sample_neg_bool)]
mbpi_neg_mean = np.mean(mbpi_neg)
mbpi_new = np.where(sample_pos_bool, mbpi_neg_mean, mean_bg_pix_int)

print(np.array(mean_bg_pix_int)[np.where(sample_pos_bool)])
print(mbpi_new[np.where(sample_pos_bool)])

[0.19269446 0.19675678 0.18701862]
[0.15022989 0.15022989 0.15022989]


In [18]:
# Normalize spots

In [19]:
# Norm by bg pixel intensity
spot_props_norm = []
mins, maxs = [],[]
for sc, mbpi in zip(spot_props, mbpi_new):
    scc = sc.copy()
    scc['int_norm'] = scc['Intensity']/mbpi
    maxs.append(scc.int_norm.max())
    mins.append(scc.int_norm.min())
    spot_props_norm.append(scc)

In [20]:
# Get cell props
cell_props_filename = [output_dir + '/' + sn + '_cell_seg_props.csv' for sn in sample_names]
cell_props = [pd.read_csv(cpf) for cpf in cell_props_filename]

In [21]:
# Get each cell
cell_ids = [cp.ID.values for cp in cell_props]

In [56]:
# Get all spots in cell
cell_spot_ints = [[sp.loc[sp.cell_id == cell, 'int_norm'].values.tolist()
                   if sp[sp.cell_id == cell].shape[0] > 0 else [0] for cell in c_ids]
                  for sp, c_ids in tqdm(zip(spot_props_norm, cell_ids))] 



0it [00:00, ?it/s][A
1it [00:03,  3.73s/it][A
2it [00:04,  2.89s/it][A
3it [00:06,  2.69s/it][A
4it [00:10,  2.85s/it][A
5it [00:14,  3.19s/it][A
6it [00:17,  3.18s/it][A
7it [00:19,  3.05s/it][A
7it [07:49, 67.00s/it][A

9it [00:27,  3.40s/it][A
10it [00:29,  3.12s/it][A
11it [00:32,  3.00s/it][A
12it [00:34,  2.78s/it][A
13it [00:38,  2.97s/it][A
14it [00:42,  3.36s/it][A
15it [00:46,  3.48s/it][A
16it [00:47,  2.94s/it][A
17it [00:50,  2.76s/it][A
18it [00:51,  2.43s/it][A
19it [00:54,  2.42s/it][A
20it [00:57,  2.70s/it][A
21it [01:00,  2.78s/it][A
22it [01:02,  2.60s/it][A
23it [01:05,  2.52s/it][A
24it [01:07,  2.49s/it][A
25it [01:08,  2.06s/it][A
26it [01:12,  2.58s/it][A
27it [01:16,  3.05s/it][A
28it [01:19,  3.17s/it][A
29it [01:23,  3.32s/it][A
30it [01:27,  3.55s/it][A
31it [01:30,  3.26s/it][A
32it [01:33,  3.29s/it][A
33it [01:35,  2.75s/it][A
34it [01:36,  2.21s/it][A
35it [01:40,  2.77s/it][A
36it [01:41,  2.24s/it][A
37it [01:45, 

In [57]:
# Get brightest spot in cell
max_ints = [[np.max(cell) for cell in csi] for csi in cell_spot_ints]

In [58]:
# Get values into a dictionary
max_int_dict = imfn.get_nested_dict(keys, max_ints, (0,1,2))

In [63]:
# Set up ordered lists of factors
I = ['a','b','c','d','e','f','g','h']
J = ['neg','pos']
K = ['1','2','3']

In [64]:
# Make plot dictionary
plot_dict = {}
for i in I:
    plot_dict[i] = {}
    for j in J:
        max_int_list = []
        for k in K:
            max_int_list += max_int_dict[i][j][k][0][1]
        plot_dict[i][j] = max_int_list


In [80]:
# plot parameters
import image_plots as ip
import numpy as np
import matplotlib.pyplot as plt

fig_dir = '../figures'

# col = 'w'
# dims = (10,5)
# lw = 1
# ft = 20
# jit = 0.15
# ylims = (0,25)
# transparency = 0.8
# dot_factor = 0.5

col = 'k'
lw = 1
ft = 5
dims = (0.9843 ,0.5906)
jit = 0.1
transparency = 0.6
dot_factor = 0.5

colors = plt.rcParams['axes.prop_cycle'].by_key()['color']


xticklabels = ['Control','Positive']
line_colors = [colors[0], colors[1]]

In [82]:
# Plot brightest spot in cell
# Generate PLots

for i in I:
    print('method: ', i)
    fig, ax = ip.general_plot(col=col, dims=dims, ft=ft, lw=lw)
    xticks = ip.get_xticks(J)
    ax.set_xticks(xticks)
    ax.set_xticklabels(xticklabels)
    # Plot one for each control
    max_ints = []
    for j, lc, xt in zip(J, line_colors, xticks):
        max_ints = [plot_dict[i][j]]
        ip.violin_dot_plot(ax, values=max_ints, positions=[xt], 
                           jit=jit, col=col, line_col=lc, bw=0.5,
                          dot_factor=dot_factor, transparency=transparency,
                          ft=ft)
             
#     ax.set_xlim((0.5,3))
    ax.set_ylim(-0.3, 7)
    ax.tick_params(labelleft=False, left=True, direction='in', labelsize=ft, color=col, labelcolor=col, length=lw*2)
    ax.tick_params(labelbottom=False, bottom=True, direction='in', labelsize=ft, color=col, labelcolor=col, length=lw*2)
    if i == 'g':
        ax.tick_params(labelbottom=True, bottom=True, direction='in', labelsize=ft, color=col, labelcolor=col, length=lw*2)
    filename = fig_dir + '/cell_max_int_method_' + i + '.pdf'
#     ip.plt.show()
    ip.plt.savefig(filename, transparent=True)
    ip.plt.close()

method:  a
method:  b
method:  c
method:  d
method:  e
method:  f
method:  g
method:  h


In [None]:
# Plot spot Count per cell

In [83]:
# get spot props csvs
spot_props_filenames = [output_dir + '/' + sn + '_spot_seg_props_filtered.csv' 
                        for sn in sample_names]
spot_props_filtered = [pd.read_csv(spf) for spf in spot_props_filenames]

In [84]:
# Get cell props
cell_props_filename = [output_dir + '/' + sn + '_cell_seg_props.csv' for sn in sample_names]
cell_props = [pd.read_csv(cpf) for cpf in cell_props_filename]

In [85]:
# Get each cell
cell_ids = [cp.ID.values for cp in cell_props]

In [86]:
# Get values
cell_counts = [[sp[sp.cell_id == cell].shape[0]
                for cell in c_ids]
               for sp, c_ids in tqdm(zip(spot_props_filtered, cell_ids))] 

50it [01:43,  2.06s/it]


In [87]:
# Put values in a dictionary
cell_count_dict = imfn.get_nested_dict(keys, cell_counts, (0,1,2))

In [88]:
# Combine FOV values to make a plot dictionary of values
plot_dict_count = {}
for i in I:
    plot_dict_count[i] = {}
    for j in J:
        count_list = []
        for k in K:
            count_list += cell_count_dict[i][j][k][0][1]
        plot_dict_count[i][j] = count_list

In [100]:
# plot parameters
import image_plots as ip
import numpy as np
import matplotlib.pyplot as plt

fig_dir = '../figures'

# col = 'w'
# dims = (10,5)
# lw = 1
# ft = 20
# jit = 0.15
# ylims = (0,25)
# transparency = 0.8
# dot_factor = 0.5
# y_jit=0.1

col = 'k'
lw = 1
ft = 5
dims = (0.9843 ,0.5906)
jit = 0.1
transparency = 0.6
# dot_factor = 0.5
y_jit=0.1

colors = plt.rcParams['axes.prop_cycle'].by_key()['color']


xticklabels = ['Control','Positive']
line_colors = [colors[0], colors[1]]

In [101]:
# Plot figure
# Plot brightest spot in cell
# Generate PLots

for i in I:
    print('method: ', i)
    fig, ax = ip.general_plot(col=col, dims=dims, ft=ft, lw=lw)
    xticks = ip.get_xticks(J)
    ax.set_xticks(xticks)
    ax.set_xticklabels(xticklabels)
    # Plot one for each control
    max_ints = []
    for j, lc, xt in zip(J, line_colors, xticks):
        counts = [plot_dict_count[i][j]]
        ip.violin_dot_plot(ax, values=counts, positions=[xt], 
                           jit=jit, y_jit=y_jit, col=col, line_col=lc, bw=0.5,
                          dot_factor=dot_factor, transparency=transparency,
                          ft=ft)
             
#     ax.set_xlim((0.5,3))
    ax.set_ylim(-0.3, 10)
    ax.tick_params(labelleft=True, left=True, direction='in', labelsize=ft, color=col, labelcolor=col, length=lw*2)
    ax.tick_params(labelbottom=False, bottom=True, direction='in', labelsize=ft, color=col, labelcolor=col, length=lw*2)
    if i == 'g':
        ax.tick_params(labelbottom=True, bottom=True, direction='in', labelsize=ft, color=col, labelcolor=col, length=lw*2)
    filename = fig_dir + '/cell_spot_counts_method_' + i + '.pdf'
#     ip.plt.show()
    ip.plt.savefig(filename, transparent=True)
    ip.plt.close()

method:  a
method:  b
method:  c
method:  d
method:  e
method:  f
method:  g
method:  h


In [None]:
# Plot pixel intensity histograms

In [108]:
from numba import njit

In [120]:
# Get pixels for each cell
@njit
def get_cell_pixels(cell_ids, seg, raw):
    cell_max_pix = []
    for cid in cell_ids:
        mask = seg == cid
        pix = raw*mask
        cell_max_pix.append(np.max(pix))
    return cell_max_pix

In [121]:
cell_max_pixels = []
i=0
for seg, props, raw, mbpi in zip(cell_seg, cell_props, raw_corrected, mbpi_new):
    i+=1
    print(i)    
    cell_ids = props.ID.values
    raw_norm = raw/mbpi
    cell_max_pix = get_cell_pixels(cell_ids=cell_ids, seg=seg, raw=raw_norm)
    cell_max_pixels.append(cell_max_pix)


  0%|          | 0/50 [00:00<?, ?it/s]

a


100%|██████████| 50/50 [28:48<00:00, 34.57s/it]


In [317]:
# Norm pixels to bg pixel intensity
cell_pixels_norm = [{cid:np.array(pix)/mbpi for cid, pix in cell_pix.items()} 
                    for cell_pix, mbpi in zip(cell_pixels, mbpi_new)]

In [None]:
# Get max pixel in each cell
cell_pixels_max = [[np.max(pix) for cid, pix in cell_pix_norm.items()] 
                   for cell_pix_norm in tqdm(cell_pixels_norm)]

In [None]:
# Generate dictionary of histograms
cell_pix_dict = imfn.get_nested_dict(keys, cell_max_pixels, [0,1,2])

In [None]:
# Set up ordered lists of factors
I = ['a','b','c','d','e','f','g','h']
J = ['pos','neg']
K = ['1','2','3']

In [None]:
# Calculate the mean and stdev between fields of view for each set of factors
# Generate a dictionary of mean and stdev values
plot_dict = {}
for i in I:
    plot_dict[i] = {}
    for j in J:
        max_pix = []
        for k in K:
            max_pix += [cell_pix_dict[i][j][k][0][1] for k in K]
        plot_dict[i][j] = max_pix

In [None]:
# plot parameters
import image_plots as ip
import numpy as np
import matplotlib.pyplot as plt

fig_dir = '../figures'

col = 'w'
dims = (10,5)
lw = 1
ft = 20
jit = 0.15
ylims = (0,25)
transparency = 0.8
dot_factor = 0.5

# col = 'k'
# lw = 1
# ft = 5
# dims = (0.9843 ,0.5906)
# jit = 0.1
# transparency = 0.6
# dot_factor = 0.5

colors = plt.rcParams['axes.prop_cycle'].by_key()['color']


xticklabels = ['Control','Positive']
line_colors = [colors[0], colors[1]]

In [None]:
# Plot brightest spot in cell
# Generate PLots

for i in I:
    print('method: ', i)
    fig, ax = ip.general_plot(col=col, dims=dims, ft=ft, lw=lw)
    xticks = ip.get_xticks(J)
    ax.set_xticks(xticks)
    ax.set_xticklabels(xticklabels)
    # Plot one for each control
    max_ints = []
    for j, lc, xt in zip(J, line_colors, xticks):
        max_ints = [plot_dict[i][j]]
        ip.violin_dot_plot(ax, values=max_ints, positions=[xt], 
                           jit=jit, col=col, line_col=lc, bw=0.5,
                          dot_factor=dot_factor, transparency=transparency,
                          ft=ft)
             
#     ax.set_xlim((0.5,3))
    ax.set_ylim(-0.3, 7)
    ax.tick_params(labelleft=False, left=True, direction='in', labelsize=ft, color=col, labelcolor=col, length=lw*2)
    ax.tick_params(labelbottom=False, bottom=True, direction='in', labelsize=ft, color=col, labelcolor=col, length=lw*2)
    if i == 'g':
        ax.tick_params(labelbottom=True, bottom=True, direction='in', labelsize=ft, color=col, labelcolor=col, length=lw*2)
    filename = fig_dir + '/cell_max_pix_method_' + i + '.pdf'
    ip.plt.show()
#     ip.plt.savefig(filename, transparent=True)
    ip.plt.close()