In [132]:
%matplotlib widget
%load_ext autoreload
%autoreload 2

import matplotlib
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42

import pandas as pd
import matplotlib as mpl
import time
import matplotlib.pyplot as plt
from matplotlib.path import Path as mplPath
import matplotlib.cm as cm
import matplotlib.patches as patches
from scipy import stats
import math
import numpy as np
from scipy import interpolate
from pathlib import Path
import os 

from gating_util import ScatterSelectorGating

from bsccm import BSCCM
from demixing_util import *

bsccm_with_spectra = BSCCM(str(Path.home()) + '/BSCCM_local/BSCCM/')

#containing the data to demix
bsccm_with_data = BSCCM(str(Path.home()) + '/BSCCM_local/BSCCM/')
# bsccm_with_data = BSCCM(str(Path.home()) + '/BSCCM_local/BSCCM-coherent/')


dataframe_saving_fullpath = str(Path.home()) + '/BSCCM_local/BSCCM/BSCCM_surface_markers.csv'
# For exporting figures
export_dir = '/home/henry/leukosight_data/figures/demixing'


single_markers = ['CD123', 'CD3', 'CD19', 'CD56', 'HLA-DR', 'CD45', 'CD14', 'CD16', 'autofluor']

# The names of the fluorescent measurements put into a database
channel_names = ['Fluor_426-446_shading_corrected', 
       'Fluor_500-550_shading_corrected', 
       'Fluor_550-570_shading_corrected', 
       'Fluor_585-625_shading_corrected', 
       'Fluor_627-673_shading_corrected', 
       'Fluor_690-_shading_corrected']
readable_channel_names = ['426-446 nm', 
       '500-550 nm', 
       '550-570 nm', 
       '585-625 nm', 
       '627-673 nm', 
       '>690 nm']

selections = {batch: ['selection_example_{}_positive_cells_batch_{}'.format(m, batch) 
               for m in single_markers] for batch in range(2)}

unmixed_channel_names = ['CD123/HLA-DR/CD14', 'CD3/CD19/CD56', 'CD45', 'CD16', 'autofluor']

    
# Prepare the raw data
mixed_data = bsccm_with_data.surface_marker_dataframe[channel_names].to_numpy()
#Make everything positive
mixed_data -= (np.min(mixed_data, axis=0) - 1e-2)


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Selection of single spectra populations

In [2]:
batch = 0
antibodies = 'CD3'
mixed_ch_index_0 = 0
mixed_ch_index_1 = 1

mask = np.logical_and(bsccm_with_data.index_dataframe.antibodies == 'unstained',
      bsccm_with_data.index_dataframe.batch == batch)

data, marked_mask = load_mixed_data_subset(mixed_data, antibodies, selections, bsccm_with_data, batch=batch)
plt.figure()
plt.scatter(mixed_data[mask][:, mixed_ch_index_0], mixed_data[mask][:, mixed_ch_index_1], 
            color='black', s=15, alpha=0.5, edgecolors='None', rasterized=True)

plt.xlabel(channel_names[mixed_ch_index_0])
plt.ylabel(channel_names[mixed_ch_index_1])
plt.title('Unstained')
plt.xlim([35, 67])
plt.ylim([20, 370])
plt.savefig(export_dir + '/population_selection_unstained.pdf', transparent=True, dpi=300)   


mask = np.logical_and(bsccm_with_data.index_dataframe.antibodies == antibodies,
      bsccm_with_data.index_dataframe.batch == batch)

data, marked_mask = load_mixed_data_subset(mixed_data, antibodies, selections, bsccm_with_data, batch=batch)
plt.figure()
plt.scatter(mixed_data[mask][np.logical_not(marked_mask), mixed_ch_index_0],
            mixed_data[mask][np.logical_not(marked_mask), mixed_ch_index_1], 
            color='black', s=15, alpha=0.4, edgecolors='None', rasterized=True)
plt.scatter(mixed_data[mask][marked_mask, mixed_ch_index_0], mixed_data[mask][marked_mask, mixed_ch_index_1], 
            color='lime', s=15, alpha=0.4, edgecolors='None', rasterized=True)

plt.scatter(np.mean(mixed_data[mask][marked_mask, mixed_ch_index_0]),
            np.mean(mixed_data[mask][marked_mask, mixed_ch_index_1]), 
            c='red')

plt.scatter(np.mean(mixed_data[mask][np.logical_not(marked_mask), mixed_ch_index_0]),
            np.mean(mixed_data[mask][np.logical_not(marked_mask), mixed_ch_index_1]),
            c='red')

plt.xlabel(readable_channel_names[mixed_ch_index_0])
plt.ylabel(readable_channel_names[mixed_ch_index_1])
plt.title('Antibody stained')
plt.xlim([35, 67])
plt.ylim([20, 370])
plt.savefig(export_dir + '/population_selection_CD3_marked.pdf', dpi=300, transparent=True)   

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

# Figuring our demixing matrix

In [147]:
single_marker_unmix_channel_spectra, single_marker_unmix_channel_brightness, unmix_channel_spectra, unmix_channel_brightness = \
    compute_spectra(bsccm_with_spectra, channel_names, unmixed_channel_names, single_markers, batch=batch)


spectra = np.array([single_marker_unmix_channel_spectra[channel] for channel in single_markers])

# spectra /= np.linalg.norm(spectra, axis=0)

fig, ax = plt.subplots()
plt.imshow(spectra, cmap='inferno')
ax.set_yticklabels([''] + single_markers)
ax.set_xticks(range(6))
ax.set_xticklabels(readable_channel_names)
ax.tick_params(axis='x', labelrotation=-45 )
plt.colorbar()

plt.savefig(export_dir + '/single_ab_mixing_matrix.pdf', transparent=True, dpi=300)


fig, ax = plt.subplots()
brightness = np.array([[single_marker_unmix_channel_brightness[channel] for channel in single_markers]]).T
brightness = brightness / np.min(brightness)
plt.imshow(brightness, cmap='inferno')
plt.colorbar()
# ax.set_yticklabels([''] + single_markers)
# ax.set_xticks(range(6))
# ax.set_xticklabels(readable_channel_names)
# ax.tick_params(axis='x', labelrotation=-45 )
plt.savefig(export_dir + '/spectra_brightness.pdf', transparent=True, dpi=300)


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

  ax.set_yticklabels([''] + single_markers)


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

# Demixing results with different regularization tuning

In [122]:
single_marker_unmix_channel_spectra, single_marker_unmix_channel_brightness, unmix_channel_spectra, unmix_channel_brightness = \
    compute_spectra(bsccm_with_spectra, channel_names, spectra_names, single_markers, batch=batch)

spectra_names = ['CD3', 'autofluor']
spectra = np.array([single_marker_unmix_channel_spectra[channel] for channel in spectra_names])

fig, ax = plt.subplots()
plt.imshow(spectra, cmap='inferno')
ax.set_yticks(range(2))
ax.set_yticklabels(spectra_names)
ax.set_xticks(range(6))
ax.set_xticklabels(readable_channel_names)
ax.tick_params(axis='x', labelrotation=-45 )

plt.savefig(export_dir + '/two_spectrum_mixing_matrix.pdf', transparent=True)


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [126]:
readable_channel_names

['426-446 nm',
 '500-550 nm',
 '550-570 nm',
 '585-625 nm',
 '627-673 nm',
 '>690 nm']

In [134]:
batch = 0
antibodies = 'CD3'

spectra_names = ['CD3', 'autofluor']

single_marker_unmix_channel_spectra, single_marker_unmix_channel_brightness, unmix_channel_spectra, unmix_channel_brightness = \
    compute_spectra(bsccm_with_spectra, channel_names, spectra_names, single_markers, batch=batch)

print(antibodies, batch, '\t\t\t\t\t\t\t')
mask = np.logical_and(bsccm_with_data.index_dataframe.antibodies == antibodies,
      bsccm_with_data.index_dataframe.batch == batch)
if antibodies == 'unstained':
    spectra_names = ['autofluor'] # single spectrum
    spectra = np.stack([single_marker_unmix_channel_spectra[c] for c in spectra_names], axis=0)
    reweighting = [1]
else:
    spectra_names = [antibodies, 'autofluor']
    spectra = np.stack([single_marker_unmix_channel_spectra[c] for c in spectra_names], axis=0)
    autofluor_mag = single_marker_unmix_channel_brightness['autofluor']
    marker_mag = single_marker_unmix_channel_brightness[antibodies]
    autofluor_vec = single_marker_unmix_channel_spectra['autofluor']
    marker_vec = single_marker_unmix_channel_spectra[antibodies]
    weighted_proj = (autofluor_vec @ marker_vec) / marker_mag

    reweighting = [weighted_proj, 1]

data, marked_mask = load_mixed_data_subset(mixed_data, antibodies, selections, bsccm_with_data, batch=batch)
plt.figure()
plt.scatter(mixed_data[mask][np.logical_not(marked_mask), mixed_ch_index_0], 
            mixed_data[mask][np.logical_not(marked_mask), mixed_ch_index_1], 
            color='black', s=15, alpha=0.5, edgecolors='None', rasterized=True)
plt.scatter(mixed_data[mask][marked_mask, mixed_ch_index_0], 
            mixed_data[mask][marked_mask, mixed_ch_index_1], 
            color='lime', s=15, alpha=0.5, edgecolors='None', rasterized=True)

plt.xlabel(readable_channel_names[mixed_ch_index_0])
plt.ylabel(readable_channel_names[mixed_ch_index_1])
plt.title('Fluorescence')
plt.savefig(export_dir + '/raw_fluor_{}.pdf'.format(antibodies), transparent=True, dpi=300)
    
fig, ax = plt.subplots(3,1, figsize=(4,12))
for index, l1_reg, save_name in zip(range(3), [7e-3, 7e-1, 7e1], ('Under-regularized', 
                                                 'Optimally-regularized',
                                                'Over-regularized')):
    unmixed, background_spectrum = do_factorization(mixed_data[mask], spectra,
            l1_reg = l1_reg,
            momentum=0.9,
            learning_rate = 1e3,
            background_learning_rate=1e-1,
            reweighting=reweighting)


    mixed_ch_index_0 = 0
    mixed_ch_index_1 = 1

    unmixed_ch_index_0 = 1
    unmixed_ch_index_1 = 0

  

    ax[index].scatter(unmixed[np.logical_not(marked_mask), unmixed_ch_index_0], 
                      unmixed[np.logical_not(marked_mask), unmixed_ch_index_1], 
                color='black', s=15, alpha=0.5, edgecolors='None', rasterized=True)
    ax[index].scatter(unmixed[marked_mask, unmixed_ch_index_0],
                      unmixed[marked_mask, unmixed_ch_index_1], 
                color='lime', s=15, alpha=0.5, edgecolors='None', rasterized=True)

    ax[index].set_xlabel('Autofluorescnece')
    ax[index].set_ylabel(antibodies)
    ax[index].set_title('Unmixed ({})'.format(save_name))
plt.savefig(export_dir + '/{}_unmixed.pdf'.format(save_name), transparent=True, dpi=300)

CD3 0 							


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

112: 	loss: 393.720	rel_error: 0.0005		8.4  -0.0  -0.0  -0.0  -0.0  -0.0  				

# Two spectra vs all spectra model


In [131]:
import matplotlib.gridspec as gridspec

y_ax_antibodies = ['CD123', 'CD3', 'CD45', 'CD16',  'all']
unmix_channels_to_use = ['CD123/HLA-DR/CD14', 'CD3/CD19/CD56', 'CD45', 'CD16']
batch = 0
log_plots=False

fig = plt.figure(figsize=(8,8))
gs = fig.add_gridspec(len(y_ax_antibodies), len(unmix_channels_to_use) + 1,  width_ratios=(7,7,7,7, 4),
                      height_ratios=(1,) * len(y_ax_antibodies),
                  left=0.1, right=0.9, bottom=0.1, top=0.9,
                  wspace=0.05, hspace=0.05)


for i, antibodies in enumerate(y_ax_antibodies):

    single_marker_unmix_channel_spectra, single_marker_unmix_channel_brightness, unmix_channel_spectra, unmix_channel_brightness = \
        compute_spectra(bsccm_with_spectra, channel_names, unmix_channels_to_use, single_markers, batch=batch)

    
    spectra = np.stack([unmix_channel_spectra[c] for c in unmix_channels_to_use], axis=0)

    #Weight regularization based on projection onto first singular vector
    unmix_spectrum = np.array([unmix_channel_spectra[name] * unmix_channel_brightness[name] for name in unmix_channels_to_use])
    u, s, vh = np.linalg.svd(unmix_spectrum, full_matrices=False)
    first_vec = np.abs(vh[0])
    reweighting = [first_vec @ unmix_channel_spectra[name] / unmix_channel_brightness[name]
                    for name in unmix_channels_to_use]


    mask = np.logical_and(bsccm_with_data.index_dataframe.antibodies == antibodies,
          bsccm_with_data.index_dataframe.batch == batch)

    if antibodies != 'all':
        data, marked_mask = load_mixed_data_subset(mixed_data, antibodies, selections, bsccm_with_data, batch=batch)
    else:
        marked_mask = np.zeros(np.sum(mask), np.bool)
        
    l1_reg = 7e-1
    unmixed, background_spectrum = do_factorization(mixed_data[mask], spectra,
            l1_reg = l1_reg,
            momentum=0.9,
            learning_rate = 1e3,
            background_learning_rate=1e-1,
            reweighting=reweighting,
#             stopping_error=0.1
                            )


    show_antibody = antibodies
    unmixed_channel_names = ['CD123/HLA-DR/CD14', 'CD3/CD19/CD56', 'CD45', 'CD16']
    spectra_names = unmix_channels_to_use
    
    if np.sum([show_antibody in m for m in unmixed_channel_names]):
        marker_index = [show_antibody in m for m in unmixed_channel_names].index(True)
        #otherwise just keep it the same for final row with all antibodies
    
    for j in range(len(unmixed_channel_names)):

        if j == 0:
            if i == 0:
                first_col_ax = fig.add_subplot(gs[i, j])
            else:
                first_row_ax = fig.axes[j]
                first_col_ax = fig.add_subplot(gs[i, j], sharex=first_row_ax)
            first_col_ax.set_ylabel(unmixed_channel_names[marker_index])
            ax = first_col_ax
        else:
            if i == 0:
                ax = fig.add_subplot(gs[i, j], sharey=first_col_ax)
            else:
                first_row_ax = fig.axes[j]
                ax = fig.add_subplot(gs[i, j], sharex=first_row_ax, sharey=first_col_ax) 
        if i != len(y_ax_antibodies) - 1:
            ax.axes.xaxis.set_visible(False)
        else:
            ax.set_xlabel(spectra_names[j])

        if j !=  0:  
            ax.axes.yaxis.set_visible(False)

        else:
            ax.set_ylabel(show_antibody)
            
            
        if log_plots:
            log_data_other = np.log(1e-2 + unmixed[:, j])
            log_data_marker = np.log(1e-2 + unmixed[:, marker_index])
            ax.scatter(log_data_other[np.logical_not(marked_mask)], 
                          log_data_marker[np.logical_not(marked_mask)], 
                s=3, c='k', rasterized=True, alpha=0.5, edgecolors='None',)
            ax.scatter(log_data_other[marked_mask], 
                          log_data_marker[marked_mask], 
                s=3, c='lime', rasterized=True, alpha=0.5, edgecolors='None',)
            #Give good axes in spite of one big outlier
#             ax.set_xlim([np.min(log_data_other) -0.5, 1.2 * np.percentile(log_data_other, 99.9)])
#             ax.set_ylim([np.min(log_data_marker) - 0.5, 1.2 * np.percentile(log_data_marker, 99.9)])
        else:
            ax.scatter(
                unmixed[np.logical_not(marked_mask), j],
                unmixed[np.logical_not(marked_mask), marker_index], 
                s=3, c='k', rasterized=True, alpha=0.5, edgecolors='None',)
            ax.scatter(
                unmixed[marked_mask, j], 
                 unmixed[marked_mask, marker_index], 
                s=3, c='lime', rasterized=True, alpha=0.5, edgecolors='None',)
            #Give good axes in spite of one big outlier
#             ax.set_xlim([-1, 1.4 * np.percentile(unmixed[:, j], 99.8)])
#             ax.set_ylim([-1, 1.4 * np.percentile(unmixed[:, marker_index], 99.8)])              

        
                       
            
    if i == 0:
        ax_hist = fig.add_subplot(gs[i, -1], sharey=first_col_ax)
    else:
        first_row_hist_ax = fig.axes[len(unmix_channels_to_use)]
        ax_hist = fig.add_subplot(gs[i, -1], sharey=first_col_ax, sharex=first_row_hist_ax)

    if log_plots:
        ax_hist.hist(np.log(1e-2 + unmixed[np.logical_not(marked_mask), marker_index]),
                           60, color='k', alpha=0.5, log=True, rasterized=True, density=True)
        ax_hist.hist(np.log(1e-2 + unmixed[marked_mask, marker_index]), 
                           60, color='lime', alpha=0.5, log=True, rasterized=True, density=True)
    else:
        ax_hist.hist(unmixed[marked_mask, marker_index], 60, color='lime', 
                            alpha=0.5, log=True, orientation='horizontal', rasterized=True, density=True)
        ax_hist.hist(unmixed[np.logical_not(marked_mask), marker_index], 60, 
                            color='k', alpha=0.5, log=True, orientation='horizontal', rasterized=True, density=True)
        
    ax_hist.axes.yaxis.set_visible(False)
    if i == len(y_ax_antibodies) - 1:        
        ax_hist.set_xlabel('Density')

# make rows and corresponding columns have the same values for limits
for i in range(len(y_ax_antibodies) - 1):
    x_ax = fig.axes[i]
    y_ax = fig.axes[i * len(y_ax_antibodies)]
    
    new_ax_val = [min(x_ax.get_xlim()[0], y_ax.get_ylim()[0]),
                max(x_ax.get_xlim()[1], y_ax.get_ylim()[1])]
    x_ax.set_xlim(new_ax_val)
    y_ax.set_ylim(new_ax_val)
    
    
        
fig.savefig(export_dir + '/multi_marker_unmixed.pdf'.format(antibodies), 
                transparent=True, dpi=300)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

4683: 	loss: 20.482	rel_error: 0.0005		-0.0  -0.0  1.9  1.1  2.9  23.2  						

  return n/db/n.sum(), bin_edges


In [105]:
y_ax_antibodies

['CD123', 'CD3', 'CD45', 'CD16', 'all']

0
1
2
3


# Show multi-marker unmixing matrix

In [9]:
single_marker_unmix_channel_spectra, single_marker_unmix_channel_brightness, unmix_channel_spectra, unmix_channel_brightness = \
    compute_spectra(bsccm_with_spectra, channel_names, unmixed_channel_names, single_markers, batch=batch)

spectra_names = ['CD123/HLA-DR/CD14', 'CD3/CD19/CD56', 'CD45', 'CD16']

spectra = np.array([unmix_channel_spectra[channel] for channel in spectra_names])

fig, ax = plt.subplots()
plt.imshow(spectra, cmap='inferno')
ax.set_yticks(range(4))
ax.set_yticklabels(spectra_names)
ax.set_xticks(range(6))
ax.set_xticklabels(readable_channel_names)
ax.tick_params(axis='x', labelrotation=-45 )
plt.savefig(export_dir + '/multi_ab_mixing_matrix.pdf', transparent=True)


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

# Show raw fluorescnece for four different marked populations

In [80]:
batch = 0
mixed_ch_index_0 = 0
mixed_ch_index_1 = 1

fig, ax = plt.subplots(5, 1, figsize=(3, 15))

for j, antibodies in enumerate(['CD123', 'CD3', 'CD45', 'CD16', 'all']):

    mask = np.logical_and(bsccm_with_data.index_dataframe.antibodies == antibodies,
          bsccm_with_data.index_dataframe.batch == batch)
    
    if j == 4:
        marked_mask = np.zeros(np.sum(mask), dtype=np.bool)
    else:
        data, marked_mask = load_mixed_data_subset(mixed_data, antibodies, selections, bsccm_with_data, batch=batch)
    ax[j].scatter(mixed_data[mask][np.logical_not(marked_mask), mixed_ch_index_0],
                mixed_data[mask][np.logical_not(marked_mask), mixed_ch_index_1], 
                color='black', s=15, alpha=0.4, edgecolors='None', rasterized=True)
    ax[j].scatter(mixed_data[mask][marked_mask, mixed_ch_index_0], mixed_data[mask][marked_mask, mixed_ch_index_1], 
                color='lime', s=15, alpha=0.4, edgecolors='None', rasterized=True)

    ax[j].set_xlabel(readable_channel_names[mixed_ch_index_0])
    ax[j].set_ylabel(readable_channel_names[mixed_ch_index_1])
    ax[j].set_title(antibodies)
#     plt.xlim([35, 67])
#     plt.ylim([20, 370])

plt.savefig(export_dir + '/raw_fluor_for_multi_model_unmix.pdf'.format(antibodies), transparent=True, dpi=300)   

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

## Draw some matrices for forward model

In [138]:
image.shape

(14, 1, 6)

In [141]:
image = np.random.rand(1, 6)
image = np.concatenate(14 * [image])

fig, ax = plt.subplots()
plt.imshow(image, cmap='inferno')
# ax.set_yticks(range(2))
# ax.set_yticklabels(spectra_names)
# ax.set_xticks(range(6))
# ax.set_xticklabels(readable_channel_names)
# ax.tick_params(axis='x', labelrotation=-45 )

plt.savefig(export_dir + '/backgrounds.pdf', transparent=True)


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …