In [None]:
import os
import sys
print("Python version" + sys.version)
os.getcwd()
print(sys.executable)

In [2]:
import numpy as np
np.random.seed(123)
import pandas as pd
import scipy
import itertools

import umap
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

import scanpy as sc
import anndata as ad
import scvelo as scv
from tqdm.notebook import tqdm

from pathlib import Path

In [3]:
import venn

In [4]:
import cellrank as cr

In [None]:
cr.__version__

In [6]:
from cellrank.kernels import PseudotimeKernel

In [7]:
from cellrank.kernels import CytoTRACEKernel

In [8]:
import scanpy.external as sce

In [9]:
from pysankey import sankey

In [10]:
import upsetplot
from upsetplot import from_contents
from upsetplot import UpSet
from upsetplot import plot

In [None]:
sc.settings.verbosity = 1
sc.logging.print_header()
sc.settings.set_figure_params(dpi=150, facecolor='white')

In [12]:
# remove weird grid from scvelo
plt.rcParams['axes.grid'] = False

In [111]:
from matplotlib_inline.backend_inline import set_matplotlib_formats
set_matplotlib_formats('svg', dpi = 300)

In [112]:
plt.rcParams['svg.fonttype'] = 'none'
plt.rcParams['font.family'] = 'DejaVu Sans'
plt.rcParams['font.size'] = '8'


In [14]:
# revised from Stefan's cell type signature
signatures_path_ = '../cell_type_from_stefan/scrnaseq_signature_collection/'
from score_and_classify import *

In [15]:
new_data_folder = '/fast/users/twei_m/work/crc/datasets_new_preprocessing'

In [16]:
adata_epi = sc.read(Path(new_data_folder)/'202306_CB_epi_Numbat_Scitcem_inferCNV_icms_Uhlitz_scanvi.h5')

In [17]:
tsamples = ['p007t', 'p008t', 'p009t1','p009t2', 'p013t', 'p014t', 'p016t', 
           'p020t', 'p021t', 'p026t', 'p035t'] 

In [18]:
samples = ['p007n', 'p008n', 'p009n1', 'p009n2','p013n', 'p014n', 'p016n', 
           'p020n', 'p021n'] + tsamples

### Numbat and Scitcem

In [None]:
[(adata_epi.obs['numbat'] == 'tumour\n(tumour sample)') & (adata_epi.obs['scitcem_call'] == 'tumour\n(tumour sample)')]

In [20]:
adata_epi.obs['Numbat_Scitcem_tumour'] = list((adata_epi.obs['numbat'] == 'tumour\n(tumour sample)') &
                                                    (adata_epi.obs['scitcem_call'] == 'tumour\n(tumour sample)'))
adata_epi.obs['Numbat_Scitcem_tumour'] = list((adata_epi.obs['numbat'] == 'tumour\n(tumour sample)') &
                                                    (adata_epi.obs['scitcem_call'] == 'tumour\n(tumour sample)'))

adata_epi.obs['tumour_normal_normal'] = None

for i in np.arange(0, adata_epi.shape[0]):
    if adata_epi.obs['sample_origin'][i] == '\nnormal\nsample\n':
        adata_epi.obs['tumour_normal_normal'][i] = 'normal_sample'
    elif (adata_epi.obs['numbat'][i] == 'tumour\n(tumour sample)') & (adata_epi.obs['scitcem_call'][i] == 'tumour\n(tumour sample)'):
        adata_epi.obs['tumour_normal_normal'][i] = 'genomically_tumour'
    elif (adata_epi.obs['numbat'][i] == 'normal\n(tumour sample)') & (adata_epi.obs['scitcem_call'][i] == 'normal\n(tumour sample)'):
        adata_epi.obs['tumour_normal_normal'][i] = 'genomically_normal'
    else:
        adata_epi.obs['tumour_normal_normal'][i] = 'no confident assignment'
adata_epi.obs['tumour_normal_normal'] = adata_epi.obs['tumour_normal_normal'].astype('category')

In [22]:
adata_epi.obs['tumour_normal_normal'] = adata_epi.obs['tumour_normal_normal'
                                                     ].cat.reorder_categories([
    'genomically_tumour', 'no confident assignment',
    'genomically_normal', 'normal_sample'])

In [23]:
adata_epi.uns['tumour_normal_normal_colors'] = ['#ff7f0e', '#9b1ee3','#1f77b4','#d3d3d3']

In [24]:
anatomical_loc = pd.DataFrame({'patient': list(np.unique(adata_epi.obs['patient'])) ,
                               'anatomical_location':['0_Cecum', '0_Cecum', '7_Sigmoid', '2_Ascending', '2_Ascending',
                                                      '9_Rectum', '9_Rectum', '6_Descending', '0_Cecum', '2_Ascending']})

In [25]:
adata_epi.obs = adata_epi.obs.merge(anatomical_loc, on = 'patient', how='left').set_axis(adata_epi.obs.index)

### Venn

In [None]:
set_infercnv = set(adata_epi[(adata_epi.obs['inferCNV_result'] == 'CNA\n(tumour sample)') & 
                                     (adata_epi.obs['sample_origin'] == 'tumour\nsample')].obs.index)
len(set_infercnv)

In [None]:
set_scitcem = set(adata_epi[(adata_epi.obs['scitcem_call'] == 'tumour\n(tumour sample)') & 
                                     (adata_epi.obs['sample_origin'] == 'tumour\nsample')].obs.index)
len(set_scitcem)

In [None]:
set_numbat = set(adata_epi[(adata_epi.obs['numbat'] == 'tumour\n(tumour sample)')& 
                                     (adata_epi.obs['sample_origin'] == 'tumour\nsample')].obs.index)
len(set_numbat)

In [None]:
set_icms_scANVI = set(adata_epi[(adata_epi.obs['iCMS_scANVI'] != 'normal\n(tumour sample)')& 
                                     (adata_epi.obs['sample_origin'] == 'tumour\nsample')].obs.index)
len(set_icms_scANVI)

### upset plot

In [30]:
upset_dic = from_contents({'iCMS':sorted(set_icms_scANVI),
               'Scitcem':sorted(set_scitcem),
               'Numbat':sorted(set_numbat),
               'inferCNV':sorted(set_infercnv)
    
})

In [None]:
fig = plt.figure(figsize=(12, 5))
upsetp = plot(upset_dic, show_counts=True, fig=fig, element_size=None);

In [33]:
upset_df = adata_epi[adata_epi.obs['sample_origin'] == 'tumour\nsample'].obs.copy()

upset_df = upset_df.set_index(upset_df.iCMS_scANVI.isin(['iCMS2\n(tumour sample)','iCMS3\n(tumour sample)'])).set_index(
upset_df.inferCNV_result == 'CNA\n(tumour sample)', append=True).set_index(
upset_df.numbat == 'tumour\n(tumour sample)', append=True).set_index(
upset_df.scitcem_call == 'tumour\n(tumour sample)', append=True)

upset_df.index.rename(['iCMS', 'inferCNV', 'Numbat', 'Scitcem'], inplace=True)

In [None]:
fig = plt.figure(figsize=(12, 6))

upsetp = UpSet(upset_df, show_counts='%d', show_percentages=False, 
               intersection_plot_elements = 0, element_size=40)
upsetp.add_stacked_bars(by='MS_status', elements = 10)
upsetp.plot()
plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
;

In [36]:
upset_df_tumour_cells = upset_df[(upset_df.iCMS_scANVI != 'normal\n(tumour sample)') |
        (upset_df.numbat != 'normal\n(tumour sample)') |
        (upset_df.scitcem_call != 'normal\n(tumour sample)') |
        (~upset_df.inferCNV_result.isin(['CNN\n(tumour sample)', 'failed_sample']))]

In [None]:
fig = plt.figure(figsize=(12, 6))

upsetp = UpSet(upset_df_tumour_cells, show_counts=True, show_percentages=False, 
               intersection_plot_elements = 0, element_size=40)
upsetp.add_stacked_bars(by='anatomical_location', elements = 10)
upsetp.plot()
plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
;

In [None]:
fig = plt.figure(figsize=(12, 6))

upsetp = UpSet(upset_df_tumour_cells, show_counts=True, show_percentages=False, 
               intersection_plot_elements = 0, element_size=40)
upsetp.add_stacked_bars(by='patient', elements = 10)
upsetp.plot()
plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
;

In [None]:
fig = plt.figure(figsize=(12, 6))

upsetp = UpSet(upset_df_tumour_cells, show_counts=True, show_percentages=False, 
               intersection_plot_elements = 0, element_size=40)
upsetp.add_stacked_bars(by='Uhlitz_scANVI', elements = 10)
upsetp.plot()
plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
;

### Consensus cell identity

- normal sample
- Numbat == tumour and Scitcem == tumour > tumour; same for normal
- Scitcem == 0.5 & Numbat > 0.5 > tumour
- MSS: Scitcem < 0.5 & Numbat > 0.5 > tumour
- MSI: Scitcem > 0.5 & WGS normal sample Mutect2 alt. allele == 0

- cell_identity_list = pd.read_csv(Path(new_data_folder)/'conflicting_cell_identity_scitcem_high.txt', index_col=0)

In [41]:
adata_epi.obs['cell_identity'] = None

for i in np.arange(0, adata_epi.shape[0]):
    if adata_epi.obs['sample_origin'][i] == '\nnormal\nsample\n':
        adata_epi.obs['cell_identity'][i] = 'normal_sample'
    elif (adata_epi.obs['numbat'][i] == 'tumour\n(tumour sample)') & (adata_epi.obs['scitcem_call'][i] == 'tumour\n(tumour sample)'):
        adata_epi.obs['cell_identity'][i] = 'genomically_tumour'
    elif (adata_epi.obs['numbat'][i] == 'normal\n(tumour sample)') & (adata_epi.obs['scitcem_call'][i] == 'normal\n(tumour sample)'):
        adata_epi.obs['cell_identity'][i] = 'genomically_normal'
    # no observed alt. >> leave this to the MSS/MSI decision 
    #elif (adata_epi.obs['scitcem_p'][i] == 0.5) & (adata_epi.obs['p_cnv'][i] > 0.5):
    #    adata_epi.obs['cell_identity'][i] = 'genomically_tumour'
    
    # Numbat is higher in MSS
    elif (adata_epi.obs['MS_status'][i] == 'MSS') & (adata_epi.obs['p_cnv'][i] > 0.5):
        adata_epi.obs['cell_identity'][i] = 'genomically_tumour'
    # Scitcem is higher in MSI
    elif (adata_epi.obs['MS_status'][i] == 'MSI') & (adata_epi.obs['scitcem_p'][i] > 0.5):
        adata_epi.obs['cell_identity'][i] = 'genomically_tumour'
    else:
        adata_epi.obs['cell_identity'][i] = 'no confident assignment'
        
adata_epi.obs['cell_identity'] = adata_epi.obs['cell_identity'].astype('category')
          

In [44]:
adata_epi.uns['cell_identity_colors'] = adata_epi.uns['tumour_normal_normal_colors']

In [45]:
adata_epi.obs['cell_identity'] = adata_epi.obs['cell_identity'].cat.reorder_categories(['genomically_tumour','no confident assignment', 
                                                             'genomically_normal','normal_sample'])

In [46]:
adata_epi.obs['tumour_normal_normal'] = adata_epi.obs['tumour_normal_normal'].cat.reorder_categories(['genomically_tumour','no confident assignment', 
                                                             'genomically_normal','normal_sample'])

In [47]:
# re-run to include cell_identity col
upset_df = adata_epi[adata_epi.obs['sample_origin'] == 'tumour\nsample'].obs.copy()

upset_df = upset_df.set_index(upset_df.iCMS_scANVI.isin(['iCMS2\n(tumour sample)','iCMS3\n(tumour sample)'])).set_index(
upset_df.inferCNV_result == 'CNA\n(tumour sample)', append=True).set_index(
upset_df.numbat == 'tumour\n(tumour sample)', append=True).set_index(
upset_df.scitcem_call == 'tumour\n(tumour sample)', append=True)

upset_df.index.rename(['iCMS', 'inferCNV', 'Numbat', 'Scitcem'], inplace=True)

In [None]:
fig = plt.figure(figsize=(12, 6))

upsetp = UpSet(upset_df, show_counts='%d', show_percentages=False, 
               intersection_plot_elements = 0, element_size=40)
upsetp.add_stacked_bars(by='cell_identity', elements = 10)
upsetp.plot()
plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
;

In [None]:
fig = plt.figure(figsize=(12, 6))

upsetp = UpSet(upset_df, show_counts='%d', show_percentages=False, 
               intersection_plot_elements = 0, element_size=40, max_subset_size=2000)
upsetp.add_stacked_bars(by='cell_identity', elements = 10)
upsetp.plot()
plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
;

In [None]:
fig = plt.figure(figsize=(12, 6))

upsetp = UpSet(upset_df, show_counts='%d', show_percentages=False, 
               intersection_plot_elements = 0, element_size=40, max_subset_size=2000)
upsetp.add_stacked_bars(by='MS_status', elements = 10)
upsetp.plot()
plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
;

### p035t 

In [53]:
# Scitcem > 0.5 > 226 cells
p035t_cells = adata_epi[(adata_epi.obs['sample'] == 'p035t') & 
          (adata_epi.obs['tumour_normal_normal'] == 'no confident assignment') &
          (adata_epi.obs['scitcem_p'] > 0.5)].obs.sort_values(['scitcem_p', 'p_cnv'])

In [None]:
plt.figure(figsize=(12, 5))
sns.scatterplot(data = p035t_cells.reset_index(), x = 'index', y = 'p_cnv', hue = 'iCMS_scANVI')
plt.xticks(rotation=90)
plt.xticks('');

### huge heatmap of cell identity

In [None]:
sns.color_palette("blend:#1f77b4,#d5d5d5,#ff7f0e", as_cmap=True)

In [62]:
# assign tumour to 1 and normal to 0 for heatmap
dummy_ci_df = pd.get_dummies(adata_epi[adata_epi.obs['sample'].isin(tsamples)].obs[['numbat', 'scitcem_call']])[[
    'numbat_tumour\n(tumour sample)', 'scitcem_call_tumour\n(tumour sample)'
]].merge(adata_epi[adata_epi.obs['sample'].isin(tsamples)].obs[['sample','p_cnv','scitcem_p', 
                                                                'filter_alt', 'filter_dp','mean_VAF']], 
         left_index=True, right_index=True)

In [None]:
with plt.style.context('./plt_style'):
    cmap = sns.color_palette("blend:#1f77b4,#ff7f0e", as_cmap=True)
    fig, (ax1, ax2, ax3, ax4, ax5, ax6, ax7, ax8, ax9, ax10, ax11) = plt.subplots(nrows = 11)
    
    sns.heatmap(dummy_ci_df[dummy_ci_df['sample'] == tsamples[0]].sort_values(['numbat_tumour\n(tumour sample)',
                                                                               'scitcem_call_tumour\n(tumour sample)'
                                                                                  ])[['numbat_tumour\n(tumour sample)',
                                                                                      'scitcem_call_tumour\n(tumour sample)']].T,
                xticklabels = False,
                cmap=cmap,ax=ax1, cbar=False, yticklabels = False)
    
    sns.heatmap(dummy_ci_df[dummy_ci_df['sample'] == tsamples[1]].sort_values(['numbat_tumour\n(tumour sample)',
                                                                               'scitcem_call_tumour\n(tumour sample)'
                                                                                  ])[['numbat_tumour\n(tumour sample)',
                                                                                      'scitcem_call_tumour\n(tumour sample)']].T,
                xticklabels = False,
                cmap=cmap, ax=ax2, cbar=False, yticklabels = False)
    
    sns.heatmap(dummy_ci_df[dummy_ci_df['sample'] == tsamples[2]].sort_values(['numbat_tumour\n(tumour sample)',
                                                                               'scitcem_call_tumour\n(tumour sample)'
                                                                                  ])[['numbat_tumour\n(tumour sample)',
                                                                                      'scitcem_call_tumour\n(tumour sample)']].T,
                xticklabels = False,
                cmap=cmap, ax=ax3, cbar=False, yticklabels = False)
    
    sns.heatmap(dummy_ci_df[dummy_ci_df['sample'] == tsamples[3]].sort_values(['numbat_tumour\n(tumour sample)',
                                                                               'scitcem_call_tumour\n(tumour sample)'
                                                                                  ])[['numbat_tumour\n(tumour sample)',
                                                                                      'scitcem_call_tumour\n(tumour sample)']].T,
                xticklabels = False,
                cmap=cmap, ax=ax4, cbar=False, yticklabels = False)
    
    sns.heatmap(dummy_ci_df[dummy_ci_df['sample'] == tsamples[4]].sort_values(['numbat_tumour\n(tumour sample)',
                                                                               'scitcem_call_tumour\n(tumour sample)'
                                                                                  ])[['numbat_tumour\n(tumour sample)',
                                                                                      'scitcem_call_tumour\n(tumour sample)']].T,
                xticklabels = False,
                cmap=cmap, ax=ax5, cbar=False, yticklabels = False)
    
    sns.heatmap(dummy_ci_df[dummy_ci_df['sample'] == tsamples[5]].sort_values(['numbat_tumour\n(tumour sample)',
                                                                               'scitcem_call_tumour\n(tumour sample)'
                                                                                  ])[['numbat_tumour\n(tumour sample)',
                                                                                      'scitcem_call_tumour\n(tumour sample)']].T,
                xticklabels = False,
                cmap=cmap, ax=ax6, cbar=False, yticklabels = False)
    
    sns.heatmap(dummy_ci_df[dummy_ci_df['sample'] == tsamples[6]].sort_values(['numbat_tumour\n(tumour sample)',
                                                                               'scitcem_call_tumour\n(tumour sample)'
                                                                                  ])[['numbat_tumour\n(tumour sample)',
                                                                                      'scitcem_call_tumour\n(tumour sample)']].T,
                xticklabels = False,
                cmap=cmap, ax=ax7, cbar=False, yticklabels = False)
    
    sns.heatmap(dummy_ci_df[dummy_ci_df['sample'] == tsamples[7]].sort_values(['numbat_tumour\n(tumour sample)',
                                                                               'scitcem_call_tumour\n(tumour sample)'
                                                                                  ])[['numbat_tumour\n(tumour sample)',
                                                                                      'scitcem_call_tumour\n(tumour sample)']].T,
                xticklabels = False,
                cmap=cmap, ax=ax8, cbar=False, yticklabels = False)
    
    sns.heatmap(dummy_ci_df[dummy_ci_df['sample'] == tsamples[8]].sort_values(['numbat_tumour\n(tumour sample)',
                                                                               'scitcem_call_tumour\n(tumour sample)'
                                                                                  ])[['numbat_tumour\n(tumour sample)',
                                                                                      'scitcem_call_tumour\n(tumour sample)']].T,
                xticklabels = False,
                cmap=cmap, ax=ax9, cbar=False, yticklabels = False)
    
    sns.heatmap(dummy_ci_df[dummy_ci_df['sample'] == tsamples[9]].sort_values(['numbat_tumour\n(tumour sample)',
                                                                               'scitcem_call_tumour\n(tumour sample)'
                                                                                  ])[['numbat_tumour\n(tumour sample)',
                                                                                      'scitcem_call_tumour\n(tumour sample)']].T,
                xticklabels = False,
                cmap=cmap, ax=ax10, cbar=False, yticklabels = False)
    
    sns.heatmap(dummy_ci_df[dummy_ci_df['sample'] == tsamples[10]].sort_values(['numbat_tumour\n(tumour sample)',
                                                                               'scitcem_call_tumour\n(tumour sample)'
                                                                                  ])[['numbat_tumour\n(tumour sample)',
                                                                                      'scitcem_call_tumour\n(tumour sample)']].T,
                xticklabels = False,
                cmap=cmap, ax=ax11, cbar=False, yticklabels = False)

    ax1.set(ylabel=tsamples[0])
    ax1.yaxis.label.set(rotation=0, ha='right', va= 'center', size = 16)
    
    ax2.set(ylabel=tsamples[1])
    ax2.yaxis.label.set(rotation=0, ha='right', va= 'center', size = 16)
    
    ax3.set(ylabel=tsamples[2])
    ax3.yaxis.label.set(rotation=0, ha='right', va= 'center', size = 16)
    
    ax4.set(ylabel=tsamples[3])
    ax4.yaxis.label.set(rotation=0, ha='right', va= 'center', size = 16)
    
    ax5.set(ylabel=tsamples[4])
    ax5.yaxis.label.set(rotation=0, ha='right', va= 'center', size = 16)
    
    ax6.set(ylabel=tsamples[5])
    ax6.yaxis.label.set(rotation=0, ha='right', va= 'center', size = 16)
    
    ax7.set(ylabel=tsamples[6])
    ax7.yaxis.label.set(rotation=0, ha='right', va= 'center', size = 16)
    
    ax8.set(ylabel=tsamples[7])
    ax8.yaxis.label.set(rotation=0, ha='right', va= 'center', size = 16)
    
    ax9.set(ylabel=tsamples[8])
    ax9.yaxis.label.set(rotation=0, ha='right', va= 'center', size = 16)
    
    ax10.set(ylabel=tsamples[9])
    ax10.yaxis.label.set(rotation=0, ha='right', va= 'center', size = 16)
    
    ax11.set(ylabel=tsamples[10])
    ax11.yaxis.label.set(rotation=0, ha='right', va= 'center', size = 16)
    
    ax1.set_title('Cell identity assignment\n')
    
    
;

In [None]:
cmap = sns.color_palette("blend:#1f77b4,#ff7f0e", as_cmap=True)
fig, (axes) = plt.subplots(nrows = 11, subplot_kw={'yticks':[]}, figsize = (10,10))

axins = inset_axes(axes[0], width="100%", height="2000%",
                   bbox_to_anchor=(1.05, -2, .03, .4),
                   bbox_transform=axes[0].transAxes, loc=2, borderpad=0)
axins.tick_params(left=False, right=True, labelleft=False, labelright=True)
    

images = []

for i in np.arange(0,11):
    data = np.asarray(dummy_ci_df[dummy_ci_df['sample'] == tsamples[i]].sort_values(['numbat_tumour\n(tumour sample)',
                                                                               'scitcem_call_tumour\n(tumour sample)'
                                                                                  ])[['numbat_tumour\n(tumour sample)',
                                                                                      'scitcem_call_tumour\n(tumour sample)']].T)
    
        
    axes[i].set(ylabel=tsamples[i])
    axes[i].yaxis.label.set(rotation=0, ha='right', va= 'center')
    
    images.append(axes[i].imshow(data, cmap = cmap, aspect = 'auto',
                       interpolation = 'nearest', interpolation_stage = 'rgba'))
        
fig.colorbar(images[0], cax = axins, fraction = .03)

fig.subplots_adjust(hspace = 0.6)

;

In [82]:
dummy_ci_df['mod_p_cnv'] = None

for i in np.arange(0, dummy_ci_df.shape[0]):
    if np.round(dummy_ci_df['p_cnv'][i], 2) == 1:
        dummy_ci_df['mod_p_cnv'][i] = 1
    else:
        dummy_ci_df['mod_p_cnv'][i] = dummy_ci_df['p_cnv'][i]
dummy_ci_df['mod_p_cnv'] = dummy_ci_df['mod_p_cnv'].astype('float64')

In [91]:
tsamples_original = tsamples.copy()

In [98]:
tsamples = ['P07t',
 'P08t',
 'P09t',
 'P09t',
 'P13t',
 'P14t',
 'P16t',
 'P20t',
 'P21t',
 'P26t',
 'P35t']

In [102]:
dummy_ci_df['sample_rev'] = None

for i in np.arange(0, dummy_ci_df.shape[0]):
    if dummy_ci_df['sample'][i] in ['p009t1', 'p009t2']:
        dummy_ci_df['sample_rev'][i] = 'P09t'
    elif dummy_ci_df['sample'][i] in ['p009n1', 'p009n2']:
        dummy_ci_df['sample_rev'][i] = 'P09n'
    else:
        dummy_ci_df['sample_rev'][i] = dummy_ci_df['sample'][i].replace('p0', 'P')
        
dummy_ci_df['sample_rev'] = dummy_ci_df['sample_rev'].astype('category')

In [103]:
dummy_ci_df['sample_original'] = dummy_ci_df['sample'].copy()

In [104]:
dummy_ci_df['sample'] = dummy_ci_df['sample_rev'].copy()

In [None]:
# map the probability
with plt.style.context('./plt_style'):
    cmap = sns.color_palette("blend:#1f77b4,#333333,#ff7f0e", as_cmap=True)
    fig, (ax1, ax2, ax3, ax4, ax5, ax6, ax7, ax8, ax9, ax10, ax11) = plt.subplots(nrows = 11)
    
    from mpl_toolkits.axes_grid1.inset_locator import inset_axes
    #cbar_ax = fig.add_axes([.91, .3, .03, .4])
    
    axins = inset_axes(ax1, width="100%", height="2000%",
                   bbox_to_anchor=(1.05, -2, .03, .4),
                   bbox_transform=ax1.transAxes, loc=2, borderpad=0)
    axins.tick_params(left=False, right=True, labelleft=False, labelright=True)
    
    sns.heatmap(dummy_ci_df[dummy_ci_df['sample'] == tsamples[0]].sort_values(['numbat_tumour\n(tumour sample)',
                                                                               'scitcem_call_tumour\n(tumour sample)',
                                                                               'mod_p_cnv','scitcem_p']
                                                                             )[['mod_p_cnv','scitcem_p']].T,
                xticklabels = False,
                cmap=cmap,ax=ax1, cbar_ax=axins, yticklabels = False)
    
    sns.heatmap(dummy_ci_df[dummy_ci_df['sample'] == tsamples[1]].sort_values(['numbat_tumour\n(tumour sample)',
                                                                               'scitcem_call_tumour\n(tumour sample)',
                                                                                'mod_p_cnv','scitcem_p'
                                                                                  ])[['mod_p_cnv','scitcem_p']].T,
                xticklabels = False,
                cmap=cmap, ax=ax2, cbar=False, yticklabels = False)
    
    sns.heatmap(dummy_ci_df[dummy_ci_df['sample'] == tsamples[2]].sort_values(['numbat_tumour\n(tumour sample)',
                                                                               'scitcem_call_tumour\n(tumour sample)',
                                                                                'mod_p_cnv','scitcem_p'
                                                                                  ])[['mod_p_cnv','scitcem_p']].T,
                xticklabels = False,
                cmap=cmap, ax=ax3, cbar=False, yticklabels = False)
    
    sns.heatmap(dummy_ci_df[dummy_ci_df['sample'] == tsamples[3]].sort_values(['numbat_tumour\n(tumour sample)',
                                                                               'scitcem_call_tumour\n(tumour sample)',
                                                                                'mod_p_cnv','scitcem_p'
                                                                                  ])[['mod_p_cnv','scitcem_p']].T,
                xticklabels = False,
                cmap=cmap, ax=ax4, cbar=False, yticklabels = False)
    
    sns.heatmap(dummy_ci_df[dummy_ci_df['sample'] == tsamples[4]].sort_values(['numbat_tumour\n(tumour sample)',
                                                                               'scitcem_call_tumour\n(tumour sample)',
                                                                                'mod_p_cnv','scitcem_p'
                                                                                  ])[['mod_p_cnv','scitcem_p']].T,
                xticklabels = False,
                cmap=cmap, ax=ax5, cbar=False, yticklabels = False)
    
    sns.heatmap(dummy_ci_df[dummy_ci_df['sample'] == tsamples[5]].sort_values(['numbat_tumour\n(tumour sample)',
                                                                               'scitcem_call_tumour\n(tumour sample)',
                                                                                'mod_p_cnv','scitcem_p'
                                                                                  ])[['mod_p_cnv','scitcem_p']].T,
                xticklabels = False,
                cmap=cmap, ax=ax6, cbar=False, yticklabels = False)
    
    sns.heatmap(dummy_ci_df[dummy_ci_df['sample'] == tsamples[6]].sort_values(['numbat_tumour\n(tumour sample)',
                                                                               'scitcem_call_tumour\n(tumour sample)',
                                                                                'mod_p_cnv','scitcem_p'
                                                                                  ])[['mod_p_cnv','scitcem_p']].T,
                xticklabels = False,
                cmap=cmap, ax=ax7, cbar=False, yticklabels = False)
    
    sns.heatmap(dummy_ci_df[dummy_ci_df['sample'] == tsamples[7]].sort_values(['numbat_tumour\n(tumour sample)',
                                                                               'scitcem_call_tumour\n(tumour sample)',
                                                                                'mod_p_cnv','scitcem_p'
                                                                                  ])[['mod_p_cnv','scitcem_p']].T,
                xticklabels = False,
                cmap=cmap, ax=ax8, cbar=False, yticklabels = False)
    
    sns.heatmap(dummy_ci_df[dummy_ci_df['sample'] == tsamples[8]].sort_values(['numbat_tumour\n(tumour sample)',
                                                                               'scitcem_call_tumour\n(tumour sample)',
                                                                                'mod_p_cnv','scitcem_p'
                                                                                  ])[['mod_p_cnv','scitcem_p']].T,
                xticklabels = False,
                cmap=cmap, ax=ax9, cbar=False, yticklabels = False)
    
    sns.heatmap(dummy_ci_df[dummy_ci_df['sample'] == tsamples[9]].sort_values(['numbat_tumour\n(tumour sample)',
                                                                               'scitcem_call_tumour\n(tumour sample)',
                                                                                'mod_p_cnv','scitcem_p'
                                                                                  ])[['mod_p_cnv','scitcem_p']].T,
                xticklabels = False,
                cmap=cmap, ax=ax10, cbar=False, yticklabels = False)
    
    sns.heatmap(dummy_ci_df[dummy_ci_df['sample'] == tsamples[10]].sort_values(['numbat_tumour\n(tumour sample)',
                                                                               'scitcem_call_tumour\n(tumour sample)',
                                                                                'mod_p_cnv','scitcem_p'
                                                                                  ])[['mod_p_cnv','scitcem_p']].T,
                xticklabels = False,
                cmap=cmap, ax=ax11, cbar=False, yticklabels = False)

    ax1.set(ylabel=tsamples[0])
    ax1.yaxis.label.set(rotation=0, ha='right', va= 'center', size = 16)
    
    ax2.set(ylabel=tsamples[1])
    ax2.yaxis.label.set(rotation=0, ha='right', va= 'center', size = 16)
    
    ax3.set(ylabel=tsamples[2])
    ax3.yaxis.label.set(rotation=0, ha='right', va= 'center', size = 16)
    
    ax4.set(ylabel=tsamples[3])
    ax4.yaxis.label.set(rotation=0, ha='right', va= 'center', size = 16)
    
    ax5.set(ylabel=tsamples[4])
    ax5.yaxis.label.set(rotation=0, ha='right', va= 'center', size = 16)
    
    ax6.set(ylabel=tsamples[5])
    ax6.yaxis.label.set(rotation=0, ha='right', va= 'center', size = 16)
    
    ax7.set(ylabel=tsamples[6])
    ax7.yaxis.label.set(rotation=0, ha='right', va= 'center', size = 16)
    
    ax8.set(ylabel=tsamples[7])
    ax8.yaxis.label.set(rotation=0, ha='right', va= 'center', size = 16)
    
    ax9.set(ylabel=tsamples[8])
    ax9.yaxis.label.set(rotation=0, ha='right', va= 'center', size = 16)
    
    ax10.set(ylabel=tsamples[9])
    ax10.yaxis.label.set(rotation=0, ha='right', va= 'center', size = 16)
    
    ax11.set(ylabel=tsamples[10])
    ax11.yaxis.label.set(rotation=0, ha='right', va= 'center', size = 16)
    
    #ax1.set_title('Cell identity assignment probability\n')
    
    #plt.tight_layout()
    
;

In [320]:
tsamples = ['P07t',
 'P08t',
 'P09t',
 'P13t',
 'P14t',
 'P16t',
 'P20t',
 'P21t',
 'P26t',
 'P35t']

In [None]:
cmap = sns.color_palette("blend:#1f77b4,#333333,#ff7f0e", as_cmap=True)

fig, (axes) = plt.subplots(nrows = 10, subplot_kw={'xticks':[],'yticks':[]}, figsize = (7/2.54,10/2.54))

axins = inset_axes(axes[0], width="100%", height="2000%",
                   bbox_to_anchor=(1.05, -1, .03, .4),
                   bbox_transform=axes[0].transAxes, loc=2, borderpad=0)
axins.tick_params(left=False, right=True, labelleft=False, labelright=True)
    

images = []

for i in np.arange(0,10):
    data = np.asarray(dummy_ci_df[dummy_ci_df['sample'] == tsamples[i]].sort_values(['numbat_tumour\n(tumour sample)',
                                                                               'scitcem_call_tumour\n(tumour sample)',
                                                                                'mod_p_cnv','scitcem_p'
                                                                                  ])[['mod_p_cnv','scitcem_p']].T)
         
    axes[i].set(ylabel=tsamples[i])
    axes[i].yaxis.label.set(rotation=0, ha='right', va= 'center')
    axes[i].set_frame_on(False)
    
    images.append(axes[i].imshow(data, cmap = cmap, aspect = 'auto',
                       interpolation = 'nearest', interpolation_stage = 'rgba'))
    
axes[9].set_xticks([0, data.shape[1]/2, data.shape[1]-1], ['0.0', '0.5', '1.0'])
axes[9].set(xlabel = 'Proportion')

        
cbar = fig.colorbar(images[0], cax = axins, fraction = .03)
cbar.solids.set_edgecolor('face')

fig.subplots_adjust(hspace = 0.1)

;

In [107]:
dummy_ci_df_conflicting = dummy_ci_df[(dummy_ci_df['numbat_tumour\n(tumour sample)'] != dummy_ci_df['scitcem_call_tumour\n(tumour sample)'])]

In [None]:
# proportion of conflicting cells 
(dummy_ci_df_conflicting[dummy_ci_df_conflicting['scitcem_p'] == 0.5]['sample'].value_counts(
    dropna= False).sort_index())/(dummy_ci_df_conflicting['sample'].value_counts(
    dropna= False).sort_index())

In [None]:
# proportion of all cells 
(dummy_ci_df_conflicting[dummy_ci_df_conflicting['scitcem_p'] == 0.5]['sample'].value_counts(
    dropna= False).sort_index())/(dummy_ci_df['sample'].value_counts(
    dropna= False).sort_index())

In [None]:
for i in tsamples:
    plt.figure(figsize = (6,2))
    ax1 = plt.subplot()
    ax2 = ax1.twinx()
    df = dummy_ci_df_conflicting[dummy_ci_df_conflicting['sample'] == i].reset_index().sort_values([
        'scitcem_p', 'p_cnv'])
    sns.scatterplot(data = df,
                    x = 'index',
                    y = 'p_cnv', ax = ax1, s = 10)
    sns.scatterplot(data = df,
                    x = 'index',
                    y = 'scitcem_p', ax = ax2, color = 'firebrick', s = 10)
    plt.title(f'{i} ({df.shape[0]} cells)')
    ax1.set_ylim(-0.1,1.1)
    ax2.set_ylim(-0.1,1.1)
    ax1.set_xticklabels('')

In [None]:
# number of alt
for i in tsamples:
    plt.figure(figsize = (10/2.54,5/2.54))
    ax1 = plt.subplot()
    ax2 = ax1.twinx()
    ax3 = ax1.twinx()
    
    df = dummy_ci_df_conflicting[dummy_ci_df_conflicting['sample'] == i].reset_index().sort_values([
        'scitcem_p', 'p_cnv'])
    sns.scatterplot(data = df,
                    x = 'index',
                    y = 'p_cnv', ax = ax1, s = 10)
    sns.scatterplot(data = df,
                    x = 'index',
                    y = 'scitcem_p', ax = ax2, color = 'firebrick', s = 10)
    sns.scatterplot(data = df,
                    x = 'index',
                    y = 'filter_alt', ax = ax3, color = 'orange', s = 8)
    
    plt.title(f'{i} ({df.shape[0]} cells)')
    
    ax1.set_ylim(-0.1,1.1)
    ax2.set_ylim(-0.1,1.1)
    
    ax1.set_xticklabels('')
    ax1.set_xticks([])
    ax1.set_ylabel('probability')
    
    ax2.set_yticklabels('')
    ax2.set_yticks([])
    ax2.set_ylabel('')

In [None]:
# mean_VAF
for i in tsamples:
    plt.figure(figsize = (10/2.54,5/2.54))
    ax1 = plt.subplot()
    ax2 = ax1.twinx()
    ax3 = ax1.twinx()
    
    df = dummy_ci_df_conflicting[dummy_ci_df_conflicting['sample'] == i].reset_index().sort_values([
        'scitcem_p', 'p_cnv'])
    sns.scatterplot(data = df,
                    x = 'index',
                    y = 'p_cnv', ax = ax1, s = 10)
    sns.scatterplot(data = df,
                    x = 'index',
                    y = 'scitcem_p', ax = ax2, color = 'firebrick', s = 10)
    sns.scatterplot(data = df,
                    x = 'index',
                    y = 'mean_VAF', ax = ax3, color = 'orange', s = 8)
    
    plt.title(f'{i} ({df.shape[0]} cells)')
    
    ax1.set_ylim(-0.1,1.1)
    ax2.set_ylim(-0.1,1.1)
    ax3.set_ylim(-0.1,1.1)
    
    ax1.set_xticklabels('')
    ax1.set_xticks([])
    ax1.set_ylabel('probability')
    
    ax2.set_yticklabels('')
    ax2.set_yticks([])
    ax2.set_ylabel('')

In [81]:
dummy_ci_df_conflicting_NA = dummy_ci_df_conflicting[dummy_ci_df_conflicting['scitcem_p'] == 0.5]

In [82]:
dummy_ci_df_conflicting_NA_solved = dummy_ci_df_conflicting[dummy_ci_df_conflicting['scitcem_p'] != 0.5]

In [None]:
dummy_ci_df_conflicting_NA_solved[(dummy_ci_df_conflicting_NA_solved['scitcem_p'] < 0.5)]['p_cnv'].hist()
plt.xlim(0,1)

In [90]:
dummy_ci_df_conflicting_NA_solved_high_scitcem = dummy_ci_df_conflicting_NA_solved[(dummy_ci_df_conflicting_NA_solved['scitcem_p'] > 0.5)]

### after remove the Scticem == 0.5 cells

In [None]:
# number of alt
for i in tsamples:
    plt.figure(figsize = (6,2))
    ax1 = plt.subplot()
    ax2 = ax1.twinx()
    ax3 = ax1.twinx()
    
    df = dummy_ci_df_conflicting_NA_solved[dummy_ci_df_conflicting_NA_solved['sample'] == i].reset_index().sort_values([
        'scitcem_p', 'p_cnv'])
    sns.scatterplot(data = df,
                    x = 'index',
                    y = 'p_cnv', ax = ax1, s = 10)
    sns.scatterplot(data = df,
                    x = 'index',
                    y = 'scitcem_p', ax = ax2, color = 'firebrick', s = 10)
    sns.scatterplot(data = df,
                    x = 'index',
                    y = 'filter_alt', ax = ax3, color = 'orange', s = 8)
    
    plt.title(f'{i} ({df.shape[0]} cells)')
    
    ax1.set_ylim(-0.1,1.1)
    ax2.set_ylim(-0.1,1.1)
    
    ax1.set_xticklabels('')
    ax1.set_xticks([])
    ax1.set_ylabel('probability')
    
    ax2.set_yticklabels('')
    ax2.set_yticks([])
    ax2.set_ylabel('')

In [None]:
# number of dp
for i in tsamples:
    plt.figure(figsize = (6,2))
    ax1 = plt.subplot()
    ax2 = ax1.twinx()
    ax3 = ax1.twinx()
    
    df = dummy_ci_df_conflicting_NA_solved[dummy_ci_df_conflicting_NA_solved['sample'] == i].reset_index().sort_values([
        'scitcem_p', 'p_cnv'])
    sns.scatterplot(data = df,
                    x = 'index',
                    y = 'p_cnv', ax = ax1, s = 10)
    sns.scatterplot(data = df,
                    x = 'index',
                    y = 'scitcem_p', ax = ax2, color = 'firebrick', s = 10)
    sns.scatterplot(data = df,
                    x = 'index',
                    y = 'filter_dp', ax = ax3, color = 'orange', s = 8)
    
    plt.title(f'{i} ({df.shape[0]} cells)')
    
    ax1.set_ylim(-0.1,1.1)
    ax2.set_ylim(-0.1,1.1)
    
    ax1.set_xticklabels('')
    ax1.set_xticks([])
    ax1.set_ylabel('probability')
    
    ax2.set_yticklabels('')
    ax2.set_yticks([])
    ax2.set_ylabel('')

In [None]:
# mean_VAF
for i in tsamples:
    plt.figure(figsize = (6,2))
    ax1 = plt.subplot()
    ax2 = ax1.twinx()
    ax3 = ax1.twinx()
    
    df = dummy_ci_df_conflicting_NA_solved[dummy_ci_df_conflicting_NA_solved['sample'] == i].reset_index().sort_values([
        'scitcem_p', 'p_cnv'])
    sns.scatterplot(data = df,
                    x = 'index',
                    y = 'p_cnv', ax = ax1, s = 10)
    sns.scatterplot(data = df,
                    x = 'index',
                    y = 'scitcem_p', ax = ax2, color = 'firebrick', s = 10)
    sns.scatterplot(data = df,
                    x = 'index',
                    y = 'mean_VAF', ax = ax3, color = 'orange', s = 8)
    
    plt.title(f'{i} ({df.shape[0]} cells)')
    
    ax1.set_ylim(-0.1,1.1)
    ax2.set_ylim(-0.1,1.1)
    ax3.set_ylim(-0.1,1.1)
    
    ax1.set_xticklabels('')
    ax1.set_xticks([])
    ax1.set_ylabel('probability')
    
    ax2.set_yticklabels('')
    ax2.set_yticks([])
    ax2.set_ylabel('')

In [97]:
adata_epi.obs['cell_identity_NA_resolved'] = np.where(adata_epi.obs.index.isin(dummy_ci_df_conflicting_NA.index),
                                                     'no observed variant', adata_epi.obs['tumour_normal_normal'])
adata_epi.obs['cell_identity_NA_resolved'] = adata_epi.obs['cell_identity_NA_resolved'].astype('category')