In [None]:
import numpy as np
import pandas as pd
import scanpy as sc
import matplotlib.pyplot as plt
import seaborn as sns
import anndata 
import os
import matplotlib
import mudata
matplotlib.rcParams['lines.linewidth'] = 0.5
import quiche as qu
import pertpy as pt
from supplementary_plot_helpers import *
from sketchKH import *
%reload_ext autoreload
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
save_directory = os.path.join('publications', 'supplementary_figures', 'supplementary_figure13')
qu.pp.make_directory(save_directory)
adata_spain = anndata.read_h5ad(os.path.join('data', 'Zenodo', 'spain_preprocessed.h5ad'))
relapse_dict = dict(zip(adata_spain.obs[['Patient_ID', 'Relapse']].drop_duplicates()['Patient_ID'], adata_spain.obs[['Patient_ID', 'Relapse']].drop_duplicates()['Relapse']))

adata_stanford = anndata.read_h5ad(os.path.join('data', 'Zenodo', 'stanford_preprocessed.h5ad'))
recurrence_dict = dict(zip(adata_stanford.obs[['Patient_ID', 'RECURRENCE_LABEL']].drop_duplicates()['Patient_ID'], adata_stanford.obs[['Patient_ID', 'RECURRENCE_LABEL']].drop_duplicates()['RECURRENCE_LABEL']))

adata_ntpublic = anndata.read_h5ad(os.path.join('data', 'Zenodo', 'nt_preprocessed.h5ad'))
pcr_dict = dict(zip(adata_ntpublic.obs[['Patient_ID', 'pCR']].drop_duplicates()['Patient_ID'], adata_ntpublic.obs[['Patient_ID', 'pCR']].drop_duplicates()['pCR']))

### boxplots of cell type abundance: supplementary figure 13 a, d, g

In [None]:
norm_counts_spain, p_values_df = compute_abundance(adata_spain, 'Patient_ID', 'cell_cluster', 'Relapse', relapse_dict, id1 = 1.0, id2 = 0.0)
plot_cell_type_abundance_grid(norm_counts_spain, p_values_df, 'Relapse', 'cell_cluster', fdr_column='FDR_p_value', save_directory=save_directory,order = [0.0, 1.0], filename_save = 'supplementary_figure13a')
norm_counts_spain.to_csv(os.path.join('data', 'output_files', 'normalized_cell_abundances_spain.csv'))
p_values_df.to_csv(os.path.join('data', 'output_files', 'normalized_cell_abundances_statistics_spain.csv'))

In [None]:
norm_counts_stanford, p_values_df = compute_abundance(adata_stanford, 'Patient_ID', 'cell_cluster', 'Recurrence', recurrence_dict, id1 = 'POSITIVE', id2 = 'NEGATIVE')
plot_cell_type_abundance_grid(norm_counts_stanford, p_values_df, 'Recurrence', 'cell_cluster', fdr_column='FDR_p_value', order = ['NEGATIVE', 'POSITIVE'], save_directory=save_directory,filename_save = 'supplementary_figure13d')
norm_counts_stanford.to_csv(os.path.join('data', 'output_files', 'normalized_cell_abundances_stanford.csv'))
p_values_df.to_csv(os.path.join('data', 'output_files', 'normalized_cell_abundances_statistics_stanford.csv'))

In [None]:
norm_counts_nt, p_values_df = compute_abundance(adata_ntpublic, 'Patient_ID', 'cell_cluster', 'pCR', pcr_dict, id1 = 'RD', id2 = 'pCR')
plot_cell_type_abundance_grid(norm_counts_nt, p_values_df, 'pCR', 'cell_cluster', fdr_column='FDR_p_value', save_directory=save_directory,order = ['pCR', 'RD'], filename_save = 'supplementary_figure13g')
norm_counts_nt.to_csv(os.path.join('data', 'output_files', 'normalized_cell_abundances_nt.csv'))
p_values_df.to_csv(os.path.join('data', 'output_files', 'normalized_cell_abundances_statistics_nt.csv'))

### Plot prediction performance, supplementary figure 13c, f, i. For classification scripts: see abundance_prediction.R

In [None]:
auc_score_spain = pd.read_csv(os.path.join('data', 'output_files', 'auc_data_spain.csv'))
auc_score_stanford = pd.read_csv(os.path.join('data', 'output_files', 'auc_data_stanford.csv'))
auc_score_nt = pd.read_csv(os.path.join('data', 'output_files', 'auc_data_nt.csv'))

In [None]:
plot_auc(auc_score_spain, save_directory = save_directory,  filename_save = 'supplementary_figure13c')
plot_auc(auc_score_stanford, save_directory,  filename_save = 'supplementary_figure13f')
plot_auc(auc_score_nt, save_directory, filename_save = 'supplementary_figure13i')

### DA cell type testing, Supplementary Figure 13b, e, h

In [None]:
## spain 
phenotypic_markers = ['ECAD', 'CK17', 'CD45', 'CD3', 'CD4', 'CD8', 'FOXP3', 'CD20', 'CD56', 'CD14', 'CD68',
                    'CD163', 'CD11c', 'HLADR', 'ChyTr', 'Calprotectin', 'FAP', 'SMA', 'Vim', 'Fibronectin',
                    'Collagen1', 'CD31']

sketch_size = 1000
design = '~Relapse'
model_contrasts = 'Relapse1-Relapse0'
adata_spain = anndata.read_h5ad(os.path.join('data', 'Zenodo', 'spain_preprocessed.h5ad'))
adata_spain.obs['Relapse'] = adata_spain.obs['Relapse'].astype('int').astype('str')

adata_relapse = adata_spain[~np.isin(adata_spain.obs.Patient_ID, list(adata_spain.obs.groupby('Patient_ID').size()[adata_spain.obs.groupby('Patient_ID').size() < sketch_size].index))].copy()

#standardize
X = qu.pp.standardize(x = adata_relapse.X.copy())
adata_relapse.X = X.copy()
_, adata_relapse_subsample = sketch(adata =adata_relapse[:, phenotypic_markers], num_subsamples = sketch_size, gamma = 1, frequency_seed = 0, sample_set_key='Patient_ID', n_jobs = 8)

milo = pt.tl.Milo()
mdata = milo.load(adata_relapse_subsample, feature_key = 'expression')
sc.tl.pca(mdata['expression'])
sc.pp.neighbors(mdata['expression'], n_neighbors = 50, use_rep = 'X_pca')
mdata['expression'].uns["nhood_neighbors_key"] = None
mdata = qu.tl.build_milo_graph(mdata, feature_key = 'expression')
mdata = milo.count_nhoods(mdata, sample_col = 'Patient_ID', feature_key = 'expression')
milo.da_nhoods(mdata, design = design, model_contrasts = model_contrasts, feature_key = 'expression')
milo.annotate_nhoods(mdata, anno_col = 'cell_cluster', feature_key = 'expression')
mdata = mudata.MuData({'expression': mdata['expression'], 'milo': mdata['milo']})
mdata['milo'].var['Patient_ID'] = mdata['expression'].obs['Patient_ID'].values
mdata['milo'].var[design.split('~')[1]] = mdata['expression'].obs[design.split('~')[1]].values

In [None]:
celltypes = ['APC', 'B', 'CAF', 'CD4T', 'CD8T', 'CD68_Mac', 'CD163_Mac', 'Cancer_1', 'Cancer_2', 'Cancer_3',
          'Endothelium', 'Fibroblast', 'Mac_Other', 'Mast', 'Monocyte', 'NK', 'Neutrophil',
          'Smooth_Muscle', 'T_Other']

colors_dict_cells = {'APC': '#700548',
 'B': '#005377',
 'CAF': '#f2cc8f',
 'CD4T': '#ebb3a9',
 'CD8T': '#ff5666',
 'CD68_Mac': '#ffa52f',
 'CD163_Mac': '#788AA3',
 'Cancer_1': '#66cdaa',
 'Cancer_2': '#3d405b',
 'Cancer_3': '#b49ab8',
 'Endothelium': '#f78e69',
 'Fibroblast': '#2d9bd5',
 'Immune_Other': '#366962',
 'Mac_Other': '#c7d66d',
 'Mast': '#E36414',
 'Monocyte': '#CC6690',
 'NK': '#9ee2ff',
 'Neutrophil': '#4a7c59',
 'Other': '#FFBF69',
 'Smooth_Muscle': '#f5ebe0',
 'T_Other': '#901C14',
 'Treg': '#9e8576'}

boxplot(mdata, alpha = 0.05, xlim = [-3.5,3.5], annot_key = 'nhood_annotation', niches = celltypes,
            feature_key = "milo", figsize = (3,5), fontsize = 10, colors_dict=colors_dict_cells, save_directory=save_directory, filename_save='supplementary_figure13b')

In [None]:
directory = os.path.join('data', 'tnbc_stanford', 'adata')
phenotypic_markers = ['ECAD', 'CK17', 'CD45', 'CD3', 'CD4', 'CD8', 'FOXP3', 'CD20', 'CD56', 'CD14', 'CD68',
                    'CD163', 'CD11c', 'HLADR', 'ChyTr', 'Calprotectin', 'FAP', 'SMA', 'Vim', 'Fibronectin',
                    'Collagen1', 'CD31']

adata = anndata.read_h5ad(os.path.join('data','Zenodo', 'stanford_preprocessed.h5ad'))

sc.set_figure_params(dpi = 400, dpi_save = 400, fontsize = 14)

colors_dict_cells = {'APC': '#700548',
 'B': '#005377',
 'CAF': '#f2cc8f',
 'CD4T': '#ebb3a9',
 'CD8T': '#ff5666',
 'CD68_Mac': '#ffa52f',
 'CD163_Mac': '#788AA3',
 'Cancer_1': '#66cdaa',
 'Cancer_2': '#3d405b',
 'Cancer_3': '#b49ab8',
 'Endothelium': '#f78e69',
 'Fibroblast': '#2d9bd5',
 'Immune_Other': '#366962',
 'Mac_Other': '#c7d66d',
 'Mast': '#E36414',
 'Monocyte': '#CC6690',
 'NK': '#9ee2ff',
 'Neutrophil': '#4a7c59',
 'Other': '#FFBF69',
 'Smooth_Muscle': '#f5ebe0',
 'T_Other': '#901C14',
 'Treg': '#9e8576'}

colors_dict = {'myeloid':'#4DCCBD',
               'lymphoid':'#279AF1',
               'tumor':'#FF8484',
               'structural':'#F9DC5C'}

lineage_dict = {'APC':'myeloid',
 'B':'lymphoid',
 'CAF': 'structural',
 'CD4T': 'lymphoid',
 'CD8T': 'lymphoid',
 'CD68_Mac': 'myeloid',
 'CD163_Mac': 'myeloid',
 'Cancer_1': 'tumor',
 'Cancer_2': 'tumor',
 'Cancer_3': 'tumor',
 'Endothelium':'structural',
 'Fibroblast': 'structural',
 'Mac_Other': 'myeloid',
 'Mast':'myeloid',
 'Monocyte':'myeloid',
 'NK':'lymphoid',
 'Neutrophil':'myeloid',
 'Smooth_Muscle':'structural',
 'T_Other':'lymphoid',
 'Treg':'lymphoid'}

adata.X = qu.pp.standardize(adata.X)

sketch_size = 1000
design = '~RECURRENCE_LABEL'
model_contrasts = 'RECURRENCE_LABELPOSITIVE-RECURRENCE_LABELNEGATIVE'

adata_relapse = adata[~np.isin(adata.obs.Patient_ID, list(adata.obs.groupby('Patient_ID').size()[adata.obs.groupby('Patient_ID').size() < sketch_size].index))].copy()
_, adata_relapse_subsample = sketch(adata = adata_relapse[:, phenotypic_markers], num_subsamples = sketch_size, gamma = 1, frequency_seed = 0, sample_set_key='Patient_ID', n_jobs = 8)

milo = pt.tl.Milo()
mdata = milo.load(adata_relapse_subsample, feature_key = 'expression')
sc.tl.pca(mdata['expression'])
sc.pp.neighbors(mdata['expression'], n_neighbors = 50, use_rep = 'X_pca')
mdata['expression'].uns["nhood_neighbors_key"] = None
mdata = qu.tl.build_milo_graph(mdata, feature_key = 'expression')
mdata = milo.count_nhoods(mdata, sample_col = 'Patient_ID', feature_key = 'expression')
milo.da_nhoods(mdata, design = design, model_contrasts = model_contrasts, feature_key = 'expression')
milo.annotate_nhoods(mdata, anno_col = 'cell_cluster', feature_key = 'expression')
mdata = mudata.MuData({'expression': mdata['expression'], 'milo': mdata['milo']})
mdata['milo'].var['Patient_ID'] = mdata['expression'].obs['Patient_ID'].values
mdata['milo'].var[design.split('~')[1]] = mdata['expression'].obs[design.split('~')[1]].values

In [None]:
celltypes = ['APC', 'B', 'CAF', 'CD4T', 'CD8T', 'CD68_Mac', 'CD163_Mac', 'Cancer_1', 'Cancer_2', 'Cancer_3',
          'Endothelium', 'Fibroblast', 'Mac_Other', 'Mast', 'Monocyte', 'NK', 'Neutrophil',
          'Smooth_Muscle', 'T_Other', 'Treg']

boxplot(mdata, alpha = 0.05, xlim = [-4,4], annot_key = 'nhood_annotation', niches = celltypes, circle_x=-4.35,
            feature_key = "milo",  figsize = (3,4.5), fontsize = 10, colors_dict=colors_dict_cells, save_directory=save_directory, filename_save='supplementary_figure13e')

In [None]:
phenotypic_markers = ['CK5/14', 'CK8/18', 'panCK', 'AR','CD45', 'CD3', 'CD4', 'CD8', 'FOXP3', 'CD20','CD79a', 'CD56', 'CD68', 'CD163', 'CD11c', 'HLA-DR',  'CD15', 'MPO', 'Calponin', 'SMA', 'Vimentin', 'PDGFRB','PDPN', 'CD31']

functional_markers = ['PD-L1 (SP142)', 'PD-L1 (73-10)', 'IDO', 'PD-1', 'OX40', 'ICOS', 'CA9', 'c-PARP', 'Ki67', 'pH2AX', 'Helios', 'GATA3', 'T-bet', 'TCF1', 'TOX', 'GZMB', 'HLA-ABC']

var_names = phenotypic_markers+functional_markers

cell_ordering = ['Cancer_4', 'CD4T', 'CD8T', 'Treg', 'B', 'Plasma',
                 'NK', 'CD163_Mac', 'APC','DC', 'Neutrophil',
                 'Fibroblast', 'PDPN', 'Endothelium']

adata = anndata.read_h5ad(os.path.join('data','Zenodo', 'nt_preprocessed.h5ad'))
sc.set_figure_params(dpi = 400, dpi_save = 400, fontsize = 14)

colors_dict_cells = {'APC': '#700548',
 'B': '#005377',
 'Plasma': '#f2cc8f',
 'CD4T': '#ebb3a9',
 'CD8T': '#ff5666',
 'DC': '#ffa52f',
 'CD163_Mac': '#788AA3',
 'Cancer_4': '#66cdaa',
 'Endothelium': '#f78e69',
 'Fibroblast': '#2d9bd5',
 'PDPN': '#CC6690',
 'NK': '#9ee2ff',
 'Neutrophil': '#4a7c59',
 'Treg': '#9e8576'}

colors_dict = {'myeloid':'#4DCCBD',
               'lymphoid':'#279AF1',
               'tumor':'#FF8484',
               'structural':'#F9DC5C'}

lineage_dict = {'APC':'myeloid',
                'DC':'myeloid',
                'B':'lymphoid',
                'PDPN': 'structural',
                'CD4T': 'lymphoid',
                'CD8T': 'lymphoid',
                'Mac': 'myeloid',
                'Cancer_4': 'tumor',
                'Endothelium':'structural',
                'Fibroblast': 'structural',
                'Plasma':'lymphoid',
                'NK':'lymphoid',
                'Neutrophil':'myeloid',
                'Treg':'lymphoid'}

adata.X = qu.pp.standardize(adata.X)

sketch_size = 1000
design = '~pCR'
model_contrasts = 'pCRRD-pCRpCR'

adata_relapse = adata[~np.isin(adata.obs.Patient_ID, list(adata.obs.groupby('Patient_ID').size()[adata.obs.groupby('Patient_ID').size() < sketch_size].index))].copy()

#standardize
X = qu.pp.standardize(x = adata_relapse.X.copy())
adata_relapse.X = X.copy()
_, adata_relapse_subsample = sketch(adata = adata_relapse[:, phenotypic_markers], num_subsamples = sketch_size, gamma = 1, frequency_seed = 0, sample_set_key='Patient_ID', n_jobs = 8)

milo = pt.tl.Milo()
mdata = milo.load(adata_relapse_subsample, feature_key = 'expression')
sc.tl.pca(mdata['expression'])
sc.pp.neighbors(mdata['expression'], n_neighbors = 50, use_rep = 'X_pca')
mdata['expression'].uns["nhood_neighbors_key"] = None
mdata = qu.tl.build_milo_graph(mdata, feature_key = 'expression')
mdata = milo.count_nhoods(mdata, sample_col = 'Patient_ID', feature_key = 'expression')
milo.da_nhoods(mdata, design = design, model_contrasts = model_contrasts, feature_key = 'expression')
milo.annotate_nhoods(mdata, anno_col = 'cell_cluster', feature_key = 'expression')
mdata = mudata.MuData({'expression': mdata['expression'], 'milo': mdata['milo']})
mdata['milo'].var['Patient_ID'] = mdata['expression'].obs['Patient_ID'].values
mdata['milo'].var[design.split('~')[1]] = mdata['expression'].obs[design.split('~')[1]].values

In [None]:
celltypes = ['Cancer_4', 'CD4T', 'CD8T', 'Treg', 'B', 'Plasma',
                 'NK', 'CD163_Mac', 'APC','DC', 'Neutrophil',
                 'Fibroblast', 'PDPN', 'Endothelium']

boxplot(mdata, alpha = 0.05, xlim = [-5,5], annot_key = 'nhood_annotation', niches = celltypes,
            feature_key = "milo", figsize = (3,3.5), circle_x = -5.4, circle_width = 0.3, fontsize = 10, colors_dict=colors_dict_cells, save_directory=save_directory, filename_save='supplementary_figure13h')