In [None]:
import os
import pandas as pd
import numpy as np
import quiche as qu
import matplotlib.pyplot as plt
import seaborn as sns
import anndata
from sketchKH import *
import scanpy as sc
import mudata
import matplotlib.cm as cm
import shutil
from scipy.stats import ranksums

sns.set_style('ticks')
%reload_ext autoreload
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
save_directory = os.path.join('publications','supplementary_figures', 'supplementary_figure18')
qu.pp.make_directory(save_directory)
functional_markers = ['PDL1','Ki67','GLUT1','CD45RO','CD69', 'PD1','CD57','TBET', 'TCF1',
                        'CD45RB', 'TIM3','IDO', 'LAG3', 'CD38', 'HLADR']

mdata = mudata.read_h5mu(os.path.join('data', 'tnbc_spain', 'mdata', 'mdata_ecm_alignment.h5mu'))
mdata['quiche'].var[['logFC', 'SpatialFDR', 'PValue']] = mdata['quiche'].var[['logFC', 'SpatialFDR', 'PValue']].astype('float')
scores_df = pd.DataFrame(mdata['quiche'].var.groupby('quiche_niche')['SpatialFDR'].median())
scores_df.columns = ['pval']
scores_df['logFC'] = mdata['quiche'].var.groupby('quiche_niche')['logFC'].mean()
scores_df = scores_df[scores_df['pval'] < 0.05]
ids = list(set(scores_df.index).intersection(set(list(mdata['quiche'].var['quiche_niche'].value_counts()[mdata['quiche'].var['quiche_niche'].value_counts() >= 5].index))))
scores_df = scores_df.loc[ids]
scores_df = scores_df[(scores_df.logFC > 0.5) | (scores_df.logFC < -0.5)]
niches = list(scores_df.index)

cov_count_df = qu.tl.compute_patient_proportion(mdata,
                                niches = niches,
                                feature_key = 'quiche',
                                annot_key = 'quiche_niche',
                                patient_key = 'Patient_ID',
                                design_key = 'compartment',
                                patient_niche_threshold = 5)

cov_count_df_neg = cov_count_df[cov_count_df['mean_logFC'] < 0]
cov_count_df_neg = cov_count_df_neg[cov_count_df_neg['patient_count'] >= 2]
cov_count_df_neg = cov_count_df_neg[cov_count_df_neg['compartment'] == 'unaligned']

cov_count_df_pos = cov_count_df[cov_count_df['mean_logFC'] > 0]
cov_count_df_pos = cov_count_df_pos[cov_count_df_pos['patient_count'] >= 2]
cov_count_df_pos = cov_count_df_pos[cov_count_df_pos['compartment'] == 'aligned']

In [None]:
adata_expression_og = mdata['expression'].copy()
mdata['expression'].X = qu.pp.standardize(mdata['expression'].X)

In [None]:
adata_func_pos = qu.tl.compute_functional_expression(mdata = mdata,
                                sig_niches = list(cov_count_df_pos['quiche_niche'].values),
                                labels_key = 'cell_cluster',
                                annot_key = 'quiche_niche',
                                fov_key = 'fov',
                                segmentation_label_key = 'label',
                                patient_key = 'fovcompartment',
                                min_cell_count = 3,
                                foldchange_key = 'logFC',
                                markers = mdata['expression'].var_names)

adata_func_neg = qu.tl.compute_functional_expression(mdata = mdata,
                                sig_niches = list(cov_count_df_neg['quiche_niche'].values),
                                labels_key = 'cell_cluster',
                                annot_key = 'quiche_niche',
                                fov_key = 'fov',
                                segmentation_label_key = 'label',
                                patient_key = 'fovcompartment',
                                min_cell_count = 3,
                                foldchange_key = 'logFC',
                                markers = mdata['expression'].var_names)

## Supplementary Figure 18a-b

In [None]:
pos_df = adata_func_pos.to_df()
pos_df['cell_cluster'] = adata_func_pos.obs['cell_cluster']
 
neg_df = adata_func_neg.to_df()
neg_df['cell_cluster'] = adata_func_neg.obs['cell_cluster']

total_df = mdata['expression'].to_df()
total_df['cell_cluster'] = mdata['expression'].obs['cell_cluster']

qu.pl.plot_diff_func(pos_df, total_df, 'cell_cluster', functional_markers, (5.5, 3.5), cmap = 'PuOr_r', filename_save='supplementary_figure18a')
shutil.move(os.path.join('figures','matrixplot_supplementary_figure18a.pdf'), os.path.join(save_directory, 'supplementary_figure18a.pdf'))
qu.pl.plot_diff_func(neg_df, total_df, 'cell_cluster', functional_markers, (5.5, 3.5), cmap = 'PuOr_r', filename_save='supplementary_figure18b')
shutil.move(os.path.join('figures','matrixplot_supplementary_figure18b.pdf'), os.path.join(save_directory, 'supplementary_figure18b.pdf'))

## Supplementary Figure 18c

In [None]:
adata_sub = mdata['spatial_nhood'][np.isin(mdata['spatial_nhood'].obs['cell_cluster'], ['Cancer_1', 'Cancer_2', 'Cancer_3'])].copy()
morph_measurements = adata_sub.obs[['nc_ratio', 'area', 'eccentricity', 'major_axis_length', 'minor_axis_length']].copy()
morph_measurements['elongation'] = morph_measurements['major_axis_length'] / morph_measurements['minor_axis_length']
morph_measurements = pd.DataFrame(qu.pp.standardize(morph_measurements), index = morph_measurements.index, columns = morph_measurements.columns)
morph_measurements['compartment'] = adata_sub.obs['compartment']
morph_measurements['Patient_ID'] = adata_sub.obs['Patient_ID']
morph_measurementsv2 = morph_measurements.groupby(['compartment', 'Patient_ID']).mean().unstack().unstack().reset_index()

In [None]:
fig, axes = plt.subplots(1, 4, figsize=(12, 3.5), sharey=False)
for idx, morph in enumerate(['nc_ratio', 'area', 'eccentricity', 'elongation']):
    melted_data = morph_measurementsv2[morph_measurementsv2['level_0'] ==morph]
    
    Q1 = melted_data[0].quantile(0.25)
    Q3 = melted_data[0].quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR

    filtered_data = melted_data[(melted_data[0] >= lower_bound) & (melted_data[0] <= upper_bound)]

    groups = [filtered_data[filtered_data['compartment'] == c][0] for c in filtered_data['compartment'].unique()]
    if len(groups) == 2:
        stat, p_value = ranksums(groups[0], groups[1])
    else:
        p_value = None
    
    g = sns.boxplot(x='compartment', y=0, data=filtered_data, width=0.5, fliersize=0, color='white', ax=axes[idx])
    g = sns.stripplot( x='compartment', y=0, data=filtered_data, color='gray', alpha=0.8, ax=axes[idx])
    g.tick_params(labelsize=10)
    
    if p_value is not None:
        if p_value < 0.001:
            axes[idx].set_title(f"{morph} (p < 0.001)")
        else:
            axes[idx].set_title(f"{morph} (p = {p_value:.3f})")
    else:
        axes[idx].set_title(f"{morph} (p-value not computed)")
    
    axes[idx].set_xlabel("ECM region", fontsize = 12)
    axes[idx].set_ylabel('')

plt.tight_layout()
plt.savefig(os.path.join(save_directory,'supplementary_figure18c.pdf'))

## Supplementary Figure 18d

In [None]:
adata_sub = mdata['spatial_nhood'][np.isin(mdata['spatial_nhood'].obs['cell_cluster'], ['Cancer_1', 'Cancer_2', 'Cancer_3'])].copy()
adata_sub = adata_sub[adata_sub.obs['compartment'] == 'aligned']

prop_df = adata_sub.to_df()
prop_df = prop_df[adata_sub.obs['compartment'] == 'aligned']
prop_df['Cancer_total'] = prop_df['Cancer_1'] + prop_df['Cancer_2'] + prop_df['Cancer_3']
prop_df['Fibroblast_total'] = prop_df['Fibroblast'] + prop_df['CAF']
prop_df.drop(columns = ['Cancer_1', 'Cancer_2', 'Cancer_3', 'Fibroblast', 'CAF'], inplace = True)


plt.figure(figsize=(6, 4))
g = sns.histplot(prop_df['Cancer_total'], bins=50, kde=False, color='cornflowerblue')
g.tick_params(labelsize = 12)
# Thresholds to annotate
thresholds = [0.6, 0.8, 0.95]
total_cells = len(prop_df)

for thresh in thresholds:
    count_above = (prop_df['Cancer_total'] >= thresh).sum()
    percent = (count_above / total_cells) * 100

    # Add vertical line
    plt.axvline(thresh, color='k', linestyle='--')
    
    # Add label near the top of the plot
    plt.text(thresh + 0.01, plt.ylim()[1]*0.95,
             f'{thresh:.2f} ({percent:.1f}%)',
             rotation=90, color='k', va='top', ha='left', fontsize=10)

# Final tweaks
plt.xlabel('Proportion of neighboring cancer cells', fontsize = 12)
plt.ylabel('Frequency', fontsize = 12)
plt.tight_layout()
plt.savefig(os.path.join(save_directory, 'supplementary_figure18d.pdf'), bbox_inches = 'tight')


## Supplementary Figure 18e

In [None]:
threshold_list = [0.6, 0.8, 0.95]
for threshold in threshold_list:
    idval = np.int(np.round(threshold*100, 2))
    adata_sub = mdata['spatial_nhood'][np.isin(mdata['spatial_nhood'].obs['cell_cluster'], ['Cancer_1', 'Cancer_2', 'Cancer_3'])].copy()
    prop_df_total = adata_sub.to_df()
    prop_df_total = prop_df_total['Cancer_1'] + prop_df_total['Cancer_2'] + prop_df_total['Cancer_3']

    morph_measurements = adata_sub.obs[['nc_ratio', 'area', 'eccentricity', 'major_axis_length', 'minor_axis_length']].copy()
    morph_measurements['elongation'] = morph_measurements['major_axis_length'] / morph_measurements['minor_axis_length']
    morph_measurements = pd.DataFrame(qu.pp.standardize(morph_measurements), index = morph_measurements.index, columns = morph_measurements.columns)
    morph_measurements['compartment'] = adata_sub.obs['compartment']
    morph_measurements['compartment_total'] = list(morph_measurements['compartment'].values)
    morph_measurements['compartment_total'][np.where((adata_sub.obs['compartment'] == 'aligned') & (prop_df_total >= threshold))[0]] = f'aligned cancer >= {threshold}'
    morph_measurements = morph_measurements[morph_measurements['compartment_total'] != 'aligned']
    morph_measurements.drop(columns  = 'compartment', inplace = True)
    morph_measurements['Patient_ID'] = adata_sub.obs['Patient_ID']
    morph_measurementsv2 = morph_measurements.groupby(['compartment_total', 'Patient_ID']).mean().unstack().unstack().reset_index()

    fig, axes = plt.subplots(1, 4, figsize=(12, 3.5), sharey=False)
    for idx, morph in enumerate(['nc_ratio', 'area', 'eccentricity', 'elongation']):
        melted_data = morph_measurementsv2[morph_measurementsv2['level_0'] ==morph]
        
        Q1 = melted_data[0].quantile(0.25)
        Q3 = melted_data[0].quantile(0.75)
        IQR = Q3 - Q1
        lower_bound = Q1 - 1.5 * IQR
        upper_bound = Q3 + 1.5 * IQR

        filtered_data = melted_data[(melted_data[0] >= lower_bound) & (melted_data[0] <= upper_bound)]

        groups = [filtered_data[filtered_data['compartment_total'] == c][0] for c in filtered_data['compartment_total'].unique()]
        if len(groups) == 2:
            stat, p_value = ranksums(groups[0], groups[1])
        else:
            p_value = None
        
        g = sns.boxplot(x='compartment_total', y=0, data=filtered_data, width=0.5, fliersize=0, color='white', ax=axes[idx])
        g = sns.stripplot( x='compartment_total', y=0, data=filtered_data, color='gray', alpha=0.8, ax=axes[idx])
        g.tick_params(labelsize=10)
        
        if p_value is not None:
            if p_value < 0.001:
                axes[idx].set_title(f"{morph} (p < 0.001)")
            else:
                axes[idx].set_title(f"{morph} (p = {p_value:.3f})")
        else:
            axes[idx].set_title(f"{morph} (p-value not computed)")
        
        axes[idx].set_xlabel("ECM region", fontsize = 12)
        axes[idx].set_ylabel('')

    plt.tight_layout()
    plt.savefig(os.path.join(save_directory, f'supplementary_figure18e{idval}.pdf'))

## Supplementary Figure 18f

In [None]:
align_dir = '/Volumes/Shared/Noah Greenwald/TNBC_Cohorts/SPAIN/intermediate_files/mask_dir/alignment'
phenotypic_markers = ['ECAD', 'CK17', 'CD45', 'CD3', 'CD4', 'CD8', 'FOXP3', 'CD20', 'CD56', 'CD14', 'CD68',
                    'CD163', 'CD11c', 'HLADR', 'ChyTr', 'Calprotectin', 'FAP', 'SMA', 'Vim', 'Fibronectin',
                    'Collagen1', 'CD31']

functional_markers = ['PDL1','Ki67','GLUT1','CD45RO','CD69', 'PD1','CD57','TBET', 'TCF1',
                        'CD45RB', 'TIM3','IDO', 'LAG3', 'CD38', 'HLADR']

var_names = phenotypic_markers+functional_markers

cell_ordering = ['Cancer_1', 'Cancer_2', 'Cancer_3', 'CD4T', 'CD8T', 'Treg', 'T_Other', 'B', 
                 'NK', 'CD68_Mac', 'CD163_Mac', 'Mac_Other', 'Monocyte', 'APC','Mast', 'Neutrophil',
                 'CAF', 'Fibroblast', 'Smooth_Muscle', 'Endothelium']

sc.set_figure_params(dpi = 400, dpi_save = 400, fontsize = 14)

save_directory = os.path.join('publications', 'figures', 'figure4')
adata = anndata.read_h5ad(os.path.join('data', 'Zenodo', 'spain_preprocessed.h5ad'))
adata.obs['Relapse'] = adata.obs['Relapse'].astype('int').astype('str')

cell_table_clusters = pd.read_csv(os.path.join(align_dir, 'ecm_cell_annotations.csv'))
merged_df = pd.merge(adata.obs[['fov', 'label']], cell_table_clusters, on =['fov', 'label'])
adata.obs['compartment'] = merged_df['compartment'].values
adata.obs['Patientcompartment'] = adata.obs['Patient_ID'].astype('str') + adata.obs['compartment'].astype('str')
adata.obs['fovcompartment'] = adata.obs['fov'].astype('str') + adata.obs['compartment'].astype('str')

sketch_size = 500
many_fovs = adata.obs.groupby(['fovcompartment']).size()[np.where(adata.obs.groupby(['fovcompartment']).size() >= sketch_size)[0]].index
adata = adata[np.isin(adata.obs['fovcompartment'], many_fovs)]

In [None]:
threshold_list = [0.6, 0.8, 0.95]
for threshold in threshold_list:
    idval = np.int(np.round(threshold*100, 2))

    df_red = adata.to_df()
    df_red[['fov', 'label']]= adata.obs[['fov', 'label']].values
    df_red = pd.merge(df_red, mdata['spatial_nhood'].obs[['fov', 'label']])
    index_names = list(df_red.index)
    adata_red = adata[index_names, :].copy()

    spatial_nhood_df = mdata['spatial_nhood'].to_df()
    spatial_nhood_df['cell_cluster'] =  mdata['spatial_nhood'].obs['cell_cluster']
    spatial_nhood_df['compartment'] =  mdata['spatial_nhood'].obs['compartment']
    spatial_nhood_df.reset_index(inplace=True)
    joint_df = pd.merge(spatial_nhood_df, mdata['expression'].to_df().reset_index())
    joint_df = joint_df[np.isin(joint_df['cell_cluster'], ['Cancer_1', 'Cancer_2', 'Cancer_3'])]
    cell_ids = joint_df[joint_df['Cancer_1'] + joint_df['Cancer_2'] + joint_df['Cancer_3'] >= threshold].loc[:, 'index'].values

    df = adata_red[np.isin(adata_red.obs['cell_cluster'], ['Cancer_1', 'Cancer_2', 'Cancer_3'])].to_df()
    df = pd.DataFrame(qu.pp.standardize(df), index = df.index, columns = df.columns)
    df['compartment'] = adata_red[np.isin(adata_red.obs['cell_cluster'], ['Cancer_1', 'Cancer_2', 'Cancer_3'])].obs['compartment']

    adata_run = anndata.AnnData(df.iloc[:, :-1])
    adata_run.obs['compartment'] = pd.Categorical(df.iloc[:, -1])
    adata_run.obs['compartment_total'] = list(adata_run.obs['compartment'].values)
    adata_run = adata_run[adata_run.obs['compartment'] != 'no_collagen']
    adata_run.obs['compartment_total'][np.where((adata_run.obs['compartment'] == 'aligned') & (np.isin(adata_run.obs_names, cell_ids)))[0]] = f'aligned cancer >= {threshold}'
    adata_run = adata_run[adata_run.obs['compartment_total'] != 'aligned']
    adata_run.obs_names = [f'c_{i}' for i in range(0, len(adata_run.obs_names))]

    sc.pl.dotplot(adata_run, 
                var_names=['Collagen1', 'FAP', 'Fibronectin', 'Vim', 'SMA', 'GLUT1', 'Ki67'], 
                standard_scale = None, vmin = -1, vmax = 1, cmap = 'PuOr_r',dot_min = 0, dot_max = 0.8,
                groupby='compartment', save = f'cancer{idval}.pdf')

    shutil.move(os.path.join('figures',f'dotplot_cancer{idval}.pdf'), os.path.join(save_directory, f'supplementary_figure18f{idval}.pdf'))