In [None]:
import os
import pandas as pd
import numpy as np
import quiche as qu
import matplotlib.pyplot as plt
import seaborn as sns
import anndata
from sketchKH import *
import scanpy as sc
import mudata
import matplotlib.cm as cm
import shutil
from scipy.stats import ranksums

sns.set_style('ticks')
%reload_ext autoreload
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
save_directory = os.path.join('publications','supplementary_figures', 'supplementary_figure16')
qu.pp.make_directory(save_directory)
functional_markers = ['PDL1','Ki67','GLUT1','CD45RO','CD69', 'PD1','CD57','TBET', 'TCF1',
                        'CD45RB', 'TIM3','IDO', 'LAG3', 'CD38', 'HLADR']

mdata = mudata.read_h5mu(os.path.join('data', 'tnbc_spain', 'mdata', 'mdata_ecm_alignment.h5mu'))
mdata['quiche'].var[['logFC', 'SpatialFDR', 'PValue']] = mdata['quiche'].var[['logFC', 'SpatialFDR', 'PValue']].astype('float')
scores_df = pd.DataFrame(mdata['quiche'].var.groupby('quiche_niche')['SpatialFDR'].median())
scores_df.columns = ['pval']
scores_df['logFC'] = mdata['quiche'].var.groupby('quiche_niche')['logFC'].mean()
scores_df = scores_df[scores_df['pval'] < 0.05]
ids = list(set(scores_df.index).intersection(set(list(mdata['quiche'].var['quiche_niche'].value_counts()[mdata['quiche'].var['quiche_niche'].value_counts() >= 5].index))))
scores_df = scores_df.loc[ids]
scores_df = scores_df[(scores_df.logFC > 0.5) | (scores_df.logFC < -0.5)]
niches = list(scores_df.index)

cov_count_df = qu.tl.compute_patient_proportion(mdata,
                                niches = niches,
                                feature_key = 'quiche',
                                annot_key = 'quiche_niche',
                                patient_key = 'Patient_ID',
                                design_key = 'compartment',
                                patient_niche_threshold = 5)

cov_count_df_neg = cov_count_df[cov_count_df['mean_logFC'] < 0]
cov_count_df_neg = cov_count_df_neg[cov_count_df_neg['patient_count'] >= 2]
cov_count_df_neg = cov_count_df_neg[cov_count_df_neg['compartment'] == 'unaligned']

cov_count_df_pos = cov_count_df[cov_count_df['mean_logFC'] > 0]
cov_count_df_pos = cov_count_df_pos[cov_count_df_pos['patient_count'] >= 2]
cov_count_df_pos = cov_count_df_pos[cov_count_df_pos['compartment'] == 'aligned']

In [None]:
adata_expression_og = mdata['expression'].copy()
mdata['expression'].X = qu.pp.standardize(mdata['expression'].X)

In [None]:
adata_func_pos = qu.tl.compute_functional_expression(mdata = mdata,
                                sig_niches = list(cov_count_df_pos['quiche_niche'].values),
                                labels_key = 'cell_cluster',
                                annot_key = 'quiche_niche',
                                fov_key = 'fov',
                                segmentation_label_key = 'label',
                                patient_key = 'fovcompartment',
                                min_cell_count = 3,
                                foldchange_key = 'logFC',
                                markers = mdata['expression'].var_names)

adata_func_neg = qu.tl.compute_functional_expression(mdata = mdata,
                                sig_niches = list(cov_count_df_neg['quiche_niche'].values),
                                labels_key = 'cell_cluster',
                                annot_key = 'quiche_niche',
                                fov_key = 'fov',
                                segmentation_label_key = 'label',
                                patient_key = 'fovcompartment',
                                min_cell_count = 3,
                                foldchange_key = 'logFC',
                                markers = mdata['expression'].var_names)

## Supplementary Figure 16a-b

In [None]:
pos_df = adata_func_pos.to_df()
pos_df['cell_cluster'] = adata_func_pos.obs['cell_cluster']
 
neg_df = adata_func_neg.to_df()
neg_df['cell_cluster'] = adata_func_neg.obs['cell_cluster']

total_df = mdata['expression'].to_df()
total_df['cell_cluster'] = mdata['expression'].obs['cell_cluster']

qu.pl.plot_diff_func(pos_df, total_df, 'cell_cluster', functional_markers, (5.5, 3.5), cmap = 'PuOr_r', filename_save='supplementary_figure16a')
shutil.move(os.path.join('figures','matrixplot_supplementary_figure16a.pdf'), os.path.join(save_directory, 'supplementary_figure16a.pdf'))
qu.pl.plot_diff_func(neg_df, total_df, 'cell_cluster', functional_markers, (5.5, 3.5), cmap = 'PuOr_r', filename_save='supplementary_figure16b')
shutil.move(os.path.join('figures','matrixplot_supplementary_figure16b.pdf'), os.path.join(save_directory, 'supplementary_figure16b.pdf'))

## Supplementary Figure 16c

In [None]:
adata_sub = mdata['spatial_nhood'][np.isin(mdata['spatial_nhood'].obs['cell_cluster'], ['Cancer_1', 'Cancer_2', 'Cancer_3'])].copy()
morph_measurements = adata_sub.obs[['nc_ratio', 'area', 'eccentricity', 'major_axis_length', 'minor_axis_length']].copy()
morph_measurements['elongation'] = morph_measurements['major_axis_length'] / morph_measurements['minor_axis_length']
morph_measurements = pd.DataFrame(qu.pp.standardize(morph_measurements), index = morph_measurements.index, columns = morph_measurements.columns)
morph_measurements['compartment'] = adata_sub.obs['compartment']
morph_measurements['Patient_ID'] = adata_sub.obs['Patient_ID']
morph_measurementsv2 = morph_measurements.groupby(['compartment', 'Patient_ID']).mean().unstack().unstack().reset_index()

In [None]:
fig, axes = plt.subplots(1, 4, figsize=(12, 3.5), sharey=False)
for idx, morph in enumerate(['nc_ratio', 'area', 'eccentricity', 'elongation']):
    melted_data = morph_measurementsv2[morph_measurementsv2['level_0'] ==morph]
    
    Q1 = melted_data[0].quantile(0.25)
    Q3 = melted_data[0].quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR

    filtered_data = melted_data[(melted_data[0] >= lower_bound) & (melted_data[0] <= upper_bound)]

    groups = [filtered_data[filtered_data['compartment'] == c][0] for c in filtered_data['compartment'].unique()]
    if len(groups) == 2:
        stat, p_value = ranksums(groups[0], groups[1])
    else:
        p_value = None
    
    g = sns.boxplot(x='compartment', y=0, data=filtered_data, width=0.5, fliersize=0, color='white', ax=axes[idx])
    g = sns.stripplot( x='compartment', y=0, data=filtered_data, color='gray', alpha=0.8, ax=axes[idx])
    g.tick_params(labelsize=10)
    
    if p_value is not None:
        if p_value < 0.001:
            axes[idx].set_title(f"{morph} (p < 0.001)")
        else:
            axes[idx].set_title(f"{morph} (p = {p_value:.3f})")
    else:
        axes[idx].set_title(f"{morph} (p-value not computed)")
    
    axes[idx].set_xlabel("ECM region", fontsize = 12)
    axes[idx].set_ylabel('')

plt.tight_layout()
plt.savefig(os.path.join(save_directory,'supplementary_figure16c.pdf'))