In [None]:
import os
import pandas as pd
import numpy as np
import quiche as qu
import matplotlib.pyplot as plt
import seaborn as sns
import anndata
import scanpy as sc
from sketchKH import sketch
import mudata
import imageio as io
import shutil
from scipy.stats import ranksums
from matplotlib import cm
from ark.utils.plot_utils import color_segmentation_by_stat
from alpineer import image_utils, io_utils, load_utils, misc_utils
import matplotlib.patheffects as patheffects
from matplotlib.patches import Rectangle

sns.set_style('ticks')

%reload_ext autoreload
%load_ext autoreload
%autoreload 2
%matplotlib inline

## Tumor border analysis

In [None]:
phenotypic_markers = ['ECAD', 'CK17', 'CD45', 'CD3', 'CD4', 'CD8', 'FOXP3', 'CD20', 'CD56', 'CD14', 'CD68',
                    'CD163', 'CD11c', 'HLADR', 'ChyTr', 'Calprotectin', 'FAP', 'SMA', 'Vim', 'Fibronectin',
                    'Collagen1', 'CD31']

functional_markers = ['PDL1','Ki67','GLUT1','CD45RO','CD69', 'PD1','CD57','TBET', 'TCF1',
                        'CD45RB', 'TIM3','IDO', 'LAG3', 'CD38', 'HLADR']

var_names = phenotypic_markers+functional_markers

cell_ordering = ['Cancer_1', 'Cancer_2', 'Cancer_3', 'CD4T', 'CD8T', 'Treg', 'T_Other', 'B', 
                 'NK', 'CD68_Mac', 'CD163_Mac', 'Mac_Other', 'Monocyte', 'APC','Mast', 'Neutrophil',
                 'CAF', 'Fibroblast', 'Smooth_Muscle', 'Endothelium']

colors_dict_cells = {'APC': '#700548',
 'B': '#005377',
 'CAF': '#f2cc8f',
 'CD4T': '#ebb3a9',
 'CD8T': '#ff5666',
 'CD68_Mac': '#ffa52f',
 'CD163_Mac': '#788AA3',
 'Cancer_1': '#66cdaa',
 'Cancer_2': '#3d405b',
 'Cancer_3': '#b49ab8',
 'Endothelium': '#f78e69',
 'Fibroblast': '#2d9bd5',
 'Immune_Other': '#366962',
 'Mac_Other': '#c7d66d',
 'Mast': '#E36414',
 'Monocyte': '#CC6690',
 'NK': '#9ee2ff',
 'Neutrophil': '#4a7c59',
 'Other': '#FFBF69',
 'Smooth_Muscle': '#f5ebe0',
 'T_Other': '#901C14',
 'Treg': '#9e8576'}

colors_dict = {'myeloid':'#4DCCBD',
               'lymphoid':'#279AF1',
               'tumor':'#FF8484',
               'structural':'#F9DC5C'}

lineage_dict = {'APC':'myeloid',
 'B':'lymphoid',
 'CAF': 'structural',
 'CD4T': 'lymphoid',
 'CD8T': 'lymphoid',
 'CD68_Mac': 'myeloid',
 'CD163_Mac': 'myeloid',
 'Cancer_1': 'tumor',
 'Cancer_2': 'tumor',
 'Cancer_3': 'tumor',
 'Endothelium':'structural',
 'Fibroblast': 'structural',
 'Mac_Other': 'myeloid',
 'Mast':'myeloid',
 'Monocyte':'myeloid',
 'NK':'lymphoid',
 'Neutrophil':'myeloid',
 'Smooth_Muscle':'structural',
 'T_Other':'lymphoid',
 'Treg':'lymphoid'}

sc.set_figure_params(dpi = 400, dpi_save = 400, fontsize = 14)

save_directory = os.path.join('publications', 'figures', 'figure4')
adata = anndata.read_h5ad(os.path.join('data', 'Zenodo', 'spain_preprocessed.h5ad'))
adata.obs['Relapse'] = adata.obs['Relapse'].astype('int').astype('str')

In [None]:
sketch_size = 1000
adata  = qu.pp.filter_fovs(adata, 'Patient_ID', sketch_size)

In [None]:
cell_table_clusters = pd.read_csv('/Volumes/Shared/Noah Greenwald/TNBC_Cohorts/SPAIN/intermediate_files/mask_dir/individual_masks-no_tagg_tls/cell_annotation_mask.csv')
merged_df = pd.merge(adata.obs[['fov', 'label']], cell_table_clusters, on =['fov', 'label'])
adata.obs['compartment'] = merged_df['mask_name'].values
adata.obs['Patientcompartment'] = adata.obs['Patient_ID'].astype('str') + adata.obs['compartment'].astype('str')

In [None]:
# design = '~compartment'
# model_contrasts = 'compartmentcancer_border-compartmentstroma_core'

# mdata, sig_niches_spain = qu.tl.run_quiche(adata, radius = 100, labels_key = 'cell_cluster', spatial_key = 'spatial',
#                                     fov_key = 'fov', sketch_key = 'Patient_ID', n_neighbors = 30, merge = False, test_key= 'Patientcompartment',
#                                     delaunay = False, min_cells = 3, k_sim = 100, design = design, khop = None, label_scheme='normal',
#                                     model_contrasts = model_contrasts, sketch_size = sketch_size, nlargest = 3, annotation_key = 'quiche_niche', n_jobs = 8)
# mdata['quiche'].var = mdata['quiche'].var.astype('str')
# mdata.write_h5mu(os.path.join('data', 'tnbc_spain', 'mdata', 'mdata_region_100.h5mu'))

In [None]:
#load in data to save runtime
mdata = mudata.read_h5mu(os.path.join('data', 'tnbc_spain', 'mdata', 'mdata_tumor_region.h5mu'))
mdata['quiche'].var[['logFC', 'SpatialFDR']] = mdata['quiche'].var[['logFC', 'SpatialFDR']].astype('float')
sig_threshold = 0.05
annotation_key = 'quiche_niche'
sig_key = 'SpatialFDR'
mdata['quiche'].var[['logFC', 'SpatialFDR']] = mdata['quiche'].var[['logFC', 'SpatialFDR']].astype('float')

scores_df = pd.DataFrame(mdata['quiche'].var.groupby(annotation_key)[sig_key].median())
scores_df.columns = ['pval']
scores_df['logFC'] = mdata['quiche'].var.groupby(annotation_key)['logFC'].mean()
scores_df = scores_df[scores_df['pval'] <= sig_threshold]
scores_df = scores_df.iloc[np.where((scores_df['logFC'] <= -1) | (scores_df['logFC'] >= 1))[0]]
ids = list(set(scores_df.index).intersection(set(list(mdata['quiche'].var[annotation_key].value_counts()[mdata['quiche'].var[annotation_key].value_counts() >= 5].index))))
scores_df = scores_df.loc[ids]

cov_count_df = qu.tl.compute_patient_proportion(mdata,
                                niches = scores_df.index,
                                feature_key = 'quiche',
                                annot_key = 'quiche_niche',
                                patient_key = 'Patient_ID',
                                design_key = 'compartment',
                                patient_niche_threshold = 5)

cov_count_df_neg = cov_count_df[cov_count_df['mean_logFC'] < 0]
cov_count_df_neg = cov_count_df_neg[cov_count_df_neg['patient_count'] >= 2]
cov_count_df_neg = cov_count_df_neg[cov_count_df_neg['compartment'] == 'stroma_core']

cov_count_df_pos = cov_count_df[cov_count_df['mean_logFC'] > 0]
cov_count_df_pos = cov_count_df_pos[cov_count_df_pos['patient_count'] >= 2]
cov_count_df_pos = cov_count_df_pos[cov_count_df_pos['compartment'] == 'cancer_border']

In [None]:
adata = qu.tl.compute_spatial_neighbors(adata, radius = 100, n_neighbors = 30, spatial_key = 'spatial', delaunay = False, fov_key = 'fov', coord_type = 'generic')
adata_niche, cells_nonn = qu.tl.compute_niche_composition(adata, labels_key = 'cell_cluster', min_cells = 3)
adata_niche = adata_niche[np.where(pd.DataFrame(adata_niche.X).sum(1) != 0)[0], :].copy()
annotations_niche = qu.tl.compute_niche_abundance_fov(adata_niche.to_df(), nlargest = 3, min_perc = 0.1)
adata_niche.obs['retro'] = annotations_niche.values
index_df = adata_niche.obs.reset_index()
index_df['index_cell'] = index_df['index']
annotation_df = mdata['quiche'].var[['index_cell', 'quiche_niche', 'SpatialFDR']]
index_cells = pd.merge(index_df, annotation_df, on = 'index_cell')['index_cell'].values
adata_niche.obs.loc[index_cells, 'retro'] = list(pd.merge(index_df, annotation_df, on = 'index_cell')['quiche_niche'])

## Figure 4a

In [None]:
data_dir = r'/Volumes/Shared/Noah Greenwald/TNBC_Cohorts/SPAIN/image_data/samples'
seg_dir = r'/Volumes/Shared/Noah Greenwald/TNBC_Cohorts/SPAIN/segmentation/samples/deepcell_output'
save_directory_ = os.path.join(save_directory, 'overlay')
qu.pp.make_directory(save_directory_)

fov_list = ['TMA32_R2C8', 'TMA39_R10C4']

In [None]:
##plot expression
channel_to_rgb = np.array([
    [0.0, 0.0, 1.0],  # Blue
    [1.0, 0.0, 1.0],  # Magenta
    [1.0, 1.0, 0.0],  # Yellow
    [1.0, 0.0, 0.0],  # Red
    [0.0, 1.0, 0.0]   # Green
])
for fov in fov_list:
    qu.pl.plot_overlay(seg_dir, data_dir, fov, ['ECAD', 'CD45'], ["H3K27me3", "H3K9ac"], channel_to_rgb, save_directory_, f'{fov}_expression_overlay')

## Figure 4b

In [None]:
##plot tumor regions
save_directory_ = os.path.join(save_directory, 'overlay', 'region')
qu.pp.make_directory(save_directory_)
compartment_colormap = pd.DataFrame({'compartment': ['cancer_core', 'cancer_border', 'stroma_core', 'stroma_border'], 'color': ['blue', 'deepskyblue','#8E6E96', '#8E6E96']})            
qu.pl.cohort_cluster_plot(fovs=fov_list,
                        save_dir = os.path.join(save_directory_),
                        cell_data=adata.obs.loc[:, ['fov', 'compartment', 'label']],
                        erode=True,
                        seg_dir = seg_dir,
                        fov_col= 'fov',
                        label_col='label',
                        cluster_col='compartment',
                        seg_suffix="_whole_cell.tiff",
                        unassigned_color=np.array([0, 0, 0, 1]),
                        cmap=compartment_colormap,
                        display_fig=False)

## Figure 4c

In [None]:
colors_dict_cells['Cancer_1'] = 'deepskyblue'
colors_dict_cells['Fibroblast'] = '#8E6E96'
niche_list = ['CD8T__Cancer_1', 'CD4T__Fibroblast']

In [None]:
##plot niches
save_directory_ = os.path.join(save_directory, 'overlay', 'region', 'niches')
qu.pp.make_directory(save_directory_)
for i in range(0, len(niche_list)):
    fov = fov_list[i]
    niche = niche_list[i]
    cell_list = niche.split('__')
    qu.pl.plot_niche_score(mdata, niche, fov, seg_dir, metric = 'logFC', vmin = -2, vmax = 2, fontsize = 12, cmap = 'vlag', background = [0.3, 0.3, 0.3, 1],figsize = (6, 6), save_directory = save_directory_, filename_save = niche+'_'+fov)

    subset_mdata = adata_niche[adata_niche.obs['retro'] == niche]
    df_cells = subset_mdata.to_df()
    df_cells['cell_cluster'] = subset_mdata.obs['cell_cluster']
    df_cells['label']= subset_mdata.obs['label']
    df_cells['fov'] = subset_mdata.obs['fov']
    df_cells = df_cells[np.isin(df_cells.cell_cluster, cell_list)]

    colormap = pd.DataFrame({'cell_cluster': list(colors_dict_cells.keys()),
                            'color': list(colors_dict_cells.values())})

    qu.pl.cohort_cluster_plot(
        fovs=[fov],
        seg_dir=seg_dir,
        save_dir=save_directory_,
        cell_data=df_cells,
        erode=True,
        fov_col='fov',
        label_col='label',
        cluster_col='cell_cluster',
        seg_suffix="_whole_cell.tiff",
        cmap=colormap,
        unassigned_color=np.array([0.3, 0.3, 0.3, 1]),
        fig_file_type = 'pdf',
        display_fig=False)

## Figure4 d-e

In [None]:
G2 = qu.tl.compute_niche_network(cov_count_df = cov_count_df_pos, colors_dict = colors_dict, lineage_dict=lineage_dict, annot_key = 'quiche_niche') 

qu.pl.plot_niche_network_donut(G=G2, figsize=(6, 6), node_order=cell_ordering, buffer=1.5, weightscale = 0.05, edge_color='#1D265E',
                         centrality_measure = 'eigenvector',colors_dict=colors_dict, curvature=0.2, font_size=12,save_directory=save_directory, filename_save=f'figure4d',
                         min_node_size = 20, max_node_size = 850, lineage_dict=lineage_dict, donut_radius_inner = 1.15, donut_radius_outer = 1.25,
                         vmin = -10, vmax = 180,edge_cmap = cm.bone_r, edge_label = 'Patients')

G1 = qu.tl.compute_niche_network(cov_count_df = cov_count_df_neg, colors_dict = colors_dict, lineage_dict=lineage_dict, annot_key = 'quiche_niche') 

qu.pl.plot_niche_network_donut(G=G1, figsize=(6, 6), node_order=cell_ordering, buffer=1.5, weightscale = 0.05, edge_color='#1D265E', font_size=12,
                         centrality_measure = 'eigenvector',colors_dict=colors_dict, curvature=0.2,save_directory=save_directory, filename_save=f'figure4e',
                         min_node_size = 20, max_node_size = 850, lineage_dict=lineage_dict, donut_radius_inner = 1.15, donut_radius_outer = 1.25,
                         vmin = -10, vmax = 180,edge_cmap = cm.bone_r, edge_label = 'Patients')

## Figure 4f-g

In [None]:
adata_expression_og = mdata['expression'].copy()
mdata['expression'].X = qu.pp.standardize(mdata['expression'].X)

In [None]:
# adata_func_pos = qu.tl.compute_functional_expression(mdata = mdata,
#                                 sig_niches = list(cov_count_df_pos['quiche_niche'].values),
#                                 labels_key = 'cell_cluster',
#                                 annot_key = 'quiche_niche',
#                                 fov_key = 'fov',
#                                 segmentation_label_key = 'label',
#                                 patient_key = 'Patient_ID',
#                                 min_cell_count = 3,
#                                 foldchange_key = 'logFC',
#                                 markers = mdata['expression'].var_names)

# adata_func_neg = qu.tl.compute_functional_expression(mdata = mdata,
#                                 sig_niches = list(cov_count_df_neg['quiche_niche'].values),
#                                 labels_key = 'cell_cluster',
#                                 annot_key = 'quiche_niche',
#                                 fov_key = 'fov',
#                                 segmentation_label_key = 'label',
#                                 patient_key = 'Patient_ID',
#                                 min_cell_count = 3,
#                                 foldchange_key = 'logFC',
#                                 markers = mdata['expression'].var_names)

# adata_func_pos.write_h5ad(os.path.join('data', 'tnbc_spain', 'mdata', 'adata_func_pos_region.h5ad'))
# adata_func_neg.write_h5ad(os.path.join('data', 'tnbc_spain', 'mdata','adata_func_neg_region.h5ad'))

In [None]:
## load in to save on runtime 
adata_func_pos = anndata.read_h5ad(os.path.join('data', 'tnbc_spain', 'mdata', 'adata_func_pos_region.h5ad'))
adata_func_neg = anndata.read_h5ad(os.path.join('data', 'tnbc_spain', 'mdata','adata_func_neg_region.h5ad'))

In [None]:
pos_df = adata_func_pos.to_df()
pos_df['cell_cluster'] = adata_func_pos.obs['cell_cluster']
 
neg_df = adata_func_neg.to_df()
neg_df['cell_cluster'] = adata_func_neg.obs['cell_cluster']

total_df = mdata['expression'].to_df()
total_df['cell_cluster'] = mdata['expression'].obs['cell_cluster']

qu.pl.plot_diff_func(pos_df, total_df, 'cell_cluster', functional_markers, (5.5, 4.25), cmap = 'PuOr_r', filename_save=f'figure4f.pdf')
shutil.move(os.path.join('figures','matrixplot_figure4f.pdf'), os.path.join(save_directory, 'figure4f.pdf'))
qu.pl.plot_diff_func(neg_df, total_df, 'cell_cluster', functional_markers, (5.5, 4.75), cmap = 'PuOr_r', filename_save=f'figure4g.pdf')
shutil.move(os.path.join('figures','matrixplot_figure4g.pdf'), os.path.join(save_directory, 'figure4g.pdf'))

## ECM analysis

### niche analysis

In [None]:
directory = '/Volumes/Shared/Noah Greenwald/TNBC_Cohorts/SPAIN/intermediate_files/fiber_segmentation_processed_data'
seg_dir = '/Volumes/Shared/Noah Greenwald/TNBC_Cohorts/SPAIN/segmentation/samples/deepcell_output'
align_dir = '/Volumes/Shared/Noah Greenwald/TNBC_Cohorts/SPAIN/intermediate_files/mask_dir/alignment'
data_dir = '/Volumes/Shared/Noah Greenwald/TNBC_Cohorts/SPAIN/image_data/samples'
sc.set_figure_params(dpi = 400, dpi_save = 400, fontsize = 14)

fiber_table = pd.read_csv(os.path.join(directory, 'fiber_object_table.csv'))

save_directory = os.path.join('publications', 'figures', 'figure4')
qu.pp.make_directory(save_directory)

fov_list = ['TMA44_R3C3', 'TMA38_R3C8']
niche_list = ['Fibroblast__Neutrophil','B__Cancer_1']

alignment_scores = fiber_table['alignment_score'].dropna()
fiber_table['alignment_bin_2'] = fiber_table['alignment_score'].apply(lambda x: 0.01 if x <= 0.5 else 1 if x > 0.5 else 0)

In [None]:
phenotypic_markers = ['ECAD', 'CK17', 'CD45', 'CD3', 'CD4', 'CD8', 'FOXP3', 'CD20', 'CD56', 'CD14', 'CD68',
                    'CD163', 'CD11c', 'HLADR', 'ChyTr', 'Calprotectin', 'FAP', 'SMA', 'Vim', 'Fibronectin',
                    'Collagen1', 'CD31']

functional_markers = ['PDL1','Ki67','GLUT1','CD45RO','CD69', 'PD1','CD57','TBET', 'TCF1',
                        'CD45RB', 'TIM3','IDO', 'LAG3', 'CD38', 'HLADR']

var_names = phenotypic_markers+functional_markers

cell_ordering = ['Cancer_1', 'Cancer_2', 'Cancer_3', 'CD4T', 'CD8T', 'Treg', 'T_Other', 'B', 
                 'NK', 'CD68_Mac', 'CD163_Mac', 'Mac_Other', 'Monocyte', 'APC','Mast', 'Neutrophil',
                 'CAF', 'Fibroblast', 'Smooth_Muscle', 'Endothelium']

sc.set_figure_params(dpi = 400, dpi_save = 400, fontsize = 14)

save_directory = os.path.join('publications', 'figures', 'figure4')
adata = anndata.read_h5ad(os.path.join('data', 'Zenodo', 'spain_preprocessed.h5ad'))
adata.obs['Relapse'] = adata.obs['Relapse'].astype('int').astype('str')

cell_table_clusters = pd.read_csv(os.path.join(align_dir, 'ecm_cell_annotations.csv'))
merged_df = pd.merge(adata.obs[['fov', 'label']], cell_table_clusters, on =['fov', 'label'])
adata.obs['compartment'] = merged_df['compartment'].values
adata.obs['Patientcompartment'] = adata.obs['Patient_ID'].astype('str') + adata.obs['compartment'].astype('str')
adata.obs['fovcompartment'] = adata.obs['fov'].astype('str') + adata.obs['compartment'].astype('str')

sketch_size = 500
many_fovs = adata.obs.groupby(['fovcompartment']).size()[np.where(adata.obs.groupby(['fovcompartment']).size() >= sketch_size)[0]].index
adata = adata[np.isin(adata.obs['fovcompartment'], many_fovs)]

In [None]:
# khop = None
# radius = 50
# n_neighbors = 10
# labels_key = 'cell_cluster'
# spatial_key = 'spatial'
# coord_type = 'generic'
# fov_key = 'fov'
# n_jobs = -1
# min_cells = 3
# delaunay = False
# sketch_key = 'fovcompartment'
# test_key = 'fovcompartment'
# gamma = 1
# frequency_seed = 0
# k_sim = 100

# if khop is not None:
#     niche_df, _ = qu.tl.spatial_niches_khop(adata, radius = radius, p = 2, k = n_neighbors, khop = khop, min_cell_threshold = 0, labels_key = labels_key, spatial_key = spatial_key, fov_key = fov_key, n_jobs = n_jobs)
#     adata_niche = anndata.AnnData(niche_df)
#     adata_niche.obs = adata.obs.loc[niche_df.index, :]
# else:
#     adata = qu.tl.compute_spatial_neighbors(adata, radius = radius, n_neighbors = n_neighbors, spatial_key = spatial_key, delaunay = delaunay, fov_key = fov_key, coord_type = coord_type)
#     adata_niche, cells_nonn = qu.tl.compute_niche_composition(adata, labels_key = labels_key, min_cells = min_cells)

# adata_niche = adata_niche[np.where(pd.DataFrame(adata_niche.X).sum(1) != 0)[0], :].copy()
# adata = adata[np.where(pd.DataFrame(adata_niche.X).sum(1) != 0)[0], :].copy()

# if sketch_size is None:
#     adata_niche_subsample = adata_niche.copy()
# else:
#     _, adata_niche_subsample = sketch(adata_niche, sample_set_key = sketch_key, gamma = gamma, num_subsamples = sketch_size, frequency_seed = frequency_seed, n_jobs = n_jobs)
# adata_niche_subsample = qu.tl.construct_niche_similarity_graph(adata_niche_subsample, k = k_sim, n_jobs = n_jobs)

# annotation_key = 'quiche_niche'
# feature_key = 'spatial_nhood'
# design = '~compartment'
# model_contrasts = 'compartmentaligned-compartmentunaligned'
# adata_niche_subsample = adata_niche_subsample[~adata_niche_subsample.obs['fovcompartment'].str.contains('no_collagen')].copy()
# adata_niche_subsample = qu.tl.construct_niche_similarity_graph(adata_niche_subsample, k = k_sim, n_jobs = n_jobs)
# mdata = qu.tl.quicheDA(adata_niche_subsample, design = design, model_contrasts=model_contrasts, patient_key = test_key)
# annotations = qu.tl.label_niches(mdata, nlargest = 3, min_perc = 0.1)
# try:
#     mdata['milo'].var[annotation_key] = annotations.values
# except:
#     mdata['milo'].var[annotation_key] = annotations
# mdata = mudata.MuData({'expression': adata, feature_key: mdata['spatial_nhood'], 'quiche': mdata['milo']})
# mdata['quiche'].var[mdata['spatial_nhood'].obs.columns] = mdata['spatial_nhood'].obs.values
# mdata[feature_key].obs[annotation_key] = mdata['quiche'].var[annotation_key].values
# mdata['quiche'].var = mdata['quiche'].var.astype('str')

# mdata['quiche'].var = mdata['quiche'].var.astype('str')
# mdata.write_h5mu(os.path.join('data', 'tnbc_spain', 'mdata', 'mdata_ecm_alignment.h5mu'))

In [None]:
## load in data to save on runtime
mdata = mudata.read_h5mu(os.path.join('data', 'tnbc_spain', 'mdata', 'mdata_ecm_alignment.h5mu'))
mdata['quiche'].var[['logFC', 'SpatialFDR', 'PValue']] = mdata['quiche'].var[['logFC', 'SpatialFDR', 'PValue']].astype('float')
scores_df = pd.DataFrame(mdata['quiche'].var.groupby('quiche_niche')['SpatialFDR'].median())
scores_df.columns = ['pval']
scores_df['logFC'] = mdata['quiche'].var.groupby('quiche_niche')['logFC'].mean()
scores_df = scores_df[scores_df['pval'] < 0.05]
ids = list(set(scores_df.index).intersection(set(list(mdata['quiche'].var['quiche_niche'].value_counts()[mdata['quiche'].var['quiche_niche'].value_counts() >= 5].index))))
scores_df = scores_df.loc[ids]
scores_df = scores_df[(scores_df.logFC > 0.5) | (scores_df.logFC < -0.5)]
niches = list(scores_df.index)

cov_count_df = qu.tl.compute_patient_proportion(mdata,
                                niches = niches,
                                feature_key = 'quiche',
                                annot_key = 'quiche_niche',
                                patient_key = 'Patient_ID',
                                design_key = 'compartment',
                                patient_niche_threshold = 5)
cov_count_df_neg = cov_count_df[cov_count_df['mean_logFC'] < 0]
cov_count_df_neg = cov_count_df_neg[cov_count_df_neg['patient_count'] >= 2]
cov_count_df_neg = cov_count_df_neg[cov_count_df_neg['compartment'] == 'unaligned']

cov_count_df_pos = cov_count_df[cov_count_df['mean_logFC'] > 0]
cov_count_df_pos = cov_count_df_pos[cov_count_df_pos['patient_count'] >= 2]
cov_count_df_pos = cov_count_df_pos[cov_count_df_pos['compartment'] == 'aligned']

## Figure 4k

In [None]:
sns.set_style('ticks')
qu.pl.beeswarm(mdata,
    feature_key="quiche",
    alpha = 0.05,
    niches=list(cov_count_df_neg.quiche_niche) + list(cov_count_df_pos.quiche_niche),
    figsize=(4.5, 6),
    annot_key='quiche_niche',
    design_key='compartment',
    patient_key='Patient_ID',
    xlim=[-3,3],
    save_directory=save_directory,
    filename_save=f'figure4k')

## Figure 4l

In [None]:
adata_sub = mdata['spatial_nhood'][np.isin(mdata['spatial_nhood'].obs['cell_cluster'], ['Cancer_1', 'Cancer_2', 'Cancer_3'])].copy()
morph_measurements = adata_sub.obs[['nc_ratio', 'area', 'eccentricity', 'major_axis_length', 'minor_axis_length']].copy()
morph_measurements['elongation'] = morph_measurements['major_axis_length'] / morph_measurements['minor_axis_length']
morph_measurements = pd.DataFrame(qu.pp.standardize(morph_measurements), index = morph_measurements.index, columns = morph_measurements.columns)
morph_measurements['compartment'] = adata_sub.obs['compartment']
morph_measurements['Patient_ID'] = adata_sub.obs['Patient_ID']
morph_measurementsv2 = morph_measurements.groupby(['compartment', 'Patient_ID']).mean().unstack().unstack().reset_index()

fig, axes = plt.subplots(1, 1, figsize=(4, 3.5), sharey=False)
melted_data = morph_measurementsv2[morph_measurementsv2['level_0'] =='elongation']
Q1 = melted_data[0].quantile(0.25)
Q3 = melted_data[0].quantile(0.75)
IQR = Q3 - Q1
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR
filtered_data = melted_data[(melted_data[0] >= lower_bound) & (melted_data[0] <= upper_bound)]
groups = [filtered_data[filtered_data['compartment'] == c][0] for c in filtered_data['compartment'].unique()]
if len(groups) == 2:
    stat, p_value = ranksums(groups[0], groups[1])
else:
    p_value = None
g = sns.boxplot(x='compartment', y=0, data=filtered_data, width=0.5, fliersize=0, color='white', ax=axes)
g = sns.stripplot( x='compartment', y=0, data=filtered_data, color='gray', alpha=0.8, ax=axes)
g.tick_params(labelsize=10)
if p_value is not None:
    if p_value < 0.001:
        axes.set_title(f"elongation (p < 0.001)")
    else:
        axes.set_title(f"elongation (p = {p_value:.3f})")
else:
    axes.set_title(f"elongation (p-value not computed)")

axes.set_xlabel("ECM region", fontsize = 12)
axes.set_ylabel('')

plt.tight_layout()
plt.savefig(os.path.join(save_directory, 'figure4l.pdf'), bbox_inches = 'tight')

## Figure 4m

In [None]:
df_red = adata.to_df()
df_red[['fov', 'label']]= adata.obs[['fov', 'label']].values
df_red = pd.merge(df_red, mdata['spatial_nhood'].obs[['fov', 'label']])
index_names = list(df_red.index)
adata_red = adata[index_names, :].copy()

df = adata_red[np.isin(adata_red.obs['cell_cluster'], ['Cancer_1', 'Cancer_2', 'Cancer_3'])].to_df()
df = pd.DataFrame(qu.pp.standardize(df), index = df.index, columns = df.columns)
df['compartment'] = adata_red[np.isin(adata_red.obs['cell_cluster'], ['Cancer_1', 'Cancer_2', 'Cancer_3'])].obs['compartment']

adata_run = anndata.AnnData(df.iloc[:, :-1])
adata_run.obs['compartment'] = pd.Categorical(df.iloc[:, -1])
adata_run = adata_run[adata_run.obs['compartment'] != 'no_collagen']
adata_run.obs_names = [f'c_{i}' for i in range(0, len(adata_run.obs_names))]

sc.pl.dotplot(adata_run, 
            var_names=['Collagen1', 'FAP', 'Fibronectin', 'Vim', 'SMA', 'GLUT1', 'Ki67'], 
            standard_scale = None, vmin = -1, vmax = 1, cmap = 'PuOr_r',dot_min = 0, dot_max = 0.8,
            groupby='compartment', save = 'figure4m.pdf')

shutil.move(os.path.join('figures','dotplot_figure4m.pdf'), os.path.join(save_directory, 'figure4m.pdf'))

## ECM viz

In [None]:
fov_list = ['TMA44_R3C3', 'TMA38_R3C8']
niche_list = ['Fibroblast__Neutrophil','B__Cancer_1']
mdata['quiche'].var['logFC'] = mdata['quiche'].var['logFC'].astype('float')

## Figure 4h Collagen1 expression

In [None]:
fiber_channel = 'Collagen1'
img_sub_folder = ''
img_cmap_name = "bone"
labels_cmap_name = "cool"
save_directory_ = os.path.join(save_directory, 'overlay', 'ecm')
qu.pp.make_directory(save_directory_)

for fov_name in fov_list:
    io_utils.validate_paths(data_dir)
    misc_utils.verify_in_list(fiber_channel=[fiber_channel],
                              all_channels=io_utils.remove_file_extensions(io_utils.list_files(os.path.join(data_dir, fov_name, img_sub_folder))))

    data_xr = load_utils.load_imgs_from_tree(data_dir, img_sub_folder, fovs=[fov_name], channels=[fiber_channel])

    channel_data = data_xr.loc[fov_name, :, :, fiber_channel].values
    channel_data = channel_data / np.quantile(channel_data, 0.99)

    _, axes = plt.subplots(1, 1)
    img_cmap = cm.get_cmap(img_cmap_name)
    labels_cmap = cm.get_cmap(labels_cmap_name)

    axes.imshow(channel_data, cmap=img_cmap)
    axes.axis("off")
    scale_bar_length = int((channel_data.shape[0]*100)/800)  # 2048 pixels is 800 uM so 256 pixels corresponds to 100 microns
    scale_bar_height = 20  # Height of the scale bar rectangle
    scale_bar_color = 'white'
    text_y_offset = 30  # Offset of the text from the scale bar

    axes.add_patch(Rectangle((channel_data.shape[1] - scale_bar_length - 80, channel_data.shape[0] - scale_bar_height - 20),
                            scale_bar_length, scale_bar_height, linewidth=0, edgecolor=None, facecolor=scale_bar_color))

    axes.text(channel_data.shape[1] - scale_bar_length / 2 - 80, channel_data.shape[0] - scale_bar_height - text_y_offset,
            '100 µm', color=scale_bar_color, fontweight='regular', fontsize=12, ha='center', va='bottom', path_effects=[patheffects.withStroke(linewidth=0.5, foreground='black')])
  
    plt.savefig(os.path.join(save_directory_, f'{fiber_channel}_{fov_name}.tiff'), bbox_inches = 'tight')

## Figure 4i ECM alignment

In [None]:
feature_fovs = {'alignment_bin_2': fov_list}

for feature in feature_fovs:
    fov_list = feature_fovs[feature]
    fiber_table_sub = fiber_table[fiber_table.fov.isin(fov_list)]
    fiber_table_sub[feature + '_norm'] = np.log(fiber_table_sub[feature])

    feature_dir = os.path.join(directory, f'colored_{feature}')
    color_segmentation_by_stat(fovs=fiber_table_sub.fov.unique(), data_table=fiber_table_sub, seg_dir=directory, save_dir=save_directory_, stat_name=feature, cmap="RdBu", seg_suffix="_fiber_labels.tiff", erode=True)

In [None]:
adata = qu.tl.compute_spatial_neighbors(adata, radius = 100, n_neighbors = 30, spatial_key = 'spatial', delaunay = False, fov_key = 'fov', coord_type = 'generic')
adata_niche, cells_nonn = qu.tl.compute_niche_composition(adata, labels_key = 'cell_cluster', min_cells = 3)
adata_niche = adata_niche[np.where(pd.DataFrame(adata_niche.X).sum(1) != 0)[0], :].copy()
annotations_niche = qu.tl.compute_niche_abundance_fov(adata_niche.to_df(), nlargest = 3, min_perc = 0.1)
adata_niche.obs['retro'] = annotations_niche.values
index_df = adata_niche.obs.reset_index()
index_df['index_cell'] = index_df['index']
annotation_df = mdata['quiche'].var[['index_cell', 'quiche_niche', 'SpatialFDR']]
index_cells = pd.merge(index_df, annotation_df, on = 'index_cell')['index_cell'].values
adata_niche.obs.loc[index_cells, 'retro'] = list(pd.merge(index_df, annotation_df, on = 'index_cell')['quiche_niche'])

## Figure 4j Niche

In [None]:
##plot niches
save_directory_ = os.path.join(save_directory, 'overlay', 'ecm', 'niches')
qu.pp.make_directory(save_directory_)
for i in range(0, len(niche_list)):
    fov = fov_list[i]
    niche = niche_list[i]
    cell_list = niche.split('__')
    niche_indices = list(mdata['spatial_nhood'][mdata['quiche'].var['quiche_niche'] == niche].obs_names)
    niche_indices = [item for item in niche_indices if item not in ['TMA44_R9C4_cell3131166', 'TMA44_R9C4_cell3131174', 'TMA44_R9C4_cell3131188', 'TMA44_R9C5_cell3134587']]
    qu.pl.plot_niche_score(mdata, niche, fov, seg_dir, metric = 'logFC', vmin = -2, vmax = 2, fontsize = 12, cmap = 'vlag', background = [0.3, 0.3, 0.3, 1],figsize = (6, 6), save_directory = save_directory_, filename_save = niche+'_'+fov, niche_indices = niche_indices)

    subset_mdata = adata_niche[adata_niche.obs['retro'] == niche]
    df_cells = subset_mdata.to_df()
    df_cells['cell_cluster'] = subset_mdata.obs['cell_cluster']
    df_cells['label']= subset_mdata.obs['label']
    df_cells['fov'] = subset_mdata.obs['fov']
    df_cells = df_cells[np.isin(df_cells.cell_cluster, cell_list)]

    colormap = pd.DataFrame({'cell_cluster': list(colors_dict_cells.keys()),
                            'color': list(colors_dict_cells.values())})

    qu.pl.cohort_cluster_plot(
        fovs=[fov],
        seg_dir=seg_dir,
        save_dir=save_directory_,
        cell_data=df_cells,
        erode=True,
        fov_col='fov',
        label_col='label',
        cluster_col='cell_cluster',
        seg_suffix="_whole_cell.tiff",
        cmap=colormap,
        fig_file_type = 'pdf',
        unassigned_color=np.array([0.3, 0.3, 0.3, 1]),
        display_fig=False)