In [None]:
import scanpy as sc
import decoupler as dc
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import anndata as ad
import scanorama

from wrapper_functions import *

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
analysis_name='Default'

root_path = os.getcwd()
results_folder = os.path.join(root_path, 'results')
basepath=root_path+'/analyzed/'+analysis_name+"/"

In [None]:
global_clustering_folder = os.path.join(results_folder, 'global_clustering') 
adata=sc.read(os.path.join(global_clustering_folder , 'clustering_results_harmony_deconv_bbknn.h5ad'))

In [None]:
adata.obs['Sample_Layer'] = adata.obs['readout_id'].astype(str) + '-' + adata.obs['leiden'].astype(str)

In [None]:
file_names = [f for f in os.listdir(os.path.join(results_folder, 'qc_filtered')) if os.path.isfile(os.path.join(results_folder,'qc_filtered',f))]
adata_list = [ad.read(os.path.join(results_folder, 'qc_filtered', file)) for file in file_names if file.endswith('.h5ad')]

In [None]:
adata_concat = sc.concat(
    adata_list,
    label="library_id",
    uns_merge="unique",
    keys=[
        k
        for d in [adata.uns["spatial"] for adata in adata_list]
        for k, v in d.items()
    ],
    index_unique="-",
    join='outer' 
)

In [None]:
adata_concat.obs['batch']=adata_concat.obs['batch'].astype('category')
adata_concat.raw = adata_concat.copy()


In [None]:
adata_concat.obs['leiden']=adata[adata_concat.obs.index].obs['leiden'].copy()

In [None]:
adata.obs['annoStroma']=adata.obs['annov2'].copy()
adata.obs['annoTumor']=adata.obs['annov2'].copy()

stroma=['Epi_HEV','Fibro_Immune_HEV','Macrophage_Mmp9','Muscle_Vessel','Tumor_Fibro','Tumor_Fibro_Necro']

for i in stroma:
    adata.obs["annoStroma"]=adata.obs["annoStroma"].replace(i, "Stroma")
adata.obs["annoStroma"]=adata.obs["annoStroma"].replace("Stroma_Necro", "Stroma")

#### For tumor
tumor=[ 'Tumor','Tumor_Cytotox','Tumor_Fibro','Tumor_Fibro_Necro','Tumor_Necro']

for i in tumor:
    adata.obs["annoTumor"]=adata.obs["annoTumor"].replace(i, "Tumor")    

tmp=adata.obs.copy()
tmp['annoTumor']=tmp['annoTumor'].cat.add_categories(['STROMA','TUMOR','FIBRO'])
tmp['annoStroma']=tmp['annoStroma'].cat.add_categories(['STROMA','TUMOR','FIBRO'])
tmp.loc[tmp['Malignantcat'].isin([ 'neg'])&tmp['annoStroma'].isin([ 'Stroma']),'annoStroma']='STROMA'
tmp.loc[tmp['Fibrocat'].isin([ 'pos'])&tmp['annoStroma'].isin([ 'Stroma','STROMA']),'annoStroma']='FIBRO'
tmp.loc[tmp['Malignantcat'].isin([ 'pos'])&tmp['annoTumor'].isin([ 'Tumor']),'annoTumor']='TUMOR'

tmp['annoTumorStroma']=tmp['annoStroma'].astype('str')+tmp['annoTumor'].astype('str')
tmp['annoTumorStroma'].value_counts()

tumor=['StromaTUMOR','Tumor_NecroTUMOR','TumorTUMOR','Tumor_CytotoxTUMOR']
stroma=['FIBROTumor','FIBROFibro_Immune_HEV','FIBROMacrophage_Mmp9','FIBROMuscle_Vessel','FIBROEpi_HEV',
        'STROMAEpi_HEV','STROMATumor','STROMAFibro_Immune_HEV','STROMAMuscle_Vessel',
        'STROMAMacrophage_Mmp9','FIBROTUMOR']
other=['StromaEpi_HEV','StromaTumor','Tumor_CytotoxTumor','StromaFibro_Immune_HEV','StromaMuscle_Vessel',
       'StromaMacrophage_Mmp9','TumorTumor','Tumor_NecroTumor']
both=['FIBROTUMOR']

for i in both:
    tmp["annoTumorStroma"]=tmp["annoTumorStroma"].replace(i, "TumorStroma")
for i in tumor:
    tmp["annoTumorStroma"]=tmp["annoTumorStroma"].replace(i, "Tumor")
for i in stroma:
    tmp["annoTumorStroma"]=tmp["annoTumorStroma"].replace(i, "Stroma")
for i in other:
    tmp["annoTumorStroma"]=tmp["annoTumorStroma"].replace(i, "NotDetermined")

adata.obs['annoTumorStroma']=tmp["annoTumorStroma"].copy()

sc.pl.umap(adata, color='annoTumorStroma')

In [None]:
# Create summary Table for stroma vs. tumor
sample_col = "readout_id"
label_col  = "annoTumorStroma"

obs = adata.obs[[sample_col, label_col]].copy()

counts = obs.groupby([sample_col, label_col]).size().unstack(fill_value=0)
perc_all = counts.div(counts.sum(axis=1), axis=0) * 100

summary_all = (
    counts.add_prefix("n_")
    .join(perc_all.add_prefix("pct_"))
    .reset_index()
)

summary_all.to_csv(results_folder+'/StromavsTumor_Table.tsv',sep='\t')

In [None]:
myfract='Stroma'
#myfract='Tumor' ### Switch here for Tumor analysis

In [None]:
adata=adata[adata.obs['annoTumorStroma']==myfract].copy()



In [None]:
adata_concat=adata_concat[adata.obs.index].copy()

In [None]:
# Get pseudo-bulk profile
pdata = dc.get_pseudobulk(
    adata_concat,
    sample_col='readout_id',
    groups_col='leiden',
    # layer='counts',
    mode='sum',
    min_cells=0,
    min_counts=0,
    use_raw=True
)
pdata

In [None]:
dc.plot_psbulk_samples(pdata, groupby=['readout_id','leiden','batch'], figsize=(10, 8))

In [None]:
# Get filtered pseudo-bulk profile
pdata = dc.get_pseudobulk(
    adata_concat,
    sample_col='readout_id',
    groups_col='leiden',
    # layer='counts',
    mode='sum',
    min_cells=5,
    min_counts=500,
    use_raw=True
)
pdata

In [None]:
pp_pdata = pdata.copy()
sc.pp.normalize_total(pp_pdata, target_sum=1e6)
sc.pp.log1p(pp_pdata)
sc.pp.scale(pp_pdata, max_value=10)
sc.tl.pca(pp_pdata, n_comps=10)

In [None]:
sc.pl.pca(pp_pdata, color=['readout_id'], ncols=1, show=True, size=300)

In [None]:
sc.pl.pca(pp_pdata, color=['treatment_id'], ncols=1, show=True, size=300)

In [None]:
sc.pl.pca(pp_pdata, color=['leiden'], ncols=1, show=True, size=300)

In [None]:
dc.get_metadata_associations(
    pp_pdata,
    obs_keys = ['readout_id', 'individual_id','treatment_id', 'leiden', 'psbulk_n_cells', 'psbulk_counts'], #metadata columns to associate to PCs
    obsm_key='X_pca',  
    uns_key='pca_anova', 
    inplace=True
)

In [None]:
plt.figure(figsize=(7,10))
ax, legend_axes = dc.plot_associations(
    pp_pdata,
    uns_key='pca_anova', 
    obsm_key='X_pca', 
    stat_col='p_adj',  
    obs_annotation_cols = ['treatment_id', 'leiden'], 
    titles=['Adjusted p-values from ANOVA', 'Principle component scores']
)
plt.show()

#### Are there genes only expressed in one condition? 

In [None]:
conditionT = pdata[pdata.obs['treatment_id'] == 'FAP_LTBR', :]
conditionC = pdata[pdata.obs['treatment_id'] == 'Untreated', :]

In [None]:
expr_conditionT = np.mean(conditionT.X, axis=0)
expr_conditionC = np.mean(conditionC.X, axis=0)

In [None]:
# Calculate the absolute difference in expression between the two conditions
expression_difference = np.abs(expr_conditionT - expr_conditionC)

In [None]:
# Identify genes that are exclusively expressed
exclusive_conditionT = (expr_conditionT > 0) & (expr_conditionC == 0)
exclusive_conditionC = (expr_conditionC > 0) & (expr_conditionT == 0)

In [None]:
# Create a DataFrame for sorting and filtering
genes_data = pd.DataFrame({
    'Gene': pdata.var_names,
    'Expr_Condition1': expr_conditionT,
    'Expr_Condition2': expr_conditionC,
    'Expression_Difference': expression_difference
})

In [None]:
# Filter and sort genes exclusively expressed in Condition 1
exclusive_genes_conditionT = genes_data[exclusive_conditionT]
exclusive_genes_conditionT_sorted = exclusive_genes_conditionT.sort_values(by='Expression_Difference', ascending=False)

In [None]:
# Filter and sort genes exclusively expressed in Condition 2
exclusive_genes_conditionC = genes_data[exclusive_conditionC]
exclusive_genes_conditionC_sorted = exclusive_genes_conditionC.sort_values(by='Expression_Difference', ascending=False)

In [None]:
exclusive_genes_conditionT_sorted.head()

In [None]:
exclusive_genes_conditionT_sorted.to_csv(results_folder+'/DGE_'+myfract+'_exclusive_genes_conditionT.tsv',sep='\t')

In [None]:
exclusive_genes_conditionC_sorted.head()

In [None]:
exclusive_genes_conditionC_sorted.to_csv(results_folder+'/DGE_'+myfract+'_exclusive_genes_conditionC.tsv',sep='\t')

In [None]:
mysub=myfract #'Tumor' or 'Stroma'
pseudobulk_folder = os.path.join(results_folder, 'pseudo_bulkResults') 

In [None]:
exclusive_genes_conditionT_sorted.head(200)['Gene'].to_csv(os.path.join(pseudobulk_folder , mysub+'-FAP-LTBR_vs_Untreated_top200_FAP-LTBRgenes.csv'))

In [None]:
genestoadd=['Cd3d','Cd8a','CD8b','Pdcd1','Ifng','Gzmb','Gzma','Cd19','Cxcl9','Cd274','Ccl22','Ms4a1','Glycam1',
            'Madcam1','Selp','Sele','Cxcl13','H2-Aa', 'H2-Ab1', 'H2-DMb1', 'H2-Eb1','Chst4','Il9r','Il13','Fcer2a']

In [None]:
list(exclusive_genes_conditionT_sorted.head(200)['Gene'])

In [None]:
exclusive_genes_conditionC_sorted.head()

In [None]:
for i, library in enumerate(
   adata.obs["readout_id"].unique().tolist()
):
    ad = adata[adata.obs["readout_id"] == library, :].copy()
    print(ad.obs["readout_id"].unique()[0])
    print(ad.obs["treatment_id"].unique()[0])
    #sc.pl.spatial(
    #    ad,
    #    img_key="hires",
    #    library_id=library,
    #    color=["Chst4", 'Ptchd1', 'leiden'],
    #    size=1.5, color_map = 'RdBu_r', use_raw=False)
    
#sc.pl.violin(adata, keys = ["Chst4", 'Chst4'], groupby='Sample_Layer', rotation=90, use_raw=False)

### T versus C in across all data

In [None]:
set(list(pdata.var.index)).intersection(set(['Glycam1']))

In [None]:
exclude=['V43J19-319_A1_B09','V43J24-078_D1_A06', 'V43J11-302_A1_B08', 'V42D20-025_A1_B16', 'V42D20-002_D1_A02', 'V42D20-025_D1_A05']

In [None]:
set(pdata.obs.readout_id)

In [None]:
adata_T_C =  pdata[~pdata.obs['readout_id'].isin(exclude)].copy() #[(pdata.obs['leiden'] == 'White Matter')].copy()

In [None]:
adata_T_C

In [None]:
set(adata_T_C.obs.readout_id)

In [None]:
#dc.plot_filter_by_expr(adata_T_C, group='treatment_id', min_count=3, min_total_count=20)
dc.plot_filter_by_expr(adata_T_C, group='treatment_id', min_count=0.2, min_total_count=2)


In [None]:
#genes = dc.filter_by_expr(adata_T_C, group='treatment_id', min_count=3, min_total_count=20)
genes = dc.filter_by_expr(adata_T_C, group='treatment_id', min_count=0.2, min_total_count=2)

# Filter by these genes
adata_T_C = adata_T_C[:, genes].copy()
adata_T_C

In [None]:
#set(list(pdata.var.index)).intersection(set(['M']))

### Contrast between conditions

In [None]:
# Import DESeq2
from pydeseq2.dds import DeseqDataSet
from pydeseq2.ds import DeseqStats

In [None]:
# Build DESeq2 object
dds = DeseqDataSet(
    adata=adata_T_C,
    design_factors="treatment_id",
    ref_level=['treatment_id', 'Untreated'],
    refit_cooks=True,
    n_cpus=16,
)

In [None]:
adata_T_C.obs

In [None]:
dds.deseq2()

In [None]:
dds.obsm['design_matrix']

In [None]:
# Extract contrast between treated vs control
stat_res = DeseqStats(dds, contrast=["treatment-id", 'FAP-LTBR', "Untreated"], n_cpus=16)
stat_res.summary()


In [None]:
stat_res.lfc_shrink(coeff='treatment-id_FAP-LTBR_vs_Untreated')

In [None]:
# Extract results
results_df = stat_res.results_df
results_df.sort_values('stat')

In [None]:

    
## check if folder exists and create it otherwise
if not os.path.exists(pseudobulk_folder):
    os.makedirs(pseudobulk_folder)
    print(f"Folder '{pseudobulk_folder}' created.")
else:
    print(f"Folder '{pseudobulk_folder}' already exists.")

results_df.to_csv(os.path.join(pseudobulk_folder , mysub+'-FAP-LTBR_vs_Untreated_0605.csv'))

In [None]:
dc.plot_volcano_df(results_df, x='log2FoldChange', y='padj', top=20, 
                   save=os.path.join(pseudobulk_folder , mysub+'-FAP-LTBR_vs_Untreated_volcano.pdf'))


In [None]:
mat = results_df[['stat']].T.rename(index={'stat': 'WM_FAP-LTBR_vs_Untreated'})
mat

### Transcription factor activity inference

In [None]:
# Retrieve CollecTRI gene regulatory network
collectri = dc.get_collectri(organism='mouse', split_complexes=False)
collectri

In [None]:
# Infer pathway activities with ulm
tf_acts, tf_pvals = dc.run_ulm(mat=mat, net=collectri)
tf_acts

In [None]:
tf_pvals

In [None]:
pd.concat([tf_acts, tf_pvals]).transpose().to_csv(os.path.join(pseudobulk_folder , mysub+'-FAP-LTBR_vs_Untreated_TFact.tsv'),sep='\t')

In [None]:
dc.plot_barplot(tf_acts, 'WM_FAP-LTBR_vs_Untreated', top=25, vertical=True, 
                save=os.path.join(pseudobulk_folder , mysub+'-FAP-LTBR_vs_Untreated_TFact.pdf'))

In [None]:
# Extract logFCs and pvals
logFCs = results_df[['log2FoldChange']].T.rename(index={'log2FoldChange': 'WM_FAP-LTBR_vs_Untreated'})
pvals = results_df[['padj']].T.rename(index={'padj': 'WM_FAP-LTBR_vs_Untreated'})
dc.plot_volcano(logFCs, pvals, 'WM_FAP-LTBR_vs_Untreated', name='Irf1', net=collectri, top=10, sign_thr=0.05, lFCs_thr=0.5)

In [None]:
# Extract logFCs and pvals
logFCs = results_df[['log2FoldChange']].T.rename(index={'log2FoldChange': 'WM_FAP-LTBR_vs_Untreated'})
pvals = results_df[['padj']].T.rename(index={'padj': 'WM_FAP-LTBR_vs_Untreated'})
dc.plot_volcano(logFCs, pvals, 'WM_FAP-LTBR_vs_Untreated', name='Yap1', net=collectri, top=10, sign_thr=0.05, lFCs_thr=0.5)

In [None]:
# Extract logFCs and pvals
logFCs = results_df[['log2FoldChange']].T.rename(index={'log2FoldChange': 'WM_FAP-LTBR_vs_Untreated'})
pvals = results_df[['padj']].T.rename(index={'padj': 'WM_FAP-LTBR_vs_Untreated'})
dc.plot_volcano(logFCs, pvals, 'WM_FAP-LTBR_vs_Untreated', name='Plag1', net=collectri, top=10, sign_thr=0.05, lFCs_thr=0.5)

In [None]:
dc.plot_volcano(logFCs, pvals, 'WM_FAP-LTBR_vs_Untreated', name='Rfxap', net=collectri, top=10, sign_thr=0.05, lFCs_thr=0.5)

In [None]:
dc.plot_volcano(logFCs, pvals, 'WM_FAP-LTBR_vs_Untreated', name='Nfkb1', net=collectri, top=10, sign_thr=0.05, lFCs_thr=0.5)

In [None]:
dc.plot_volcano(logFCs, pvals, 'WM_FAP-LTBR_vs_Untreated', name='Rela', net=collectri, top=10, sign_thr=0.05, lFCs_thr=0.5)

In [None]:
dc.plot_volcano(logFCs, pvals, 'WM_FAP-LTBR_vs_Untreated', name='Srf', net=collectri, top=10, sign_thr=0.05, lFCs_thr=0.5)

In [None]:
### Adjust plot for publication
pathway_acts_t=tf_acts.transpose()
pathway_acts_t.columns=['Activity']

top_25 = pathway_acts_t.loc[pathway_acts_t["Activity"].abs().nlargest(25).index]
top_25_sorted = top_25.sort_values(by="Activity", ascending=True)

#  Create the bar plot
fig, ax = plt.subplots(figsize=(5, 5))
colors = ["red" if act > 0 else "blue" for act in top_25_sorted["Activity"]]
ax.barh(top_25_sorted.index, top_25_sorted["Activity"], color=colors)
ax.set_xlabel("Activity", fontsize=12)
ax.set_ylabel("TF", fontsize=12)
ax.set_title("Top 25 TFs by Activity", fontsize=10)
ax.axvline(0, color="black", linestyle="--", linewidth=0.8)  # Add a vertical line at Activity = 0
plt.tight_layout()

plt.savefig(os.path.join(pseudobulk_folder , mysub+'-FAP-LTBR_vs_Untreated_TFact.pdf'), format="pdf")
plt.show()

### Pathway activity inference

In [None]:
progeny = pd.read_csv("external_files/model_progeny500_mouse_decoupleR.csv")

In [None]:
pathway_acts, pathway_pvals = dc.run_mlm(mat=mat, net=progeny)

In [None]:
pd.concat([pathway_acts,pathway_pvals]).transpose().to_csv(os.path.join(pseudobulk_folder , mysub+'-FAP-LTBR_vs_Untreated_Pathways.tsv'),sep='\t')

In [None]:
dc.plot_barplot(pathway_acts, 'WM_FAP-LTBR_vs_Untreated', top=25, vertical=True, 
               save=os.path.join(pseudobulk_folder , mysub+'-FAP-LTBR_vs_Untreated_Pathways.pdf'))

In [None]:
dc.plot_targets(results_df, stat='stat', source_name='JAK-STAT', net=progeny, top=15)

In [None]:
dc.plot_targets(results_df, stat='stat', source_name='NFkB', net=progeny, top=15)

In [None]:
dc.plot_targets(results_df, stat='stat', source_name='PI3K', net=progeny, top=15)

In [None]:
dc.plot_targets(results_df, stat='stat', source_name='TGFb', net=progeny, top=15)

In [None]:
dc.plot_targets(results_df, stat='stat', source_name='p53', net=progeny, top=15)

In [None]:
### Adjust plot for publication
pathway_acts_t=pathway_acts.transpose()
pathway_acts_t.columns=['Activity']

top_25 = pathway_acts_t.loc[pathway_acts_t["Activity"].abs().nlargest(25).index]
top_25_sorted = top_25.sort_values(by="Activity", ascending=True)

# Create the bar plot
fig, ax = plt.subplots(figsize=(5, 4))
colors = ["red" if act > 0 else "blue" for act in top_25_sorted["Activity"]]
ax.barh(top_25_sorted.index, top_25_sorted["Activity"], color=colors)
ax.set_xlabel("Activity", fontsize=12)
ax.set_ylabel("Pathway", fontsize=12)
ax.set_title("Top 25 Pathways by Activity", fontsize=10)
ax.axvline(0, color="black", linestyle="--", linewidth=0.8)  # Add a vertical line at Activity = 0
plt.tight_layout()

plt.savefig(os.path.join(pseudobulk_folder , mysub+'-FAP-LTBR_vs_Untreated_Pathways.pdf'), format="pdf")
plt.show()

In [None]:
dc.plot_targets(results_df, stat='stat', source_name='Estrogen', net=progeny, top=15)

### Functional enrichment of biological terms

In [None]:
#msigdb = dc.get_resource('MSigDB', organism ='mouse')
msigdb = pd.read_csv("external_files/msigdb_mouse_hallmark.csv")
msigdb

In [None]:
# Remove duplicated entries
msigdb = msigdb[~msigdb.duplicated(['gene_symbol', 'gs_name'])]
msigdb.loc[:, 'geneset'] = [name.split('HALLMARK_')[1] for name in msigdb['gs_name']]

In [None]:
msigdb

In [None]:
# Infer enrichment with ora using significant deg
top_genes_TvsC = results_df[(results_df['padj'] < 0.05) & (results_df['stat'] > 0)]

# Run ora
enr_pvals = dc.get_ora_df(
    df=top_genes_TvsC,
    net=msigdb,
    source='geneset',
    target='gene_symbol',
    verbose= True
)

enr_pvals.head()

In [None]:
top_genes_TvsC

In [None]:
enr_pvals_filtered = enr_pvals[enr_pvals['FDR p-value'] < 0.001]

In [None]:
enr_pvals_filtered

In [None]:
dc.plot_dotplot(enr_pvals_filtered, x='Combined score', y = 'Term', 
                s='Odds ratio', c = 'FDR p-value', scale = 0.3, 
                figsize=(5,3))

In [None]:
# Infer enrichment with ora using significant deg
top_genes_upCvsT = results_df[(results_df['padj'] < 0.05) & (results_df['stat'] < 0)]

# Run ora
enr_pvals = dc.get_ora_df(
    df=top_genes_upCvsT,
    net=msigdb,
    source='geneset',
    target='gene_symbol',
    verbose= True
)

enr_pvals.head()

In [None]:
enr_pvals_filtered = enr_pvals[enr_pvals['FDR p-value'] < 0.05]

In [None]:
enr_pvals_filtered

In [None]:
dc.plot_dotplot(enr_pvals_filtered, x='Combined score', y = 'Term', s='Odds ratio', c = 'FDR p-value', 
                scale = 0.3, figsize=(4,2))

In [None]:
# Run ora on full results set
enr_pvals = dc.get_gsea_df(
    df=results_df,
    stat = 'stat',
    net=msigdb,
    source='geneset',
    target='gene_symbol',
    verbose= True
)

enr_pvals.sort_values('NES', ascending=False)

In [None]:
enr_pvals_filtered = enr_pvals[enr_pvals['FDR p-value'] < 0.01]

In [None]:
enr_pvals_filtered_color = enr_pvals_filtered.sort_values(by='NES', key=abs, ascending=False).head(25)

In [None]:
enr_pvals_filtered_color['color'] = np.where(enr_pvals_filtered_color['NES'] < 0, 'blue', 'red')

In [None]:
enr_pvals_filtered_color=enr_pvals_filtered_color.sort_values(by='NES', ascending=True)

In [None]:
enr_pvals_filtered_color

In [None]:
dc.plot_barplot_df(enr_pvals_filtered_color, x='NES', y= 'Term', figsize=(5, 4), 
                   color=enr_pvals_filtered_color.color.tolist(),  
                   save=os.path.join(pseudobulk_folder , mysub+'-FAP-LTBR_vs_Untreated_HallmarkGSEA.pdf'))

In [None]:
enr_pvals_filtered_color.to_csv(os.path.join(pseudobulk_folder , mysub+'-FAP-LTBR_vs_Untreated_HallmarkGSEA_sig.tsv'),sep='\t')

In [None]:
enr_pvals.to_csv(os.path.join(pseudobulk_folder , mysub+'-FAP-LTBR_vs_Untreated_HallmarkGSEA.tsv'),sep='\t')

In [None]:
### alternative plot for publication

# Step 1: Filter the top 25 rows based on absolute NES values
df_filtered = enr_pvals_filtered_color.loc[enr_pvals_filtered_color["NES"].abs().nlargest(25).index]

# Step 2: Sort the filtered DataFrame by the actual NES values (not absolute values)
df_sorted = df_filtered.sort_values(by="NES", ascending=False)


In [None]:

fig, ax = plt.subplots(figsize=(7, 4))
ax.barh(df_sorted["Term"], df_sorted["NES"], color=df_sorted["color"])
ax.set_xlabel("NES (Normalized Enrichment Score)", fontsize=10)
ax.set_ylabel("Term", fontsize=12)
ax.set_title("Top 25 Terms by NES (Absolute Value)", fontsize=10)
ax.invert_yaxis()  
plt.tight_layout()

plt.savefig(os.path.join(pseudobulk_folder , mysub+'-FAP-LTBR_vs_Untreated_HallmarkGSEA.pdf'), format="pdf")
plt.show()

### Custom signatures

In [None]:
import besca as bc

In [None]:
bescapath_full = os.path.dirname(bc.__file__)
bescapath = os.path.split(bescapath_full)[0]

species = "mouse"  ## or mouse for now
conversion = None
sigsuffix = ""
if species == "mouse":
    sigsuffix = ".mouse"

## Provided with besca; change this for own gmt file
gmt_file_anno = (
    bescapath + "/besca/datasets/genesets/CellNames_scseqCMs6_sigs" + sigsuffix + ".gmt"
)
## An extra set of signatures (less specific but informative) is also provided
gmt_file_anno_extra = (
    bescapath
    + "/besca/datasets/genesets/CellNames_scseqCMs6_Extrasigs"
    + sigsuffix
    + ".gmt"
)

In [None]:
from itertools import repeat

mymarkers = bc.tl.sig.read_GMT_sign(gmt_file_anno, directed=False)
mymarkers_extra = bc.tl.sig.read_GMT_sign(gmt_file_anno_extra, directed=False)


In [None]:
mymarkers['HEVEndothelial']=['Glycam1','Selp','Sele','Ackr1','Enpp6','Madcam1','Lipg','Enpp2','Cxcl1','Lifr','Serpina1b','Vwf','Syt15','Chst4','Fut7']

In [None]:
mymarkers['necrosis']=['Pou3f1','Edn2',
 'Cxcl3',
 'Ccl3',
 'Arg1',
 'Pgf',
 'Rsad2',
 'Hilpda',
 'Rnf144b',
 'A530064D06Rik',
 'Clec4d',
 'Ifit3',
 'Cmpk2',
 'Hcar2',
 'Atf3',
 'Klk10']
mymarkers['vessels']=['Ren1',
 'Myh11',
 'Vwf',
 'Gdf10',
 'Bcam',
 'Pgm5',
 'Lmcd1',
 'Ccdc3',
 'Fam174b',
 'Sncg',
 'Cpxm2',
 'Lims2',
 'Ccm2l']
mymarkers['epimus']=['Fgf23',
 'Pi16',
 'Has1',
 'Cys1']
mymarkers['mamfib']=['Foxi1',
 'Fam241b',
 'Irf6',
 'Tacstd2',
 'Cldn3',
 'Marveld3',
 'Clic6',
 'Sox10',
 'Plch2',
 'Pdlim3',
 'Vtcn1',
 'Fcgbp',
 'Prr15l',
 'Nrtn',
 'Capn8',
 'Tmprss13',
 'Tmem30b',
 'Aldh1a3',
 'Edar',
 'Wfdc18',
 'Trp73',
 'Tfap2c',
 'Tnk1',
 'Foxa1',
 'Lmx1b',
 'Kcnk1',
 'Papln',
 'Epn3']

mymarkers['fibtuimm']=[
 'Chst4',
 'Sspo',
 'Auts2',
 'Itga11',
 'Mfap4',
 'Ccdc149',
 'Hoxc4',
 'Meox1',
 'Sh3rf3',
 'Sorcs2',
 'Mtcl1']

mymarkers['tuadipo']=[
 'Mrgpre',
 'Fcrls',
 'Cacna2d2',
 'Nol3',
 'Cidec',
 'Nptxr',
 'Klhdc8a',
 'Prdx3',
 'Rbpms2',
 'Rab11b',
 'H3f3a',
 'Cisd1',
 'Gng2']

mymarkers['tumor']=[
 'Lmo7',
 'Cdhr1',
 'Gm2115',
 'Hsd17b7',
 'Kif20a',
 'Trim59',
 'Ncmap',
 'Rspo2',
 'Psrc1',
 'Cx3cr1',
 'Sqle',
 'Artn',
 'Itgb8',
 'C130026I21Rik',
 'Scrn1',
 'Abcb9',
 'Cyp51',
 'Stra6']

mymarkers['fibtu']=['Reln',
 'Prox1',
 'Pla2g2d',
 'Ly6h',
 'Sh3gl3',
 'Msc',
 'Col6a5',
 'Adra1a',
 'Tenm3',
 'Gpr162',
 'Wdr86']
mymarkers['fibadipo']=['Slc7a10',
 'Adrb3',
 'Lgals12',
 'Car3',
 'Plin1',
 'Cfd',
 'Ankdd1a',
 'Fabp4',
 'Cd209g',
 'Nnmt']

mymarkers['granulo']=["S100a8",  'G0s2','S100a9','Hdc']

In [None]:

# optional conversion - if human-based signatures are read
# if species=='mouse':
#    for signature in mymarkers.keys():
#        mymarkers[signature] = [i for i in map(bc.tl.sig._helper._to_geneid, repeat(conversion), mymarkers[signature]) if i is not None]

mymarkers = bc.tl.sig.filter_siggenes(
    adata_concat, mymarkers
)  ### remove genes not present in dataset or empty signatures
mymarkers_extra = bc.tl.sig.filter_siggenes(adata_concat, mymarkers_extra)


In [None]:
genes=list()
sets=list()
for key in mymarkers:
    for gene in mymarkers[key]:
        genes.append(gene)
        sets.append(key)

In [None]:
mysigs=pd.DataFrame()

In [None]:
mysigs['gene_symbol']=genes
mysigs['geneset']=sets
mysigs['gs_name']=sets


In [None]:
mysigs.loc[mysigs.geneset.isin(['HEVEndothelial']),:]

In [None]:
# Run ora
enr_pvals = dc.get_gsea_df(
    df=results_df,
    stat = 'stat',
    net=mysigs,
    source='geneset',
    target='gene_symbol',
    verbose= True
)

enr_pvals.sort_values('NES', ascending=False)

In [None]:
enr_pvals

In [None]:
enr_pvals_filtered = enr_pvals[enr_pvals['FDR p-value'] < 0.1]

enr_pvals_filtered_color = enr_pvals_filtered.sort_values(by='NES', key=abs, ascending=False).head(25)

enr_pvals_filtered_color['color'] = np.where(enr_pvals_filtered_color['NES'] < 0, 'blue', 'red')

enr_pvals_filtered_color=enr_pvals_filtered_color.sort_values(by='NES', ascending=True)

enr_pvals_filtered_color

dc.plot_barplot_df(enr_pvals_filtered_color, x='NES', y= 'Term', figsize=(4, 3), 
                   color=enr_pvals_filtered_color.color.tolist(),
                   save=os.path.join(pseudobulk_folder , mysub+'-FAP-LTBR_vs_Untreated_BescaSigGSEA.pdf'))

In [None]:
# Infer enrichment with ora using significant deg
top_genes_TvsC = results_df[(results_df['padj'] < 0.1) & (results_df['stat'] > 0)]

# Run ora
enr_pvals = dc.get_ora_df(
    df=top_genes_TvsC,
    net=mysigs,
    source='geneset',
    target='gene_symbol',
    verbose= True
)

enr_pvals.head()

top_genes_TvsC

enr_pvals_filtered = enr_pvals[enr_pvals['FDR p-value'] < 0.05]

enr_pvals_filtered

dc.plot_dotplot(enr_pvals_filtered, x='Combined score', y = 'Term', s='Odds ratio', c = 'FDR p-value', scale = 0.3, 
                figsize=(5,3))

In [None]:
# Infer enrichment with ora using significant deg
top_genes_upCvsT = results_df[(results_df['padj'] < 0.1) & (results_df['stat'] < 0)]

# Run ora
enr_pvals = dc.get_ora_df(
    df=top_genes_upCvsT,
    net=mysigs,
    source='geneset',
    target='gene_symbol',
    verbose= True
)

enr_pvals.head()

top_genes_upCvsT

enr_pvals_filtered = enr_pvals[enr_pvals['FDR p-value'] < 0.1]

enr_pvals_filtered

dc.plot_dotplot(enr_pvals_filtered, x='Combined score', y = 'Term', s='Odds ratio', c = 'FDR p-value', 
                scale = 0.3, figsize=(4,2))

In [None]:
gomarkers = bc.tl.sig.read_GMT_sign('external_files/m5.go.bp.v2023.2.Mm.symbols.gmt', directed=False)
cellmarkers = bc.tl.sig.read_GMT_sign('external_files/m8.all.v2023.2.Mm.symbols.gmt', directed=False)



In [None]:

gomarkers = bc.tl.sig.filter_siggenes(
    adata_concat, gomarkers
)  
### remove genes not present in dataset or empty signatures
cellmarkers = bc.tl.sig.filter_siggenes(adata_concat, cellmarkers)


genes=list()
sets=list()
for key in gomarkers:
    for gene in gomarkers[key]:
        genes.append(gene)
        sets.append(key)

gosigs=pd.DataFrame()

gosigs['gene_symbol']=genes
gosigs['geneset']=sets
gosigs['gs_name']=sets


In [None]:
genes=list()
sets=list()
for key in cellmarkers:
    for gene in cellmarkers[key]:
        genes.append(gene)
        sets.append(key)

cellsigs=pd.DataFrame()

cellsigs['gene_symbol']=genes
cellsigs['geneset']=sets
cellsigs['gs_name']=sets


In [None]:
gosigs

In [None]:
# Run ora
enr_pvals = dc.get_gsea_df(
    df=results_df,
    stat = 'stat',
    net=cellsigs,
    source='geneset',
    target='gene_symbol',
    verbose= True
)

enr_pvals.sort_values('NES', ascending=False)

enr_pvals

enr_pvals_filtered = enr_pvals[enr_pvals['FDR p-value'] < 0.01]

enr_pvals_filtered_color = enr_pvals_filtered.sort_values(by='NES', key=abs, ascending=False).head(25)

enr_pvals_filtered_color['color'] = np.where(enr_pvals_filtered_color['NES'] < 0, 'blue', 'red')

enr_pvals_filtered_color=enr_pvals_filtered_color.sort_values(by='NES', ascending=True)

enr_pvals_filtered_color


In [None]:

dc.plot_barplot_df(enr_pvals_filtered_color, x='NES', y= 'Term', figsize=(6, 8), 
                   color=enr_pvals_filtered_color.color.tolist())


In [None]:

# Infer enrichment with ora using significant deg
top_genes_TvsC = results_df[(results_df['padj'] < 0.01) & (results_df['stat'] > 0)]

# Run ora
enr_pvals = dc.get_ora_df(
    df=top_genes_TvsC,
    net=cellsigs,
    source='geneset',
    target='gene_symbol',
    verbose= True
)

enr_pvals.head()

top_genes_TvsC

enr_pvals_filtered = enr_pvals[enr_pvals['FDR p-value'] < 0.001]
enr_pvals_filtered = enr_pvals[enr_pvals['Combined score'] > 75]
enr_pvals_filtered



In [None]:
dc.plot_dotplot(enr_pvals_filtered, x='Combined score', y = 'Term', s='Odds ratio', c = 'FDR p-value', scale = 0.3, 
                figsize=(6,8))


In [None]:

# Infer enrichment with ora using significant deg
top_genes_upTvsC = results_df[(results_df['padj'] < 0.01) & (results_df['stat'] > 0)]

# Run ora
enr_pvals = dc.get_ora_df(
    df=top_genes_TvsC,
    net=gosigs,
    source='geneset',
    target='gene_symbol',
    verbose= True
)

enr_pvals.head()

top_genes_upCvsT

enr_pvals_filtered = enr_pvals[enr_pvals['FDR p-value'] < 0.001]
enr_pvals_filtered = enr_pvals[enr_pvals['Combined score'] > 300]

enr_pvals_filtered


In [None]:

dc.plot_dotplot(enr_pvals_filtered, x='Combined score', y = 'Term', s='Odds ratio', c = 'FDR p-value', 
                scale = 0.3, figsize=(6,8))

In [None]:
### Top 40 induced genes
top50=results_df.loc[results_df['padj']<=0.01,:].sort_values('log2FoldChange', ascending=False).iloc[0:50,:]
#results_df.sort_values('stat')
top50.iloc[0:50,:]

In [None]:
adata.obs=adata.obs.drop(columns=['Cxcl9','Cxcl10','Cxcl13','Cd274','Glycam1']) 
adata.obs=adata.obs.drop(columns=['Cd19']) #,'CD19'


In [None]:
indcat=['A2','A3','A4','A5','A8','B6','B7','B8','B16','B19']
adata_sub=adata[adata.obs.individual_id.isin(indcat)]


In [None]:

sc.pl.matrixplot(adata_sub, var_names=list(top50.index), 
                                                  groupby='individual_id', standard_scale='var', vmax=0.6, 
                 categories_order=indcat, 
                 save= mysub+'-FAP-LTBR_vs_Untreated_TopGenesHeatmap.pdf')


In [None]:
list(top50.index)

In [None]:
oi=set(list(results_df.loc[results_df['padj']<=0.01,:].index)).intersection(set(['H2-Aa','Cxcl13','Cd274',
                                                                              'Gzmb','Cxcl9','Ccl8','Trem2','Cxcl10','Cxcl9',
                                                                              'Gzme','Prf1','H2-Eb1',
 'H2-Ab1',
 'H2-DMb1','Glycam1','Fap','Ltbr']))

In [None]:
oi=['Ccl8', 'Cd274', 'Cxcl10', 'Glycam1', 'H2-Aa', 'H2-Ab1', 'H2-DMb1', 'H2-Eb1']

In [None]:
#adata_sub=adata.copy()
#adata_sub.obs=adata_sub.obs.drop(columns=['Cxcl9','Cxcl10','Cxcl13','Cd274','Glycam1','Ifng'])
#adata_sub.obs=adata_sub.obs.drop(columns=['Cxcl10'])
#adata_sub.obs=adata_sub.obs.drop(columns=['Ifng','Cxcl9','Cxcl10','Cxcl13','Cd274','Glycam1'])

In [None]:
adata_sub=adata_sub[adata_sub.obs.individual_id.isin(indcat)].copy()

In [None]:
sc.pl.matrixplot(adata_sub, var_names=list(oi)+['Cxcl13','Cd274'], use_raw=False,groupby='individual_id', 
                 standard_scale='var', vmax=0.6, 
                 categories_order=indcat)


In [None]:
adata.obs["readout_id"].unique().tolist()

In [None]:
oi

In [None]:
sc.settings.set_figure_params()

In [None]:
genestoadd=['Cd3d','Cd8a','Cd4','Pdcd1','Cd274','Ifng','Cxcl9','Gzmb','Gzma','Ms4a1','Cd79a','Igkc','Glycam1',
            'Madcam1','Meox1','Cxcl13','H2-Aa', 'H2-Ab1', 'H2-DMb1', 'Chst4','Il9r','Il2rg','Pla2g2d','Stat1',
            'Cxcl10','Fcer2a']
indcat=['A2','A3','A4','A5','A8','B6','B8','B16','B19']

In [None]:
genestoadd=['Cd3d','Cd8a','Cd4','Pdcd1','Cd274','Ifng','Cxcl9','Gzmb','Gzma','Prf1','Cxcl13',
            'H2-Aa', 'H2-Ab1', 'H2-DMb1', 'Chst4','Ctsk','Ccl8','Stat1','Cxcl10','Igkc','Glycam1',
            'Madcam1','Meox1','Fcer2a']
indcat=['A2','A3','A4','A5','A8','B6','B8','B16','B19']

In [None]:
plotgoi=['Glycam1', 'Meox1', 'Selp',  'Cd19', 'Cd79a', 'Igkc', 'Bcl2l14',
         'Fcer2a',  'H2-Ab1', 'H2-DMb1', 'H2-DMa', 'Cd274', 'Cd3d', 'Cd8a', 'Cd4', 
         'Sell','Tcf7', 'Slamf7', 'Pdcd1',  'Gzmb', 'Gzma', 'Cxcl9', 'Cxcl10', 
          'Cxcl13',   'Il6', 'Il1b']
indcat=['A2','A3','A4','A5','A8','B6','B7','B8','B16','B19']
#adata_sub=adata[adata.obs.individual_id.isin(indcat)]
adata_sub=adata[~adata.obs['readout_id'].isin(exclude)].copy()

In [None]:
sc.pl.matrixplot(adata_sub, var_names=plotgoi, use_raw=False,groupby='individual_id', 
                 standard_scale='var', vmax=0.6, 
                 categories_order=indcat, save= mysub+'-FAP-LTBR_vs_Untreated_GOIHeatmap.pdf')


In [None]:
genestoadd=["Glycam1",'Meox1','Madcam1','Cxcl13','Chst4',
               'Cd274','Cd3e','Cd4','Cd8a','Tcf7','Slamf7', 'Fcer2a','Ms4a1',
               'Cxcl9','Cxcl10']

In [None]:
for i, library in enumerate(
   adata_sub.obs["readout_id"].unique().tolist()
):
    ad = adata_sub[adata_sub.obs["readout_id"] == library, :].copy()
    print(ad.obs["readout_id"].unique()[0])
    print(ad.obs["treatment_id"].unique()[0])
    sc.pl.spatial(
        ad,
        img_key="hires",
        library_id=library,
        color=genestoadd,
        size=1.5, color_map = 'viridis',  use_raw=False, 
        save=ad.obs["readout_id"].unique()[0]+mysub+'-MultipleMarkerexpression.pdf')
    
#sc.pl.violin(adata, keys = ["Glycam1"], groupby='treatment_id', rotation=90,  use_raw=False)

##### After running Tumor and stroma separately, do some joint plots

In [None]:
#strodata=adata.copy()

In [None]:
strores=pd.read_csv(os.path.join(pseudobulk_folder , 'Stroma-FAP-LTBR_vs_Untreated_0605.csv'),sep=',', index_col=0)
tures=pd.read_csv(os.path.join(pseudobulk_folder , 'Tumor-FAP-LTBR_vs_Untreated_0605.csv'),sep=',', index_col=0)

In [None]:
top25plot=set(strores.loc[strores['padj']<0.01,:].sort_values('log2FoldChange', ascending=False).iloc[0:15,:].index).union(set(tures.loc[tures['padj']<0.01,:].sort_values('log2FoldChange', ascending=False).iloc[0:15,:].index))


In [None]:
# Merge the two DataFrames on their indices
merged_df = pd.merge(strores, tures, left_index=True, right_index=True, how="outer", suffixes=("S", "T"))


In [None]:
plotgoi=['Glycam1',
 'Meox1',
 'Selp',
 'Cd79a',
 'Igkc',
 'H2-Ab1',
 'H2-DMb1',
 'H2-DMa',
 'Cd274',
 'Cd3d',
 'Cd8a',
 'Cd4',
 'Sell',
'Tcf7',
 'Slamf7',
 'Pdcd1',
 'Gzmb',
 'Gzma',
 'Cxcl10',
 'Cxcl13',
 'Il6',
 'Il1b']

In [None]:
plotgoi=set(plotgoi).union(set(top25plot))

In [None]:
plot_df=merged_df.loc[plotgoi,:]

In [None]:
#plot_df

In [None]:
sns.reset_defaults()

In [None]:
heatmap_data = plot_df[["log2FoldChangeS", "log2FoldChangeT"]].sort_values("log2FoldChangeS", ascending=False)
padj_values = plot_df[["padjS", "padjT"]].loc[heatmap_data.index,:].to_numpy()


In [None]:
plt.figure(figsize=(2.5, 8))
ax = sns.heatmap(
    heatmap_data,
    annot=False,  
    cmap="Reds",  # White-to-Red colormap
    vmin=0,  # Minimum value for the colormap
    vmax=5,  # Maximum value for the colormap
    cbar_kws={'label': 'Log2FC'},
    linewidths=0.2, linecolor="grey"
)

for y in range(padj_values.shape[0]):  
    for x in range(padj_values.shape[1]): 
        if padj_values[y, x] <= 0.01:  
            ax.text(
                x + 0.5, y + 0.5, '**', 
                ha='center', va='center', color='black', fontsize=10
            )
        elif padj_values[y, x] <= 0.05:  
            ax.text(
                x + 0.5, y + 0.5, '*', 
                ha='center', va='center', color='black', fontsize=10
            )
                
ax.set_xticklabels(["log2FC Stroma", "log2FC Tumor"], fontsize=10)
plt.title("Heatmap of log2FC (Capped at 5) with Stars for Significant padj", fontsize=14)
plt.tight_layout()

plt.savefig(os.path.join(pseudobulk_folder , 'Heatmap_FCperRegion-FAP-LTBR_vs_Untreated.pdf'), format="pdf")
plt.show()


In [None]:
heatmap_data = np.log(plot_df[["baseMeanS", "baseMeanT"]].loc[heatmap_data.index,:])
padj_values = plot_df[["padjS", "padjT"]].loc[heatmap_data.index,:].to_numpy()

plt.figure(figsize=(2.5, 8))
ax = sns.heatmap(
    heatmap_data,
    annot=False,  
    cmap="YlGnBu",  
    cbar_kws={'label': 'baseMean'},
    vmin=0,  
    linewidths=0.2, linecolor="grey"
)

for y in range(padj_values.shape[0]): 
    for x in range(padj_values.shape[1]):  
        if padj_values[y, x] <= 0.01:  
            ax.text(
                x + 0.5, y + 0.5, '**',  
                ha='center', va='center', color='black', fontsize=10
            )
        elif padj_values[y, x] <= 0.05:  
            ax.text(
                x + 0.5, y + 0.5, '*',  
                ha='center', va='center', color='black', fontsize=10
            )
                

ax.set_xticklabels(["Mean Stroma", "Mean Tumor"], fontsize=10)
plt.title("Heatmap of Means (Caped at 5) with Stars for Significant padj", fontsize=14)
plt.tight_layout()

plt.savefig(os.path.join(pseudobulk_folder , 'Heatmap_MeanperRegion-FAP-LTBR_vs_Untreated.pdf'), format="pdf")
plt.show()


In [None]:

stf=pd.read_csv(os.path.join(pseudobulk_folder , 'Stroma-FAP-LTBR_vs_Untreated_TFact.tsv'),sep='\t', index_col=0).transpose()
ttf=pd.read_csv(os.path.join(pseudobulk_folder , 'Tumor-FAP-LTBR_vs_Untreated_TFact.tsv'),sep='\t', index_col=0).transpose()

In [None]:
stf.iloc[0,:]

In [None]:
# Merge the two DataFrames on their indices
merged_df = pd.merge(stf.iloc[0,:], ttf.iloc[0,:], left_index=True, right_index=True, how="outer", suffixes=("S", "T"))


In [None]:
# Merge the two DataFrames on their indices
merged_pval = pd.merge(stf.iloc[1,:], ttf.iloc[1,:], left_index=True, right_index=True, how="outer", suffixes=("S", "T"))


In [None]:
tfoi=['Irf1', 'Rfxap', 'Rfxank', 'Nfkb1', 'Tbx21', 'Ciita', 'Rel', 'Klf13', 'Irf5', 'Irf8', 'Irf2', 'Irf3', 
      'Lmo2', 'Rela', 'Spi1', 'Stat4', 'Pou2f2', 'Nr4a3', 
      'E2f4', 'Prrx1', 'Myc', 'Srf', 'Yap1', 'Plag1']

In [None]:
plot_df=merged_df.loc[tfoi,:]
plot_df.columns=['TFactS','TFactT']

plot_pval=merged_pval.loc[tfoi,:]
plot_pval.columns=['TFactS','TFactT']

In [None]:
heatmap_data = plot_df
plt.figure(figsize=(2.25, 6))
ax = sns.heatmap(
    heatmap_data,
    annot=False,  
    cmap="RdBu_r",
    center=0,
    vmax=5,
    cbar_kws={'label': 'TFact'},
    linewidths=0.2, linecolor="grey"
)

for y, row in enumerate(plot_pval.index): 
    for x, col in enumerate(plot_pval.columns): 
        p_value = plot_pval.loc[row, col]  
        star = None
        if p_value <= 0.01: 
            star = "**"
        elif p_value <= 0.05: 
            star = "*"
        if star:
            ax.text(x + 0.5, y + 0.5, star, ha='center', va='center', color='black', fontsize=8)  # Add the star


ax.set_xticklabels(["S", "T"], fontsize=10)
plt.title("Heatmap of TF act, capped at 5", fontsize=10)
plt.tight_layout()



plt.savefig(os.path.join(pseudobulk_folder , 'Heatmap_TFactRegion-FAP-LTBR_vs_Untreated.pdf'), format="pdf")
plt.show()


In [None]:
spat=pd.read_csv(os.path.join(pseudobulk_folder , 'Stroma-FAP-LTBR_vs_Untreated_Pathways.tsv'),sep='\t', index_col=0).transpose()
tpat=pd.read_csv(os.path.join(pseudobulk_folder , 'Tumor-FAP-LTBR_vs_Untreated_Pathways.tsv'),sep='\t', index_col=0).transpose()

In [None]:
merged_df = pd.merge(spat.iloc[0,:], tpat.iloc[0,:], left_index=True, right_index=True, how="outer", suffixes=("S", "T"))


In [None]:
merged_pval = pd.merge(spat.iloc[1,:], tpat.iloc[1,:], left_index=True, right_index=True, how="outer", suffixes=("S", "T"))


In [None]:
plot_df=merged_df
plot_df.columns=['PathactS','PathactT']

In [None]:
plot_pval=merged_pval
plot_pval.columns=['PathactS','PathactT']

In [None]:
plot_pval

In [None]:
heatmap_data = plot_df.sort_values(by='PathactS', ascending=False) 
plot_pval=plot_pval.loc[heatmap_data.index,:]

plt.figure(figsize=(2.25, 4))
ax = sns.heatmap(
    heatmap_data,
    annot=False,  
    cmap="RdBu_r",  # White-to-Red colormap
    center=0,
    vmax=5,
    cbar_kws={'label': 'Pathact'},
    linewidths=0.2, linecolor="grey"
)

for y, row in enumerate(plot_pval.index): 
    for x, col in enumerate(plot_pval.columns):  
        p_value = plot_pval.loc[row, col] 
        star = None
        if p_value <= 0.01: 
            star = "**"
        elif p_value <= 0.05: 
            star = "*"
        if star:
            ax.text(x + 0.5, y + 0.5, star, ha='center', va='center', color='black', fontsize=8)  # Add the star
                
ax.set_xticklabels(["S", "T"], fontsize=10)
plt.title("Heatmap of Pathway act, capped at 5", fontsize=10)
plt.tight_layout()


plt.savefig(os.path.join(pseudobulk_folder , 'Heatmap_PathwayactRegion-FAP-LTBR_vs_Untreated.pdf'), format="pdf")
plt.show()


In [None]:
spath=pd.read_csv(os.path.join(pseudobulk_folder , 'Stroma-FAP-LTBR_vs_Untreated_HallmarkGSEA_sig.tsv'),sep='\t', index_col=1)
tpath=pd.read_csv(os.path.join(pseudobulk_folder , 'Tumor-FAP-LTBR_vs_Untreated_HallmarkGSEA_sig.tsv'),sep='\t', index_col=1)

In [None]:
poi=list(set(spath.index).union(set(tpath.index)))
lpoi=list(merged_df.loc[merged_df.loc[:,['FDR p-valueS','FDR p-valueT']].min(axis=1)<=0.05,:].index).copy() ## more lenient version

In [None]:
spath=pd.read_csv(os.path.join(pseudobulk_folder , 'Stroma-FAP-LTBR_vs_Untreated_HallmarkGSEA.tsv'),sep='\t', index_col=1)
tpath=pd.read_csv(os.path.join(pseudobulk_folder , 'Tumor-FAP-LTBR_vs_Untreated_HallmarkGSEA.tsv'),sep='\t', index_col=1)

In [None]:
merged_df = pd.merge(spath, tpath, left_index=True, right_index=True, how="outer", suffixes=("S", "T"))
#plot_df=merged_df
#plot_df.columns=['PathactS','PathactT']

In [None]:
merged_df

In [None]:
#poi=['TNFA_SIGNALING_VIA_NFKB','ALLOGRAFT_REJECTION','PI3K_AKT_MTOR_SIGNALING','TGF_BETA_SIGNALING',
#    'OXIDATIVE_PHOSPHORYLATION','P53_PATHWAY','INTERFERON_ALPHA_RESPONSE',
#     'INTERFERON_GAMMA_RESPONSE','IL6_JAK_STAT3_SIGNALING','INFLAMMATORY_RESPONSE','G2M_CHECKPOINT','E2F_TARGETS']

In [None]:
plot_df=merged_df.loc[lpoi,:] # or poi
heatmap_data = plot_df[["NESS", "NEST"]].sort_values(by='NESS', ascending=False) 
padj_values = plot_df[["NOM p-valueS", "NOM p-valueT"]].loc[heatmap_data.index,:].to_numpy()


In [None]:

plt.figure(figsize=(1.5, 6))
ax = sns.heatmap(
    heatmap_data,
    annot=False, 
    cmap="RdBu_r",  # White-to-Red colormap
    cbar_kws={'label': 'NES'},
    center=0, 
    linewidths=0.2, linecolor="grey"
)

for y in range(padj_values.shape[0]): 
    for x in range(padj_values.shape[1]):  
        if padj_values[y, x] <= 0.01:  
            ax.text(
                x + 0.5, y + 0.5, '**',  
                ha='center', va='center', color='black', fontsize=10
            )
        elif padj_values[y, x] <= 0.05:  
            ax.text(
                x + 0.5, y + 0.5, '*', 
                ha='center', va='center', color='black', fontsize=10
            )
                
ax.set_xticklabels(["S", "T"], fontsize=10)
plt.title("Heatmap of NESS with Stars for Significant padj")
plt.tight_layout()


plt.savefig(os.path.join(pseudobulk_folder , 'Heatmap_HallmarkperRegion-FAP-LTBR_vs_Untreated_large.pdf'), format="pdf")
plt.show()


In [None]:
! jupyter nbconvert --to html 07_Pseudobulk_analysis-perRegion.ipynb