In [1]:
REPO = '../..'
Manuscript_RESULT = f'{REPO}/data/result/manuscript_table/'
import pandas as pd
import decoupler as dc
import warnings
warnings.filterwarnings("ignore")
%load_ext autoreload
%autoreload 2
%matplotlib inline

# load adata and cell type/clinical annotation

In [2]:
obs_path = f"{Manuscript_RESULT}/GEX_OBS.csv"
obs = pd.read_csv(obs_path,index_col=0)

# DEGs to create GMT file for each Cellstate in Tumor

In [6]:
with open(f'{Manuscript_RESULT}/Tumor_Cellstate.gmt','w') as f:
    for cellstate in obs.loc[obs.Celltype=='Tumor','Cellstate'].unique():
        print(cellstate)
        sheet_name = cellstate.replace('/','_')  if '/' in cellstate else cellstate
        degs = pd.read_excel(f"{Manuscript_RESULT}/DEGs_Cellstate.xlsx",sheet_name=sheet_name,index_col=0)
        degs = degs.sort_values('stat',ascending=False).index[(degs.padj<0.05) & (degs.log2FoldChange>1)].tolist()[:100]
        f.write('\t'.join([cellstate,'All_cells']+degs)+'\n')

Tumor.EMT-III
Tumor.EMT-II
Tumor.ER-II
Tumor.ER-I
Tumor.Cell_Cycle
Tumor.Interferon/MHCII(I)


# Activity inference with Multivariate Linear Model (MLM)

In [3]:
progeny = dc.get_progeny(organism='human', top=500)

In [4]:
progeny.to_csv(f"{Manuscript_RESULT}/progeny_reference.csv",index=False)

In [7]:
pathway_acts = []
pathway_pvals = []
for celltype in ['CD8T','Macs','Tumor']:
    cellstates = obs.loc[obs.Celltype==celltype,'Cellstate'].unique()
    for cellstate in cellstates:
        sheet_name = cellstate.replace('/','_')  if '/' in cellstate else cellstate
        stat_df = pd.read_excel(f"{Manuscript_RESULT}/DEGs_{celltype}.xlsx",sheet_name=sheet_name,index_col=0)[['stat']].T.rename(index={'stat':cellstate})
        # Infer pathway activities with mlm
        pathway_act, pathway_pval = dc.run_mlm(mat=stat_df, net=progeny)
        pathway_acts.append(pathway_act)
        pathway_pvals.append(pathway_pval)

## Tumor aggr state
for cellstate in ['EMT','ER','Interferon','Cell_Cycle']:
    stat_df = pd.read_excel(f"{Manuscript_RESULT}/DEGs_Tumor_Aggr.xlsx",sheet_name=cellstate,index_col=0)[['stat']].T.rename(index={'stat':cellstate})
    # Infer pathway activities with mlm
    pathway_act, pathway_pval = dc.run_mlm(mat=stat_df, net=progeny)
    pathway_acts.append(pathway_act)
    pathway_pvals.append(pathway_pval)
    
pathway_acts = pd.concat(pathway_acts,axis=0)
pathway_pvals = pd.concat(pathway_pvals,axis=0)
# store result
with pd.ExcelWriter(f"{Manuscript_RESULT}/Progeny.xlsx") as f:
    pathway_acts.to_excel(f,sheet_name='Activity',index=True)
    pathway_pvals.to_excel(f,sheet_name='Pvalue',index=True)