## Functional Enrichment Analysis

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import rcParams
from matplotlib.pyplot import rc_context
import scanpy as sc
import scvelo as scv
import scipy.io
import os
import math
import gseapy as gp
from gseapy.plot import barplot, dotplot, gseaplot
from gseapy.scipalette import SciPalette
from pylab import *
from matplotlib.colors import ListedColormap,LinearSegmentedColormap 
import seaborn as sns
from pathlib import Path
%matplotlib inline

In [3]:
import pandas as pd
data = pd.read_excel("DEG.xlsx", header=None) 
gene_list = data[0].tolist()
print(gene_list)

['gene', 'ARHGAP15', 'DOCK10', 'SKAP1', 'FYN', 'FNBP1', 'PRKCB', 'REV1', 'ENSSSCG00000011121', 'CHST11', 'CAMK4', 'SRGN', 'PIP4K2A', 'ETS1', 'PTPRC', 'CD3E', 'STK17B', 'PDE3B', 'ENSSSCG00000001097', 'LCP1', 'ENSSSCG00000005095', 'CD2', 'CTSC', 'ENSSSCG00000033909', 'CD53', 'KLF12', 'TRAT1', 'ANKRD44', 'ITGA4', 'CD247', 'ITK', 'IKZF3', 'ARHGDIB', 'INPP4B', 'CYTIP', 'RDX', 'STK17A', 'RHOH', 'ENSSSCG00000029596', 'LCP2', 'PTPN22', 'ARHGAP25', 'LAPTM5', 'OGT', 'RGS1', 'EVL', 'ITGB7', 'CORO1A', 'SEPTIN6', 'RUNX3', 'PTPRCAP', 'CD96', 'RFTN1', 'SFMBT2', 'CD69', 'LCK', 'CD3D', 'BTBD11', 'NLRC3', 'ENSSSCG00000040535', 'ZEB1', 'INPP5D', 'CST7', 'TSPAN5', 'ENSSSCG00000036618', 'APBB1IP', 'THEMIS', 'TRPS1', 'ENSSSCG00000000576', 'PTK2B', 'TNFAIP8', 'PAG1', 'STK4', 'SH3KBP1', 'DOCK2', 'SLC9A9', 'CRLF3', 'STAT4', 'IKZF2', 'ENSSSCG00000016475', 'LNPEP', 'ITGAE', 'CCND3', 'TGFBR2', 'CD37', 'PARP8', 'SNTB1', 'PRKCQ', 'IKZF1', 'ENSSSCG00000025499', 'TRERF1', 'PREX1', 'GPR171', 'MAPRE2', 'FLI1', 'SCML4',

In [4]:
enr = gp.enrichr(gene_list=gene_list,
                gene_sets='GO_Biological_Process_2021',  #['GO_Cellular_Component_2021','GO_Molecular_Function_2021',''KEGG_2021_Human']
                organism='Human',
                outdir='./',
                cutoff=0.05,
                no_plot=True
                )
file_path = f'immune_DEG_GO.csv'
enr.results = enr.results[enr.results['Adjusted P-value'] < 0.05]
enr.results['CellType'] = "Immune cell"    
enr.results['-Log10 P-value'] = -enr.results['Adjusted P-value'].apply(math.log10)
enr.results['Count'] = enr.results['Overlap'].map(lambda x:x.split('/')[0])
enr.results['Gene Count'] = len(gene_list)
enr.results['Background Count'] = enr.results['Overlap'].map(lambda x:x.split('/')[1])
enr.results['Count'] = pd.to_numeric(enr.results['Count'],errors='coerce')
enr.results['Gene Count'] = pd.to_numeric(enr.results['Gene Count'],errors='coerce')
enr.results['Background Count'] = pd.to_numeric(enr.results['Background Count'],errors='coerce')
enr.results['Fold Enrichment'] = (enr.results['Count']/enr.results['Gene Count'])/(enr.results['Background Count']/14937)
enr.results['Method'] = "GO Enrichment"  
enr.results=enr.results.sort_values(by='Fold Enrichment', ascending=False)
enr.results=enr.results.reset_index(drop = True)
    
enr.results.to_csv(file_path)

In [None]:
files = os.listdir('./')

def file_filter(f):
    if f[-9:] in ['_padj.csv']:
        return True
    else:
        return False

files = list(filter(file_filter, files))
print(files)

In [None]:
for i in files:
    filename=i.replace("_padj.csv", "")
    data = pd.read_csv(i, index_col=0)
    data_up = data[data.log2FC > 0]
    data_dw = data[data.log2FC < 0]
    enr_up = gp.enrichr(gene_list=data_up.gene,
                        gene_sets='GO_Biological_Process_2021',  #['GO_Cellular_Component_2021','GO_Molecular_Function_2021']
                        organism='Human',
                        outdir='./',
                        cutoff=0.05,  
                        no_plot=True

                        )
    enr_dw = gp.enrichr(gene_list=data_dw.gene,
                        gene_sets='GO_Biological_Process_2021',  #['GO_Cellular_Component_2021','GO_Molecular_Function_2021']
                        organism='Human',
                        outdir='./',
                        cutoff=0.05,  
                        no_plot=True
                        )
    enr_up.results.to_csv(filename+'_up_GO.csv')
    enr_dw.results.to_csv(filename+'_dw_GO.csv')
    enr_up.res2d['UP_DW'] = "UP"
    enr_dw.res2d['UP_DW'] = "DOWN"
    enr_up.res2d.Term = enr_up.res2d.Term.str.split(" \(GO").str[0]
    enr_dw.res2d.Term = enr_dw.res2d.Term.str.split(" \(GO").str[0]
    enr_res = pd.concat([enr_up.res2d.head(), enr_dw.res2d.head()])
    enr_res.to_csv(filename+'_GO.csv')