In [79]:
import pandas as pd
pd.set_option('display.max_colwidth', 100)
import numpy as np
import gseapy as gp

# names = gp.get_library_name()
gobp = 'GO_Biological_Process_2021'
gomf = 'GO_Molecular_Function_2021'
gocc = 'GO_Cellular_Component_2021'

d = pd.read_csv(r'fdr5.csv', index_col=0)
g = d.loc[d['PPDE'] > 0.95].sort_values('PPDE', ascending=False).iloc[:1000].sort_index()

r = gp.enrichr(
    gene_list=g.index.to_list(), 
    gene_sets=[gocc, gomf, gobp],       
    cutoff=0.05,
    organism='Human',
    ).results

# I used the data from part 1 of homework 3 that uses only PBMC data. 

### 1. Top 3 GO Bio Processes

In [82]:
r.loc[r['Gene_set'] == 'GO_Biological_Process_2021'].iloc[0:3,[1, 4]]

Unnamed: 0,Term,Adjusted P-value
933,aerobic electron transport chain (GO:0019646),1.884799e-16
934,mitochondrial ATP synthesis coupled electron transport (GO:0042775),1.884799e-16
935,"mitochondrial electron transport, NADH to ubiquinone (GO:0006120)",1.313649e-09


### 2. Top 3 GO Molecular Functions

In [89]:
r.loc[r['Gene_set'] == 'GO_Molecular_Function_2021'].iloc[0:3,[1, 4]]

Unnamed: 0,Term,Adjusted P-value
301,oxidoreduction-driven active transmembrane transporter activity (GO:0015453),5.046719e-15
302,NADH dehydrogenase (quinone) activity (GO:0050136),4.727279e-10
303,NADH dehydrogenase (ubiquinone) activity (GO:0008137),4.727279e-10


### 3. Top 3 GO Cellular Components

In [90]:
r.loc[r['Gene_set'] == 'GO_Cellular_Component_2021'].iloc[0:3,[1, 4]]

Unnamed: 0,Term,Adjusted P-value
0,mitochondrial membrane (GO:0031966),4.2886970000000005e-23
1,mitochondrial inner membrane (GO:0005743),3.53477e-21
2,organelle inner membrane (GO:0019866),2.588383e-19


## 4. Does the selected GO term make sense given the list of DE genes?



### Look at GO term aerobic electron transport chain (GO:0019646)

- This makes sense based on the name. Covid sufferers have trouble turning over oxygen, so it would make sense that RNA transcripts related to aerobic oxidation in the ETC will be DE. Likely these genes will be highly up-regulated because the cell is trying to boost these metabolic pathways to provide enough oxygen for the cells.
<br>  
- A google scholar search for `"covid-19"  aerobic "electron transport chain"` with year filter since 2021 yields 601 results which suggests that at least electron transport chain and covid-19 are strongly correlated.
<br>  

- This paper https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7934846/ states the following: 
>Our observations demonstrate that, within 7 days of infection, morbidity, wasting, and failure to thrive in these animals is associated with significant decrease in expression of genes related to key cellular metabolic processes regulating aerobic cellular respiration and energy production.
<br> 

However, their statement that expression of genes decreases actually disagrees with my hypothesis above. There is some effect from SARS-CoV-2 that causes the cells to express these important proteins at a much lower level.

In [65]:
cc = r.loc[r['Gene_set'] == 'GO_Cellular_Component_2021']
bp = r.loc[r['Gene_set'] == 'GO_Biological_Process_2021']
mf = r.loc[r['Gene_set'] == 'GO_Molecular_Function_2021']

['GO_Cellular_Component_2021' 'GO_Molecular_Function_2021'
 'GO_Biological_Process_2021']


In [66]:
cc

Unnamed: 0,Gene_set,Term,Overlap,P-value,Adjusted P-value,Old P-value,Old Adjusted P-value,Odds Ratio,Combined Score,Genes
0,GO_Cellular_Component_2021,mitochondrial membrane (GO:0031966),85/469,1.424816e-25,4.288697e-23,0,0,4.503529,257.649535,COX7B;MRPS15;NDUFA13;MPV17L2;MRPL36;MRPL37;COX...
1,GO_Cellular_Component_2021,mitochondrial inner membrane (GO:0005743),67/328,2.348684e-23,3.534770e-21,0,0,5.155836,268.647918,COX7B;MRPS15;NDUFA13;MPV17L2;MRPL36;MRPL37;COX...
2,GO_Cellular_Component_2021,organelle inner membrane (GO:0019866),66/346,2.579784e-21,2.588383e-19,0,0,4.724381,223.966731,COX7B;MRPS15;NDUFA13;MPV17L2;MRPL36;MRPL37;COX...
3,GO_Cellular_Component_2021,mitochondrial matrix (GO:0005759),57/348,1.905222e-15,1.433680e-13,0,0,3.886161,131.718220,GFM1;ACAA2;MRPL37;TXN2;CA5B;ME2;IDH3A;BCKDHA;M...
4,GO_Cellular_Component_2021,mitochondrial respiratory chain complex I (GO:...,17/42,5.200591e-12,2.608963e-10,0,0,13.126144,341.046752,NDUFA9;NDUFA13;NDUFA7;NDUFA6;NDUFB5;NDUFA4;NDU...
...,...,...,...,...,...,...,...,...,...,...
296,GO_Cellular_Component_2021,platelet alpha granule (GO:0031091),1/90,9.902133e-01,9.953697e-01,0,0,0.212696,0.002092,HGF
297,GO_Cellular_Component_2021,integral component of plasma membrane (GO:0005...,55/1454,9.906198e-01,9.953697e-01,0,0,0.732235,0.006901,CNTNAP1;GPR68;PTGDR2;KCNC4;PTPRO;ASGR1;MILR1;F...
298,GO_Cellular_Component_2021,cell-cell junction (GO:0005911),6/271,9.938035e-01,9.953697e-01,0,0,0.426749,0.002653,ARVCF;CTNND1;NECTIN3;FGFRL1;AJAP1;ACTG1
299,GO_Cellular_Component_2021,Golgi lumen (GO:0005796),1/100,9.941539e-01,9.953697e-01,0,0,0.191110,0.001121,DEFA1


In [22]:
df = pd.read_csv('https://raw.githubusercontent.com/kuanrongchan/vaccine-studies/main/Ad5_seroneg.csv',index_col=0)
DEGs_up_1d = (df[(df['fc_1d'] > 1.5) & (df['qval_1d'] < 0.05)]).index.tolist()
DEGs_down_1d = (df[(df['fc_1d'] < -1.5) & (df['qval_1d'] < 0.05)]).index.tolist()

In [44]:
enr_GOBP_up = gp.enrichr(gene_list=DEGs_up_1d ,
 gene_sets=['GO_Biological_Process_2021'],
 organism='Human', 
 description='DEGs_up_1d',
 outdir='test/enr_DEGs_GOBP_up',
 cutoff=0.05 
 )

In [45]:
enr_GOBP_up.results

Unnamed: 0,Gene_set,Term,Overlap,P-value,Adjusted P-value,Old P-value,Old Adjusted P-value,Odds Ratio,Combined Score,Genes
0,GO_Biological_Process_2021,cytokine-mediated signaling pathway (GO:0019221),102/621,4.038333e-34,1.491357e-30,0,0,5.100533,392.190496,CD86;IL1RN;CSF1;CD80;IFIT5;IL1RAP;IFI35;CXCL13...
1,GO_Biological_Process_2021,defense response to symbiont (GO:0140546),45/124,7.986735e-31,1.474751e-27,0,0,14.020292,971.639286,RTP4;CD40;RNASEL;IFIT5;IFI6;IFIT1;IFI44L;IFIT3...
2,GO_Biological_Process_2021,defense response to virus (GO:0051607),46/133,2.078736e-30,2.558924e-27,0,0,13.025317,890.225617,RTP4;CD40;RNASEL;IFIT5;IFI6;IFIT1;IFI44L;IFIT3...
3,GO_Biological_Process_2021,cellular response to interferon-gamma (GO:0071...,39/121,1.174254e-24,1.084130e-21,0,0,11.614731,639.988034,CCL13;SP100;PTAFR;ICAM1;OASL;CCL8;CCL7;TRIM5;S...
4,GO_Biological_Process_2021,cellular response to type I interferon (GO:007...,29/65,2.317999e-23,1.426729e-20,0,0,19.470644,1014.785675,RNASEL;SP100;IFIT5;IFI6;ADAR;IFI35;IFIT1;IFIT3...
...,...,...,...,...,...,...,...,...,...,...
3688,GO_Biological_Process_2021,rRNA metabolic process (GO:0016072),1/162,9.989039e-01,9.997476e-01,0,0,0.144054,0.000158,XRN1
3689,GO_Biological_Process_2021,gene expression (GO:0010467),5/356,9.990978e-01,9.997476e-01,0,0,0.328683,0.000297,RNASEL;NUP62;ADAR;NUP43;NUP58
3690,GO_Biological_Process_2021,"mRNA splicing, via spliceosome (GO:0000398)",3/274,9.992062e-01,9.997476e-01,0,0,0.255885,0.000203,SMU1;METTL14;CSTF3
3691,GO_Biological_Process_2021,ribosome biogenesis (GO:0042254),1/192,9.996896e-01,9.998758e-01,0,0,0.121236,0.000038,GLUL
