In [64]:
library(fgsea)
library(jsonlite)


In [65]:
get_scores = function(x) {
  scores = sign(x$logFC) * -log10(x$P.Value)
  names(scores) = x$genes
  sorted_scores = sort(scores, decreasing = TRUE)
  print(length(sorted_scores))
  print(length(unique(names(sorted_scores))))
  return(sorted_scores)
}

get_geneset = function(x) {
  return(unique(unlist(x[x$padj<0.05]$leadingEdge)))
}


In [66]:
# load data 
data1 <- read.csv('../../bulkRNAseq/y622_degs.csv', row.names=1)
data1_scores = get_scores(data1)
data2 <- read.csv('../../bulkRNAseq/choline_degs.csv', row.names=1)
data2_scores = get_scores(data2)
data3 <- read.csv('../../bulkRNAseq/g2_degs.csv', row.names=1)
data3_scores = get_scores(data3)
data4 <- read.csv('../../bulkRNAseq/choline_batch2_degs.csv', row.names=1)
data4_scores = get_scores(data4)

# compute fgsea results
all_paths = read.csv('../../processed_data/all_paths.csv', row.names = 'X')
pathways = as.list(as.data.frame(t(all_paths)))
temp = lapply(names(pathways), function(x) pathways[[x]][!(pathways[[x]]=='')])
names(temp) = names(pathways)

[1] 13170
[1] 13169
[1] 13170
[1] 13169
[1] 13170
[1] 13169
[1] 13170
[1] 13169


In [67]:
# load degs
data1_out = fgsea(pathways = temp, stats = data1_scores[!duplicated(names(data1_scores))], minSize = 5, maxSize = 1000, nproc=1, nPermSimple=10000)
data2_out = fgsea(pathways = temp, stats = data2_scores[!duplicated(names(data2_scores))], minSize = 5, maxSize = 1000, nproc=1, nPermSimple=10000)
data3_out = fgsea(pathways = temp, stats = data3_scores[!duplicated(names(data3_scores))], minSize = 5, maxSize = 1000, nproc=1, nPermSimple=10000)
data4_out = fgsea(pathways = temp, stats = data4_scores[!duplicated(names(data4_scores))], minSize = 5, maxSize = 1000, nproc=1, nPermSimple=10000)









In [69]:
data2_out[order(data2_out$pval), ]

pathway,pval,padj,log2err,ES,NES,size,leadingEdge
<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<int>,<list>
Electron Transport Chain (OXPHOS system in mitochondria) WP111,1.719702e-14,7.549490e-12,0.9759947,-0.6879996,-2.681772,86,"UQCRFS1 , COX7A2 , NDUFB8 , UQCRC1 , UQCR10 , NDUFA12 , COX6A1 , NDUFA6 , NDUFA1 , ATP5F1C , SDHB , ATP5IF1 , COX7A2L , NDUFS4 , NDUFA10 , UQCRH , NDUFB3 , SDHD , ATP5MG , COX5B , ATP5PB , NDUFV1 , ATP5F1A , NDUFA4 , SDHC , SURF1 , NDUFB6 , NDUFS2 , ATP5PF , SLC25A14, SLC25A4 , NDUFB7 , UQCR11 , NDUFS3 , NDUFB9 , COX4I1 , NDUFB2 , COX7B , ATP5PO , NDUFC2 , COX5A , SDHA , NDUFAB1"
Oxidative phosphorylation WP623,1.019968e-08,2.238829e-06,0.7477397,-0.6830303,-2.405102,51,"NDUFB8 , NDUFA6 , NDUFS4 , NDUFA10, ATP5MG , ATP6AP2, ATP5PB , NDUFV1 , ATP5F1A, NDUFA4 , NDUFB6 , NDUFS2 , ATP5PF , NDUFB7 , NDUFS3 , NDUFB9 , NDUFB2 , ATP5PO , NDUFC2 , NDUFAB1, NDUFA8 , NDUFB10, NDUFA5 , ATP5MC1, NDUFS5 , NDUFC1 , ATP5MF , NDUFA2 , NDUFA11, ATP5PD , NDUFB5 , NDUFS6 , ATP5ME , NDUFB4"
Mitochondrial complex I assembly model OXPHOS system WP4324,4.141639e-07,6.060599e-05,0.6749629,-0.6519385,-2.289151,50,"NDUFB8 , NDUFA12 , FOXRED1 , NDUFA6 , NDUFA1 , NDUFAF6 , NDUFS4 , NDUFA10 , TIMMDC1 , NDUFB3 , NDUFV1 , NDUFAF1 , TMEM70 , NDUFB6 , NDUFS2 , NDUFAF3 , NDUFB7 , NDUFS3 , NDUFB9 , TMEM186 , NDUFB2 , ECSIT , NDUFA13 , NDUFC2 , NDUFAB1 , NDUFA8 , DMAC2 , TMEM126B, COA1 , NDUFB10 , NDUFB11 , NDUFA5 , NDUFS5 , NDUFC1 , NDUFAF4 , NDUFA2 , NDUFB5 , NDUFS6 , NDUFAF2"
Nonalcoholic fatty liver disease WP4396,8.188166e-07,8.986512e-05,0.6594444,-0.4610508,-1.901386,126,"UQCRFS1, CASP3 , COX7A2 , JUN , BID , PIK3R1 , NDUFB8 , UQCRC1 , UQCR10 , NDUFA12, COX6A1 , CYC1 , INSR , NDUFA6 , NDUFA1 , SDHB , COX7A2L, NDUFS4 , NDUFA10, NR1H3 , UQCRH , DDIT3 , NDUFB3 , SDHD , COX5B , XBP1 , NDUFV1 , PRKAB1 , NDUFA4 , SDHC , NDUFB6 , NDUFS2 , NDUFB7 , UQCR11 , NDUFS3 , NDUFB9 , COX4I1 , NDUFB2 , COX7B , UQCRHL , NDUFA13, PIK3CA , NDUFC2 , COX5A , SDHA"
GABA receptor Signaling WP4159,2.529995e-06,2.221336e-04,0.6272567,0.7601539,2.027275,26,"GABRE , GABRA3 , GABRB1 , GABRG3 , GABRA2 , GABRG2 , GABRG1 , ABAT , GABRQ , GABRA5 , GAD1 , SLC6A11, GPHN , GABRB3 , ALDH9A1, AP2A2 , AP2A1 , GABBR1 , SLC6A1 , GABRB2"
Cytoplasmic Ribosomal Proteins WP477,6.262438e-06,4.582017e-04,0.6105269,-0.5027162,-1.959551,86,"RPS4X , RPL7 , RPL18A , RPS5 , RPS8 , RPL41 , RPL4 , RPL29 , RPL26 , RPL38 , RPL34 , RPL17 , RPS6 , MRPL19 , RPS6KB2, RPS19 , RPS17 , RPS18 , RPL6 , RPL5 , RPL19 , RPS6KA3, RPL36A , RPL9 , RPS14 , RPL21 , RPL35 , RPL27 , RPL11 , RPL10A , RPS24 , RPL23A , RPS15 , RPL3 , FAU , RPS9 , RPS3 , RPS16 , RPL31 , RPS25"
Proteasome Degradation WP183,2.554684e-05,1.602152e-03,0.5756103,-0.5709146,-2.024302,52,"PSMB5 , PSMC2 , PSMB6 , PSMB1 , PSMB3 , PSMD7 , PSMB7 , PSMA5 , PSMC1 , PSMB4 , PSMA4 , UBE2B , PSMA6 , PSMD4 , PSMD6 , PSMC3 , PSMD11, PSMC4 , PSMB2 , PSMC5 , PSMD10, PSMA1 , PSMA3 , PSMD9 , PSMD13, PSMB10, PSMD8"
DNA Replication WP466,2.926677e-04,1.549961e-02,0.4984931,-0.5736753,-1.915380,39,"GMNN , RPA2 , ORC3 , RFC4 , ORC4 , RFC2 , MCM7 , POLD3, ORC5 , PRIM1, POLD2, CDK2 , RFC3 , CDT1 , PCNA , ORC6"
Integrin-mediated Cell Adhesion WP185,3.177597e-04,1.549961e-02,0.4984931,0.5340789,1.723734,75,"ITGA6 , MAP2K5 , VCL , ITGAV , CAPN1 , PAK4 , PAK1 , CAV2 , GIT2 , ITGA9 , SRC , SOS1 , MAPK6 , CRK , VAV2 , CAPN10 , CAPN2 , MAPK1 , ITGB1 , RAP1A , RAPGEF1, MAP2K6 , ITGB8 , AKT3 , ARHGEF7, MAP2K1 , BRAF , SHC3 , MAPK4 , ZYX , CAPNS1 , ITGA1 , GRB2 , MAP2K3 , MAPK12 , AKT1 , ITGA2B , MAPK10 , PTK2 , TNS1 , RAF1 , HRAS , PAK3"
Eukaryotic Transcription Initiation WP405,4.300510e-04,1.887924e-02,0.4984931,-0.5472788,-1.846161,41,"TAF9 , POLR1E, POLR3K, CDK7 , TAF12 , POLR2K, CCNH , POLR3H, POLR3D, POLR2C, TBP , POLR2G, GTF2E2, POLR2I, POLR2J, POLR2H, GTF2H1, GTF2A2, GTF2H2, ERCC3 , POLR2E"


In [63]:
out_dict <- list(
  wt_y622 = get_geneset(data1_out),
  y622_choline = get_geneset(data2_out),
  wt_g2 = get_geneset(data3_out)
)

write_json(out_dict, path = "../../processed_data/bulkRNAseq_fgsea_leading_edge.json", pretty = TRUE)
