In [1]:
library(fgsea)
library(jsonlite)


In [2]:
get_scores = function(x) {
  scores = sign(x$logFC) * -log10(x$P.Value)
  names(scores) = x$genes
  sorted_scores = sort(scores, decreasing = TRUE)
  print(length(sorted_scores))
  print(length(unique(names(sorted_scores))))
  return(sorted_scores)
}

get_geneset = function(x) {
  return(unique(unlist(x[x$padj<0.05]$leadingEdge)))
}


In [3]:
# load data 
data1 <- read.csv('../../bulkRNAseq/y622_degs.csv', row.names=1)
data1_scores = get_scores(data1)
data2 <- read.csv('../../bulkRNAseq/choline_degs.csv', row.names=1)
data2_scores = get_scores(data2)
data3 <- read.csv('../../bulkRNAseq/g2_degs.csv', row.names=1)
data3_scores = get_scores(data3)
data4 <- read.csv('../../bulkRNAseq/choline_batch2_degs.csv', row.names=1)
data4_scores = get_scores(data4)

# compute fgsea results
all_paths = read.csv('../../processed_data/all_paths.csv', row.names = 'X')
pathways = as.list(as.data.frame(t(all_paths)))
temp = lapply(names(pathways), function(x) pathways[[x]][!(pathways[[x]]=='')])
names(temp) = names(pathways)

[1] 13170
[1] 13169
[1] 13170
[1] 13169
[1] 13170
[1] 13169
[1] 13170
[1] 13169


In [38]:
set.seed(5)
# load degs
data1_out = fgsea(pathways = temp, stats = data1_scores[!duplicated(names(data1_scores))], minSize = 5, maxSize = 1000, nproc=1, nPermSimple=10000)
data2_out = fgsea(pathways = temp, stats = data2_scores[!duplicated(names(data2_scores))], minSize = 5, maxSize = 1000, nproc=1, nPermSimple=10000)
data3_out = fgsea(pathways = temp, stats = data3_scores[!duplicated(names(data3_scores))], minSize = 5, maxSize = 1000, nproc=1, nPermSimple=10000)
data4_out = fgsea(pathways = temp, stats = data4_scores[!duplicated(names(data4_scores))], minSize = 5, maxSize = 1000, nproc=1, nPermSimple=10000)









In [39]:
print(sum(data1_out$padj<0.05))

print(sum(data3_out$padj<0.05))

[1] 15
[1] 15


In [51]:
data2_out[data2_out$padj<0.05,]

pathway,pval,padj,log2err,ES,NES,size,leadingEdge
<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<int>,<list>
Cholesterol Biosynthesis Pathway WP197,0.001512156,0.04425577,0.4550599,0.7368725,1.732358,15,"SQLE , HMGCR , LSS , IDI1 , DHCR7 , MVD , SC5D , CYP51A1"
Cytoplasmic Ribosomal Proteins WP477,5.319857e-06,0.0003892362,0.6105269,-0.5027162,-1.973062,86,"RPS4X , RPL7 , RPL18A , RPS5 , RPS8 , RPL41 , RPL4 , RPL29 , RPL26 , RPL38 , RPL34 , RPL17 , RPS6 , MRPL19 , RPS6KB2, RPS19 , RPS17 , RPS18 , RPL6 , RPL5 , RPL19 , RPS6KA3, RPL36A , RPL9 , RPS14 , RPL21 , RPL35 , RPL27 , RPL11 , RPL10A , RPS24 , RPL23A , RPS15 , RPL3 , FAU , RPS9 , RPS3 , RPS16 , RPL31 , RPS25"
DNA Replication WP466,0.0002726657,0.01355195,0.4984931,-0.5736753,-1.922234,39,"GMNN , RPA2 , ORC3 , RFC4 , ORC4 , RFC2 , MCM7 , POLD3, ORC5 , PRIM1, POLD2, CDK2 , RFC3 , CDT1 , PCNA , ORC6"
Electron Transport Chain (OXPHOS system in mitochondria) WP111,1.035193e-14,4.544495e-12,0.9865463,-0.6879996,-2.700263,86,"UQCRFS1 , COX7A2 , NDUFB8 , UQCRC1 , UQCR10 , NDUFA12 , COX6A1 , NDUFA6 , NDUFA1 , ATP5F1C , SDHB , ATP5IF1 , COX7A2L , NDUFS4 , NDUFA10 , UQCRH , NDUFB3 , SDHD , ATP5MG , COX5B , ATP5PB , NDUFV1 , ATP5F1A , NDUFA4 , SDHC , SURF1 , NDUFB6 , NDUFS2 , ATP5PF , SLC25A14, SLC25A4 , NDUFB7 , UQCR11 , NDUFS3 , NDUFB9 , COX4I1 , NDUFB2 , COX7B , ATP5PO , NDUFC2 , COX5A , SDHA , NDUFAB1"
Eukaryotic Transcription Initiation WP405,0.0005591711,0.02454761,0.4772708,-0.5472788,-1.859332,41,"TAF9 , POLR1E, POLR3K, CDK7 , TAF12 , POLR2K, CCNH , POLR3H, POLR3D, POLR2C, TBP , POLR2G, GTF2E2, POLR2I, POLR2J, POLR2H, GTF2H1, GTF2A2, GTF2H2, ERCC3 , POLR2E"
GABA receptor Signaling WP4159,1.853151e-06,0.0001627067,0.6435518,0.7601539,2.017639,26,"GABRE , GABRA3 , GABRB1 , GABRG3 , GABRA2 , GABRG2 , GABRG1 , ABAT , GABRQ , GABRA5 , GAD1 , SLC6A11, GPHN , GABRB3 , ALDH9A1, AP2A2 , AP2A1 , GABBR1 , SLC6A1 , GABRB2"
"GPCRs, Class A Rhodopsin-like WP455",0.001243314,0.04425577,0.4550599,0.5087786,1.642741,77,"PRLHR , SSTR2 , NPY1R , OPRK1 , OPN3 , CXCR4 , GPR83 , GPR27 , NPY5R , HTR7 , GPR45 , PTGER3, ADORA1, HTR2C , DRD5 , F2R , HCRTR1, GPR68 , HCRTR2, HTR6 , DRD2 , ADRA1D, GPR3"
Integrin-mediated Cell Adhesion WP185,0.0002778304,0.01355195,0.4984931,0.5340789,1.718618,75,"ITGA6 , MAP2K5 , VCL , ITGAV , CAPN1 , PAK4 , PAK1 , CAV2 , GIT2 , ITGA9 , SRC , SOS1 , MAPK6 , CRK , VAV2 , CAPN10 , CAPN2 , MAPK1 , ITGB1 , RAP1A , RAPGEF1, MAP2K6 , ITGB8 , AKT3 , ARHGEF7, MAP2K1 , BRAF , SHC3 , MAPK4 , ZYX , CAPNS1 , ITGA1 , GRB2 , MAP2K3 , MAPK12 , AKT1 , ITGA2B , MAPK10 , PTK2 , TNS1 , RAF1 , HRAS , PAK3"
Mitochondrial complex I assembly model OXPHOS system WP4324,3.934015e-07,5.756775e-05,0.6749629,-0.6519385,-2.308023,50,"NDUFB8 , NDUFA12 , FOXRED1 , NDUFA6 , NDUFA1 , NDUFAF6 , NDUFS4 , NDUFA10 , TIMMDC1 , NDUFB3 , NDUFV1 , NDUFAF1 , TMEM70 , NDUFB6 , NDUFS2 , NDUFAF3 , NDUFB7 , NDUFS3 , NDUFB9 , TMEM186 , NDUFB2 , ECSIT , NDUFA13 , NDUFC2 , NDUFAB1 , NDUFA8 , DMAC2 , TMEM126B, COA1 , NDUFB10 , NDUFB11 , NDUFA5 , NDUFS5 , NDUFC1 , NDUFAF4 , NDUFA2 , NDUFB5 , NDUFS6 , NDUFAF2"
Nanoparticle-mediated activation of receptor signaling WP2643,0.001472291,0.04425577,0.4550599,0.6440455,1.709458,26,"MAPK8 , FN1 , MAPK9 , SRC , SOS1 , MAPK1 , ITGB1 , KRAS , MAPK14, AKT3 , MAP2K1, NRAS , MAPK13, ITGA1 , GRB2 , MAPK12, MAPK10, PTK2 , RAF1 , HRAS , MAPK11"


In [46]:
out_dict <- list(
  wt_y622 = get_geneset(data1_out),
  y622_choline = get_geneset(data2_out),
  wt_g2 = get_geneset(data3_out)
)

In [49]:
print(length(out_dict$wt_g2))

print(length(out_dict$wt_y622))


print(length(out_dict$y622_choline))


[1] 356


[1] 334
[1] 313


In [48]:
out_dict <- list(
  wt_y622 = get_geneset(data1_out),
  y622_choline = get_geneset(data2_out),
  wt_g2 = get_geneset(data3_out)
)

write_json(out_dict, path = "../../processed_data/bulkRNAseq_fgsea_leading_edge2.json", pretty = TRUE)
