In [52]:
library(fgsea)
library(jsonlite)


In [53]:
get_scores = function(x) {
  scores = sign(x$logFC) * -log10(x$P.Value)
  names(scores) = x$genes
  sorted_scores = sort(scores, decreasing = TRUE)
  print(length(sorted_scores))
  print(length(unique(names(sorted_scores))))
  return(sorted_scores)
}

get_geneset = function(x) {
  return(unique(unlist(x[x$padj<0.05]$leadingEdge)))
}


In [38]:
# load data 
data1 <- read.csv('../../bulkRNAseq/y622_degs.csv', row.names=1)
data1_scores = get_scores(data1)
data2 <- read.csv('../../bulkRNAseq/choline_degs.csv', row.names=1)
data2_scores = get_scores(data2)
data3 <- read.csv('../../bulkRNAseq/g2_degs.csv', row.names=1)
data3_scores = get_scores(data3)
data4 <- read.csv('../../bulkRNAseq/choline_batch2_degs.csv', row.names=1)
data4_scores = get_scores(data4)

# compute fgsea results
all_paths = read.csv('../../processed_data/all_paths.csv', row.names = 'X')
pathways = as.list(as.data.frame(t(all_paths)))
temp = lapply(names(pathways), function(x) pathways[[x]][!(pathways[[x]]=='')])
names(temp) = names(pathways)

[1] 13170
[1] 13169
[1] 13170
[1] 13169
[1] 13170
[1] 13169
[1] 13170
[1] 13169


In [39]:
# load degs
data1_out = fgsea(pathways = temp, stats = data1_scores[!duplicated(names(data1_scores))], minSize = 5, maxSize = 1000, nproc=1, nPermSimple=10000)
data2_out = fgsea(pathways = temp, stats = data2_scores[!duplicated(names(data2_scores))], minSize = 5, maxSize = 1000, nproc=1, nPermSimple=10000)
data3_out = fgsea(pathways = temp, stats = data3_scores[!duplicated(names(data3_scores))], minSize = 5, maxSize = 1000, nproc=1, nPermSimple=10000)
data4_out = fgsea(pathways = temp, stats = data4_scores[!duplicated(names(data4_scores))], minSize = 5, maxSize = 1000, nproc=1, nPermSimple=10000)









In [63]:
out_dict <- list(
  wt_y622 = get_geneset(data1_out),
  y622_choline = get_geneset(data2_out),
  wt_g2 = get_geneset(data3_out)
)

write_json(out_dict, path = "../../processed_data/bulkRNAseq_fgsea_leading_edge.json", pretty = TRUE)
