In [3]:
library(fgsea)
library(jsonlite)

get_scores = function(x) {
  scores = sign(x$logFC) * -log10(x$P.Value)
  names(scores) = x$genes
  sorted_scores = sort(scores, decreasing = TRUE)
  print(length(sorted_scores))
  print(length(unique(names(sorted_scores))))
  return(sorted_scores)
}

get_geneset = function(x) {
  return(unique(unlist(x[x$padj<0.05]$leadingEdge)))
}


In [68]:
# load pathways
paths = read.csv('../../processed_data/pm_kl_frame.csv', row.names=1)

paths = paths[paths$is_gene=='True',]
P = list()
for(i in unique(paths$cluster)){
    P[[paste0('PM.',as.character(i))]] = paths[paths$cluster==i,'description']
}      


In [23]:
# # compute fgsea results
# all_paths = read.csv('../../processed_data/all_paths.csv', row.names = 'X')
# pathways = as.list(as.data.frame(t(all_paths)))
# temp = lapply(names(pathways), function(x) pathways[[x]][!(pathways[[x]]=='')])
# names(temp) = names(pathways)

# P = temp

In [69]:
# load DEGS - all Ex
data1 <- read.csv('../../common_variant_data/degs_rs3752246.csv', row.names=1)
data1$genes = rownames(data1)
data1_scores = get_scores(data1)

o = fgsea(pathways = P, stats = data1_scores, minSize = 5, maxSize = 1000, nproc=1, nPermSimple=10000)
o = o[order(o$pval,decreasing=T),]
o$score = sign(o$NES) * -log10(o$pval)
o$pathway = factor(o$pathway, levels = o$pathway)
o$leadingEdge = NULL
write.csv(as.data.frame(o[order(o$pval),]), '../../processed_data/fgsea_rs3752246.csv')


[1] 10226
[1] 10226


In [76]:
# load DEGS - set1
data1 <- read.csv('../../common_variant_data/degs_rs3752246_set1.csv', row.names=1)
data1$genes = rownames(data1)
data1_scores = get_scores(data1)

o = fgsea(pathways = P, stats = data1_scores, minSize = 5, maxSize = 1000, nproc=1, nPermSimple=10000)
o = o[order(o$pval,decreasing=T),]
o$score = sign(o$NES) * -log10(o$pval)
o$pathway = factor(o$pathway, levels = o$pathway)
o$leadingEdge = NULL
write.csv(as.data.frame(o[order(o$pval),]), '../../processed_data/fgsea_rs3752246_set1.csv')


[1] 10214
[1] 10214


In [75]:
# load DEGS - set2
data1 <- read.csv('../../common_variant_data/degs_rs3752246_set2.csv', row.names=1)
data1$genes = rownames(data1)
data1_scores = get_scores(data1)

o = fgsea(pathways = P, stats = data1_scores, minSize = 5, maxSize = 1000, nproc=1, nPermSimple=10000)
o = o[order(o$pval,decreasing=T),]
o$score = sign(o$NES) * -log10(o$pval)
o$pathway = factor(o$pathway, levels = o$pathway)
o$leadingEdge = NULL
write.csv(as.data.frame(o[order(o$pval),]), '../../processed_data/fgsea_rs3752246_set2.csv')


[1] 10221
[1] 10221



In [74]:
# load DEGS - set3
data1 <- read.csv('../../common_variant_data/degs_rs3752246_set3.csv', row.names=1)
data1$genes = rownames(data1)
data1_scores = get_scores(data1)

o = fgsea(pathways = P, stats = data1_scores, minSize = 5, maxSize = 1000, nproc=1, nPermSimple=10000)
o = o[order(o$pval,decreasing=T),]
o$score = sign(o$NES) * -log10(o$pval)
o$pathway = factor(o$pathway, levels = o$pathway)

o$leadingEdge = NULL

write.csv(as.data.frame(o[order(o$pval),]), '../../processed_data/fgsea_rs3752246_set3.csv')


[1] 10217
[1] 10217


In [65]:
o

pathway,pval,padj,log2err,ES,NES,size,score
<fct>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<int>,<dbl>
PM.5,0.90185558,0.9018556,0.01754691,-0.2527692,-0.6898488,18,-0.044863
PM.7,0.50251955,0.5743081,0.02262072,0.3133622,0.9694944,38,0.298847
PM.6,0.46498697,0.5743081,0.02387069,0.3207149,0.9922426,38,0.3325592
PM.2,0.2942128,0.4707405,0.03261465,0.4199168,1.1287203,20,0.5313384
PM.3,0.21007819,0.4201564,0.03891206,0.3821275,1.1822435,38,0.677619
PM.1,0.14011084,0.3736289,0.04864393,0.397889,1.255804,42,0.8535283
PM.4,0.05300414,0.2120166,0.08113341,0.4397525,1.3958068,43,1.2756902
PM.0,0.03055947,0.2120166,0.12595293,-0.4915937,-1.519191,31,-1.5148541
