In [1]:
library(readr)
library(fgsea)
library(readr)

In [2]:
all_genes_ranked <- read_csv("/home/miltondp/projects/labs/greenelab/phenoplier/base/data/crispr_screen/lipid_DEG.csv")


[36m──[39m [1m[1mColumn specification[1m[22m [36m─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────[39m
cols(
  gene_name = [31mcol_character()[39m,
  GFPLow_vs_UnSorted.log2FC = [32mcol_double()[39m,
  GFPLow_vs_UnSorted.FDR = [32mcol_double()[39m,
  GFPLow_vs_UnSorted.DEG = [33mcol_logical()[39m,
  GFPHigh_vs_UnSorted.log2FC = [32mcol_double()[39m,
  GFPHigh_vs_UnSorted.FDR = [32mcol_double()[39m,
  GFPHigh_vs_UnSorted.DEG = [33mcol_logical()[39m,
  GFPHigh_vs_GFPLow.log2FC = [32mcol_double()[39m,
  GFPHigh_vs_GFPLow.FDR = [32mcol_double()[39m,
  GFPHigh_vs_GFPLow.DEG = [33mcol_logical()[39m,
  `lipid effect` = [31mcol_character()[39m,
  rank = [32mcol_double()[39m
)




In [3]:
deg_gene_sets = list()

for (r in unique(all_genes_ranked$rank)) {
    if (r == 0) {
        next
    }
    
    data <- all_genes_ranked[all_genes_ranked$rank == r,]
    #q <- quantile(data, 0.50, names=FALSE)
    
    deg_gene_sets[[paste0("gene_set_", r)]] <- data$gene_name
}

In [4]:
# MultiPLIER LVs
multiplier_z = readRDS("/media/miltondp/Elements1/projects/multiplier/recount2_PLIER_data/recount_PLIER_model.RDS")$Z

lvs = list()
for (cidx in 1:ncol(multiplier_z)) {
    data <- multiplier_z[, cidx]
    # q <- quantile(data, 0.75, names=FALSE)
    q <- 0.0
    
    lvs[[paste0("LV", cidx)]] <- data[data > q]
}

# Compute enrichment on all LVs

In [5]:
results = list()

In [6]:
set.seed(42)

In [7]:
for (lv in names(lvs)) {
    res <- fgsea(pathways = deg_gene_sets, stats = lvs[[lv]], scoreType = "pos", eps = 0.0)[order(pval), ]
    res[, "leadingEdge"] <- sapply(res$leadingEdge, paste, collapse=",")
    res[, "lv"] <- lv
    results[[lv]] <- res
}

In [8]:
length(results)

In [9]:
df <- do.call(rbind, results)

In [10]:
dim(df)

In [11]:
head(df)

pathway,pval,padj,log2err,ES,NES,size,leadingEdge,lv
<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<int>,<chr>,<chr>
gene_set_-3,0.1078921,0.386014,0.13145761,0.8275148,1.435243,3,"PCYT2,UBE2J2",LV1
gene_set_2,0.2047952,0.386014,0.08998608,0.5427601,1.092387,61,"CHERP,RANGAP1,HNRNPL,RPS2,E4F1,TAF1C,GATAD2A,SAFB,TAF6,FBL,LSM4,SUPT5H,CHD4,PFDN6,SLC35B2,USP39,POLR3E,POLR2C,RPLP0,SREBF2",LV1
gene_set_-2,0.2517483,0.386014,0.07871138,0.5667829,1.102627,32,"PTBP1,KEAP1,PEX14,DLST,MAD2L2,GLRX5,OGDH",LV1
gene_set_1,0.2877123,0.386014,0.07182763,0.5578354,1.082663,31,"MYBBP1A,ESPL1,PLOD3,CDKN2A,BRF1,SF3B3,ACO2,GMPS,XRCC6,NUP85",LV1
gene_set_3,0.3216783,0.386014,0.06628422,0.7130726,1.292976,2,"ACACA,MBTPS1",LV1
gene_set_-1,0.6073926,0.6073926,0.03668504,0.5223079,0.960734,12,"SREBF1,ATP5B,ABCA2,MAML1,GNA15,TTC38",LV1


In [12]:
write_tsv(df, "/home/miltondp/projects/labs/greenelab/phenoplier/base/data/crispr_screen/fsgea-all_lvs.tsv")

# Quick analyses

In [13]:
df_signif <- df[df$padj < 0.05]

In [14]:
dim(df_signif)

In [15]:
df_signif[order(padj),]

pathway,pval,padj,log2err,ES,NES,size,leadingEdge,lv
<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<int>,<chr>,<chr>
gene_set_2,1.317321e-07,6.586606e-07,0.6901325,0.9057343,1.548676,49,"RPS14,RPL31,RPS19,RPS11,RPS6,RPL37,RPSA,RPL18,RPL35A,RPL34,RPL6,RPS2,RPL7,RPS13,RPS28,RPLP0,RPL19,RPS27A,RPL13,RPS16,FBL,UXT",LV707
gene_set_-2,1.380551e-07,8.283308e-07,0.6901325,0.8658895,1.643107,39,"NDUFA4,COX6A1,ATP5O,NDUFB10,COX5A,NDUFS3,NDUFB7,NDUFS2,NDUFV2,NDUFB4,NDUFB3,NDUFB9,COX17,COX7C,NDUFS6,MAD2L2,NAA38",LV678
gene_set_-3,4.942689e-06,2.965613e-05,0.6105269,0.9992990,1.816439,2,"PTEN,FBXW7",LV612
gene_set_2,9.656318e-06,4.828159e-05,0.5933255,0.7361145,1.503917,57,"RPS6,RPL6,RPLP0,RPL19,RPL31,RPL7,RPS13,RPS2,RPSA,RPS16,RPS14,FBL,RPS11,RPL37,RPL35A,RPL13,LUC7L3,RPL18,SRP72",LV905
gene_set_-1,4.293645e-05,1.288094e-04,0.5573322,0.9180109,1.691415,14,"NDUFB8,NDUFAB1,ATP5B,ATP6AP1",LV678
gene_set_2,2.192123e-05,1.315274e-04,0.5756103,0.6324956,1.463891,81,"SAFB,LUC7L3,HSP90B1,CHD4,SNRPD3,ISY1,DKC1,PFDN6,USP39,HNRNPL,HNRNPH1,SUPT5H,RPL6,CHERP,FBL,UBA2,EIF5,RPL37,RPL7,SRP72,XPO1,RPL19,SNRPD1,LSM4,CSTF3,RPS6,POLR2K,PFDN2,RPL31,RANGAP1,POLR2C,OSBP,POLR2F,RPS11,GTF2B,GATAD2A,POLR2L,GTF2H1,RPL35A,TAF1C,OTUD5,RPL34,EXOC3,ERCC3,PCBP1,SRP54",LV915
gene_set_1,6.035192e-05,1.810558e-04,0.5573322,0.6800128,1.535791,51,"TPR,DDX21,SRSF7,XRCC6,CTNNBL1,SMC2,MYBBP1A,SF3B3,SMNDC1,PNN,RBM28,NUP85,GARS,UXS1,GMPS,DDX20,ESPL1,TCP1,PPIL1,RPS23,RPL4,NPEPPS,EIF2B4,HARS,FAU",LV915
gene_set_1,1.417049e-04,8.502296e-04,0.5188481,0.8363970,1.643409,30,"ABCG2,DDIT3,BMPR2,SSH1,UXS1,SERPINB6,CHKA,SRSF7",LV821
gene_set_2,3.121505e-04,1.872903e-03,0.4984931,0.8042415,1.288432,60,"RPL34,RPL35A,RPL31,RPS13,RPS6,RPL6,RPL7,UBL5,RPS16,POLR2K,RPS28,RPS19,RPL19,RPL37,RPS27A,RPLP0,SNRPD3,RPS11,RPS14,SEC61G,SRP19",LV750
gene_set_2,3.577192e-04,2.146315e-03,0.4984931,0.9719958,1.959109,32,"ZNF3,MDM2,RPS6",LV341
