In [18]:
source("~/software/notebook_assist/functions.R")
source("~/software/notebook_assist/packages.R")
setwd("/projects/CARDIPS/analysis/epigenome_resource/")
suppressPackageStartupMessages(library(igraph))

In [19]:
theme_bw2 = theme_bw() +
    theme(
        axis.text = element_text(size = 10),
        strip.text = element_text(size = 10),
        axis.title = element_text(size = 10),
        legend.text = element_text(size = 10),
        legend.title = element_text(size = 10),
        legend.position = "top"
    )

In [20]:
### can get from supplemental table
all_modules = fread("analyses/tim/ld_modules/modules/all_modules_091124.txt",sep="\t",data.table=F)


cluster_hist = all_modules %>% group_by(Tissue,Cluster_ID) %>% summarize(n_qtls = length(qElement_Type))

cluster_hist$NoQTLs = ifelse(cluster_hist$n_qtls >= 10, ">=10",cluster_hist$n_qtls)

cluster_hist2 = as.data.frame(table(cluster_hist$Tissue, cluster_hist$NoQTLs)) %>% 
                    rename(Tissue = Var1, qElements_inQTL = Var2, No_ComplexQTLs = Freq) %>% 
                    filter(qElements_inQTL != 1)

cluster_hist2$Tissue = factor(cluster_hist2$Tissue, levels = rev(c("iPSC","CVPC","PPC")))
cluster_hist2$qElements_inQTL = factor(cluster_hist2$qElements_inQTL, levels = rev(c(1:9,">=10")))
fwrite(cluster_hist2,"~/projects/Resource/notebooks/toGitHub/sourcedata/SOURCEDATA.FIGURE4A.txt",
       sep="\t",row.names=F,quote=F)



[1m[22m`summarise()` has grouped output by 'Tissue'. You can override using the
`.groups` argument.


In [6]:
all_modules$cluster_number = as.numeric(str_extract(all_modules$Cluster_ID,"[0-9]+"))
ipsc_clusters3 = all_modules %>% filter(Tissue == "iPSC")

cvpc_clusters3 = all_modules %>% filter(Tissue == "CVPC")
ppc_clusters3 = all_modules %>% filter(Tissue == "PPC")


cluster_composition =  function(x, df){
    cluster = unique(df[ df$cluster_number == x, ] %>% group_by(qElement_Type) %>% 
                mutate(n_qelement = n()) %>% select(Cluster_ID, qElement_Type, n_qelement))
    cluster$qElement = factor(cluster$qElement_Type, levels = c("eGene","caPeak","haPeak"))

    return(cluster)
}

ipsc_clustcomp = rbindlist(lapply(1:max(ipsc_clusters3$cluster_number[ ipsc_clusters3$qElements_inQTL > 1]) , cluster_composition, 
                                  df = ipsc_clusters3))

In [7]:
cvpc_clustcomp = rbindlist(lapply(1:max(cvpc_clusters3$cluster_number[cvpc_clusters3$qElements_inQTL > 1]) , cluster_composition, 
                                  df = cvpc_clusters3))

ppc_clustcomp = rbindlist(lapply(1:max(ppc_clusters3$cluster_number[ppc_clusters3$qElements_inQTL > 1]) , cluster_composition, 
                                  df = ppc_clusters3))

In [13]:
annotate_modules = function(clustcomp, tissue) {
    cast = dcast(Cluster_ID ~ qElement_Type, data = clustcomp, value.var = "n_qelement", fun.aggregate = sum)
    if( tissue != "PPC") {
    cast$Annotation = ifelse(cast$eGene > 0 & cast$caPeak > 0 & cast$haPeak > 0, "eGene,caPeak,haPeak",
                             ifelse(cast$eGene > 0 & cast$caPeak > 0 & cast$haPeak == 0, "eGene,caPeak",
                             ifelse(cast$eGene > 0 & cast$caPeak == 0 & cast$haPeak > 0, "eGene,haPeak",
                            ifelse(cast$eGene == 0 & cast$caPeak > 0 & cast$haPeak > 0, "caPeak,haPeak",
                            ifelse(cast$eGene > 0 & cast$caPeak == 0 & cast$haPeak == 0, "eGene",
                                  ifelse(cast$eGene == 0 & cast$caPeak > 0 & cast$haPeak == 0, "caPeak","haPeak"))))))
        } else {
        cast$Annotation = ifelse(cast$eGene > 0 & cast$caPeak > 0, "eGene,caPeak",
                            ifelse(cast$eGene > 0 & cast$caPeak == 0 , "eGene","caPeak"))
    }
    cast$Tissue = tissue
    return(cast)

}

ipsc_annot = annotate_modules(ipsc_clustcomp, "iPSC")

cvpc_annot = annotate_modules(cvpc_clustcomp,"CVPC")
ppc_annot = annotate_modules(ppc_clustcomp,"PPC")

all_annot = as.data.frame(rbind(rbind(ipsc_annot[,c("Tissue","Cluster_ID","Annotation")],
                        cvpc_annot[,c("Tissue","Cluster_ID","Annotation")]),
                  ppc_annot[,c("Tissue","Cluster_ID","Annotation")]))

In [16]:
fwrite(all_annot,"~/projects/Resource/notebooks/toGitHub/sourcedata/SOURCEDATA.FIGURE4BCD.txt",sep="\t",row.names=F,quote=F)