In [1]:

source("~/software/notebook_assist/packages.R")
source("~/software/notebook_assist/functions.R")


setwd("/projects/CARDIPS/analysis/epigenome_resource/")


ipscore_gtex_leadvar_path=#path to all GTEx and iPSCORE eQTL lead variants
leads = fread(ipscore_gtex_leadvar_path,sep="\t",data.table = F)

In [2]:

mashr_result_path=## Path to mashr results - found on Figshare
mashr = fread(mashr_result_path,sep="\t",data.table=F)

mashr$gene_id = str_extract(mashr$gene_variant, "^ENSG[0-9]+") 
mashr$variant_id = str_extract(mashr$gene_variant, "VAR_[0-9]+_[0-9]+_[A-Z]_[A-Z]") 

### calculate minimum lfsr for iPSCORE and GTEX tissues and across all tissues
mashr$min_ipscore_lfsr = rowMins(data.matrix(mashr[,6:8]))
mashr$min_adult_lfsr    = rowMins(data.matrix(mashr[,c(9:55)]))
mashr$min_lfsr         = rowMins(data.matrix(mashr[,c(6:55)]))

### filter SNP-eGene pairs that are not significant and not tested in both tissues
mashr2 = mashr[ mashr$min_lfsr <= 0.05 & mashr$ngtex_tested > 0 & mashr$nipscore_tested > 0 , ]

### assign temporal annotations to mashr output 
mashr2$Category = ifelse(mashr2$min_adult_lfsr <= 0.05 & mashr2$min_ipscore_lfsr <= 0.05, "Shared",
                              ifelse(mashr2$min_adult_lfsr >= 0.05 & mashr2$min_ipscore_lfsr <= 0.05, "EDev",
                                    ifelse(mashr2$min_adult_lfsr <= 0.05 & mashr2$min_ipscore_lfsr >= 0.05,
                                           "Adult", "Not Significant")))

mashr2$variant_id = gsub("VAR_","",str_extract(mashr2$gene_variant, "VAR_.*"))
table(mashr2$Category)


 Adult   EDev Shared 
 27881   2299  72195 

In [6]:
### Calculate correlation of EDev and Adult eQTLs across iPSCORE and GTEx tissues

assigned_leads = leads[ leads$gene_variant %in% mashr2$gene_variant, ]

assigned_leads$Study = ifelse(assigned_leads$tissue %in% c("iPSC","CVPC","PPC"),
                             "iPSCORE","GTEx")
agged_leads = assigned_leads %>% group_by(gene_id, Study) %>% top_n(1,-log10(pval)) %>% sample_n(1)
assigned_leads2 = assigned_leads[ assigned_leads$gene_variant %in% agged_leads$gene_variant,]

lead_cast = dcast(gene_variant~ tissue, data = assigned_leads, fun.aggregate = sum,value.var = "beta")
rownames(lead_cast) = lead_cast$gene_variant
lead_cast$gene_variant = NULL
edev_cor = cor(lead_cast[ rownames(lead_cast) %in% mashr2$gene_variant[ mashr2$Category == "EDev"],])

shar_cor = cor(lead_cast[ rownames(lead_cast) %in% mashr2$gene_variant[ mashr2$Category == "Shared"],])
adult_cor = cor(lead_cast[ rownames(lead_cast) %in% mashr2$gene_variant[ mashr2$Category == "Adult"],])


edev_melt = melt(edev_cor)
edev_melt$Type = "EDev"
shar_melt = melt(shar_cor)
shar_melt$Type = "Shared"
adult_melt = melt(adult_cor)
adult_melt$Type = "Adult"

melted = rbind(rbind(edev_melt,shar_melt),adult_melt) %>% filter(Var1 != Var2)
melted$Tissue1 = as.character(melted$Var1)
melted$Tissue2 = as.character(melted$Var2)
melted$r2 = melted$value**2
melted$Var1 = NULL
melted$Var2 = NULL
melted2 = melted[!duplicated(apply(melted,1,function(x) paste(sort(x),collapse=''))),]
melted3 = melted2[ melted2$Tissue1 %in% c("iPSC","CVPC","PPC") |  melted2$Tissue2 %in% c("iPSC","CVPC","PPC"),]
melted4 = melted3 [ !(melted3$Tissue1 == "iPSC" &  melted3$Tissue2 == "CVPC") &
                     !(melted3$Tissue1 == "PPC" &  melted3$Tissue2 == "iPSC") &
                     !(melted3$Tissue1 == "PPC" &  melted3$Tissue2 == "CVPC"),]
melted4$`iPSCORE Tissue` = ifelse(melted4$Tissue1 %in% c("iPSC","CVPC","PPC"), melted4$Tissue1,
                                  ifelse(melted4$Tissue2 %in% c("iPSC","CVPC","PPC"), melted4$Tissue2, NA))
    
fwrite(melted4, "~/projects/Resource/iPSCORE_Multi-QTL_Resource/SourceData/SOURCEDATA.FIGURE3A.txt",sep="\t",row.names=F,quote=F)
    




In [11]:
## Annotate and summarize iPSCORE eQTLs by temporal specificity 
library(readxl)
eqtls = read_xlsx("~/projects/Resource/RevisionTables/TableS4.xlsx") %>% 
            filter(QTL_Order == "Primary" & QTL_Type == "eQTL") %>% select(Tissue, Element_ID,Cluster_ID,SNP_ID, 
                                                                           SNP_Chromosome,SNP_Position,P_value,Effect_Size )
eqtls$tissue_element = paste(eqtls$Tissue, eqtls$Element_ID,sep="_")
eqtls$gene_variant = paste(str_extract(eqtls$Element_ID, "ENSG[0-9]+"),eqtls$SNP_ID)

eqtls$Category = ifelse(eqtls$gene_variant %in% mashr2$gene_variant[ mashr2$Category == "EDev"], "EDev",
                       ifelse(eqtls$gene_variant %in% mashr2$gene_variant[ mashr2$Category == "Shared"], "Shared","No Association"))

eqtls_temporal = eqtls[ eqtls$Category != "No Association",]

cat_dups = unique(eqtls_temporal[,c("Cluster_ID","Category")])
cat_dups = eqtls_temporal[ eqtls_temporal$Cluster_ID %in% names(table(eqtls_temporal$Cluster_ID ))[ 
                    table(eqtls_temporal$Cluster_ID ) > 1],]


no_assoc = eqtls$Cluster_ID[ eqtls$Category == "No Association"]
table(eqtls$Category)
eqtls$Stage_Specificity = ifelse(eqtls$Cluster_ID %in% no_assoc, "No Association",
                           ifelse(eqtls$Cluster_ID %in% cat_dups$Cluster_ID, "Shared",eqtls$Category)) 
table(eqtls$Stage_Specificity)
table(eqtls$Tissue,eqtls$Stage_Specificity)



eqtls$Tissue = factor(eqtls$Tissue, levels = c("iPSC","CVPC","PPC"))




          EDev No Association         Shared 
          2269           6038          10998 


          EDev No Association         Shared 
          2046           6542          10717 

      
       EDev No Association Shared
  CVPC  855           1158   2824
  iPSC  951           3224   4837
  PPC   240           2160   3056

In [20]:
summarize_stage_eqtls = as.data.frame(table(eqtls$Stage_Specificity, eqtls$Tissue))
summarize_stage_eqtls$Tissue = factor(summarize_stage_eqtls$Var2, levels = c("iPSC","CVPC","PPC"))
summarize_stage_eqtls$Stage = factor(summarize_stage_eqtls$Var1, levels = c("EDev","Shared","No Association"))
summarize_stage_eqtls$Var1 = NULL
summarize_stage_eqtls$Var2 = NULL
fwrite(summarize_stage_eqtls %>% group_by(Tissue) %>% mutate(Total_eGenes = sum(Freq), Percent = (Freq/Total_eGenes)*100),
       "~/projects/Resource/iPSCORE_Multi-QTL_Resource/SourceData/SOURCEDATA.FIGURE3B.txt",sep="\t",row.names=F,quote=F)


In [23]:
fwrite(eqtls[ eqtls$Stage_Specificity != "No Association",],
       "~/projects/Resource/iPSCORE_Multi-QTL_Resource/SourceData/SOURCEDATA.FIGURE3C.txt",sep="\t",row.names=F,quote=F)

In [26]:
### Calculate EDev-specific GWAS
supptable5_path=# path to Results tab in Supplemental Table 5
gwas = fread(supptable5_path,sep="\t",data.table=F)

gwas$Stage_specificity = ifelse(gwas$Cluster_ID %in% eqtls$Cluster_ID[ eqtls$Stage_Specificity == "EDev"],"EDev",
                   ifelse(gwas$Cluster_ID %in% eqtls$Cluster_ID[ eqtls$Stage_Specificity == "Shared"],"Shared","No association"))

gwas_index = unique(gwas[ grepl("eQTL",gwas$qtl_combo_collapse) ,c("Stage_specificity","index_trait") ])
shared_index = unique(gwas_index$index_trait[ gwas_index$Stage_specificity == "Shared"])
edev_index = unique(gwas_index$index_trait[ gwas_index$Stage_specificity == "EDev"])
gwas_index$Specificity_collapse = ifelse(gwas_index$index_trait %in% shared_index, "Shared",
                                        ifelse(gwas_index$index_trait %in% edev_index, "EDev","No association"))
gwas_index2 = unique(gwas_index[, c("Specificity_collapse","index_trait")])
sum(table(gwas_index2$Specificity_collapse))
table(gwas_index2$Specificity_collapse)