In [2]:
setwd("/projects/CARDIPS/analysis/epigenome_resource")
source("analyses/jennifer/notebooks/functions.R")

set.seed(5366)
library(coloc)

This is coloc version 5.2.3



# **Table S12: GWAS Colocalization**

## **read input**

In [5]:
indep4 = fread("~/projects/Resource/notebooks/toGitHub/sourcedata/SOURCEDATA.FIGURE5A.txt", data.table = F)
indep4 = indep4[  indep4$QTL_Combination != "",]


In [6]:
gwas_coloc = fread("~/projects/Resource/Figshare/all.gwas_summary.2024_0925.txt", data.table = F) %>% 
    filter(Nominated == T)

nrow(gwas_coloc)

In [37]:
message(paste("# traits:", length(unique(gwas_coloc$description))))

# traits: 15



In [7]:
table = gwas_coloc %>% select(tissue, element_id, tissue_element, type, Cluster_ID, Complexity, Nominated, 
                                    element_name, element_chr, element_start, element_end, 
                         full_trait_id, description,
                         nsnps, PP.H0.abf, PP.H1.abf, PP.H2.abf, PP.H3.abf, PP.H4.abf,
                         max_model_pp, likely_model, topsnp, topsnp_pp,
                         beta.eqtl, se.eqtl, p.eqtl, beta.gwas, se.gwas, p.gwas, cs_size,
                         coloc_gwas,  qtl_combo)

In [8]:
a = table %>% filter(coloc_gwas == T & Nominated == T)
nrow(a)
length(unique(a$tissue_element))

In [10]:
# Reformat trait description
table[table$description == "birth weight (eur)",]$description = "Birth Weight"
table[table$description == "childhood obesity (eur)",]$description = "Childhood Obesity"
# table[table$description == "type_1_diabetes",]$description = "Type 1 Diabetes"
table[table$description == "fasting glucose",]$description = "Fasting Glucose"
table[table$description == "type 2 diabetes",]$description = "Type 2 Diabetes"
table[table$description == "I25 Chronic ischaemic heart disease",]$description = "Ischemic Heart Disease"
table$description = str_to_title(table$description)
table[table$description %like% "Body Mass",]$description = "Body Mass Index"
table[table$description %like% "Qrs Duration",]$description = "QRS Duration"
table[table$description %like% "Ldl Direct",]$description = "LDL Direct"
table[table$description %like% "Hdl Cholesterol",]$description = "HDL Cholesterol"
table[table$description %like% "I20 Angina Pectoris",]$description = "Angina Pectoris"
table[table$description %like% "I21 Acute Myocardial Infarction",]$description = "Myocardial Infarction"
table[table$description == "Multivariate Longevity",]$description = "Aging"
table[table$description == 'I48 Atrial Fibrillation And Flutter',]$description = "Atrial Fibrillation"

In [11]:
# Reformat column names
table = table %>%
    dplyr::rename(Tissue = tissue) %>%
    dplyr::rename(Element_ID = element_id) %>%
    dplyr::rename(Condition = type) %>%
    dplyr::rename(Element_Name = element_name) %>%
    dplyr::rename(Element_Chrom = element_chr) %>%
    dplyr::rename(Element_Start = element_start) %>%
    dplyr::rename(Element_End = element_end) %>%
    dplyr::rename(Trait_ID = full_trait_id) %>%
    dplyr::rename(Trait_Description = description) %>%
    dplyr::rename(Nsnps = nsnps) %>%
    dplyr::rename(Max_Hypothesis_PP = max_model_pp) %>%
    dplyr::rename(Likely_Colocalization_Hypothesis = likely_model) %>%
    dplyr::rename(Top_SNP_ID = topsnp) %>%
    dplyr::rename(Top_SNP_PP = topsnp_pp) %>%
#     dplyr::rename(Proportion_Module_Colocalized = prop_cluster_coloc) %>%
    dplyr::rename(Beta.QTL = beta.eqtl) %>%
    dplyr::rename(SE.QTL = se.eqtl) %>%
    dplyr::rename(Pvalue.QTL = p.eqtl) %>%
    dplyr::rename(Beta.GWAS = beta.gwas) %>%
    dplyr::rename(SE.GWAS = se.gwas) %>%
    dplyr::rename(Pvalue.GWAS = p.gwas) %>%
    dplyr::rename(Colocalized = coloc_gwas) %>%
#     dplyr::rename(EDev_Unique_QTL = fetal_unique) %>%
    dplyr::rename(QTL_Combination = qtl_combo) 
table$tissue_element = NULL
colnames(table) = gsub(".abf", "", colnames(table))

colnames(table)[which(colnames(table) == "cs_size")] = "99Credible_Set_Size"

In [13]:
# Add GWAS Locus ID (aka GWAS Pruned Variant)
table2 = merge(table, indep4[,c("Tissue", "Trait_Description", "Trait_ID", "GWAS_Index", "Cluster_ID", "QTL_Combination_Collapse")],
                   by = c("Tissue", "Trait_Description", "Trait_ID", "Cluster_ID"), all.x = T) %>%
    dplyr::relocate(GWAS_Index, .before = QTL_Combination)


In [25]:
# Check that all GWAS-colocalized QTLs were assigned to a GWAS locus
table2 %>% filter(Colocalized == T) %>% filter(is.na(GWAS_Index))
# table2 %>% filter(Colocalized == T) 

“number of rows of result is not a multiple of vector length (arg 2)”
“number of rows of result is not a multiple of vector length (arg 2)”
“number of rows of result is not a multiple of vector length (arg 2)”
“number of rows of result is not a multiple of vector length (arg 2)”


Tissue,Trait_Description,Trait_ID,Cluster_ID,QTL_Combination,Complexity,Element_ID,Condition,Nominated,Element_Name,⋯,Pvalue.QTL,Beta.GWAS,SE.GWAS,Pvalue.GWAS,99Credible_Set_Size,Colocalized,GWAS_Index,Element_Cond,GWAS_QTL_Combination_Collapse,GWAS_ID
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<int>,<lgl>,<chr>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<int>,<lgl>,<chr>,<chr>,<chr>,<chr>


In [17]:
table2$GWAS_ID = paste(table2$GWAS_Index, table2$Trait_Description)
# a = table2 %>% filter(Colocalized == T & Fetal_Unique == F) 
# table2$Fetal_Unique = ifelse(!table2$GWAS_ID %in% a$GWAS_ID, T, F)

In [18]:
table2 = table2 %>% 
    dplyr::relocate(Complexity, .after = Cluster_ID) %>%
    dplyr::rename(GWAS_QTL_Combination_Collapse = QTL_Combination_Collapse) %>% 
    dplyr::relocate(QTL_Combination, .after = Cluster_ID) 
#     dplyr::relocate(Has_TFBS, .before = GWAS_Index) %>%
#     select(-EDev_Unique_QTL) %>%
#     dplyr::rename(EDev_Unique_GWAS_Locus = Fetal_Unique_GWAS_Locus)

table2[is.na(table2$GWAS_Index),]$GWAS_Index = "No Colocalization"
table2[is.na(table2$GWAS_QTL_Combination_Collapse),]$GWAS_QTL_Combination_Collapse = "No Colocalization"
# table2[is.na(table2$EDev_Unique_GWAS_Locus),]$EDev_Unique_GWAS_Locus = "No Colocalization"

In [20]:
table(table2$Colocalized, table2$GWAS_QTL_Combination_Collapse)

       
         caQTL caQTL-eQTL caQTL-eQTL-haQTL caQTL-haQTL   eQTL eQTL-haQTL  haQTL
  FALSE      0          0                0           0      0          0      0
  TRUE     307         91               46          72    261         35     51
       
        No Colocalization
  FALSE            453525
  TRUE                  0

In [33]:
### GWAS results in supplemental table 5 and on Figshare
fwrite(table2 %>% select(-GWAS_ID,  -Nominated) %>% distinct(), 
       "analyses/tim/gwas_coloc/tables/Table_SX_GWAS_Colocalizations_2024_0911.txt", row.names = F, sep = "\t")

In [26]:
table3 = table2 %>% select(-GWAS_ID, -Nominated)
colnames(table3) = gsub("_", " ", colnames(table3))

# fwrite(table3 %>% distinct(), "analyses/tim/gwas_coloc/tables/Table_SX_GWAS_Colocalizations.txt", row.names = F, sep = "\t")

In [27]:
# check that numbers are correct
a = table2 %>% filter(Colocalized == T)
message(paste("# clusters that colocalized:", length(unique(a$Cluster_ID))))
message(paste("# GWAS loci that colocalized:", length(unique(a$GWAS_ID))))

# clusters that colocalized: 695

# GWAS loci that colocalized: 540

