# Colocalization of clinical traits with csaQTLs, LocusZoom plots

In [1]:
library(coloc)
library(scales)
fig_dir = "/data/srlab/lrumker/MCSC_Project/cna-qtl/figs/"
lz_folder="/data/srlab/lrumker/MCSC_Project/cna-qtl/locus_zoom_fns/"
source(paste0(lz_folder,"locus_zoom.R"))
Unique.genes <- read.delim(paste0(lz_folder, "Gencode_GRCh37_Genes_UniqueList2021.txt"), 
                           stringsAsFactors = FALSE, header = TRUE)

This is a new update to coloc.



## Helper functions

In [2]:
compute_regional_LD<-function(geno, ref_gwas, ref_leadsnp){
    ref_leadpos=as.numeric(strsplit(ref_leadsnp,":")[[1]][2])
    lead_geno = as.numeric(geno[match(ref_leadsnp,row.names(geno)),])
    get_LD <-function(i_snp, geno, lead_geno){
        return(cor(lead_geno, as.numeric(geno[i_snp,]))**2)
    }
    all_LD = apply(as.matrix(c(1:nrow(geno)), ncol=1), 1, get_LD, geno, lead_geno)

    locus_LD=data.frame("CHR_A"= ref_gwas$CHR,
                       "BP_A"= rep(ref_leadpos,nrow(ref_gwas)),
                       "SNP_A"= rep(ref_gwas$SNP[ref_gwas$BP==ref_leadpos],nrow(ref_gwas)),
                       "CHR_B"= ref_gwas$CHR,
                       "BP_B"= ref_gwas$BP,
                       "SNP_B"= ref_gwas$SNP,
                       "R2"=all_LD)
    row.names(locus_LD) = c(1:nrow(locus_LD))
    return(locus_LD)
}

In [3]:
region_plot_our_gwas<-function(celltype, lead_snp, window_sz, 
                               secondary_snps=c(), secondary_names=c(), plot_type="jpg"){
    lead_pos = strsplit(lead_snp, ":")[[1]][2]
    chr = strsplit(lead_snp, ":")[[1]][1]

    # Import sumstats from our csaQTL GWAS
    gwas_res = read.table(paste0("/data/srlab/lrumker/MCSC_Project/cna-qtl/results/gwas_",
                                 celltype, "/gwas_res_",lead_snp,"_cis.tsv"), 
                          stringsAsFactors = FALSE, comment.char = "", header = TRUE)
    gwas_res$CHR = gwas_res$X.CHROM
    gwas_res$BP = gwas_res$POS

    # Import allele doses for computing LD, note: includes all 973 donors
    geno = read.delim(paste0("/data/srlab/lrumker/MCSC_Project/cna-qtl/results/geno_munge/cis_snps/", celltype,
                      "_",lead_snp,"_cis.DS.vcf.gz"), stringsAsFactors = FALSE, header = FALSE, row.names=1)
    rownames(geno) = trimws(rownames(geno))

    # Import rsIDs
    cis_annovar=read.delim(paste0("/data/srlab/lrumker/MCSC_Project/cna-qtl/results/annovar/", celltype, 
                                 "_",lead_snp,"_cis.hg19_multianno.txt"), stringsAsFactors = FALSE, header = TRUE)
    keep_snps = which(rownames(geno) %in% gwas_res$ID)
    geno = geno[keep_snps,]
    cis_annovar = cis_annovar[keep_snps,]
    
    gwas_res$SNP = cis_annovar$avsnp147
    if(length(secondary_snps)>0){
        for(i in c(1:length(secondary_snps))){
            gwas_res$SNP[which(gwas_res$SNP==secondary_snps[i])] = secondary_names[i]
        }
    }

    # Compute LD for region to lead SNP
    locus_LD = compute_regional_LD(geno, gwas_res, lead_snp)
    
    # Plot LocusZoom
    if(length(secondary_snps)>0){
        locus.zoom(data = gwas_res[,c("CHR", "SNP", "BP", "P")], rsid.check=FALSE, 
            region = c(as.numeric(chr), as.numeric(lead_pos)-(window_sz/2), as.numeric(lead_pos)+(window_sz/2)),                            
           offset_bp = 0, ld.file = locus_LD, genes.data = Unique.genes, 
           plot.title = paste0("csaQTL associations in ",celltype, " cells"),
           file.name = paste0(fig_dir,celltype,"_chr",chr,"_regional.",plot_type), plot.type = plot_type,
           secondary.snp = secondary_names, secondary.label = TRUE) 
    }else{
        locus.zoom(data = gwas_res[,c("CHR", "SNP", "BP", "P")], rsid.check=FALSE, 
           region = c(as.numeric(chr), as.numeric(lead_pos)-(window_sz/2), as.numeric(lead_pos)+(window_sz/2)),             
           offset_bp = 0, ld.file = locus_LD, genes.data = Unique.genes, 
           plot.title = paste0("csaQTL associations in ",celltype, " cells"), plot.type = plot_type,
           file.name = paste0(fig_dir,celltype,"_chr",chr,"_regional.",plot_type)) 
    }
    return(locus_LD)
}

In [4]:
keep_shared_snps<-function(ref_gwas, celltype, lead_snp){
    lead_pos = strsplit(lead_snp, ":")[[1]][2]
    
    # Import allele doses for computing LD, note: includes all 973 donors
    geno = read.delim(paste0("/data/srlab/lrumker/MCSC_Project/cna-qtl/results/geno_munge/cis_snps/", celltype,
                      "_",lead_snp,"_cis.DS.vcf.gz"), stringsAsFactors = FALSE, header = FALSE, row.names=1)
    rownames(geno) = trimws(rownames(geno))
    
    # add rsIDs
    cis_annovar=read.delim(paste0("/data/srlab/lrumker/MCSC_Project/cna-qtl/results/annovar/", celltype, 
                                 "_",lead_snp,"_cis.hg19_multianno.txt"), stringsAsFactors = FALSE, header = TRUE)
    rsIDs = cis_annovar$avsnp147

    # retain alleles from our GWAS in ref GWAS
    ref_gwas['ID'] = paste0(ref_gwas$CHR,":",ref_gwas$BP,":", ref_gwas$other_allele, ":", ref_gwas$effect_allele)
    ref_gwas['ID_flip'] = paste0(ref_gwas$CHR,":",ref_gwas$BP,":", ref_gwas$effect_allele, ":", ref_gwas$other_allele)
    ref_gwas['found'] = ref_gwas$ID %in% rownames(geno)
    ref_gwas['found_flip'] = ref_gwas$ID_flip %in% rownames(geno)
    ref_gwas['found_any'] = as.numeric(ref_gwas['found'] | ref_gwas['found_flip'])
    ref_gwas = ref_gwas[ref_gwas$found_any==1,]
    ref_gwas['ID_merged'] = ref_gwas$ID
    ref_gwas[ref_gwas$found_flip,'ID_merged']=ref_gwas$ID_flip[ref_gwas$found_flip]
    
    # If assocs to same SNP reflected in two rows, keep first row only
    rownames(ref_gwas) = c(1:nrow(ref_gwas))
    ref_gwas = ref_gwas[match(unique(ref_gwas$ID_merged), ref_gwas$ID_merged),]
    rownames(ref_gwas) = c(1:nrow(ref_gwas))
    
    # retain same alleles our GWAS
    keep_snp = match(ref_gwas$ID_merged, row.names(geno))
    geno = geno[keep_snp,]
    ref_gwas$SNP = rsIDs[keep_snp]
    
    return(list(ref_gwas, geno))
}

## NK csaQTL at 11q24.3

In [5]:
celltype="NK"
set.seed(0)
locus_LD = region_plot_our_gwas(celltype="NK", lead_snp='11:128070535:A:G', window_sz=1000000, 
                                secondary_snps=c('rs479122'), secondary_names = c("ETS1 eQTL"))

## NK csaQTL at 12p13.2

In [6]:
set.seed(0)
# eQTL KLRC1 lead snp is 12:10594848:C:A // rs10909609; A is assoc. with decreased KLRC1
# psoriasis meta lead snp is rs11053802-T, 4.17 x 10-9 (OR>1)
lead_snp='12:10583611:C:T'
lead_snp_rsID='rs3003'
locus_LD = region_plot_our_gwas(celltype="NK", lead_snp='12:10583611:C:T', window_sz=400000, 
                                secondary_snps=c('rs11053802',  # Psoriasis GWAS hit
                                                          'rs10909609'), # KLRC1 eQTL
                                secondary_names = c("Psoriasis", "KLRC1 eQTL"),
                               plot_type="svg")

In [7]:
# csaQTL rs3003
# LD between Tsoi et al Psoriasis GWAS lead association (p=4.17e-9) and our lead snp
locus_LD[locus_LD$BP_B=='10597207',] # No sumstats for Tsoi et al available

Unnamed: 0_level_0,CHR_A,BP_A,SNP_A,CHR_B,BP_B,SNP_B,R2
Unnamed: 0_level_1,<int>,<dbl>,<chr>,<int>,<int>,<chr>,<dbl>
3147,12,10583611,rs3003,12,10597207,Psoriasis,0.4955531


### Psoriasis

In [8]:
# cat GCST90019016_buildGRCh37.tsv | awk 'NR==1 || $1==12' | awk 'NR==1 || $2<20583611' | awk 'NR==1 || $2>583611' > psoriasis.munged.chr12.txt
psoriasis_gwas = read.table("/data/srlab/lrumker/MCSC_Project/cna-qtl/ref_gwas/sumstats/psoriasis.munged.chr12.txt",
                     header = TRUE, stringsAsFactors = FALSE)
rownames(psoriasis_gwas)=c(1:nrow(psoriasis_gwas))
psoriasis_gwas$CHR = psoriasis_gwas$chromosome
psoriasis_gwas$BP = psoriasis_gwas$base_pair_location
psoriasis_gwas$P = psoriasis_gwas$p_value
psoriasis_gwas$SE = psoriasis_gwas$standard_error
psoriasis_gwas$BETA = psoriasis_gwas$beta
psoriasis_gwas$N = rep(44161, nrow(psoriasis_gwas))
psoriasis_gwas$effect_allele = toupper(psoriasis_gwas$effect_allele)
psoriasis_gwas$other_allele = toupper(psoriasis_gwas$other_allele)

# Import allele doses for computing LD
out_obj = keep_shared_snps(ref_gwas=psoriasis_gwas, celltype=celltype, lead_snp=lead_snp)
psoriasis_gwas=out_obj[[1]]
geno=out_obj[[2]]

# MAF not reported, use from our cohort
G_info = read.delim(paste0("/data/srlab/lrumker/MCSC_Project/cna-qtl/results/geno_munge/cis_snps/", celltype,
                      "_",lead_snp,"_cis.vcf.gz"), stringsAsFactors = FALSE, header = TRUE, skip=15)
rownames(G_info) = G_info$ID
G_info = G_info[psoriasis_gwas$ID_merged,]
G_info = G_info[,c(1:9)]
get_MAF <-function(info_str){
    maf_str = strsplit(info_str,";")[[1]][2]
    return(as.numeric(strsplit(maf_str,"=")[[1]][2]))
}
G_info$MAF = apply(as.matrix(G_info$INFO, ncol=1),1,get_MAF)
psoriasis_gwas$MAF = G_info$MAF

# coloc
gwas_res = read.table(paste0("/data/srlab/lrumker/MCSC_Project/cna-qtl/results/coloc/gwas_local_",
                             celltype,"_" ,lead_snp,"_cis.qassoc"), 
                      header=TRUE, stringsAsFactors=FALSE)
gwas_res = gwas_res[gwas_res$SNP %in% psoriasis_gwas$ID_merged,]
all_phenos = read.table(paste0("/data/srlab/lrumker/MCSC_Project/cna-qtl/results/gwas_", 
                        celltype,"/spheno.tsv"), header = TRUE, row.names=1)
lead_pheno_trait_sd = sd(all_phenos[paste0("X", paste0(strsplit(lead_snp, ":")[[1]], collapse="."))][,1])
our_gwas_coloc = list(beta = gwas_res$BETA, varbeta = gwas_res$SE**2,
                    sdY = lead_pheno_trait_sd,type = "quant", snp = gwas_res$SNP)
ref_gwas_coloc = list(beta = psoriasis_gwas$BETA, varbeta = psoriasis_gwas$SE**2,
                    N=psoriasis_gwas$N, MAF=psoriasis_gwas$MAF, type = "quant", snp = psoriasis_gwas$ID_merged)
coloc_res <- coloc.abf(dataset1=our_gwas_coloc, dataset2=ref_gwas_coloc)

“minimum p value is: 4.3646e-06
If this is what you expected, this is not a problem.
If this is not as small as you expected, please check the 02_data vignette.”
“estimating sdY from maf and varbeta, please directly supply sdY if known”


PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf 
 2.65e-15  9.93e-03  2.18e-14  8.09e-02  9.09e-01 
[1] "PP abf for shared variant: 90.9%"


### Flow-based cell state abundance traits

In [9]:
lead_snp='12:10583611:C:T'
lead_snp_rsID='rs3003'
celltype="NK"

Sumstats sourcing and preprocessing:

Monocyte count Vuckovic
- wget https://ftp.ebi.ac.uk/pub/databases/gwas/summary_statistics/GCST90002001-GCST90003000/GCST90002393/GCST90002393_buildGRCh37.tsv
- cat GCST90002393_buildGRCh37.tsv | awk 'NR==1 || $3==12' | awk 'NR==1 || $5<20583611' | awk 'NR==1 || $5>583611' > mono_count.munged.chr12.txt

Neutrophil percentage of white cells Vuckovic 
- wget  https://ftp.ebi.ac.uk/pub/databases/gwas/summary_statistics/GCST90002001-GCST90003000/GCST90002399/GCST90002399_buildGRCh37.tsv
- cat GCST90002399_buildGRCh37.tsv | awk 'NR==1 || $3==12' | awk 'NR==1 || $5<20583611' | awk 'NR==1 || $5>583611' > neut_pct_leuk.munged.chr12.txt

Monocyte percentage of white cells vuckovic
- wget https://ftp.ebi.ac.uk/pub/databases/gwas/summary_statistics/GCST90002001-GCST90003000/GCST90002394/GCST90002394_buildGRCh37.tsv
- cat GCST90002394_buildGRCh37.tsv | awk 'NR==1 || $3==12' | awk 'NR==1 || $5<20583611' | awk 'NR==1 || $5>583611' > mono_pct_leuk.munged.chr12.txt

Lymphocyte count vuckovic 
- wget https://ftp.ebi.ac.uk/pub/databases/gwas/summary_statistics/GCST90002001-GCST90003000/GCST90002388/GCST90002388_buildGRCh37.tsv
- cat GCST90002388_buildGRCh37.tsv | awk 'NR==1 || $3==12' | awk 'NR==1 || $5<20583611' | awk 'NR==1 || $5>583611' > lymph_count.munged.chr12.txt

In [10]:
cohort_size = 563085 # for Vuckovic et al 
trait_name="mono_count"
for(trait_name in c("mono_count", "lymph_count", "neut_pct_leuk", "mono_pct_leuk")){
    print(trait_name)
    csa_trait_gwas = read.table(paste0("/data/srlab/lrumker/MCSC_Project/cna-qtl/ref_gwas/sumstats/",
                                       trait_name,".munged.chr12.txt"),
                         header = TRUE, stringsAsFactors = FALSE)

    rownames(csa_trait_gwas)=c(1:nrow(csa_trait_gwas))
    csa_trait_gwas$CHR = csa_trait_gwas$chromosome
    csa_trait_gwas$BP = csa_trait_gwas$base_pair_location
    csa_trait_gwas$P = csa_trait_gwas$p_value
    csa_trait_gwas$SE = csa_trait_gwas$standard_error
    csa_trait_gwas$BETA = csa_trait_gwas$beta
    csa_trait_gwas$N = rep(cohort_size, nrow(csa_trait_gwas))
    csa_trait_gwas$effect_allele = toupper(csa_trait_gwas$effect_allele)
    csa_trait_gwas$other_allele = toupper(csa_trait_gwas$other_allele)
    csa_trait_gwas$MAF = csa_trait_gwas$MA_FREQ

    # Import allele doses for computing LD
    out_obj = keep_shared_snps(ref_gwas=csa_trait_gwas, celltype=celltype, lead_snp=lead_snp)
    csa_trait_gwas=out_obj[[1]]
    geno=out_obj[[2]]

    # coloc
    gwas_res = read.table(paste0("/data/srlab/lrumker/MCSC_Project/cna-qtl/results/coloc/gwas_local_",
                                 celltype,"_" ,lead_snp,"_cis.qassoc"), 
                          header=TRUE, stringsAsFactors=FALSE)
    gwas_res = gwas_res[gwas_res$SNP %in% csa_trait_gwas$ID_merged,]
    all_phenos = read.table(paste0("/data/srlab/lrumker/MCSC_Project/cna-qtl/results/gwas_", 
                            celltype,"/spheno.tsv"), header = TRUE, row.names=1)
    lead_pheno_trait_sd = sd(all_phenos[paste0("X", paste0(strsplit(lead_snp, ":")[[1]], collapse="."))][,1])
    our_gwas_coloc = list(beta = gwas_res$BETA, varbeta = gwas_res$SE**2,
                        sdY = lead_pheno_trait_sd,type = "quant", snp = gwas_res$SNP)
    ref_gwas_coloc = list(beta = csa_trait_gwas$BETA, varbeta = csa_trait_gwas$SE**2,
                        N=csa_trait_gwas$N, MAF=csa_trait_gwas$MAF, type = "quant", snp = csa_trait_gwas$ID_merged)
    coloc_res <- coloc.abf(dataset1=our_gwas_coloc, dataset2=ref_gwas_coloc)
}

[1] "mono_count"


“estimating sdY from maf and varbeta, please directly supply sdY if known”


PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf 
 5.22e-53  1.96e-40  1.17e-13  4.37e-01  5.63e-01 
[1] "PP abf for shared variant: 56.3%"
[1] "lymph_count"


“estimating sdY from maf and varbeta, please directly supply sdY if known”


PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf 
2.38e-231 8.94e-219  2.67e-13  1.00e+00  9.77e-15 
[1] "PP abf for shared variant: 9.77e-13%"
[1] "neut_pct_leuk"


“estimating sdY from maf and varbeta, please directly supply sdY if known”


PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf 
 2.69e-75  1.01e-62  2.67e-13  1.00e+00  8.98e-15 
[1] "PP abf for shared variant: 8.98e-13%"
[1] "mono_pct_leuk"


“estimating sdY from maf and varbeta, please directly supply sdY if known”


PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf 
 1.74e-53  6.52e-41  1.16e-14  4.24e-02  9.58e-01 
[1] "PP abf for shared variant: 95.8%"


Sumstats sourcing and preprocessing:

Effector Memory CD4+ T cell %T cell	(Orru et al.) 
- wget https://ftp.ebi.ac.uk/pub/databases/gwas/summary_statistics/GCST90001001-GCST90002000/GCST90001544/GCST90001544_buildGRCh37.tsv.gz
- zcat GCST90001544_buildGRCh37.tsv.gz | awk 'NR==1||$1==12' > TEM_CD4_pct_T.munged.chr12.txt

In [11]:
orru_gwas = read.delim("/data/srlab/lrumker/MCSC_Project/cna-qtl/ref_gwas/sumstats/TEM_CD4_pct_T.munged.chr12.txt",
                     header = TRUE, stringsAsFactors = FALSE)
rownames(orru_gwas)=c(1:nrow(orru_gwas))
orru_gwas$CHR = orru_gwas$chromosome
orru_gwas$BP = orru_gwas$base_pair_location
orru_gwas$P = orru_gwas$p_value
orru_gwas$SE = orru_gwas$standard_error
orru_gwas$BETA = orru_gwas$beta

In [12]:
# Import allele doses for computing LD
out_obj = keep_shared_snps(ref_gwas=orru_gwas, celltype=celltype, lead_snp=lead_snp)
orru_gwas=out_obj[[1]]
geno=out_obj[[2]]
orru_gwas = orru_gwas[orru_gwas$MAF !=0,]

In [13]:
orru_gwas[order(orru_gwas$P),][1,] # lead snp

Unnamed: 0_level_0,chromosome,base_pair_location,effect_allele,other_allele,N,MAF,effect_allele_frequency,beta,standard_error,p_value,⋯,P,SE,BETA,ID,ID_flip,found,found_flip,found_any,ID_merged,SNP
Unnamed: 0_level_1,<int>,<int>,<chr>,<chr>,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<lgl>,<lgl>,<dbl>,<chr>,<chr>
3115,12,10583297,A,G,3427,0.1936,0.8064,-0.1686,0.02956,1.257e-08,⋯,1.257e-08,0.02956,-0.1686,12:10583297:G:A,12:10583297:A:G,True,False,1,12:10583297:G:A,rs2981595


In [14]:
# coloc
gwas_res = read.table(paste0("/data/srlab/lrumker/MCSC_Project/cna-qtl/results/coloc/gwas_local_",
                             celltype,"_" ,lead_snp,"_cis.qassoc"), 
                      header=TRUE, stringsAsFactors=FALSE)
gwas_res = gwas_res[gwas_res$SNP %in% orru_gwas$ID_merged,]
all_phenos = read.table(paste0("/data/srlab/lrumker/MCSC_Project/cna-qtl/results/gwas_", 
                        celltype,"/spheno.tsv"), header = TRUE, row.names=1)
lead_pheno_trait_sd = sd(all_phenos[paste0("X", paste0(strsplit(lead_snp, ":")[[1]], collapse="."))][,1])
our_gwas_coloc = list(beta = gwas_res$BETA, varbeta = gwas_res$SE**2,
                    sdY = lead_pheno_trait_sd,type = "quant", snp = gwas_res$SNP)
ref_gwas_coloc = list(beta = orru_gwas$BETA, varbeta = orru_gwas$SE**2,
                    N=orru_gwas$N, MAF=orru_gwas$MAF, type = "quant", snp = orru_gwas$ID_merged)
coloc_res <- coloc.abf(dataset1=our_gwas_coloc, dataset2=ref_gwas_coloc)

“estimating sdY from maf and varbeta, please directly supply sdY if known”


PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf 
 1.02e-17  3.54e-05  6.20e-15  2.06e-02  9.79e-01 
[1] "PP abf for shared variant: 97.9%"


Sumstats sourcing and preprocessing:

Granulocyte % myeloid white cells (Astle et al.)
- wget https://ftp.ebi.ac.uk/pub/databases/gwas/summary_statistics/GCST004001-GCST005000/GCST004608/gran_p_myeloid_wbc_original_files/gran_p_myeloid_wbc_build37_169545_20161212.tsv.gz
- zcat gran_p_myeloid_wbc_build37_169545_20161212.tsv.gz | awk 'NR==1 || $3==12' | awk 'NR==1||
 $4<20583611' | awk 'NR==1 || $4>583611' > gran_pct_myeloid.munged.chr12.txt


In [15]:
astle_gwas = read.delim("/data/srlab/lrumker/MCSC_Project/cna-qtl/ref_gwas/sumstats/gran_pct_myeloid.munged.chr12.txt",
                     header = TRUE, stringsAsFactors = FALSE)
rownames(astle_gwas)=c(1:nrow(astle_gwas))
cohort_size=169545
astle_gwas$BETA = astle_gwas$EFFECT
astle_gwas$effect_allele = astle_gwas$ALT
astle_gwas$other_allele = astle_gwas$REF
astle_gwas$MAF = astle_gwas$MA_FREQ
astle_gwas$N = rep(cohort_size, nrow(astle_gwas))

In [16]:
# Import allele doses for computing LD
out_obj = keep_shared_snps(ref_gwas=astle_gwas, celltype=celltype, lead_snp=lead_snp)
astle_gwas=out_obj[[1]]
geno=out_obj[[2]]

In [17]:
astle_gwas[order(astle_gwas$P),][1,] # lead snp

Unnamed: 0_level_0,VARIANT,ID,CHR,BP,REF,ALT,ALT_MINOR,DIRECTION,EFFECT,SE,⋯,effect_allele,other_allele,MAF,N,ID_flip,found,found_flip,found_any,ID_merged,SNP
Unnamed: 0_level_1,<chr>,<chr>,<int>,<int>,<chr>,<chr>,<lgl>,<chr>,<dbl>,<dbl>,⋯,<chr>,<chr>,<dbl>,<dbl>,<chr>,<lgl>,<lgl>,<dbl>,<chr>,<chr>
3803,12:10581591_A_G,12:10581591:A:G,12,10581591,A,G,False,+,0.04744101,0.005392098,⋯,G,A,0.1278,169545,12:10581591:G:A,True,False,1,12:10581591:A:G,rs2682509


In [18]:
# coloc
gwas_res = read.table(paste0("/data/srlab/lrumker/MCSC_Project/cna-qtl/results/coloc/gwas_local_",
                             celltype,"_" ,lead_snp,"_cis.qassoc"), 
                      header=TRUE, stringsAsFactors=FALSE)
gwas_res = gwas_res[gwas_res$SNP %in% astle_gwas$ID_merged,]
all_phenos = read.table(paste0("/data/srlab/lrumker/MCSC_Project/cna-qtl/results/gwas_", 
                        celltype,"/spheno.tsv"), header = TRUE, row.names=1)
lead_pheno_trait_sd = sd(all_phenos[paste0("X", paste0(strsplit(lead_snp, ":")[[1]], collapse="."))][,1])
our_gwas_coloc = list(beta = gwas_res$BETA, varbeta = gwas_res$SE**2,
                    sdY = lead_pheno_trait_sd,type = "quant", snp = gwas_res$SNP)
ref_gwas_coloc = list(beta = astle_gwas$BETA, varbeta = astle_gwas$SE**2,
                    N=astle_gwas$N, MAF=astle_gwas$MAF, type = "quant", snp = astle_gwas$ID_merged)
coloc_res <- coloc.abf(dataset1=our_gwas_coloc, dataset2=ref_gwas_coloc)

“estimating sdY from maf and varbeta, please directly supply sdY if known”


PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf 
 4.38e-26  1.64e-13  1.25e-13  4.69e-01  5.31e-01 
[1] "PP abf for shared variant: 53.1%"


In [19]:
# Export table with these results
library(xtable)

In [20]:
export = data.frame("Trait"=c("Monocyte count",
                            "Neutrophil \\% leukocytes",
                            "Monocyte \\% leukocytes",
                            "Lymphocyte count",
                            "Effector Memory CD4+ T cell \\% T cell",
                            "Granulocyte \\% myeloid white cells"),
                   "Publications" = c("Kachuri et al. (2021), Chen et al. (2020), Astle et al. (2016), \\textbf{Vuckovic et al. (2020)}, Sakaue et al. (2021)",
                                        "Astle et al. (2016), \\textbf{Vuckovic et al. (2020)}",
                                        "Astle et al. (2016), \\textbf{Vuckovic et al. (2020)}",
                                        "Vuckovic et al. (2020)",
                                        "Orrù et al. (2020)",
                                        "Astle et al. (2016)"),
                   "Colocalization" = c("0.56", "$<$0.001","0.96", "$<$0.001", "0.98", "0.53"))

In [21]:
export

Trait,Publications,Colocalization
<chr>,<chr>,<chr>
Monocyte count,"Kachuri et al. (2021), Chen et al. (2020), Astle et al. (2016), \textbf{Vuckovic et al. (2020)}, Sakaue et al. (2021)",0.56
Neutrophil \% leukocytes,"Astle et al. (2016), \textbf{Vuckovic et al. (2020)}",$<$0.001
Monocyte \% leukocytes,"Astle et al. (2016), \textbf{Vuckovic et al. (2020)}",0.96
Lymphocyte count,Vuckovic et al. (2020),$<$0.001
Effector Memory CD4+ T cell \% T cell,Orrù et al. (2020),0.98
Granulocyte \% myeloid white cells,Astle et al. (2016),0.53


In [22]:
table_dir="/data/srlab/lrumker/MCSC_Project/cna-qtl/tables/"
print(xtable(export, align= c("p{0.05\\textwidth}|", 
                              "p{0.37\\textwidth}|",  
                              "p{0.4\\textwidth}|",
                             "p{0.13\\textwidth}")), 
      file=paste0(table_dir,"supptable.csaQTL_pub_csaQTL_covariates.tex"), 
      include.rownames=FALSE, sanitize.text.function = function(x) {x})

## NK csaQTL at 19p13.11

In [23]:
lead_snp='19:16441973:G:A'
lead_snp_rsID = 'rs56133626'
set.seed(0)
locus_LD = region_plot_our_gwas(celltype=celltype, lead_snp=lead_snp, window_sz=200000, 
                                secondary_snps=c('rs56152581', # T1D 
                                                'rs34006614'), # Asthma risk locus
                               secondary_names = c("Type 1 Diabetes", "Asthma")) 

### Asthma

In [24]:
#zcat HanY_prePMID_asthma_UKBB.txt.gz | awk 'NR==1 || $2==19' | awk 'NR==1 || $3<17441973' | awk 'NR==1 || $3>15441973' > asthma.munged.chr19.txt
# hg19 build
asthma_gwas = read.table("/data/srlab/lrumker/MCSC_Project/cna-qtl/ref_gwas/sumstats/asthma.munged.chr19.txt",
                     header = TRUE, stringsAsFactors = FALSE)
rownames(asthma_gwas)=c(1:nrow(asthma_gwas))
asthma_gwas$CHR = asthma_gwas$CHR
asthma_gwas$BP = asthma_gwas$BP
asthma_gwas$P = asthma_gwas$P
asthma_gwas$SE = (asthma_gwas$OR_95U - asthma_gwas$OR_95L) / 3.92
asthma_gwas$BETA = log(asthma_gwas$OR)
asthma_gwas$effect_allele = asthma_gwas$EA
asthma_gwas$other_allele = asthma_gwas$NEA
asthma_gwas$MAF = asthma_gwas$EAF
asthma_gwas$MAF[which(asthma_gwas$MAF>0.5)] = 1-asthma_gwas$MAF[which(asthma_gwas$MAF>0.5)]

# Import allele doses for computing LD
out_obj = keep_shared_snps(ref_gwas=asthma_gwas, celltype=celltype, lead_snp=lead_snp)
asthma_gwas=out_obj[[1]]
geno=out_obj[[2]]

# Lead SNP obtained from GWAS catalog, with liftover to convert to hg19 
# '19:16442782:C:T' rs34006614 GWAS meta p=1e-8 allele T increases risk

In [25]:
# Our csaQTL phenotype directionally correpsonds to increased risk of asthma
cor(t(geno[c(lead_snp, '19:16442782:C:T'),]))

Unnamed: 0,19:16441973:G:A,19:16442782:C:T
19:16441973:G:A,1.0,0.9969297
19:16442782:C:T,0.9969297,1.0


In [26]:
# coloc
gwas_res = read.table(paste0("/data/srlab/lrumker/MCSC_Project/cna-qtl/results/coloc/gwas_local_",
                             celltype,"_" ,lead_snp,"_cis.qassoc"), 
                      header=TRUE, stringsAsFactors=FALSE)
gwas_res = gwas_res[gwas_res$SNP %in% asthma_gwas$ID_merged,]
all_phenos = read.table(paste0("/data/srlab/lrumker/MCSC_Project/cna-qtl/results/gwas_", 
                        celltype,"/spheno.tsv"), header = TRUE, row.names=1)
lead_pheno_trait_sd = sd(all_phenos[paste0("X", paste0(strsplit(lead_snp, ":")[[1]], collapse="."))][,1])
our_gwas_coloc = list(beta = gwas_res$BETA, varbeta = gwas_res$SE**2,
                    sdY = lead_pheno_trait_sd,type = "quant", snp = gwas_res$SNP)
ref_gwas_coloc = list(beta = asthma_gwas$BETA, varbeta = asthma_gwas$SE**2,
                    N=asthma_gwas$N, MAF=asthma_gwas$MAF, type = "quant", snp = asthma_gwas$ID_merged)
coloc_res <- coloc.abf(dataset1=our_gwas_coloc, dataset2=ref_gwas_coloc)

“estimating sdY from maf and varbeta, please directly supply sdY if known”


PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf 
 1.08e-22  3.60e-04  4.68e-21  1.46e-02  9.85e-01 
[1] "PP abf for shared variant: 98.5%"


### Diabetes mellitus type 1

In [27]:
# hg19 build
t1d_gwas = read.csv("/data/srlab/lrumker/MCSC_Project/cna-qtl/ref_gwas/sumstats/NK_19:16441973:G:A_T1D_cis.hg19.renamed.csv",
                     header = TRUE, stringsAsFactors = FALSE)
rownames(t1d_gwas)=c(1:nrow(t1d_gwas))

# Import allele doses for computing LD
out_obj = keep_shared_snps(ref_gwas=t1d_gwas, celltype=celltype, lead_snp=lead_snp)
t1d_gwas=out_obj[[1]]
geno=out_obj[[2]]

# MAF not reported, use from our cohort
G_info = read.delim(paste0("/data/srlab/lrumker/MCSC_Project/cna-qtl/results/geno_munge/cis_snps/", celltype,
                      "_",lead_snp,"_cis.vcf.gz"), stringsAsFactors = FALSE, header = TRUE, skip=15)
rownames(G_info) = G_info$ID
G_info = G_info[t1d_gwas$ID_merged,]
G_info = G_info[,c(1:9)]
get_MAF <-function(info_str){
    maf_str = strsplit(info_str,";")[[1]][2]
    return(as.numeric(strsplit(maf_str,"=")[[1]][2]))
}
G_info$MAF = apply(as.matrix(G_info$INFO, ncol=1),1,get_MAF)
t1d_gwas$MAF = G_info$MAF

t1d_gwas[order(t1d_gwas$P),][1,] # lead snp

Unnamed: 0_level_0,CHR,BP,other_allele,effect_allele,sample_size,variant_id,P,BETA,SE,ID,ID_flip,found,found_flip,found_any,ID_merged,SNP,MAF
Unnamed: 0_level_1,<int>,<int>,<chr>,<chr>,<int>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<lgl>,<lgl>,<dbl>,<chr>,<chr>,<dbl>
4316,19,16442196,C,T,520580,rs56152581,7.01e-07,0.083137,0.016758,19:16442196:C:T,19:16442196:T:C,True,False,1,19:16442196:C:T,rs56152581,0.32851


In [28]:
# coloc
gwas_res = read.table(paste0("/data/srlab/lrumker/MCSC_Project/cna-qtl/results/coloc/gwas_local_",
                             celltype,"_" ,lead_snp,"_cis.qassoc"), 
                      header=TRUE, stringsAsFactors=FALSE)
gwas_res = gwas_res[gwas_res$SNP %in% t1d_gwas$ID_merged,]
all_phenos = read.table(paste0("/data/srlab/lrumker/MCSC_Project/cna-qtl/results/gwas_", 
                        celltype,"/spheno.tsv"), header = TRUE, row.names=1)
lead_pheno_trait_sd = sd(all_phenos[paste0("X", paste0(strsplit(lead_snp, ":")[[1]], collapse="."))][,1])
our_gwas_coloc = list(beta = gwas_res$BETA, varbeta = gwas_res$SE**2,
                    sdY = lead_pheno_trait_sd,type = "quant", snp = gwas_res$SNP)
ref_gwas_coloc = list(beta = t1d_gwas$BETA, varbeta = t1d_gwas$SE**2,
                   N=t1d_gwas$sample_size, MAF=t1d_gwas$MAF, type = "quant", snp = t1d_gwas$ID_merged)
coloc_res <- coloc.abf(dataset1=our_gwas_coloc, dataset2=ref_gwas_coloc)

“estimating sdY from maf and varbeta, please directly supply sdY if known”


PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf 
 1.54e-21  5.11e-03  2.32e-21  6.75e-03  9.88e-01 
[1] "PP abf for shared variant: 98.8%"


## NK csaQTL at 2q13

In [29]:
celltype="NK"
lead_snp='2:111851212:C:T'
lead_snp_rsID = "rs13025330"
set.seed(0)
locus_LD = region_plot_our_gwas(celltype=celltype, lead_snp=lead_snp, window_sz=1200000, 
                                secondary_snps=c('rs2165109', # CXCL16 locus, epithelial ovarian cancer
                                    'rs12986962'), # Patin et al 
                               secondary_names = c("CXCL16;\nOvarian Cancer", "Flow cytometry")) 

### Similar trait quantified with flow cytometry

In [30]:
# Import allele doses for computing LD, note: includes all 973 donors
geno = read.delim(paste0("/data/srlab/lrumker/MCSC_Project/cna-qtl/results/geno_munge/cis_snps/", celltype,
                  "_",lead_snp,"_cis.DS.vcf.gz"), stringsAsFactors = FALSE, header = FALSE, row.names=1)
rownames(geno) = trimws(rownames(geno))

# add rsIDs
cis_annovar=read.delim(paste0("/data/srlab/lrumker/MCSC_Project/cna-qtl/results/annovar/", celltype, 
                             "_",lead_snp,"_cis.hg19_multianno.txt"), stringsAsFactors = FALSE, header = TRUE)
rsIDs = cis_annovar$avsnp147

cor(t(geno['2:111808558:A:G',]), t(geno[lead_snp,]))**2 # from Patin et al., rs12986962

Unnamed: 0,2:111851212:C:T
2:111808558:A:G,0.3154349


In [31]:
cor(t(geno['2:111808558:A:G',]), t(geno[lead_snp,])) # Their rs12986962-G correpsonds to our rs13025330-T

Unnamed: 0,2:111851212:C:T
2:111808558:A:G,0.561636


### Abundance of CXCl16 chemokine protein in serum

- CXCL16 lead snp rs2165109 allele C (also effect allele in their GWAS) corresponds to effect allele T for our csaQTL
- Beta in their lead assocition is positive so increased CXCL16 associated with our effect allele T

In [32]:
# load published summary statistics, rename columns as needed
cxcl16_gwas = read.delim("/data/srlab/lrumker/MCSC_Project/cna-qtl/ref_gwas/sumstats/GCST90087940_buildGRCh37_chr2region.tsv",
                     header = TRUE, stringsAsFactors = FALSE)
rownames(cxcl16_gwas)=c(1:nrow(cxcl16_gwas))
cxcl16_gwas$CHR = cxcl16_gwas$chromosome
cxcl16_gwas$BP = cxcl16_gwas$base_pair_location
cxcl16_gwas$P = cxcl16_gwas$p_value
cxcl16_gwas$SNP = cxcl16_gwas$variant_id
cxcl16_gwas$SE = cxcl16_gwas$standard_error
cxcl16_gwas$BETA = cxcl16_gwas$beta

# Import allele doses for computing LD
out_obj = keep_shared_snps(ref_gwas=cxcl16_gwas, celltype=celltype, lead_snp=lead_snp)
cxcl16_gwas=out_obj[[1]]
geno=out_obj[[2]]

cxcl16_gwas[order(cxcl16_gwas$P),][1,] # lead snp

Unnamed: 0_level_0,variant_id,p_value,chromosome,base_pair_location,effect_allele,other_allele,effect_allele_frequency,beta,standard_error,CHR,⋯,P,SNP,SE,BETA,ID,ID_flip,found,found_flip,found_any,ID_merged
Unnamed: 0_level_1,<chr>,<dbl>,<int>,<int>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<int>,⋯,<dbl>,<chr>,<dbl>,<dbl>,<chr>,<chr>,<lgl>,<lgl>,<dbl>,<chr>
1108,rs2165109,4.577201e-18,2,111818658,C,A,0.2586,0.181687,0.0208972,2,⋯,4.577201e-18,rs2165109,0.0208972,0.181687,2:111818658:A:C,2:111818658:C:A,True,False,1,2:111818658:A:C


In [33]:
# coloc
gwas_res = read.table(paste0("/data/srlab/lrumker/MCSC_Project/cna-qtl/results/coloc/gwas_local_",
                             celltype,"_" ,lead_snp,"_cis.qassoc"), 
                      header=TRUE, stringsAsFactors=FALSE)
gwas_res = gwas_res[gwas_res$SNP %in% cxcl16_gwas$ID_merged,]
all_phenos = read.table(paste0("/data/srlab/lrumker/MCSC_Project/cna-qtl/results/gwas_", 
                        celltype,"/spheno.tsv"), header = TRUE, row.names=1)
lead_pheno_trait_sd = sd(all_phenos[paste0("X", paste0(strsplit(lead_snp, ":")[[1]], collapse="."))][,1])
our_gwas_coloc = list(beta = gwas_res$BETA, varbeta = gwas_res$SE**2,
                    sdY = lead_pheno_trait_sd,type = "quant", snp = gwas_res$SNP)
ref_gwas_coloc = list(beta = cxcl16_gwas$BETA, varbeta = cxcl16_gwas$SE**2,
                     type = "quant", N = rep(5361, nrow(cxcl16_gwas)), MAF=cxcl16_gwas$effect_allele_frequency,
                      snp = cxcl16_gwas$ID_merged) # case-control
coloc_res <- coloc.abf(dataset1=our_gwas_coloc, dataset2=ref_gwas_coloc)

“estimating sdY from maf and varbeta, please directly supply sdY if known”


PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf 
 6.91e-25  2.39e-13  4.63e-13  1.59e-01  8.41e-01 
[1] "PP abf for shared variant: 84.1%"


### Epithelial ovarian cancer
- These summary statistics reflect the "OCAC" cohort from Phelan et al, all European ancestry
- "The analyses included 66,450 samples from seven genotyping projects: 40,941 controls, 22,406 invasive cases and 3,103 borderline cases. The number of cases by histotype was as follows: serous borderline (1,954), mucinous borderline (1,149), LGSOC (1,012), HGSOC (13,037), ENOC (2,810), CCOC (1,366) and other EOC (2,764)."
- Phelan et al : Identification of 12 new susceptibility loci for different histotypes of epithelial ovarian cancer.
- The "overall" sumstats used for colocalization here are from the OCAC cohort with cases defined as "all invasive" histotypes, whereas in the main text we report the p-value for the lead SNP from the meta analysis (4.2e-8) of OCAC and CIMBA cohorts

In [34]:
ovca_gwas = read.csv("/data/srlab/lrumker/MCSC_Project/cna-qtl/ref_gwas/sumstats/ovarian_ca/chr2_region.csv",
                     header = TRUE, stringsAsFactors = FALSE)
ovca_gwas = ovca_gwas[ovca_gwas$overall_OR!=0,] # drops one snp only
rownames(ovca_gwas)=c(1:nrow(ovca_gwas))
ovca_gwas$CHR = ovca_gwas$Chromosome
ovca_gwas$BP = ovca_gwas$Position
ovca_gwas$P = ovca_gwas$overall_pvalue
ovca_gwas$SNP = ovca_gwas$OrigSNPname
ovca_gwas$effect_allele = ovca_gwas$Effect
ovca_gwas$other_allele = ovca_gwas$Baseline
ovca_gwas$BETA = ovca_gwas$overall_OR # per description doc accompanying sumstats, provided vals are log(OR)
ovca_gwas$SE = ovca_gwas$overall_SE
ovca_gwas$MAF = ovca_gwas$EAF
ovca_gwas$MAF[which(ovca_gwas$MAF>0.5)] = 1-ovca_gwas$MAF[which(ovca_gwas$MAF>0.5)]

# Import allele doses for computing LD
out_obj = keep_shared_snps(ref_gwas=ovca_gwas, celltype=celltype, lead_snp=lead_snp)
ovca_gwas=out_obj[[1]]
geno=out_obj[[2]]

ovca_gwas[order(ovca_gwas$P),][1,c("ID", "P", "BETA")] # lead snp

Unnamed: 0_level_0,ID,P,BETA
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>
1440,2:111818658:A:C,7.718e-07,0.07511


In [35]:
# coloc
gwas_res = read.table(paste0("/data/srlab/lrumker/MCSC_Project/cna-qtl/results/coloc/gwas_local_",
                             celltype,"_" ,lead_snp,"_cis.qassoc"), 
                      header=TRUE, stringsAsFactors=FALSE)
gwas_res = gwas_res[gwas_res$SNP %in% ovca_gwas$ID_merged,]
all_phenos = read.table(paste0("/data/srlab/lrumker/MCSC_Project/cna-qtl/results/gwas_", 
                        celltype,"/spheno.tsv"), header = TRUE, row.names=1)
lead_pheno_trait_sd = sd(all_phenos[paste0("X", paste0(strsplit(lead_snp, ":")[[1]], collapse="."))][,1])
our_gwas_coloc = list(beta = gwas_res$BETA, varbeta = gwas_res$SE**2,
                    sdY = lead_pheno_trait_sd,type = "quant", snp = gwas_res$SNP)
ref_gwas_coloc = list(beta = ovca_gwas$BETA, varbeta = ovca_gwas$SE**2,
                     type = "quant", N = rep(85426, nrow(ovca_gwas)), MAF=ovca_gwas$MAF,
                      snp = ovca_gwas$ID_merged) # case-control
coloc_res <- coloc.abf(dataset1=our_gwas_coloc, dataset2=ref_gwas_coloc)

“estimating sdY from maf and varbeta, please directly supply sdY if known”


PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf 
 1.22e-13  4.24e-02  4.67e-13  1.61e-01  7.96e-01 
[1] "PP abf for shared variant: 79.6%"


## Myeloid csaQTL at 15q25.1 

- BCL2A1 eQTL lead snp from eQTLgen rs8025805 
- Lead variant from flow cytometry is identical rs3826007 to OneK1K csaQTL

In [36]:
celltype="Myeloid"
lead_snp='15:80263217:C:T'
lead_snp_rsID='rs3826007'
set.seed(0)
locus_LD = region_plot_our_gwas(celltype=celltype, lead_snp=lead_snp, window_sz=100000, 
                                secondary_snps=c("rs8025805", "rs11632488"),
                               secondary_names= c("BCL2A1 eQTL", "PSC risk"), plot_type="svg")

### Similar trait quantified with flow cytometry

In [37]:
# zcat GCST90001584_buildGRCh37.tsv.gz | awk 'NR==1||$1==15' > GCST90001584_buildGRCh37.chr15.tsv
# Import reported sumstats, relabel columns as needed
orru_gwas = read.delim("/data/srlab/lrumker/MCSC_Project/cna-qtl/ref_gwas/sumstats/GCST90001584_buildGRCh37.chr15.tsv",
                     header = TRUE, stringsAsFactors = FALSE)
rownames(orru_gwas)=c(1:nrow(orru_gwas))
orru_gwas$CHR = orru_gwas$chromosome
orru_gwas$BP = orru_gwas$base_pair_location
orru_gwas$P = orru_gwas$p_value
orru_gwas$SE = orru_gwas$standard_error
orru_gwas$BETA = orru_gwas$beta

# Import allele doses for computing LD
out_obj = keep_shared_snps(ref_gwas=orru_gwas, celltype=celltype, lead_snp=lead_snp)
orru_gwas=out_obj[[1]]
geno=out_obj[[2]]
orru_gwas = orru_gwas[orru_gwas$MAF !=0,]

orru_gwas[order(orru_gwas$P),][1,] # lead snp

Unnamed: 0_level_0,chromosome,base_pair_location,effect_allele,other_allele,N,MAF,effect_allele_frequency,beta,standard_error,p_value,⋯,P,SE,BETA,ID,ID_flip,found,found_flip,found_any,ID_merged,SNP
Unnamed: 0_level_1,<int>,<int>,<chr>,<chr>,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<lgl>,<lgl>,<dbl>,<chr>,<chr>
3197,15,80263217,T,C,3629,0.2136,0.2136,-0.1739,0.02973,5.41e-09,⋯,5.41e-09,0.02973,-0.1739,15:80263217:C:T,15:80263217:T:C,True,False,1,15:80263217:C:T,rs3826007


In [38]:
# coloc
gwas_res = read.table(paste0("/data/srlab/lrumker/MCSC_Project/cna-qtl/results/coloc/gwas_local_",
                             celltype,"_" ,lead_snp,"_cis.qassoc"), 
                      header=TRUE, stringsAsFactors=FALSE)
gwas_res = gwas_res[gwas_res$SNP %in% orru_gwas$ID_merged,]
all_phenos = read.table(paste0("/data/srlab/lrumker/MCSC_Project/cna-qtl/results/gwas_", 
                        celltype,"/spheno.tsv"), header = TRUE, row.names=1)
lead_pheno_trait_sd = sd(all_phenos[paste0("X", paste0(strsplit(lead_snp, ":")[[1]], collapse="."))][,1])
our_gwas_coloc = list(beta = gwas_res$BETA, varbeta = gwas_res$SE**2,
                    sdY = lead_pheno_trait_sd,type = "quant", snp = gwas_res$SNP)
ref_gwas_coloc = list(beta = orru_gwas$BETA, varbeta = orru_gwas$SE**2,
                    N=orru_gwas$N, MAF=orru_gwas$MAF, type = "quant", snp = orru_gwas$ID_merged)
coloc_res <- coloc.abf(dataset1=our_gwas_coloc, dataset2=ref_gwas_coloc)

“estimating sdY from maf and varbeta, please directly supply sdY if known”


PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf 
 7.95e-10  8.23e-06  1.48e-06  1.44e-02  9.86e-01 
[1] "PP abf for shared variant: 98.6%"


### Primary sclerosing cholangitis

- https://www.ebi.ac.uk/gwas/studies/GCST004030
- 2,871 European ancestry cases, 12,019 European ancestry controls
- Genome-wide association study of primary sclerosing cholangitis identifies new risk loci and quantifies the genetic relationship with inflammatory bowel disease.
Ji et al

In [39]:
psc_gwas = read.delim("/data/srlab/lrumker/MCSC_Project/cna-qtl/ref_gwas/sumstats/ipsc.munged.chr15.txt",
                     header = TRUE, stringsAsFactors = FALSE, sep=" ")
rownames(psc_gwas)=c(1:nrow(psc_gwas))
psc_gwas$CHR = psc_gwas$chr
psc_gwas$BP = psc_gwas$pos
psc_gwas$P = psc_gwas$p
psc_gwas$SE = psc_gwas$se
psc_gwas$BETA = log(psc_gwas$or)
psc_gwas$effect_allele = psc_gwas$allele_1 # allele_1 is the risk allele, per header info
psc_gwas$other_allele = psc_gwas$allele_0

# Import allele doses for computing LD
out_obj = keep_shared_snps(ref_gwas=psc_gwas, celltype=celltype, lead_snp=lead_snp)
psc_gwas=out_obj[[1]]
geno=out_obj[[2]]

psc_gwas[order(psc_gwas$P),][1:2,c("SNP", "ID", "BETA", "P")] # lead snp

Unnamed: 0_level_0,SNP,ID,BETA,P
Unnamed: 0_level_1,<chr>,<chr>,<dbl>,<dbl>
3124,rs146807490,15:80259955:TGTAA:T,0.1655144,1.310619e-06
3142,rs11632488,15:80267501:A:G,0.1646666,1.471368e-06


In [40]:
# coloc
gwas_res = read.table(paste0("/data/srlab/lrumker/MCSC_Project/cna-qtl/results/coloc/gwas_local_",
                             celltype,"_" ,lead_snp,"_cis.qassoc"), 
                      header=TRUE, stringsAsFactors=FALSE)
gwas_res = gwas_res[gwas_res$SNP %in% psc_gwas$ID_merged,]
all_phenos = read.table(paste0("/data/srlab/lrumker/MCSC_Project/cna-qtl/results/gwas_", 
                        celltype,"/spheno.tsv"), header = TRUE, row.names=1)
lead_pheno_trait_sd = sd(all_phenos[paste0("X", paste0(strsplit(lead_snp, ":")[[1]], collapse="."))][,1])
our_gwas_coloc = list(beta = gwas_res$BETA, varbeta = gwas_res$SE**2,
                    sdY = lead_pheno_trait_sd,type = "quant", snp = gwas_res$SNP)
ref_gwas_coloc = list(beta = psc_gwas$BETA, varbeta = psc_gwas$SE**2,
                     type = "cc", snp = psc_gwas$ID_merged) # case-control
coloc_res <- coloc.abf(dataset1=our_gwas_coloc, dataset2=ref_gwas_coloc)

“minimum p value is: 2.2565e-06
If this is what you expected, this is not a problem.
If this is not as small as you expected, please check the 02_data vignette.”


PP.H0.abf PP.H1.abf PP.H2.abf PP.H3.abf PP.H4.abf 
 2.36e-07  2.31e-03  2.22e-06  2.08e-02  9.77e-01 
[1] "PP abf for shared variant: 97.7%"
