# PAINTOR multi-ethnic fine-mapping

In [1]:
library(data.table)
library(dplyr)


Attaching package: ‘dplyr’


The following objects are masked from ‘package:data.table’:

    between, first, last


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union




In [3]:
setwd("~/project-gandalm/isoform_twas/eqtl_new/")

In [6]:
eur <- fread("results/eur_perm_50HCP/all_assoc_perm_info.txt", data.table = F)
amr <- fread("results/amr_perm_15HCP/all_assoc_perm_info.txt", data.table = F)
afr <- fread("results/afr_perm_25HCP/all_assoc_perm_info.txt", data.table = F)

In [7]:
eur <- eur %>% filter(qval < .05)
amr <- amr %>% filter(qval < .05)
afr <- afr %>% filter(qval < .05)

In [6]:
dim(eur)
dim(amr)
dim(afr)

In [8]:
shared <- eur %>% inner_join(amr, by="pid") %>% inner_join(afr, by="pid")
dim(shared)

**Select ENSG00000001460 as an example to test run PAINTOR, ultimately should have 986 locus files. 3018 shared cis variants between EUR, AMR, AFR.**

In [4]:
eur_nominal <- fread("paintor/eur_nominal_50HCP_ENSG00000001460.txt", header = F, data.table = F)
amr_nominal <- fread("paintor/amr_nominal_15HCP_ENSG00000001460.txt", header = F, data.table = F)
afr_nominal <- fread("paintor/afr_nominal_25HCP_ENSG00000001460.txt", header = F, data.table = F)

In [5]:
shared <- eur_nominal %>% inner_join(amr_nominal, by="V2") %>% inner_join(afr_nominal, by="V2")
dim(shared)

**Calculate ZSCORE from pval**

In [7]:
shared$z.x <- shared$z.y <- shared$z <- NA

In [9]:
for(i in 1:nrow(shared)) {
    if (shared[i,'V5.x'] < 0) {
        shared[i,'z.x'] <- qnorm(shared[i,'V4.x']/2)
    } else if (shared[i,'V5.x'] >= 0) {
        shared[i,'z.x'] <- qnorm(1-shared[i,'V4.x']/2)
    }
    if (shared[i,'V5.y'] < 0) {
        shared[i,'z.y'] <- qnorm(shared[i,'V4.y']/2)
    } else if (shared[i,'V5.y'] >= 0) {
        shared[i,'z.y'] <- qnorm(1-shared[i,'V4.y']/2)
    }
    if (shared[i,'V5'] < 0) {
        shared[i,'z'] <- qnorm(shared[i,'V4']/2)
    } else if (shared[i,'V5'] >= 0) {
        shared[i,'z'] <- qnorm(1-shared[i,'V4']/2)
    }
}

In [10]:
shared <- shared %>% select(V2, z.x, z.y, z)
head(shared)

Unnamed: 0_level_0,V2,z.x,z.y,z
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>
1,rs3795294,-0.8081991,1.1167201,-1.3232869
2,rs3795295,-0.5947135,-1.4185337,0.2471101
3,rs55972836,1.226233,0.5759041,-0.6346787
4,rs3737646,-0.4035998,1.7074195,-0.5700895
5,rs72882989,-0.3738687,0.3768151,0.2466695
6,rs12122936,1.4035295,-0.8613124,0.5201328


In [11]:
colnames(shared) <- c("RSID", "ZSCORE.P1", "ZSCORE.P2", "ZSCORE.P3")
write.table(shared, "paintor/Locus1", col.names = T, row.names = F, sep = " ", quote = F)

**Use plink to estimate LD of the shared variants in all 3 populations.**

`plink --r2 bin \
--vcf ${file} \
--extract shared_variants.txt \
--out eur`

In [13]:
id <- shared %>% select(RSID)
# write.table(id, "paintor/shared_variants.txt", col.names = F, row.names = F, quote = F, sep = "\t")

In [23]:
?readBin

In [29]:
myFile <- file('paintor/eur.ld.bin', 'rb')
eur_ld <- readBin(myFile, what="numeric", n=3018*3018, size=8)
close(myFile)

In [30]:
dim(eur_ld)

NULL

In [31]:
length(eur_ld)

In [28]:
3018^2

In [32]:
head(eur_ld)

In [34]:
head(which(eur_ld == 1))

**Don't know the order, if it's row1, row2, ... or col1, col2, ... But doesn't matter! It's the same.**

In [35]:
eur_matrix <- matrix(eur_ld, nrow = 3018)

In [36]:
dim(eur_matrix)

In [37]:
write.table(eur_matrix, "paintor/Locus1.LD1", col.names = F, row.names = F, quote = F, sep = " ")

In [3]:
myFile <- file('paintor/amr.ld.bin', 'rb')
amr_ld <- readBin(myFile, what="numeric", n=3018*3018, size=8)
close(myFile)
length(amr_ld)
amr_matrix <- matrix(amr_ld, nrow = 3018)
write.table(amr_matrix, "paintor/Locus1.LD2", col.names = F, row.names = F, quote = F, sep = " ")

In [4]:
myFile <- file('paintor/afr.ld.bin', 'rb')
afr_ld <- readBin(myFile, what="numeric", n=3018*3018, size=8)
close(myFile)
length(afr_ld)
afr_matrix <- matrix(afr_ld, nrow = 3018)
write.table(afr_matrix, "paintor/Locus1.LD3", col.names = F, row.names = F, quote = F, sep = " ")

**Generate annotation file for the shared cis-variants**
- Cannot use annotation file generated for torus, which is for the variants in ALL geontype. Does not cover all shared variants between EUR, AMR, AFR
- Generate annot from ENCODE Regulatory Build and EUR variant coord

In [19]:
head(id)

Unnamed: 0_level_0,RSID
Unnamed: 0_level_1,<chr>
1,rs3795294
2,rs3795295
3,rs55972836
4,rs3737646
5,rs72882989
6,rs12122936


In [20]:
coord <- fread("../genotype/all_data/isec_R2_greater_than_3/ancestry/annot/eur_variant_coord.tsv", data.table = F)
head(coord)

Unnamed: 0_level_0,GENE,CHR,START,END
Unnamed: 0_level_1,<chr>,<int>,<int>,<dbl>
1,rs61769339,1,662622,662623
2,rs12238997,1,693731,693732
3,rs61769351,1,693823,693824
4,rs142559957,1,704637,704638
5,rs58276399,1,731718,731719
6,rs61770163,1,732032,732033


In [21]:
id <- id %>% left_join(coord, by=c("RSID"="GENE"))
dim(id)

In [23]:
annot <- fread("../genotype/all_data/isec_R2_greater_than_3/ancestry/annot/eur_variant_annot.txt.gz", data.table = F)
head(annot)

Unnamed: 0_level_0,SNP,TF_binding_site_d,promoter_flanking_region_d,promoter_d,open_chromatin_region_d,enhancer_d,CTCF_binding_site_d
Unnamed: 0_level_1,<chr>,<int>,<int>,<int>,<int>,<int>,<int>
1,rs61769339,0,1,0,0,0,0
2,rs12238997,0,0,0,0,0,0
3,rs61769351,0,0,0,0,0,0
4,rs142559957,0,0,0,0,0,0
5,rs58276399,0,0,0,0,0,0
6,rs61770163,0,0,0,0,0,0


In [25]:
id <- id %>% left_join(annot, by=c("RSID"="SNP"))

In [27]:
head(id)

Unnamed: 0_level_0,RSID,CHR,START,END,TF_binding_site_d,promoter_flanking_region_d,promoter_d,open_chromatin_region_d,enhancer_d,CTCF_binding_site_d
Unnamed: 0_level_1,<chr>,<int>,<int>,<dbl>,<int>,<int>,<int>,<int>,<int>,<int>
1,rs3795294,1,23744156,23744157,0,1,0,0,0,0
2,rs3795295,1,23744169,23744170,0,1,0,0,0,0
3,rs55972836,1,23744691,23744692,0,1,0,0,0,0
4,rs3737646,1,23745228,23745229,0,0,0,0,0,0
5,rs72882989,1,23745559,23745560,0,1,0,0,0,0
6,rs12122936,1,23745885,23745886,0,1,0,0,0,0


In [28]:
locus.id <- id %>% select(-c(1:4))

In [30]:
write.table(locus.id, "paintor/Locus1.annotations", col.names = T, row.names = F, quote = F, sep = " ")

In [31]:
colnames(locus.id)

**Running software**
