### Prepare tables to run fGWAS fine mapping using chromatin annotations using Bill's pipeline

In [1]:
suppressPackageStartupMessages(library("tidyr"))
options(scipen=999)

In [2]:
suppressPackageStartupMessages(library("stringr"))

In [33]:
home="/home/paola/Family1070/private_output/fgwas_analysis/A_fib/preprocessing/"
setwd(home)

In [4]:
gwas = read.table("/publicdata/gwas_summary_stats_20180124/AF_Christophersen2017/AF_GWAS_ALLv31_maf0.01.txt", 
                  header=T, sep="\t")

In [6]:
nrow(gwas)

In [7]:
colnames(gwas)= c("SNPID",  "A1", "A2", "CHR", "POS",  "Beta", "SE", "P_VALUE")

In [8]:
required = c("SNPID", "CHR", "POS", "SE", "Z", "F", "N")

In [9]:
gwas$Z = gwas$Beta /gwas$SE
gwas$N = '.'
gwas$F = '.'

In [10]:
gwas$POS_1 = gwas$POS -1

In [11]:
sum(is.na(gwas$SNPID))

Annotate with regulatory regions and ASE

In [12]:
write.table(gwas[,c("CHR","POS_1","POS","SNPID")], "AF.bed", row.names=F, col.names=F, quote=F, sep="\t")

In [13]:
annotate_gwas = function( regiofile, annofile ){
    system(paste("bedtools intersect -a" ,  regiofile, "-b", annofile , "-wo > intesect_results"))
    annot = read.table("intesect_results")  
    annot = annot[!duplicated(annot),]
    annot_wide <- spread(annot[,c(1:4,8,9)], V8, V9)
    annot_wide [is.na(annot_wide)]<-0
    colnames(annot_wide)[1:4] = c("CHR", "START", "POS", "SNPID")
    return(annot_wide)
   }

In [14]:
anno = annotate_gwas('AF.bed', 'frazer_lab_cm_annotations.bed')

In [15]:
head(anno)

CHR,START,POS,SNPID,ATAC,H3K27AC,NKX25
1,714018,714019,rs114983708,1,1,1
1,714426,714427,rs12028261,0,1,0
1,715264,715265,rs12184267,0,1,0
1,760997,760998,rs148828841,0,1,0
1,761146,761147,rs3115850,0,1,0
1,761955,761956,1:761956:i:r,0,1,0


In [16]:
m = merge(gwas, anno, by= c('CHR','POS', 'SNPID'), all.x=TRUE)

In [17]:
dim(anno)

In [18]:
m [is.na(m)]<-0

In [19]:
dim(m)

In [20]:
nkx_ase = readLines("NKX25_ASE_rsids.txt")
h3k_ase = readLines("H3K27AC_ASE_rsids.txt")

In [21]:
m$NKX25_ASE = as.numeric(m$SNPID %in% nkx_ase)
m$H3K27AC_ASE = as.numeric(m$SNPID %in% h3k_ase)

In [22]:
m = m[!duplicated(m),]

In [23]:
sum(duplicated(gwas$POS))

In [24]:
dim(m)

remove duplicated SNPs (merged / renames SNPs)

In [25]:
m = m[with(m, order(SE)),]
m = m[!duplicated( m[,c("CHR", "POS")]),]     
m = m[with(m, order(CHR, POS)),]

remove chr Y and M

In [26]:
m =subset(m, !(CHR %in% c("M", "Y")))

In [27]:
mori = subset(m, select = as.character(c("SNPID", "CHR", "POS", "P_VALUE" )))

In [28]:
write.table(mori, "../Original_input", col.names=T, row.names=F, sep="\t",quote=F)

In [29]:
mano = subset(m, select = c( required, 'ATAC','H3K27AC','NKX25','NKX25_ASE','H3K27AC_ASE' ))

In [30]:
dim(mano)

In [31]:
write.table(mano, "Annotated_input", col.names=T, row.names=F, sep=" ",quote=F)

In [35]:
5000/1.25

Remember to run the fgwas command to determine the size of the chunks for the analysis (-k)
for this analysis fgwas k = 4000 ( 5000/1.25 Mb)

### Read Results

There are only two loci for Atrial fiblillation associated with ASE ( however many have the peak- maybe variants not tested in the family)
1. CAV1 (strong signal that is present in manu atrial phenotypes: PR interval, P-vawe and A-fib)
2. SCN10A/SCN5A 

In [70]:
res = read.table('../pipeline_out/Final_Model/Final_Model.bfs', header=T)
seg = read.table('../pipeline_out/Final_Model/Final_Model.segbfs', header=T)
res = merge(res,seg[,c('chunk', 'PPA')], by="chunk" )
res$PPA = res$PPA.x *res$PPA.y
res = res[order(res$PPA, decreasing=T),]

In [72]:
nkx = subset(res, NKX25_ASE==1 )