-
Notifications
You must be signed in to change notification settings - Fork 2
/
pQTLandeQTLoverlap.Rmd
122 lines (85 loc) · 5.66 KB
/
pQTLandeQTLoverlap.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
---
title: "Protein and Expression specific QTL overlaps"
author: "Briana Mittleman"
date: "6/14/2019"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
```{r}
library(tidyverse)
library(workflowr)
```
This analysis will be similar to the explained and unexplained eQTL analysis available [here](exvunexpeQTL.html). I downloaded the protein and expression specific QTLs from Battle et al. 2015. This information is in supplimentary table s2.
First I need to convert the ENSG ids.
```{r}
geneNames=read.table("../../genome_anotation_data/ensemble_to_genename.txt", sep="\t", col.names = c('gene_id', 'GeneName', 'source' ),stringsAsFactors = F, header = T)
psQTL=read.table("../data/Battle_pQTL/psQTLGenes.txt", header = T, stringsAsFactors = F, col.names="gene_id") %>% inner_join(geneNames, by="gene_id") %>% select(GeneName)
write.table(psQTL, file="../data/Battle_pQTL/psQTLGeneNames.txt", row.names = F, col.names = F, quote = F, sep="\t")
esQTL=read.table("../data/Battle_pQTL/esQTLGenes.txt", header = T, stringsAsFactors = F, col.names="gene_id") %>% inner_join(geneNames, by="gene_id") %>% select(GeneName)
write.table(esQTL, file="../data/Battle_pQTL/esQTLGeneNames.txt", row.names = F, col.names = F, quote = F, sep="\t")
```
Now I cat use the subsetpermAPAwithGenelist.py code to subset my results:
```{bash, eval=F}
mkdir ../data/ApaByPgene
python subsetpermAPAwithGenelist.py ../data/Battle_pQTL/psQTLGeneNames.txt Total ../data/ApaByPgene/TotalApaPSGenes.txt
python subsetpermAPAwithGenelist.py ../data/Battle_pQTL/esQTLGenes.txt Total ../data/ApaByPgene/TotalApaESGenes.txt
python subsetpermAPAwithGenelist.py ../data/Battle_pQTL/psQTLGeneNames.txt Nuclear ../data/ApaByPgene/NuclearApaPSGenes.txt
python subsetpermAPAwithGenelist.py ../data/Battle_pQTL/esQTLGenes.txt Nuclear ../data/ApaByPgene/NuclearApaESGenes.txt
```
I also need those not in eQTL or pQTL:
```{bash,eval=F}
python subsetAPAnotEorPgene.py Total ../data/ApaByPgene/TotalApaNOTPorEGenes.txt
python subsetAPAnotEorPgene.py Nuclear ../data/ApaByPgene/NuclearApaNOTPorEGenes.txt
```
Total:
```{r}
tot.notEorP=read.table("../data/ApaByPgene/TotalApaNOTPorEGenes.txt",stringsAsFactors = F,col.names = c("pid", "nvar", "shape1", "shape2", "dummy", "sid", "dist", "npval", "slope", "ppval", "bpval"))
tot.PS=read.table("../data/ApaByPgene/TotalApaPSGenes.txt",stringsAsFactors = F,col.names = c("pid", "nvar", "shape1", "shape2", "dummy", "sid", "dist", "npval", "slope", "ppval", "bpval"))
tot.ES=read.table("../data/ApaByPgene/TotalApaESGenes.txt",stringsAsFactors = F, col.names = c("pid", "nvar", "shape1", "shape2", "dummy", "sid", "dist", "npval", "slope", "ppval", "bpval") )
tot.ex=read.table("../data/ApaByEgene/TotalApaexplainedeGenes.txt",stringsAsFactors = F,col.names = c("pid", "nvar", "shape1", "shape2", "dummy", "sid", "dist", "npval", "slope", "ppval", "bpval"))
tot.un=read.table("../data/ApaByEgene/TotalApaUnexplainedeGenes.txt",stringsAsFactors = F, col.names = c("pid", "nvar", "shape1", "shape2", "dummy", "sid", "dist", "npval", "slope", "ppval", "bpval") )
tot_allE=as.data.frame(rbind(tot.ex,tot.un))
tot.PS=na.omit(tot.PS)
tot.notEorP=na.omit(tot.notEorP)
tot.ES=na.omit(tot.ES)
tot.un=na.omit(tot.un)
```
```{r}
qqplot(-log10(runif(nrow(tot.notEorP))), -log10(tot.notEorP$bpval), xlab="-log10(Uniform)", ylab="-log10(beta pval)", main="Total Apa")
points(sort(-log10(runif(nrow(tot.PS)))), sort(-log10(tot.PS$bpval)),col= alpha("Red"))
points(sort(-log10(runif(nrow(tot_allE)))), sort(-log10(tot_allE$bpval)),col= alpha("blue"))
abline(0,1)
legend("topleft", legend=c("Neither eGenes nor pGenes", "pGenes", "eGenes"),col=c("black", "red","blue"), pch=16,bty = 'n')
```
```{r}
wilcox.test(tot.PS$bpval,tot.notEorP$bpval,alternative="less")
wilcox.test(tot_allE$bpval,tot.notEorP$bpval,alternative="less")
wilcox.test(tot.PS$bpval,tot_allE$bpval, alternative="less")
```
Nuclear:
```{r}
nuc.notEorP=read.table("../data/ApaByPgene/NuclearApaNOTPorEGenes.txt",stringsAsFactors = F,col.names = c("pid", "nvar", "shape1", "shape2", "dummy", "sid", "dist", "npval", "slope", "ppval", "bpval"))
nuc.PS=read.table("../data/ApaByPgene/NuclearApaPSGenes.txt",stringsAsFactors = F,col.names = c("pid", "nvar", "shape1", "shape2", "dummy", "sid", "dist", "npval", "slope", "ppval", "bpval"))
nuc.ES=read.table("../data/ApaByPgene/NuclearApaESGenes.txt",stringsAsFactors = F, col.names = c("pid", "nvar", "shape1", "shape2", "dummy", "sid", "dist", "npval", "slope", "ppval", "bpval") )
nuc.ex=read.table("../data/ApaByEgene/NuclearApaexplainedeGenes.txt",stringsAsFactors = F,col.names = c("pid", "nvar", "shape1", "shape2", "dummy", "sid", "dist", "npval", "slope", "ppval", "bpval"))
nuc.un=read.table("../data/ApaByEgene/NuclearApaUnexplainedeGenes.txt",stringsAsFactors = F, col.names = c("pid", "nvar", "shape1", "shape2", "dummy", "sid", "dist", "npval", "slope", "ppval", "bpval") )
nuc.un=na.omit(nuc.un)
nuc.PS=na.omit(nuc.PS)
nuc.notEorP=na.omit(nuc.notEorP)
nuc.ES=na.omit(nuc.ES)
nuc_allE=as.data.frame(rbind(nuc.ex,nuc.un))
```
```{r}
qqplot(-log10(runif(nrow(nuc.notEorP))), -log10(nuc.notEorP$bpval), xlab="-log10(Uniform)", ylab="-log10(beta pval)", main="Nuclear Apa")
points(sort(-log10(runif(nrow(nuc.PS)))), sort(-log10(nuc.PS$bpval)),col= alpha("Red"))
points(sort(-log10(runif(nrow(nuc_allE)))), sort(-log10(nuc_allE$bpval)),col= alpha("blue"))
abline(0,1)
legend("topleft", legend=c("Neither eGenes nor pGenes", "pGenes", "eGenes"),col=c("black", "red","blue","green"), pch=16,bty = 'n')
```
```{r}
wilcox.test(nuc.PS$bpval,nuc.notEorP$bpval,alternative="less")
wilcox.test(nuc_allE$bpval,nuc.notEorP$bpval,alternative="less")
wilcox.test(nuc.PS$bpval,nuc_allE$bpval,alternative="less")
```