-
Notifications
You must be signed in to change notification settings - Fork 2
/
fastman.R
195 lines (185 loc) · 7.89 KB
/
fastman.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
#' fastman_v2
#'
#' Creates Manhattan plots from GWAS summaries.
#' @param data A GWAS summary with columns of chromosome number (X and Y chromosomes need to be replaced by 23 and 24), position (base pair) information, and P-value. ALL values have to be numeric.
#' @param chr A string for the header of the column of chromosome number in data.
#' @param ps A string for the header of the column of position (base pair) information in data.
#' @param p A string for the header for the column of P-value in data.
#' @param main A string for the Title of the plot.
#' @param suggest_line Set FALSE to remove the suggestive threshold line.
#' @param gws_line Set FALSE to remove the genome-wide significant line.
#' @param color A string vector containing the colors to be used.
#' @param chr_build A string describing what Genetic Reference Consortium build is used. It has to be either "GRCh37" or "CRCh38".
#' @param yscale A numeric value for y scale.
#' @param xlab_all Set TRUE to show labels for all chromosomes.
#' @param turbo Set TRUE to remove all rows with P <= 0.1.
#' @param color2 A string vector containing the colors to be used to highlight SNPs.
#' @param snp_list A string vector containing SNPs in rsID to be highlighted.
#' @importFrom graphics abline axis mtext plot legend
#' @export
fastman2 <- function(data, snp="SNP", chr="CHR", ps="BP", p="P", main="Manhattan plot",
suggest_line=-log10(1e-5), gws_line=-log10(5e-8), color=c("grey","black"),
chr_build="GRCh37", yscale=NA, xlab_all=F, turbo=F,
color2="red", snp_list=c()){
# Check for sensible dataset ** these steps are adapted from qqman **
## Make sure you have chr, bp and p columns.
if (!(chr %in% names(data))) stop(paste("Column", chr, "not found!"))
if (!(ps %in% names(data))) stop(paste("Column", ps, "not found!"))
if (!(p %in% names(data))) stop(paste("Column", p, "not found!"))
## make sure chr, bp, and p columns are numeric.
if (!is.numeric(data[[chr]])) stop(paste(chr, "column should be numeric. Do you have 'X', 'Y', 'MT', etc? If so change to numbers and try again."))
if (!is.numeric(data[[ps]])) stop(paste(ps, "column should be numeric."))
if (!is.numeric(data[[p]])) stop(paste(p, "column should be numeric."))
# When turbo is TRUE, remove rows with P > 0.1, otherwise restrict the number of SNPs with P > 0.1 to ~20000.
if(turbo){
data <- data[which(data[,p]<=0.1),]
}else{
nrow_0.1 <- nrow(data[which(data[,p]>0.1),])
i1 <- 1
n_0.1 <- 1
while(i1<10000){
if(nrow_0.1/i1 <= 200000){
n_0.1 <- i1
break()
}
i1 <- i1+1
}
temp1 <- rep(c(1:n_0.1), nrow(data)/n_0.1)
temp1 <- temp1[1:nrow(data)]
data$gp_0.1 <- temp1
data <- data[which(data[,p]<=0.1 | data[,"gp_0.1"]==1),]
}
# Restrict the number of SNPs with P <=0.1 and P > 0.01 to ~20000.
nrow_0.01 <- nrow(data[which(data[,p]<=0.1 & data[,p]>0.01),])
if(nrow_0.01 > 200000){
i2 <- 1
n_0.01 <- 1
while(i2<10000){
if(nrow_0.01/i2 <= 200000){
n_0.01 <- i2
break()
}
i2 <- i2+1
}
temp2 <- rep(c(1:n_0.01), nrow(data)/n_0.01)
temp2 <- temp2[1:nrow(data)]
data$gp_0.01 <- temp2
data <- data[which(data[,p]>0.1 | data[,p]<=0.01 | data[,"gp_0.01"]==1),]
}
# Restrict the number of SNPs with P <=0.01 and P > 0.001 to ~20000.
nrow_0.001 <- nrow(data[which(data[,p]<=0.01 & data[,p]>0.001),])
if(nrow_0.001 > 200000){
i3 <- 1
n_0.001 <- 1
while(i2<10000){
if(nrow_0.001/i3 <= 200000){
n_0.001 <- i3
break()
}
i3 <- i3+1
}
temp3 <- rep(c(1:n_0.001), nrow(data)/n_0.001)
temp3 <- temp3[1:nrow(data)]
data$gp_0.001 <- temp3
data <- data[which(data[,p]>0.01 | data[,p]<=0.001 | data[,"gp_0.001"]==1),]
}
# Select whether the SNPs position info is based on GRCh38 or GRCh38
if(chr_build=="GRCh37"){
chr_base <- c(0, 249250621, 492449994, 690472424, 881626700, 1062541960, 1233657027, 1392795690, 1539159712, 1680373143, 1815907890, 1950914406, 2084766301, 2199936179, 2307285719, 2409817111, 2500171864, 2581367074, 2659444322, 2718573305, 2781598825, 2829728720, 2881033286, 3036303846, 3095677412)
chr_label <- c(124625310.5, 370850307.5, 591461209, 786049562, 972084330, 1148099494, 1313226359, 1465977701, 1609766428, 1748140517, 1883411148, 2017840354, 2142351240, 2253610949, 2358551415, 2454994488, 2540769469, 2620405698, 2689008814, 2750086065, 2815663773, 2865381003, 2958668566, 3065990629)
}
if(chr_build=="GRCh38"){
chr_base <- c(0, 248956422, 491149951, 689445510, 879660065, 1061198324, 1232004303, 1391350276, 1536488912, 1674883629, 1808681051, 1943767673, 2077042982, 2191407310, 2298451028, 2400442217, 2490780562, 2574038003, 2654411288, 2713028904, 2777473071, 2824183054, 2875001522, 3031042417, 3088269832)
chr_label <- c(124478211, 370053186.5, 590297730.5, 784552787.5, 970429194.5, 1146601314, 1311677290, 1463919594, 1605686271, 1741782340, 1876224362, 2010405328, 2134225146, 2244929169, 2349446623, 2445611390, 2532409283, 2614224646, 2683720096, 2745250988, 2800828063, 2849592288, 2953021970, 3059656125)
}
# Assign chromosome labels and their positions
chr_n <- c(1:22)
xlabel <- c(1:22)
x_at <- chr_label[1:22]
# Check if any SNP is on sex chromosomes.
if(24 %in% data[,chr]){
chr_n <- c(chr_n,23,24)
xlabel <- c(xlabel,"X","Y")
x_at <- chr_label[1:24]
} else if(23 %in% data[,chr]){
chr_n <- c(chr_n,23)
xlabel <- c(xlabel,"X")
x_at <- chr_label[1:23]
}
# Calculate the positions to plot for SNPs on chromosome 2 and after.
data$ps2 <- NA
for (i in chr_n){
data[which(data[,chr]==i),"ps2"] <- data[which(data[,chr]==i),ps] + chr_base[i]
}
# Set colors
data$colors <- NA
for (i in chr_n){
temp_n <- i%%length(color)
if(temp_n==0){
col<-color[length(color)]
}else col<-color[temp_n]
data[which(data[,chr]==i),"colors"] <- col
}
# If yscale is not provided, set the yscale to default of 10 when the strongest association < 1e-10, otherwise set it to match the strongest p.
if(!is.na(yscale)){
y_max <- yscale
if(y_max<=10){
y_at <- c(0:10)
}else if(y_max<=20){
y_at <- c(seq(0,y_max,by=2))
}else if(y_max<=50){
y_at <- c(seq(0,y_max,by=5))
}else if(y_max<=100){
y_at <- c(seq(0,y_max,by=10))
}else {y_at <- c(seq(0,y_max,by=20))}
}else if(is.na(yscale) & -log10(min(data[,p])) > 10){
y_max <- ceiling(-log10(min(data[,p])))
if(y_max<=10){
y_at <- c(0:10)
}else if(y_max<=20){
y_at <- c(seq(0,y_max,by=2))
}else if(y_max<=50){
y_at <- c(seq(0,y_max,by=5))
}else if(y_max<=100){
y_at <- c(seq(0,y_max,by=10))
}else {y_at <- c(seq(0,y_max,by=50))}
}else {
y_max <- 10
y_at <- c(0:10)
}
if(y_at[length(y_at)]!=y_max){
y_at <- c(y_at, y_max)
}
# Assign y scale labels based on turbo option
y_min <- 0
if(turbo){
y_min <- 1
y_at <- y_at[-1]
}
# Calculate the -logP and plot.
data$minus_log10_p <- -log10(as.numeric(as.character(data[,p])))
plot(minus_log10_p~ps2, data, axes = F, ann = F, col=data$colors, pch = 16, cex = 0.3,
xlim=c(0,chr_base[length(chr_n+1)]),
ylim=c(y_min,y_max))
axis(side = 2, at=y_at, labels=y_at)
mtext(expression(-log[10]~"p-value"), side = 2, line = 2)
# SNPs to highlight
if(length(snp_list)>0){
data[data[,snp] %in% snp_list, "colors"] <- color2
points(data[data[,snp] %in% snp_list, "ps2"],data[data[,snp] %in% snp_list, "minus_log10_p"], col=color2, pch = 16, cex = 0.4)
}
if(xlab_all){
xlas=2
}else{xlas=1}
axis(side=1, line = -1, at=x_at, labels=xlabel, tick=F, las=xlas)
mtext("Chromosome", side = 1, line = 1)
mtext(main, side = 3, line = 1)
# Plot genome-wide significant line
if(gws_line){
abline(h=gws_line, col="red",lty=2)
}
# Plot suggestive significant line
if(suggest_line){
abline(h=suggest_line, col="blue", lty=2)
}
}