-
Notifications
You must be signed in to change notification settings - Fork 1
/
10-Characterizing-CpG-Methylation.Rmd
executable file
·124 lines (96 loc) · 5.31 KB
/
10-Characterizing-CpG-Methylation.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
---
title: "Chacterizing CpG Methylation"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
In this script, I'll create figures to characterize CpG methylation in my *C. virginica* gonad samples. I will use a list of CpGs with at least 5x coverage in any samples. I'll also calculate relevant statistics for methylation islands.
**MOVE CODE TO ANALYSIS DIRECTORY `2019-03-18-Characterizing-CpG-Methylation` BEFORE PROCEEDING.**
# Session information
```{r}
sessionInfo()
```
# All 5x CpGs
## Import data
```{r}
cpgMethylation <- read.csv("../analyses/2019-03-18-Characterizing-CpG-Methylation/2019-04-09-All-5x-CpGs.csv", header = FALSE) #Import file with CpG methylation for all loci with 5x coverage
colnames(cpgMethylation) <- c("chromosome", "start", "end", "methylation") #Add column names
head(cpgMethylation) #Confirm import
```
## Create figure
```{r}
RColorBrewer::display.brewer.all() #Show all RColorBrewer palettes. I will choose greens.
plotColors <- rev(RColorBrewer::brewer.pal(5, "GnBu")) #Create a color palette for the barplots. Use 5 green-blue shades from RColorBrewer. Reverse theorder so the darkest shade is used first.
barplot(t(t(proportionData)),
col = plotColors) #See what plot looks like with new scheme
barplot(t(t(proportionData)),
col = dichromat(plotColors)) #Check that the plot colors will be easy to interpret for those with color blindess
```
```{r}
#pdf("2019-04-10-5x-CpG-Frequency-Distribution.pdf", width = 11, height = 8.5)
hist(x = cpgMethylation$methylation, axes = FALSE, xlab = "", ylab = "", main = "", col = plotColors[2], xaxs = "i", yaxs = "i") #Create base plot
axis(side = 1, col = "grey80", at = seq(from = 0, to = 100, by = 10), cex.axis = 1.2) #Add x-axis
mtext(side = 1, text = "Methylation (%)", line = 3, cex = 1.5) #Add x-axis label
axis(side = 2, col = "grey80", las = 2, labels = c("0", "2", "4", "6"), at = c(0, 2e+05, 4e+05, 6e+05), cex.axis = 1.2) #add y-axis
mtext(side = 2, text = "Frequency (x100,000)", line = 2.5, cex = 1.5) #Add y-axis label
#dev.off()
```
# Methylated 5x CpGs
## Import data
```{r}
allMethLoci <- read.csv("../analyses/2019-03-18-Characterizing-CpG-Methylation/2019-04-09-All-5x-CpG-Loci-Methylated.csv", header = FALSE) #Import file with positions of all methylated loci with 5x coverage
colnames(allMethLoci) <- c("chromosome", "start", "end", "methylation") #Add column names
head(allMethLoci) #Confirm import
```
```{r}
methLociGeneOverlaps <- read.delim("../analyses/2019-03-18-Characterizing-CpG-Methylation/2019-05-29-All5xCpGs-Genes.txt", header = FALSE, sep = "\t") #Import file with overlaps for methylated loci and genes
methLociGeneOverlaps <- methLociGeneOverlaps[,-c(4)] #Remove extra column
colnames(methLociGeneOverlaps) <- c("chromosome", "start", "end", "gene-start", "gene-end") #Add column names
head(methLociGeneOverlaps) #Confirm import
```
## Scaled methylated CpG distribution
Scaling each gene from 0% to 100%, I want to see where methylated CpG are located. This is useful to see if methylation is occuring in any consistent location for each gene.
```{r}
methLociGeneOverlaps$geneLength <- methLociGeneOverlaps$`gene-end` - methLociGeneOverlaps$`gene-start` #Calculate gene length
methLociGeneOverlaps$absPosition <- methLociGeneOverlaps$start - methLociGeneOverlaps$`gene-start` #Calculate the absolute position of the methylated CpG in the gene
methLociGeneOverlaps$scaledPosition <- methLociGeneOverlaps$absPosition / methLociGeneOverlaps$geneLength #Calculate the scaled position of the methylated CpG in the gene
head(methLociGeneOverlaps) #Confirm calculations
```
```{r}
#pdf("2019-10-10-Scaled-Gene-Methylated-Loci.pdf", height = 8.5, width = 11)
par(mar = c(5, 6, 2, 2)) #Change figure dimensions
hist(methLociGeneOverlaps$scaledPosition,
breaks = 100,
axes = FALSE,
xlab = "", ylab = "", main = "",
ylim = c(0,40000),
col = "grey80",
xaxs = "i", yaxs = "i") #Create base plot with no axes or labels. Include breaks at each percent.
axis(side = 1, at = seq(from = 0, to = 1, by = 0.1), col = "grey80", cex = 1.2) #Add x-axis
mtext(side = 1, "Scaled Position on Gene", line = 3, cex = 1.5) #Add x-axis label
axis(side = 2, at = seq(from = 0, to = 40000, by = 10000), col = "grey80", las = 2, cex = 1.2) #Add y-axis
mtext(side = 2, "Number of Methylated CpG", line = 4, cex = 1.5) #Add y-axis label
#dev.off()
```
# Methylation island characteristics
## Import data
```{r}
methylationIslands <- read.delim("2020-02-06-Methylation-Islands-500_0.02_25-filtered.tab", header = FALSE) #Import methylation island list
colnames(methylationIslands) <- c("chr", "MI.start", "MI.end", "mCpGs", "MI.length") #Add column names
head(methylationIslands) #Confirm file format
```
## Calculate statistics
```{r}
range(methylationIslands$mCpGs) #Calculate the range of the number of mCpGs
median(methylationIslands$mCpGs) #Calculate the median number of mCpGs in an island
```
```{r}
range(methylationIslands$MI.length) #Calculate the range MI length
median(methylationIslands$MI.length) #Calculate the median MI length
```
```{r}
methIslandLengthDistr <- hist(methylationIslands$MI.length) #Create histogram and save output to characterize length distribution
methIslandLengthDistr$breaks #Look at bins for methylation island length
methIslandLengthDistr$counts #Look at counts for number of islands in bins
```