This repository has been archived by the owner on Jul 30, 2019. It is now read-only.
forked from jpverta/verta_jones_elife_2019
-
Notifications
You must be signed in to change notification settings - Fork 0
/
normalizeInfCounts.r
executable file
·100 lines (66 loc) · 3.71 KB
/
normalizeInfCounts.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
########
# c172 #
########
library(DESeq2)
# all F1s w/ RNAseq masked ref
cov = read.table('aseReadCounts_c172_PallF1_infSites_totalCov_STAR_duprem.txt',row.names='pos')
# verify parent lane correlation
cor(na.omit(cov[,c('c172_P_533_M_lane3_TOTCOV','c172_P_533_M_lane8_TOTCOV')]))
cor(na.omit(cov[,c('c172_P_532_F_lane3_TOTCOV','c172_P_532_F_lane8_TOTCOV')]))
# exclude chrXIX
#cov = cov[grepl('chrXIX',rownames(cov))==F,]
cov[is.na(cov)] = 0
cov = cov[,2:ncol(cov)]
cov = as.numeric(cov)
colSums(cov)
# add parent lanes together
#cov$c172_P_533_M_TOTCOV = cov$c172_P_533_M_lane3_TOTCOV + cov$c172_P_533_M_lane8_TOTCOV
#cov$c172_P_532_F_TOTCOV = cov$c172_P_532_F_lane3_TOTCOV + cov$c172_P_532_F_lane8_TOTCOV
# remove parent lane columns
#cov = cov[,which(colnames(cov) %in% c('c172_P_532_F_lane3_TOTCOV','c172_P_532_F_lane8_TOTCOV','c172_P_533_M_lane3_TOTCOV','c172_P_533_M_lane8_TOTCOV') == F)]
design = data.frame(row.names=colnames(cov),
sex=rep(c('F','M'),8),
generation=c(rep('F1',12),rep('P',4)))
icounts = DESeqDataSetFromMatrix(countData=cov, colData=design, design=~1)
icounts = estimateSizeFactors(icounts)
# plot
plot(colSums(counts(icounts)),sizeFactors(icounts),type='n')
text(colSums(counts(icounts)),sizeFactors(icounts),colnames(counts(icounts)),cex=.75)
abline(lm(sizeFactors(icounts) ~ colSums(counts(icounts))))
write.table(counts(icounts,normalized=T),'aseReadCounts_c172_PallF1_infSites_totalCov_DESeqNormalized_STAR_duprem.txt')
# allele-specific counts
alleleCov = read.table('aseReadCounts_c172_PallF1_infSites_STAR_duprem.txt',row.names='pos')
# exclude chrXIX
#alleleCov = alleleCov[grepl('chrXIX',rownames(alleleCov))==F,]
alleleCov[is.na(alleleCov)] = 0
alleleCov = alleleCov[,2:ncol(alleleCov)]
alleleCov = as.numeric(alleleCov)
#########
# for F1s
# remove parent lane columns
alleleCov = alleleCov[,c('c172_F1_20_F_REFCOV','c172_F1_20_F_ALTCOV','c172_F1_20_M_REFCOV','c172_F1_20_M_ALTCOV','c172_F1_04_F_REFCOV','c172_F1_04_F_ALTCOV','c172_F1_04_M_REFCOV','c172_F1_04_M_ALTCOV')]
aDesign = data.frame(row.names=colnames(alleleCov),sex=rep(c('F','F','M','M'),2),generation=c(rep('F1',8)))
acounts = DESeqDataSetFromMatrix(countData=alleleCov, colData=aDesign, design=~1)
acounts = estimateSizeFactors(acounts)
plot(colSums(counts(acounts)),sizeFactors(acounts),type='n')
text(colSums(counts(acounts)),sizeFactors(acounts),colnames(counts(acounts)),cex=.75)
abline(lm(sizeFactors(acounts) ~ colSums(counts(acounts))))
colSums(counts(acounts,normalized=T))
write.table(counts(acounts,normalized=T),'aseReadCounts_c172_F1_infSites_alleleCov_DESeqNormalized_STAR_duprem.txt')
###################
# for salinity F1s
alleleCov = read.table('aseReadCounts_c172_PallF1_infSites_STAR_duprem.txt',row.names='pos')
# remove parent lane columns
alleleCov = alleleCov[,which(colnames(alleleCov) %in% c('c172_P_532_F_lane3_ALTCOV','c172_P_532_F_lane8_ALTCOV','c172_P_533_M_lane3_ALTCOV','c172_P_533_M_lane8_ALTCOV','c172_P_532_F_lane3_REFCOV','c172_P_532_F_lane8_REFCOV','c172_P_533_M_lane3_REFCOV','c172_P_533_M_lane8_REFCOV') == F)]
alleleCov[is.na(alleleCov)] = 0
alleleCov = alleleCov[,2:ncol(alleleCov)]
alleleCov = as.numeric(alleleCov)
# for F1s
aDesign = data.frame(row.names=colnames(alleleCov),sex=rep(c('F','F','M','M'),6),generation=c(rep('F1',24)))
acounts = DESeqDataSetFromMatrix(countData=alleleCov, colData=aDesign, design=~1)
acounts = estimateSizeFactors(acounts)
plot(colSums(counts(acounts)),sizeFactors(acounts),type='n')
text(colSums(counts(acounts)),sizeFactors(acounts),colnames(counts(acounts)),cex=.75)
abline(lm(sizeFactors(acounts) ~ colSums(counts(acounts))))
colSums(counts(acounts,normalized=T))
write.table(counts(acounts,normalized=T),'.../aseReadCounts_c172_allF1_infSites_alleleCov_DESeqNormalized_STAR_duprem.txt')