Remove any dbSNPs and rerun the multiple testing correction

In [1]:
dbsnp_file <- read.table('/mnt/vast/hpc/csg/hcs2152/ZFR_RNA_Editing/References/Danio_rerio_dbSNP.txt',header=TRUE,sep='\t')
editing_sites <- read.table('/mnt/vast/hpc/csg/hcs2152/ZFR_RNA_Editing/SPRINT/Output/A2I_Editing/SPRINT_zfr_combined_final.tsv',header=TRUE,sep='\t')

In [2]:
#put dbSNP table into ID format like editing sites
dbsnp_file$ID <- paste(dbsnp_file$genomic.region, dbsnp_file$coordinate, sep = '_')

# Move 'ID' column to the beginning
dbsnp_file <- dbsnp_file[, c('ID', names(dbsnp_file)[-which(names(dbsnp_file) %in% 'ID')])]

In [3]:
dbsnp_file

ID,genomic.region,coordinate,strand
<chr>,<chr>,<int>,<chr>
KZ116042.1_2135,KZ116042.1,2135,+
KZ116042.1_2686,KZ116042.1,2686,-
KZ116042.1_2738,KZ116042.1,2738,-
KZ116042.1_2785,KZ116042.1,2785,+
KZ116042.1_2839,KZ116042.1,2839,+
KZ116042.1_2851,KZ116042.1,2851,-
KZ116042.1_2943,KZ116042.1,2943,+
KZ116042.1_2988,KZ116042.1,2988,+
KZ116042.1_2989,KZ116042.1,2989,+
KZ116042.1_3122,KZ116042.1,3122,+


In [4]:
library(dplyr)


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union




In [5]:
#Note the number of editing sites before
dim(editing_sites)

In [6]:
# Anti-join to remove matching rows from editing_sites
editing_sites <- anti_join(editing_sites, dbsnp_file, by = 'ID')

In [7]:
#Check to see if/how many were removed
dim(editing_sites)

In [8]:
#Redo the multiple testing correction since there are a different number of data points now
#Remove old p-adj bh values
editing_sites <- editing_sites[, !names(editing_sites) %in% 'p_adj_BH']

In [9]:
editing_sites$p_adj_BH <- p.adjust(editing_sites$p_value, method = "BH")

In [10]:
editing_sites

ID,editing.type.x,read.type.x,strand.x,Ctrl.01.coverage,Ctrl.02.coverage,Ctrl.03.coverage,NO.01.coverage,NO.02.coverage,NO.03.coverage,⋯,NO.05.coverage,NO.06.coverage,Ctrl.04.coverage_editing_percentage,Ctrl.05.coverage_editing_percentage,Ctrl.06.coverage_editing_percentage,NO.04.coverage_editing_percentage,NO.05.coverage_editing_percentage,NO.06.coverage_editing_percentage,p_value,p_adj_BH
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,⋯,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1_10033434,AG,hyper,+,6:12,7:10,4:12,2:8,3:10,3:13,⋯,9:22,11:14,60.00000,57.14286,64.51613,71.87500,70.96774,56.00000,0.138631078,0.8762125
1_10033438,AG,hyper,+,12:12,9:9,12:12,7:7,9:9,12:12,⋯,21:21,13:14,50.00000,50.00000,50.00000,50.00000,50.00000,51.85185,1.000000000,1.0000000
1_11784543,TC,regular_and_hyper,-,8:75,11:115,10:80,8:59,13:66,10:42,⋯,14:163,17:171,89.04110,86.66667,94.30380,82.70677,92.09040,90.95745,0.430952169,1.0000000
1_11784563,TC,regular_and_hyper,-,20:77,22:115,14:88,9:60,4:65,10:45,⋯,28:169,46:177,77.10843,83.75000,81.48148,83.52490,85.78680,79.37220,0.071173904,0.7001188
1_11784567,TC,regular_and_hyper,-,36:77,78:124,49:94,36:63,34:65,30:48,⋯,115:184,102:185,59.37500,60.17699,64.40000,59.41645,61.53846,64.45993,0.227277609,0.9897161
1_11784568,TC,regular_and_hyper,-,69:76,119:124,80:93,57:63,63:65,46:48,⋯,177:184,175:185,52.36220,51.12782,51.77994,51.02506,50.96953,51.38889,1.000000000,1.0000000
1_11784569,TC,regular_and_hyper,-,43:77,65:125,51:93,45:63,33:65,23:48,⋯,96:184,97:183,67.17172,62.96296,64.48980,62.92135,65.71429,65.35714,0.883242627,1.0000000
1_11784571,TC,regular_and_hyper,-,21:79,48:128,28:94,19:63,27:64,24:44,⋯,67:190,82:188,68.50000,67.47573,67.91667,67.46988,73.92996,69.62963,0.030138082,0.4816341
1_11784572,TC,regular_and_hyper,-,20:79,42:127,16:94,15:63,34:64,23:44,⋯,49:190,67:188,72.10526,70.55838,71.49123,74.34211,79.49791,73.72549,0.449845118,1.0000000
1_11784577,TC,regular_and_hyper,-,36:77,66:122,57:91,41:58,40:63,22:45,⋯,131:187,94:183,71.50538,61.08597,65.98361,64.60177,58.80503,66.06498,0.553671045,1.0000000


In [13]:
significant_ids <- editing_sites$ID[editing_sites$p_adj_BH < 0.05]

In [14]:
significant_ids

In [15]:
p_file <- '/mnt/vast/hpc/csg/hcs2152/ZFR_RNA_Editing/SPRINT/Output/A2I_Editing/SPRINT_ZFR_combined_dbSNP_removed_final.tsv'
write.table(editing_sites, file = p_file, sep = "\t", quote = FALSE, row.names = FALSE)