In [1]:
# Input data 
file_2dpf <- '/mnt/vast/hpc/csg/hcs2152/ZFR_RNA_Editing/SPRINT/Output/A2I_Editing/2dpf/SPRINT_ZFR_editing_2dpf.tsv'
file_5dpf <- '/mnt/vast/hpc/csg/hcs2152/ZFR_RNA_Editing/SPRINT/Output/A2I_Editing/5dpf/SPRINT_ZFR_editing_5dpf.tsv'
file_p_vals <- ('/mnt/vast/hpc/csg/hcs2152/ZFR_RNA_Editing/SPRINT/Output/A2I_Editing/SPRINT_combined_p_values.tsv')
# Read the TSV file into a data frame
data_2dpf <- read.table(file_2dpf, header = TRUE, sep = "\t")
data_5dpf <- read.table(file_5dpf, header = TRUE, sep = "\t")
data_p_vals <- read.table(file_p_vals, header = TRUE, sep = "\t")

In [2]:
#Remove any contigs besides Chr 1-25
pattern <- "^(1[0-9]|2[0-5]|[1-9])$"
data_2dpf <- subset(data_2dpf, grepl(pattern, chr))
data_5dpf <- subset(data_5dpf, grepl(pattern, chr))

In [3]:
#Remove the start column (site of editing is stop column)
data_2dpf <- subset(data_2dpf, select = -start)
data_5dpf <- subset(data_5dpf, select = -start)


In [4]:
# Create 'ID' column by pasting 'chr' and 'stop' with '_'
data_2dpf$ID <- paste(data_2dpf$chr, data_2dpf$stop, sep = "_")
data_5dpf$ID <- paste(data_5dpf$chr, data_5dpf$stop, sep = "_")
# Rearrange columns with 'ID' as the first column and remove 'chr' and 'stop' columns
data_2dpf <- data_2dpf[, c("ID", setdiff(names(data_2dpf), c("ID", "chr", "stop")))]
data_5dpf <- data_5dpf[, c("ID", setdiff(names(data_5dpf), c("ID", "chr", "stop")))]

In [5]:
merged_df <- merge(data_2dpf, data_5dpf, by = "ID")

In [6]:
#Add in p and p adj
final_df <- merge(merged_df, data_p_vals, by = "ID")

In [7]:
final_df

ID,editing.type.x,read.type.x,strand.x,Ctrl.01.coverage,Ctrl.02.coverage,Ctrl.03.coverage,NO.01.coverage,NO.02.coverage,NO.03.coverage,⋯,NO.05.coverage,NO.06.coverage,Ctrl.04.coverage_editing_percentage,Ctrl.05.coverage_editing_percentage,Ctrl.06.coverage_editing_percentage,NO.04.coverage_editing_percentage,NO.05.coverage_editing_percentage,NO.06.coverage_editing_percentage,p_value,p_adj_BH
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,⋯,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1_10033434,AG,hyper,+,6:12,7:10,4:12,2:8,3:10,3:13,⋯,9:22,11:14,60.00000,57.14286,64.51613,71.87500,70.96774,56.00000,0.138631078,0.8823425
1_10033438,AG,hyper,+,12:12,9:9,12:12,7:7,9:9,12:12,⋯,21:21,13:14,50.00000,50.00000,50.00000,50.00000,50.00000,51.85185,1.000000000,1.0000000
1_11784543,TC,regular_and_hyper,-,8:75,11:115,10:80,8:59,13:66,10:42,⋯,14:163,17:171,89.04110,86.66667,94.30380,82.70677,92.09040,90.95745,0.430952169,1.0000000
1_11784563,TC,regular_and_hyper,-,20:77,22:115,14:88,9:60,4:65,10:45,⋯,28:169,46:177,77.10843,83.75000,81.48148,83.52490,85.78680,79.37220,0.071173904,0.7055186
1_11784567,TC,regular_and_hyper,-,36:77,78:124,49:94,36:63,34:65,30:48,⋯,115:184,102:185,59.37500,60.17699,64.40000,59.41645,61.53846,64.45993,0.227277609,0.9969175
1_11784568,TC,regular_and_hyper,-,69:76,119:124,80:93,57:63,63:65,46:48,⋯,177:184,175:185,52.36220,51.12782,51.77994,51.02506,50.96953,51.38889,1.000000000,1.0000000
1_11784569,TC,regular_and_hyper,-,43:77,65:125,51:93,45:63,33:65,23:48,⋯,96:184,97:183,67.17172,62.96296,64.48980,62.92135,65.71429,65.35714,0.883242627,1.0000000
1_11784571,TC,regular_and_hyper,-,21:79,48:128,28:94,19:63,27:64,24:44,⋯,67:190,82:188,68.50000,67.47573,67.91667,67.46988,73.92996,69.62963,0.030138082,0.4873298
1_11784572,TC,regular_and_hyper,-,20:79,42:127,16:94,15:63,34:64,23:44,⋯,49:190,67:188,72.10526,70.55838,71.49123,74.34211,79.49791,73.72549,0.449845118,1.0000000
1_11784577,TC,regular_and_hyper,-,36:77,66:122,57:91,41:58,40:63,22:45,⋯,131:187,94:183,71.50538,61.08597,65.98361,64.60177,58.80503,66.06498,0.553671045,1.0000000


In [8]:
file_path <- '/mnt/vast/hpc/csg/hcs2152/ZFR_RNA_Editing/SPRINT/Output/A2I_Editing/SPRINT_zfr_combined_final.tsv'

In [9]:
# Export the merged data frame to a TSV file
write.table(final_df, file = file_path, sep = "\t", row.names = FALSE)