# MPRAnalyze Prepare Annotation Data

In [1]:
library(tidyverse)

── [1mAttaching core tidyverse packages[22m ──────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mdplyr    [39m 1.1.4     [32m✔[39m [34mreadr    [39m 2.1.5
[32m✔[39m [34mforcats  [39m 1.0.0     [32m✔[39m [34mstringr  [39m 1.5.1
[32m✔[39m [34mggplot2  [39m 3.5.1     [32m✔[39m [34mtibble   [39m 3.2.1
[32m✔[39m [34mlubridate[39m 1.9.3     [32m✔[39m [34mtidyr    [39m 1.3.1
[32m✔[39m [34mpurrr    [39m 1.0.2     
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
[36mℹ[39m Use the conflicted package ([3m[34m<http://conflicted.r-lib.org/>[39m[23m) to force all conflicts to become errors


In [2]:
# annotations for DNA data
dna_annot <- data.frame(version = factor(rep(c("V1", "V2"), each=150)), batch = factor(rep(c(21:26), each=50)), 
                        barcode = factor(rep(c(1:50), times=3))) %>% 
                        unite("label", version, batch, barcode, sep=":", remove=FALSE) %>% 
                        column_to_rownames("label")

In [3]:
# annotations for RNA data
rna_annot_pool <- data.frame(version = factor(rep(c("V1", "V2"), each=500)), batch = factor(rep(c(1:20), each=50)), 
                        barcode = factor(rep(c(1:50), times=10))) %>% 
                        unite("label", version, batch, barcode, sep=":", remove=FALSE) %>% 
                        column_to_rownames("label")

In [4]:
# save DNA annotations
write.table(dna_annot, '../data/dna_annot.txt')

In [5]:
# save RNA annotations
write.table(rna_annot_pool, '../data/rna_annot_pool.txt', sep = "\t")

In [6]:
# get size of DNA and RNA counts
size_dna <- 600
size_rna <- 2000

In [7]:
# make DNA annotations for allelic comparison
dna_annot_allelic <- data.frame(version = factor(rep(c("V1", "V2"), each=size_dna/2)), 
                                batch = factor(rep(c(11:13), each=100)), 
                                barcode = factor(rep(c(1:50), each=2, times=(size_dna/100))), 
                                alleletype = factor(rep(c('ref', 'alt'), times=(size_dna/2)))) %>% 
                        unite("label", version, batch, barcode, alleletype, sep=":", remove=FALSE) %>% 
                        column_to_rownames("label")

In [8]:
# find interaction between barcodes and allele types
dna_annot_allelic$barcode_allelic <- interaction(dna_annot_allelic$barcode, dna_annot_allelic$alleletype)
dna_annot_allelic$barcode_allelic_version <- interaction(dna_annot_allelic$version, dna_annot_allelic$barcode_allelic)

In [9]:
# make RNA annotations for allelic comparison
rna_annot_allelic_pool <- data.frame(version = factor(rep(c("V1", "V2"), each=size_rna/2)), 
                                batch = factor(rep(c(1:10), each=100)), 
                                barcode = factor(rep(c(1:50), each=2, times=(size_rna/100))), 
                                alleletype = factor(rep(c('ref', 'alt'), times=(size_rna/2)))) %>% 
                        unite("label", version, batch, barcode, alleletype, sep=":", remove=FALSE) %>% 
                        column_to_rownames("label")

In [10]:
# find interaction between barcodes, allele types, and versions
rna_annot_allelic_pool$barcode_allelic <- interaction(rna_annot_allelic_pool$barcode, rna_annot_allelic_pool$alleletype)
rna_annot_allelic_pool$barcode_allelic_version <- interaction(rna_annot_allelic_pool$version, rna_annot_allelic_pool$barcode_allelic)

In [11]:
# save DNA allelic annotations
write.table(dna_annot_allelic, '../data/merged_dna_annot_allelic.txt')

In [12]:
# save RNA allelic annotations
write.table(rna_annot_allelic_pool, '../data/merged_rna_annot_pool_allelic.txt')