In [None]:
# This script defines functions to build a Translational Efficiency Hypothesis (TEH) - Accuracy model that predicts protein expression in E. coli
# This energy-based notion of cost is readily extended to include the effects of inaccuracy
# by foccusing on the probability for a codon to produce a nonsense error.


# Author: 
# Alejandra Lopez Sosa, May 2023, Zurich University of Applied Sciences,
# Institute for Chemistry and Biotechnology

# Data description

# Nonsense error rates of Escherichia coli
# Nonsense error rates of E.coli are taken from
# Estimates obtained by using FONSE model from AnaCoda package in R:
# https://cran.r-project.org/web/packages/AnaCoDa/vignettes/anacoda.html
# The FONSE (First order approximation On NonSense Error) model analyzes gene data for selection on codon usage against of nonsense error rates.


# Protein expression levels measurements (empirical data)
# These data were generated by Marco Gees, Zrinka Raguz Nakic and Christin Peters 
# from the Institute of Biotechnology, ZHAW Wädenswil, during the years 2020-2022. 
# They are stored under the name Testing_the_TEH/data/data_gfp_zrinka.csv

### Install and load packages

In [None]:
# First we set the corresponding working directory

# setting working path
setwd("/Users/ale/Documents/thesis_codon_bias/Testing_the_TEH/Rscripts")

# Installing all necessary packages
install.packages("RColorBrewer")
install.packages("seqinr")
install.packages("VGAM")
install.packages("doSNOW")
install.packages("coda")
install.packages("EMCluster")
install.packages("Biostrings")
if (!requireNamespace("BiocManager", quietly = TRUE))
  install.packages("BiocManager")
# Need 3.14 with R version 4.1
BiocManager::install(version = "3.14")
BiocManager::install(c("GeneGA"))
BiocManager::install("sscu")
install.packages("bioseq")
install.packages("GeneGA")
install.packages("AnaCoDa")

# load necessary pacakges
library("RColorBrewer")
library("seqinr")
library("VGAM")
library("doSNOW")
library("coda")
library("EMCluster")
library("Biostrings")
library("bioseq")
library("GeneGA")
library("AnaCoDa")

In [2]:
# Source file containing sequence conversion functions
source("sequence_conversions.R")

### Obtain data from data_gfp_zrinka

In [3]:
# Read file and assign to variable
data_gfp_zrinka <- read.csv("/Users/ale/Documents/thesis_codon_bias/Testing_the_TEH/data/data_gfp_zrinka.csv", header = TRUE, sep = ",")

# Keep only the columns of interest: 'X...sequence_name...', 'strain...', 'fluorescence.value..AU.' and 'outlier_status..logica.'
data_gfp_zrinka <- data_gfp_zrinka[,c(1,2,4,5)]
data_gfp_zrinka

X...sequence_name...,strain...,fluorescence.value..AU.,outlier_status..logica.
<chr>,<chr>,<dbl>,<chr>
pET28b_empty,K12,7.5,no
pET28b_empty,K12,-5.5,no
pET28b_empty,K12,-17.5,no
pET28b_empty,K12,15.5,no
pET28b_empty,K12,39.0,no
pET28b_empty,K12,17.0,no
pET28b_empty,K12,-7.0,no
pET28b_empty,K12,-49.0,yes
V015-wildtype,K12,14853.5,no
V015-wildtype,K12,14101.5,no


Remove outliers before continuing

In [4]:
# Keep only the values where 'outlier_status..logica.' == "no"
data_gfp_zrinka_no_outliers <- data_gfp_zrinka[data_gfp_zrinka$outlier_status..logica. == "no",]

In [5]:
add.mean.fluo.col <- function(data, seq_col = "X...sequence_name...", strain_col = "strain...", fluo_col = "fluorescence.value..AU.") {
  # keep only columns seq_col, strain_col and fluo_col
  data <- data[,c(seq_col, strain_col, fluo_col)]

  # Compute mean fluorescence value for each (sequence, strain) group
  mean_fluo <- aggregate(data[[fluo_col]] ~ data[[seq_col]] + data[[strain_col]], data = data, FUN = mean)
  names(mean_fluo) <- c(seq_col, strain_col, "mean_fluorescence")
  
  # Add the mean fluorescence value for each row
  data <- merge(data, mean_fluo, by = c(seq_col, strain_col))

  # Rename columns in dataframe for better understanding
  names(data) <- c("sequence_name", "strain", "value", "mean_fluorescence")

  # Remove rows where sequence_name is pET28b_empty (they're controls and carry no info)
  data <- data[data$sequence_name != "pET28b_empty",]

  return(data)
}

data_gfp_zrinka <- add.mean.fluo.col(data_gfp_zrinka)
data_gfp_zrinka_no_outliers <- add.mean.fluo.col(data_gfp_zrinka_no_outliers)

#### Retrieve sequences and add sequence id column for mapping

In [6]:
# Read sequence constructs file
sequence_constructs <- read.csv("../data/sequence_names_mRNA.csv")

# Add sequence_id column by construct name to the sequence_constructs dataframe
sequence_constructs$sequence_id <- sapply(strsplit(sequence_constructs$Construct.name, "_"), "[", 1)

#### Add sequence column to the dataframe via sequence id column

In [7]:
# Add sequence_id column to dataframe
data_gfp_zrinka$sequence_id <- sapply(strsplit(data_gfp_zrinka$sequence_name, "-"), "[", 1)
# dataframe with no outliers
data_gfp_zrinka_no_outliers$sequence_id <- sapply(strsplit(data_gfp_zrinka_no_outliers$sequence_name, "-"), "[", 1)

# Add sequence column from sequence_constructs to dataframe mapping via sequence_id
data_gfp_zrinka <- merge(data_gfp_zrinka, sequence_constructs[,c(4,5)], by = "sequence_id")
# dataframe with no outliers
data_gfp_zrinka_no_outliers <- merge(data_gfp_zrinka_no_outliers, sequence_constructs[,c(4,5)], by = "sequence_id")

#### Retrieve nonsense error rates for each codon and assign to a vector

In [8]:
# Read the nonsense error rates file
# Nonsense error rates obtained from FONSE model are stored in file "nonsense_error_rates_fonse.csv" under "$Mean"
ecoli_nonsense_error_data = read.csv(file = "/Users/ale/Documents/thesis_codon_bias/Testing_the_TEH/Rscripts/nonsense_error_rates_fonse.csv", header = TRUE)

In [9]:
# Assign nonsense error rates values to the corresponding codon and create a new vector containing all values
nonsense_error_rates_ecoli <- ecoli_nonsense_error_data$Mean
names(nonsense_error_rates_ecoli) <- ecoli_nonsense_error_data$Codon

In [10]:
nonsense_error_rates_ecoli

#### Compute and add translation success rates for each sequence

In [10]:
translation.success.rate.of.mRNA <- function(seq_cand, nonsense_error_rates_ecoli, verbose = FALSE) {
  # This function outputs the total translation success rate of an mRNA sequence 
  # from the input codons of the sequence (argument: sequence) 
  # and the nonsense error rates for each individual codon of said sequence.
  # Nonsense error rates were computed by the FONSE model (Gilchrist, 2015) (file: "nonsense_error_rates_fonse.csv").
  # All three stop codons are excluded.
  codons_of_sequence <- seq.string.to.cod.string(seq_cand)
  L <- length(codons_of_sequence)
  
  if (verbose) {
    cat("length candidate sequence is:", L, '\n')
  }
  
  # Exclude the last codon (stop codon)
  codons <- codons_of_sequence[1:(L-1)]
  
  # Identify positions of codons in the nonsense error rate table
  positions_in_table <- match(codons, names(nonsense_error_rates_ecoli))

  # Retrieve nonsense error rates for codons
  error_rate_mRNA <- nonsense_error_rates_ecoli[positions_in_table]

  if (verbose) {
    cat("Elongation times of the codons:", error_rate_mRNA, '\n')
  }
  
  # Calculate total translation success rate of the mRNA sequence
  total_success_rate <- prod(1-error_rate_mRNA)
  return(total_success_rate)
}

# Test the function translation.success.rate.of.mRNA
seq_cand <- sequence_constructs$Sequence[1]
translation.success.rate.of.mRNA(seq_cand, nonsense_error_rates_ecoli)

In [11]:
# This function adds the total success rate column to dataframe
add.success.rate.col <- function(data, nonsense_error_rates_ecoli){
  data$total_success_rate <- sapply(data$Sequence, translation.success.rate.of.mRNA, nonsense_error_rates_ecoli = nonsense_error_rates_ecoli)
  return(data)
}

# Call the function add.success.rate.col to add a success rate column to dataframe
data_gfp_zrinka <- add.success.rate.col(data_gfp_zrinka, nonsense_error_rates_ecoli)
data_gfp_zrinka

# Call the same funciton on dataframe with no outliers
data_gfp_zrinka_no_outliers <- add.success.rate.col(data_gfp_zrinka_no_outliers, nonsense_error_rates_ecoli)
data_gfp_zrinka_no_outliers

sequence_id,sequence_name,strain,value,mean_fluorescence,Sequence,total_success_rate
<chr>,<chr>,<chr>,<dbl>,<dbl>,<chr>,<dbl>
V015,V015-wildtype,BL21DE3,16486.75,17098.38,ATGGGAATAATGGAGGCAGAGAGGAAAACAACAGGCTGGGCTGCCAGAGACCCATCTGGCATCCTCTCTCCTTACACTTACACTCTTAGAGAGACTGGACCAGAGGATGTGAACATAAGAATCATTTGCTGTGGAATCTGCCACACCGATCTTCATCAAACTAAAAATGATCTTGGCATGTCTAATTACCCCATGGTTCCTGGGCATGAAGTGGTAGGGGAAGTAGTGGAGGTGGGATCAGATGTGAGCAAGTTCACCGTAGGGGACATAGTTGGAGTTGGTTGCCTCGTTGGATGTTGCGGAGGTTGTAGCCCCTGCGAGAGAGATCTGGAACAGTATTGTCCAAAGAAGATTTGGAGCTACAATGATGTTTACATCAATGGTCAACCTACACAAGGCGGCTTCGCTAAAGCCACCGTCGTTCACCAAAAGTTTGTGGTCAAGATTCCAGAAGGAATGGCGGTTGAGCAGGCTGCGCCGCTACTGTGCGCTGGTGTGACTGTGTACAGTCCACTGAGCCACTTTGGTCTGAAACAACCAGGCCTAAGAGGAGGTATACTAGGGTTAGGTGGAGTCGGTCACATGGGTGTGAAAATAGCCAAAGCAATGGGTCACCATGTGACTGTCATAAGCTCATCAAACAAGAAGAGAGAAGAGGCATTGCAAGATCTTGGAGCTGATGATTACGTGATCGGTTCCGACCAAGCGAAGATGAGCGAATTGGCTGATTCGTTGGATTACGTAATTGACACGGTGCCTGTTCATCATGCACTTGAGCCATATTTGTCTCTGCTTAAGCTTGATGGTAAACTCATTCTCATGGGAGTTATCAACAATCCATTACAGTTTCTCACTCCTCTGCTTATGCTTGGGAGGAAAGTGATAACGGGGAGCTTCATAGGGAGCATGAAGGAGACAGAGGAGATGCTTGAGTTCTGTAAAGAAAAGGGTTTGAGTTCGATTATCGAAGTTGTGAAGATGGATTATGTTAACACTGCGTTTGAGAGACTTGAGAAGAACGATGTGCGTTATAGGTTCGTCGTTGATGTCGAAGGAAGCAATCTCGACGCTTTAATTGGCTCCGATGGAGGGTCTGGTGGCGGATCAACAAGTCGTGACCACATGGTCCTTCATGAGTACGTAAATGCTGCTGGGATTACATGA,0.9082692
V015,V015-wildtype,BL21DE3,16621.75,17098.38,ATGGGAATAATGGAGGCAGAGAGGAAAACAACAGGCTGGGCTGCCAGAGACCCATCTGGCATCCTCTCTCCTTACACTTACACTCTTAGAGAGACTGGACCAGAGGATGTGAACATAAGAATCATTTGCTGTGGAATCTGCCACACCGATCTTCATCAAACTAAAAATGATCTTGGCATGTCTAATTACCCCATGGTTCCTGGGCATGAAGTGGTAGGGGAAGTAGTGGAGGTGGGATCAGATGTGAGCAAGTTCACCGTAGGGGACATAGTTGGAGTTGGTTGCCTCGTTGGATGTTGCGGAGGTTGTAGCCCCTGCGAGAGAGATCTGGAACAGTATTGTCCAAAGAAGATTTGGAGCTACAATGATGTTTACATCAATGGTCAACCTACACAAGGCGGCTTCGCTAAAGCCACCGTCGTTCACCAAAAGTTTGTGGTCAAGATTCCAGAAGGAATGGCGGTTGAGCAGGCTGCGCCGCTACTGTGCGCTGGTGTGACTGTGTACAGTCCACTGAGCCACTTTGGTCTGAAACAACCAGGCCTAAGAGGAGGTATACTAGGGTTAGGTGGAGTCGGTCACATGGGTGTGAAAATAGCCAAAGCAATGGGTCACCATGTGACTGTCATAAGCTCATCAAACAAGAAGAGAGAAGAGGCATTGCAAGATCTTGGAGCTGATGATTACGTGATCGGTTCCGACCAAGCGAAGATGAGCGAATTGGCTGATTCGTTGGATTACGTAATTGACACGGTGCCTGTTCATCATGCACTTGAGCCATATTTGTCTCTGCTTAAGCTTGATGGTAAACTCATTCTCATGGGAGTTATCAACAATCCATTACAGTTTCTCACTCCTCTGCTTATGCTTGGGAGGAAAGTGATAACGGGGAGCTTCATAGGGAGCATGAAGGAGACAGAGGAGATGCTTGAGTTCTGTAAAGAAAAGGGTTTGAGTTCGATTATCGAAGTTGTGAAGATGGATTATGTTAACACTGCGTTTGAGAGACTTGAGAAGAACGATGTGCGTTATAGGTTCGTCGTTGATGTCGAAGGAAGCAATCTCGACGCTTTAATTGGCTCCGATGGAGGGTCTGGTGGCGGATCAACAAGTCGTGACCACATGGTCCTTCATGAGTACGTAAATGCTGCTGGGATTACATGA,0.9082692
V015,V015-wildtype,BL21DE3,17863.75,17098.38,ATGGGAATAATGGAGGCAGAGAGGAAAACAACAGGCTGGGCTGCCAGAGACCCATCTGGCATCCTCTCTCCTTACACTTACACTCTTAGAGAGACTGGACCAGAGGATGTGAACATAAGAATCATTTGCTGTGGAATCTGCCACACCGATCTTCATCAAACTAAAAATGATCTTGGCATGTCTAATTACCCCATGGTTCCTGGGCATGAAGTGGTAGGGGAAGTAGTGGAGGTGGGATCAGATGTGAGCAAGTTCACCGTAGGGGACATAGTTGGAGTTGGTTGCCTCGTTGGATGTTGCGGAGGTTGTAGCCCCTGCGAGAGAGATCTGGAACAGTATTGTCCAAAGAAGATTTGGAGCTACAATGATGTTTACATCAATGGTCAACCTACACAAGGCGGCTTCGCTAAAGCCACCGTCGTTCACCAAAAGTTTGTGGTCAAGATTCCAGAAGGAATGGCGGTTGAGCAGGCTGCGCCGCTACTGTGCGCTGGTGTGACTGTGTACAGTCCACTGAGCCACTTTGGTCTGAAACAACCAGGCCTAAGAGGAGGTATACTAGGGTTAGGTGGAGTCGGTCACATGGGTGTGAAAATAGCCAAAGCAATGGGTCACCATGTGACTGTCATAAGCTCATCAAACAAGAAGAGAGAAGAGGCATTGCAAGATCTTGGAGCTGATGATTACGTGATCGGTTCCGACCAAGCGAAGATGAGCGAATTGGCTGATTCGTTGGATTACGTAATTGACACGGTGCCTGTTCATCATGCACTTGAGCCATATTTGTCTCTGCTTAAGCTTGATGGTAAACTCATTCTCATGGGAGTTATCAACAATCCATTACAGTTTCTCACTCCTCTGCTTATGCTTGGGAGGAAAGTGATAACGGGGAGCTTCATAGGGAGCATGAAGGAGACAGAGGAGATGCTTGAGTTCTGTAAAGAAAAGGGTTTGAGTTCGATTATCGAAGTTGTGAAGATGGATTATGTTAACACTGCGTTTGAGAGACTTGAGAAGAACGATGTGCGTTATAGGTTCGTCGTTGATGTCGAAGGAAGCAATCTCGACGCTTTAATTGGCTCCGATGGAGGGTCTGGTGGCGGATCAACAAGTCGTGACCACATGGTCCTTCATGAGTACGTAAATGCTGCTGGGATTACATGA,0.9082692
V015,V015-wildtype,BL21DE3,13851.75,17098.38,ATGGGAATAATGGAGGCAGAGAGGAAAACAACAGGCTGGGCTGCCAGAGACCCATCTGGCATCCTCTCTCCTTACACTTACACTCTTAGAGAGACTGGACCAGAGGATGTGAACATAAGAATCATTTGCTGTGGAATCTGCCACACCGATCTTCATCAAACTAAAAATGATCTTGGCATGTCTAATTACCCCATGGTTCCTGGGCATGAAGTGGTAGGGGAAGTAGTGGAGGTGGGATCAGATGTGAGCAAGTTCACCGTAGGGGACATAGTTGGAGTTGGTTGCCTCGTTGGATGTTGCGGAGGTTGTAGCCCCTGCGAGAGAGATCTGGAACAGTATTGTCCAAAGAAGATTTGGAGCTACAATGATGTTTACATCAATGGTCAACCTACACAAGGCGGCTTCGCTAAAGCCACCGTCGTTCACCAAAAGTTTGTGGTCAAGATTCCAGAAGGAATGGCGGTTGAGCAGGCTGCGCCGCTACTGTGCGCTGGTGTGACTGTGTACAGTCCACTGAGCCACTTTGGTCTGAAACAACCAGGCCTAAGAGGAGGTATACTAGGGTTAGGTGGAGTCGGTCACATGGGTGTGAAAATAGCCAAAGCAATGGGTCACCATGTGACTGTCATAAGCTCATCAAACAAGAAGAGAGAAGAGGCATTGCAAGATCTTGGAGCTGATGATTACGTGATCGGTTCCGACCAAGCGAAGATGAGCGAATTGGCTGATTCGTTGGATTACGTAATTGACACGGTGCCTGTTCATCATGCACTTGAGCCATATTTGTCTCTGCTTAAGCTTGATGGTAAACTCATTCTCATGGGAGTTATCAACAATCCATTACAGTTTCTCACTCCTCTGCTTATGCTTGGGAGGAAAGTGATAACGGGGAGCTTCATAGGGAGCATGAAGGAGACAGAGGAGATGCTTGAGTTCTGTAAAGAAAAGGGTTTGAGTTCGATTATCGAAGTTGTGAAGATGGATTATGTTAACACTGCGTTTGAGAGACTTGAGAAGAACGATGTGCGTTATAGGTTCGTCGTTGATGTCGAAGGAAGCAATCTCGACGCTTTAATTGGCTCCGATGGAGGGTCTGGTGGCGGATCAACAAGTCGTGACCACATGGTCCTTCATGAGTACGTAAATGCTGCTGGGATTACATGA,0.9082692
V015,V015-wildtype,BL21DE3,19477.00,17098.38,ATGGGAATAATGGAGGCAGAGAGGAAAACAACAGGCTGGGCTGCCAGAGACCCATCTGGCATCCTCTCTCCTTACACTTACACTCTTAGAGAGACTGGACCAGAGGATGTGAACATAAGAATCATTTGCTGTGGAATCTGCCACACCGATCTTCATCAAACTAAAAATGATCTTGGCATGTCTAATTACCCCATGGTTCCTGGGCATGAAGTGGTAGGGGAAGTAGTGGAGGTGGGATCAGATGTGAGCAAGTTCACCGTAGGGGACATAGTTGGAGTTGGTTGCCTCGTTGGATGTTGCGGAGGTTGTAGCCCCTGCGAGAGAGATCTGGAACAGTATTGTCCAAAGAAGATTTGGAGCTACAATGATGTTTACATCAATGGTCAACCTACACAAGGCGGCTTCGCTAAAGCCACCGTCGTTCACCAAAAGTTTGTGGTCAAGATTCCAGAAGGAATGGCGGTTGAGCAGGCTGCGCCGCTACTGTGCGCTGGTGTGACTGTGTACAGTCCACTGAGCCACTTTGGTCTGAAACAACCAGGCCTAAGAGGAGGTATACTAGGGTTAGGTGGAGTCGGTCACATGGGTGTGAAAATAGCCAAAGCAATGGGTCACCATGTGACTGTCATAAGCTCATCAAACAAGAAGAGAGAAGAGGCATTGCAAGATCTTGGAGCTGATGATTACGTGATCGGTTCCGACCAAGCGAAGATGAGCGAATTGGCTGATTCGTTGGATTACGTAATTGACACGGTGCCTGTTCATCATGCACTTGAGCCATATTTGTCTCTGCTTAAGCTTGATGGTAAACTCATTCTCATGGGAGTTATCAACAATCCATTACAGTTTCTCACTCCTCTGCTTATGCTTGGGAGGAAAGTGATAACGGGGAGCTTCATAGGGAGCATGAAGGAGACAGAGGAGATGCTTGAGTTCTGTAAAGAAAAGGGTTTGAGTTCGATTATCGAAGTTGTGAAGATGGATTATGTTAACACTGCGTTTGAGAGACTTGAGAAGAACGATGTGCGTTATAGGTTCGTCGTTGATGTCGAAGGAAGCAATCTCGACGCTTTAATTGGCTCCGATGGAGGGTCTGGTGGCGGATCAACAAGTCGTGACCACATGGTCCTTCATGAGTACGTAAATGCTGCTGGGATTACATGA,0.9082692
V015,V015-wildtype,BL21DE3,19002.00,17098.38,ATGGGAATAATGGAGGCAGAGAGGAAAACAACAGGCTGGGCTGCCAGAGACCCATCTGGCATCCTCTCTCCTTACACTTACACTCTTAGAGAGACTGGACCAGAGGATGTGAACATAAGAATCATTTGCTGTGGAATCTGCCACACCGATCTTCATCAAACTAAAAATGATCTTGGCATGTCTAATTACCCCATGGTTCCTGGGCATGAAGTGGTAGGGGAAGTAGTGGAGGTGGGATCAGATGTGAGCAAGTTCACCGTAGGGGACATAGTTGGAGTTGGTTGCCTCGTTGGATGTTGCGGAGGTTGTAGCCCCTGCGAGAGAGATCTGGAACAGTATTGTCCAAAGAAGATTTGGAGCTACAATGATGTTTACATCAATGGTCAACCTACACAAGGCGGCTTCGCTAAAGCCACCGTCGTTCACCAAAAGTTTGTGGTCAAGATTCCAGAAGGAATGGCGGTTGAGCAGGCTGCGCCGCTACTGTGCGCTGGTGTGACTGTGTACAGTCCACTGAGCCACTTTGGTCTGAAACAACCAGGCCTAAGAGGAGGTATACTAGGGTTAGGTGGAGTCGGTCACATGGGTGTGAAAATAGCCAAAGCAATGGGTCACCATGTGACTGTCATAAGCTCATCAAACAAGAAGAGAGAAGAGGCATTGCAAGATCTTGGAGCTGATGATTACGTGATCGGTTCCGACCAAGCGAAGATGAGCGAATTGGCTGATTCGTTGGATTACGTAATTGACACGGTGCCTGTTCATCATGCACTTGAGCCATATTTGTCTCTGCTTAAGCTTGATGGTAAACTCATTCTCATGGGAGTTATCAACAATCCATTACAGTTTCTCACTCCTCTGCTTATGCTTGGGAGGAAAGTGATAACGGGGAGCTTCATAGGGAGCATGAAGGAGACAGAGGAGATGCTTGAGTTCTGTAAAGAAAAGGGTTTGAGTTCGATTATCGAAGTTGTGAAGATGGATTATGTTAACACTGCGTTTGAGAGACTTGAGAAGAACGATGTGCGTTATAGGTTCGTCGTTGATGTCGAAGGAAGCAATCTCGACGCTTTAATTGGCTCCGATGGAGGGTCTGGTGGCGGATCAACAAGTCGTGACCACATGGTCCTTCATGAGTACGTAAATGCTGCTGGGATTACATGA,0.9082692
V015,V015-wildtype,BL21DE3,19599.00,17098.38,ATGGGAATAATGGAGGCAGAGAGGAAAACAACAGGCTGGGCTGCCAGAGACCCATCTGGCATCCTCTCTCCTTACACTTACACTCTTAGAGAGACTGGACCAGAGGATGTGAACATAAGAATCATTTGCTGTGGAATCTGCCACACCGATCTTCATCAAACTAAAAATGATCTTGGCATGTCTAATTACCCCATGGTTCCTGGGCATGAAGTGGTAGGGGAAGTAGTGGAGGTGGGATCAGATGTGAGCAAGTTCACCGTAGGGGACATAGTTGGAGTTGGTTGCCTCGTTGGATGTTGCGGAGGTTGTAGCCCCTGCGAGAGAGATCTGGAACAGTATTGTCCAAAGAAGATTTGGAGCTACAATGATGTTTACATCAATGGTCAACCTACACAAGGCGGCTTCGCTAAAGCCACCGTCGTTCACCAAAAGTTTGTGGTCAAGATTCCAGAAGGAATGGCGGTTGAGCAGGCTGCGCCGCTACTGTGCGCTGGTGTGACTGTGTACAGTCCACTGAGCCACTTTGGTCTGAAACAACCAGGCCTAAGAGGAGGTATACTAGGGTTAGGTGGAGTCGGTCACATGGGTGTGAAAATAGCCAAAGCAATGGGTCACCATGTGACTGTCATAAGCTCATCAAACAAGAAGAGAGAAGAGGCATTGCAAGATCTTGGAGCTGATGATTACGTGATCGGTTCCGACCAAGCGAAGATGAGCGAATTGGCTGATTCGTTGGATTACGTAATTGACACGGTGCCTGTTCATCATGCACTTGAGCCATATTTGTCTCTGCTTAAGCTTGATGGTAAACTCATTCTCATGGGAGTTATCAACAATCCATTACAGTTTCTCACTCCTCTGCTTATGCTTGGGAGGAAAGTGATAACGGGGAGCTTCATAGGGAGCATGAAGGAGACAGAGGAGATGCTTGAGTTCTGTAAAGAAAAGGGTTTGAGTTCGATTATCGAAGTTGTGAAGATGGATTATGTTAACACTGCGTTTGAGAGACTTGAGAAGAACGATGTGCGTTATAGGTTCGTCGTTGATGTCGAAGGAAGCAATCTCGACGCTTTAATTGGCTCCGATGGAGGGTCTGGTGGCGGATCAACAAGTCGTGACCACATGGTCCTTCATGAGTACGTAAATGCTGCTGGGATTACATGA,0.9082692
V015,V015-wildtype,BL21DE3,13885.00,17098.38,ATGGGAATAATGGAGGCAGAGAGGAAAACAACAGGCTGGGCTGCCAGAGACCCATCTGGCATCCTCTCTCCTTACACTTACACTCTTAGAGAGACTGGACCAGAGGATGTGAACATAAGAATCATTTGCTGTGGAATCTGCCACACCGATCTTCATCAAACTAAAAATGATCTTGGCATGTCTAATTACCCCATGGTTCCTGGGCATGAAGTGGTAGGGGAAGTAGTGGAGGTGGGATCAGATGTGAGCAAGTTCACCGTAGGGGACATAGTTGGAGTTGGTTGCCTCGTTGGATGTTGCGGAGGTTGTAGCCCCTGCGAGAGAGATCTGGAACAGTATTGTCCAAAGAAGATTTGGAGCTACAATGATGTTTACATCAATGGTCAACCTACACAAGGCGGCTTCGCTAAAGCCACCGTCGTTCACCAAAAGTTTGTGGTCAAGATTCCAGAAGGAATGGCGGTTGAGCAGGCTGCGCCGCTACTGTGCGCTGGTGTGACTGTGTACAGTCCACTGAGCCACTTTGGTCTGAAACAACCAGGCCTAAGAGGAGGTATACTAGGGTTAGGTGGAGTCGGTCACATGGGTGTGAAAATAGCCAAAGCAATGGGTCACCATGTGACTGTCATAAGCTCATCAAACAAGAAGAGAGAAGAGGCATTGCAAGATCTTGGAGCTGATGATTACGTGATCGGTTCCGACCAAGCGAAGATGAGCGAATTGGCTGATTCGTTGGATTACGTAATTGACACGGTGCCTGTTCATCATGCACTTGAGCCATATTTGTCTCTGCTTAAGCTTGATGGTAAACTCATTCTCATGGGAGTTATCAACAATCCATTACAGTTTCTCACTCCTCTGCTTATGCTTGGGAGGAAAGTGATAACGGGGAGCTTCATAGGGAGCATGAAGGAGACAGAGGAGATGCTTGAGTTCTGTAAAGAAAAGGGTTTGAGTTCGATTATCGAAGTTGTGAAGATGGATTATGTTAACACTGCGTTTGAGAGACTTGAGAAGAACGATGTGCGTTATAGGTTCGTCGTTGATGTCGAAGGAAGCAATCTCGACGCTTTAATTGGCTCCGATGGAGGGTCTGGTGGCGGATCAACAAGTCGTGACCACATGGTCCTTCATGAGTACGTAAATGCTGCTGGGATTACATGA,0.9082692
V015,V015-wildtype,K12,14853.50,14075.00,ATGGGAATAATGGAGGCAGAGAGGAAAACAACAGGCTGGGCTGCCAGAGACCCATCTGGCATCCTCTCTCCTTACACTTACACTCTTAGAGAGACTGGACCAGAGGATGTGAACATAAGAATCATTTGCTGTGGAATCTGCCACACCGATCTTCATCAAACTAAAAATGATCTTGGCATGTCTAATTACCCCATGGTTCCTGGGCATGAAGTGGTAGGGGAAGTAGTGGAGGTGGGATCAGATGTGAGCAAGTTCACCGTAGGGGACATAGTTGGAGTTGGTTGCCTCGTTGGATGTTGCGGAGGTTGTAGCCCCTGCGAGAGAGATCTGGAACAGTATTGTCCAAAGAAGATTTGGAGCTACAATGATGTTTACATCAATGGTCAACCTACACAAGGCGGCTTCGCTAAAGCCACCGTCGTTCACCAAAAGTTTGTGGTCAAGATTCCAGAAGGAATGGCGGTTGAGCAGGCTGCGCCGCTACTGTGCGCTGGTGTGACTGTGTACAGTCCACTGAGCCACTTTGGTCTGAAACAACCAGGCCTAAGAGGAGGTATACTAGGGTTAGGTGGAGTCGGTCACATGGGTGTGAAAATAGCCAAAGCAATGGGTCACCATGTGACTGTCATAAGCTCATCAAACAAGAAGAGAGAAGAGGCATTGCAAGATCTTGGAGCTGATGATTACGTGATCGGTTCCGACCAAGCGAAGATGAGCGAATTGGCTGATTCGTTGGATTACGTAATTGACACGGTGCCTGTTCATCATGCACTTGAGCCATATTTGTCTCTGCTTAAGCTTGATGGTAAACTCATTCTCATGGGAGTTATCAACAATCCATTACAGTTTCTCACTCCTCTGCTTATGCTTGGGAGGAAAGTGATAACGGGGAGCTTCATAGGGAGCATGAAGGAGACAGAGGAGATGCTTGAGTTCTGTAAAGAAAAGGGTTTGAGTTCGATTATCGAAGTTGTGAAGATGGATTATGTTAACACTGCGTTTGAGAGACTTGAGAAGAACGATGTGCGTTATAGGTTCGTCGTTGATGTCGAAGGAAGCAATCTCGACGCTTTAATTGGCTCCGATGGAGGGTCTGGTGGCGGATCAACAAGTCGTGACCACATGGTCCTTCATGAGTACGTAAATGCTGCTGGGATTACATGA,0.9082692
V015,V015-wildtype,K12,14101.50,14075.00,ATGGGAATAATGGAGGCAGAGAGGAAAACAACAGGCTGGGCTGCCAGAGACCCATCTGGCATCCTCTCTCCTTACACTTACACTCTTAGAGAGACTGGACCAGAGGATGTGAACATAAGAATCATTTGCTGTGGAATCTGCCACACCGATCTTCATCAAACTAAAAATGATCTTGGCATGTCTAATTACCCCATGGTTCCTGGGCATGAAGTGGTAGGGGAAGTAGTGGAGGTGGGATCAGATGTGAGCAAGTTCACCGTAGGGGACATAGTTGGAGTTGGTTGCCTCGTTGGATGTTGCGGAGGTTGTAGCCCCTGCGAGAGAGATCTGGAACAGTATTGTCCAAAGAAGATTTGGAGCTACAATGATGTTTACATCAATGGTCAACCTACACAAGGCGGCTTCGCTAAAGCCACCGTCGTTCACCAAAAGTTTGTGGTCAAGATTCCAGAAGGAATGGCGGTTGAGCAGGCTGCGCCGCTACTGTGCGCTGGTGTGACTGTGTACAGTCCACTGAGCCACTTTGGTCTGAAACAACCAGGCCTAAGAGGAGGTATACTAGGGTTAGGTGGAGTCGGTCACATGGGTGTGAAAATAGCCAAAGCAATGGGTCACCATGTGACTGTCATAAGCTCATCAAACAAGAAGAGAGAAGAGGCATTGCAAGATCTTGGAGCTGATGATTACGTGATCGGTTCCGACCAAGCGAAGATGAGCGAATTGGCTGATTCGTTGGATTACGTAATTGACACGGTGCCTGTTCATCATGCACTTGAGCCATATTTGTCTCTGCTTAAGCTTGATGGTAAACTCATTCTCATGGGAGTTATCAACAATCCATTACAGTTTCTCACTCCTCTGCTTATGCTTGGGAGGAAAGTGATAACGGGGAGCTTCATAGGGAGCATGAAGGAGACAGAGGAGATGCTTGAGTTCTGTAAAGAAAAGGGTTTGAGTTCGATTATCGAAGTTGTGAAGATGGATTATGTTAACACTGCGTTTGAGAGACTTGAGAAGAACGATGTGCGTTATAGGTTCGTCGTTGATGTCGAAGGAAGCAATCTCGACGCTTTAATTGGCTCCGATGGAGGGTCTGGTGGCGGATCAACAAGTCGTGACCACATGGTCCTTCATGAGTACGTAAATGCTGCTGGGATTACATGA,0.9082692


sequence_id,sequence_name,strain,value,mean_fluorescence,Sequence,total_success_rate
<chr>,<chr>,<chr>,<dbl>,<dbl>,<chr>,<dbl>
V015,V015-wildtype,BL21DE3,16486.75,17098.38,ATGGGAATAATGGAGGCAGAGAGGAAAACAACAGGCTGGGCTGCCAGAGACCCATCTGGCATCCTCTCTCCTTACACTTACACTCTTAGAGAGACTGGACCAGAGGATGTGAACATAAGAATCATTTGCTGTGGAATCTGCCACACCGATCTTCATCAAACTAAAAATGATCTTGGCATGTCTAATTACCCCATGGTTCCTGGGCATGAAGTGGTAGGGGAAGTAGTGGAGGTGGGATCAGATGTGAGCAAGTTCACCGTAGGGGACATAGTTGGAGTTGGTTGCCTCGTTGGATGTTGCGGAGGTTGTAGCCCCTGCGAGAGAGATCTGGAACAGTATTGTCCAAAGAAGATTTGGAGCTACAATGATGTTTACATCAATGGTCAACCTACACAAGGCGGCTTCGCTAAAGCCACCGTCGTTCACCAAAAGTTTGTGGTCAAGATTCCAGAAGGAATGGCGGTTGAGCAGGCTGCGCCGCTACTGTGCGCTGGTGTGACTGTGTACAGTCCACTGAGCCACTTTGGTCTGAAACAACCAGGCCTAAGAGGAGGTATACTAGGGTTAGGTGGAGTCGGTCACATGGGTGTGAAAATAGCCAAAGCAATGGGTCACCATGTGACTGTCATAAGCTCATCAAACAAGAAGAGAGAAGAGGCATTGCAAGATCTTGGAGCTGATGATTACGTGATCGGTTCCGACCAAGCGAAGATGAGCGAATTGGCTGATTCGTTGGATTACGTAATTGACACGGTGCCTGTTCATCATGCACTTGAGCCATATTTGTCTCTGCTTAAGCTTGATGGTAAACTCATTCTCATGGGAGTTATCAACAATCCATTACAGTTTCTCACTCCTCTGCTTATGCTTGGGAGGAAAGTGATAACGGGGAGCTTCATAGGGAGCATGAAGGAGACAGAGGAGATGCTTGAGTTCTGTAAAGAAAAGGGTTTGAGTTCGATTATCGAAGTTGTGAAGATGGATTATGTTAACACTGCGTTTGAGAGACTTGAGAAGAACGATGTGCGTTATAGGTTCGTCGTTGATGTCGAAGGAAGCAATCTCGACGCTTTAATTGGCTCCGATGGAGGGTCTGGTGGCGGATCAACAAGTCGTGACCACATGGTCCTTCATGAGTACGTAAATGCTGCTGGGATTACATGA,0.9082692
V015,V015-wildtype,BL21DE3,16621.75,17098.38,ATGGGAATAATGGAGGCAGAGAGGAAAACAACAGGCTGGGCTGCCAGAGACCCATCTGGCATCCTCTCTCCTTACACTTACACTCTTAGAGAGACTGGACCAGAGGATGTGAACATAAGAATCATTTGCTGTGGAATCTGCCACACCGATCTTCATCAAACTAAAAATGATCTTGGCATGTCTAATTACCCCATGGTTCCTGGGCATGAAGTGGTAGGGGAAGTAGTGGAGGTGGGATCAGATGTGAGCAAGTTCACCGTAGGGGACATAGTTGGAGTTGGTTGCCTCGTTGGATGTTGCGGAGGTTGTAGCCCCTGCGAGAGAGATCTGGAACAGTATTGTCCAAAGAAGATTTGGAGCTACAATGATGTTTACATCAATGGTCAACCTACACAAGGCGGCTTCGCTAAAGCCACCGTCGTTCACCAAAAGTTTGTGGTCAAGATTCCAGAAGGAATGGCGGTTGAGCAGGCTGCGCCGCTACTGTGCGCTGGTGTGACTGTGTACAGTCCACTGAGCCACTTTGGTCTGAAACAACCAGGCCTAAGAGGAGGTATACTAGGGTTAGGTGGAGTCGGTCACATGGGTGTGAAAATAGCCAAAGCAATGGGTCACCATGTGACTGTCATAAGCTCATCAAACAAGAAGAGAGAAGAGGCATTGCAAGATCTTGGAGCTGATGATTACGTGATCGGTTCCGACCAAGCGAAGATGAGCGAATTGGCTGATTCGTTGGATTACGTAATTGACACGGTGCCTGTTCATCATGCACTTGAGCCATATTTGTCTCTGCTTAAGCTTGATGGTAAACTCATTCTCATGGGAGTTATCAACAATCCATTACAGTTTCTCACTCCTCTGCTTATGCTTGGGAGGAAAGTGATAACGGGGAGCTTCATAGGGAGCATGAAGGAGACAGAGGAGATGCTTGAGTTCTGTAAAGAAAAGGGTTTGAGTTCGATTATCGAAGTTGTGAAGATGGATTATGTTAACACTGCGTTTGAGAGACTTGAGAAGAACGATGTGCGTTATAGGTTCGTCGTTGATGTCGAAGGAAGCAATCTCGACGCTTTAATTGGCTCCGATGGAGGGTCTGGTGGCGGATCAACAAGTCGTGACCACATGGTCCTTCATGAGTACGTAAATGCTGCTGGGATTACATGA,0.9082692
V015,V015-wildtype,BL21DE3,17863.75,17098.38,ATGGGAATAATGGAGGCAGAGAGGAAAACAACAGGCTGGGCTGCCAGAGACCCATCTGGCATCCTCTCTCCTTACACTTACACTCTTAGAGAGACTGGACCAGAGGATGTGAACATAAGAATCATTTGCTGTGGAATCTGCCACACCGATCTTCATCAAACTAAAAATGATCTTGGCATGTCTAATTACCCCATGGTTCCTGGGCATGAAGTGGTAGGGGAAGTAGTGGAGGTGGGATCAGATGTGAGCAAGTTCACCGTAGGGGACATAGTTGGAGTTGGTTGCCTCGTTGGATGTTGCGGAGGTTGTAGCCCCTGCGAGAGAGATCTGGAACAGTATTGTCCAAAGAAGATTTGGAGCTACAATGATGTTTACATCAATGGTCAACCTACACAAGGCGGCTTCGCTAAAGCCACCGTCGTTCACCAAAAGTTTGTGGTCAAGATTCCAGAAGGAATGGCGGTTGAGCAGGCTGCGCCGCTACTGTGCGCTGGTGTGACTGTGTACAGTCCACTGAGCCACTTTGGTCTGAAACAACCAGGCCTAAGAGGAGGTATACTAGGGTTAGGTGGAGTCGGTCACATGGGTGTGAAAATAGCCAAAGCAATGGGTCACCATGTGACTGTCATAAGCTCATCAAACAAGAAGAGAGAAGAGGCATTGCAAGATCTTGGAGCTGATGATTACGTGATCGGTTCCGACCAAGCGAAGATGAGCGAATTGGCTGATTCGTTGGATTACGTAATTGACACGGTGCCTGTTCATCATGCACTTGAGCCATATTTGTCTCTGCTTAAGCTTGATGGTAAACTCATTCTCATGGGAGTTATCAACAATCCATTACAGTTTCTCACTCCTCTGCTTATGCTTGGGAGGAAAGTGATAACGGGGAGCTTCATAGGGAGCATGAAGGAGACAGAGGAGATGCTTGAGTTCTGTAAAGAAAAGGGTTTGAGTTCGATTATCGAAGTTGTGAAGATGGATTATGTTAACACTGCGTTTGAGAGACTTGAGAAGAACGATGTGCGTTATAGGTTCGTCGTTGATGTCGAAGGAAGCAATCTCGACGCTTTAATTGGCTCCGATGGAGGGTCTGGTGGCGGATCAACAAGTCGTGACCACATGGTCCTTCATGAGTACGTAAATGCTGCTGGGATTACATGA,0.9082692
V015,V015-wildtype,BL21DE3,13851.75,17098.38,ATGGGAATAATGGAGGCAGAGAGGAAAACAACAGGCTGGGCTGCCAGAGACCCATCTGGCATCCTCTCTCCTTACACTTACACTCTTAGAGAGACTGGACCAGAGGATGTGAACATAAGAATCATTTGCTGTGGAATCTGCCACACCGATCTTCATCAAACTAAAAATGATCTTGGCATGTCTAATTACCCCATGGTTCCTGGGCATGAAGTGGTAGGGGAAGTAGTGGAGGTGGGATCAGATGTGAGCAAGTTCACCGTAGGGGACATAGTTGGAGTTGGTTGCCTCGTTGGATGTTGCGGAGGTTGTAGCCCCTGCGAGAGAGATCTGGAACAGTATTGTCCAAAGAAGATTTGGAGCTACAATGATGTTTACATCAATGGTCAACCTACACAAGGCGGCTTCGCTAAAGCCACCGTCGTTCACCAAAAGTTTGTGGTCAAGATTCCAGAAGGAATGGCGGTTGAGCAGGCTGCGCCGCTACTGTGCGCTGGTGTGACTGTGTACAGTCCACTGAGCCACTTTGGTCTGAAACAACCAGGCCTAAGAGGAGGTATACTAGGGTTAGGTGGAGTCGGTCACATGGGTGTGAAAATAGCCAAAGCAATGGGTCACCATGTGACTGTCATAAGCTCATCAAACAAGAAGAGAGAAGAGGCATTGCAAGATCTTGGAGCTGATGATTACGTGATCGGTTCCGACCAAGCGAAGATGAGCGAATTGGCTGATTCGTTGGATTACGTAATTGACACGGTGCCTGTTCATCATGCACTTGAGCCATATTTGTCTCTGCTTAAGCTTGATGGTAAACTCATTCTCATGGGAGTTATCAACAATCCATTACAGTTTCTCACTCCTCTGCTTATGCTTGGGAGGAAAGTGATAACGGGGAGCTTCATAGGGAGCATGAAGGAGACAGAGGAGATGCTTGAGTTCTGTAAAGAAAAGGGTTTGAGTTCGATTATCGAAGTTGTGAAGATGGATTATGTTAACACTGCGTTTGAGAGACTTGAGAAGAACGATGTGCGTTATAGGTTCGTCGTTGATGTCGAAGGAAGCAATCTCGACGCTTTAATTGGCTCCGATGGAGGGTCTGGTGGCGGATCAACAAGTCGTGACCACATGGTCCTTCATGAGTACGTAAATGCTGCTGGGATTACATGA,0.9082692
V015,V015-wildtype,BL21DE3,19477.00,17098.38,ATGGGAATAATGGAGGCAGAGAGGAAAACAACAGGCTGGGCTGCCAGAGACCCATCTGGCATCCTCTCTCCTTACACTTACACTCTTAGAGAGACTGGACCAGAGGATGTGAACATAAGAATCATTTGCTGTGGAATCTGCCACACCGATCTTCATCAAACTAAAAATGATCTTGGCATGTCTAATTACCCCATGGTTCCTGGGCATGAAGTGGTAGGGGAAGTAGTGGAGGTGGGATCAGATGTGAGCAAGTTCACCGTAGGGGACATAGTTGGAGTTGGTTGCCTCGTTGGATGTTGCGGAGGTTGTAGCCCCTGCGAGAGAGATCTGGAACAGTATTGTCCAAAGAAGATTTGGAGCTACAATGATGTTTACATCAATGGTCAACCTACACAAGGCGGCTTCGCTAAAGCCACCGTCGTTCACCAAAAGTTTGTGGTCAAGATTCCAGAAGGAATGGCGGTTGAGCAGGCTGCGCCGCTACTGTGCGCTGGTGTGACTGTGTACAGTCCACTGAGCCACTTTGGTCTGAAACAACCAGGCCTAAGAGGAGGTATACTAGGGTTAGGTGGAGTCGGTCACATGGGTGTGAAAATAGCCAAAGCAATGGGTCACCATGTGACTGTCATAAGCTCATCAAACAAGAAGAGAGAAGAGGCATTGCAAGATCTTGGAGCTGATGATTACGTGATCGGTTCCGACCAAGCGAAGATGAGCGAATTGGCTGATTCGTTGGATTACGTAATTGACACGGTGCCTGTTCATCATGCACTTGAGCCATATTTGTCTCTGCTTAAGCTTGATGGTAAACTCATTCTCATGGGAGTTATCAACAATCCATTACAGTTTCTCACTCCTCTGCTTATGCTTGGGAGGAAAGTGATAACGGGGAGCTTCATAGGGAGCATGAAGGAGACAGAGGAGATGCTTGAGTTCTGTAAAGAAAAGGGTTTGAGTTCGATTATCGAAGTTGTGAAGATGGATTATGTTAACACTGCGTTTGAGAGACTTGAGAAGAACGATGTGCGTTATAGGTTCGTCGTTGATGTCGAAGGAAGCAATCTCGACGCTTTAATTGGCTCCGATGGAGGGTCTGGTGGCGGATCAACAAGTCGTGACCACATGGTCCTTCATGAGTACGTAAATGCTGCTGGGATTACATGA,0.9082692
V015,V015-wildtype,BL21DE3,19002.00,17098.38,ATGGGAATAATGGAGGCAGAGAGGAAAACAACAGGCTGGGCTGCCAGAGACCCATCTGGCATCCTCTCTCCTTACACTTACACTCTTAGAGAGACTGGACCAGAGGATGTGAACATAAGAATCATTTGCTGTGGAATCTGCCACACCGATCTTCATCAAACTAAAAATGATCTTGGCATGTCTAATTACCCCATGGTTCCTGGGCATGAAGTGGTAGGGGAAGTAGTGGAGGTGGGATCAGATGTGAGCAAGTTCACCGTAGGGGACATAGTTGGAGTTGGTTGCCTCGTTGGATGTTGCGGAGGTTGTAGCCCCTGCGAGAGAGATCTGGAACAGTATTGTCCAAAGAAGATTTGGAGCTACAATGATGTTTACATCAATGGTCAACCTACACAAGGCGGCTTCGCTAAAGCCACCGTCGTTCACCAAAAGTTTGTGGTCAAGATTCCAGAAGGAATGGCGGTTGAGCAGGCTGCGCCGCTACTGTGCGCTGGTGTGACTGTGTACAGTCCACTGAGCCACTTTGGTCTGAAACAACCAGGCCTAAGAGGAGGTATACTAGGGTTAGGTGGAGTCGGTCACATGGGTGTGAAAATAGCCAAAGCAATGGGTCACCATGTGACTGTCATAAGCTCATCAAACAAGAAGAGAGAAGAGGCATTGCAAGATCTTGGAGCTGATGATTACGTGATCGGTTCCGACCAAGCGAAGATGAGCGAATTGGCTGATTCGTTGGATTACGTAATTGACACGGTGCCTGTTCATCATGCACTTGAGCCATATTTGTCTCTGCTTAAGCTTGATGGTAAACTCATTCTCATGGGAGTTATCAACAATCCATTACAGTTTCTCACTCCTCTGCTTATGCTTGGGAGGAAAGTGATAACGGGGAGCTTCATAGGGAGCATGAAGGAGACAGAGGAGATGCTTGAGTTCTGTAAAGAAAAGGGTTTGAGTTCGATTATCGAAGTTGTGAAGATGGATTATGTTAACACTGCGTTTGAGAGACTTGAGAAGAACGATGTGCGTTATAGGTTCGTCGTTGATGTCGAAGGAAGCAATCTCGACGCTTTAATTGGCTCCGATGGAGGGTCTGGTGGCGGATCAACAAGTCGTGACCACATGGTCCTTCATGAGTACGTAAATGCTGCTGGGATTACATGA,0.9082692
V015,V015-wildtype,BL21DE3,19599.00,17098.38,ATGGGAATAATGGAGGCAGAGAGGAAAACAACAGGCTGGGCTGCCAGAGACCCATCTGGCATCCTCTCTCCTTACACTTACACTCTTAGAGAGACTGGACCAGAGGATGTGAACATAAGAATCATTTGCTGTGGAATCTGCCACACCGATCTTCATCAAACTAAAAATGATCTTGGCATGTCTAATTACCCCATGGTTCCTGGGCATGAAGTGGTAGGGGAAGTAGTGGAGGTGGGATCAGATGTGAGCAAGTTCACCGTAGGGGACATAGTTGGAGTTGGTTGCCTCGTTGGATGTTGCGGAGGTTGTAGCCCCTGCGAGAGAGATCTGGAACAGTATTGTCCAAAGAAGATTTGGAGCTACAATGATGTTTACATCAATGGTCAACCTACACAAGGCGGCTTCGCTAAAGCCACCGTCGTTCACCAAAAGTTTGTGGTCAAGATTCCAGAAGGAATGGCGGTTGAGCAGGCTGCGCCGCTACTGTGCGCTGGTGTGACTGTGTACAGTCCACTGAGCCACTTTGGTCTGAAACAACCAGGCCTAAGAGGAGGTATACTAGGGTTAGGTGGAGTCGGTCACATGGGTGTGAAAATAGCCAAAGCAATGGGTCACCATGTGACTGTCATAAGCTCATCAAACAAGAAGAGAGAAGAGGCATTGCAAGATCTTGGAGCTGATGATTACGTGATCGGTTCCGACCAAGCGAAGATGAGCGAATTGGCTGATTCGTTGGATTACGTAATTGACACGGTGCCTGTTCATCATGCACTTGAGCCATATTTGTCTCTGCTTAAGCTTGATGGTAAACTCATTCTCATGGGAGTTATCAACAATCCATTACAGTTTCTCACTCCTCTGCTTATGCTTGGGAGGAAAGTGATAACGGGGAGCTTCATAGGGAGCATGAAGGAGACAGAGGAGATGCTTGAGTTCTGTAAAGAAAAGGGTTTGAGTTCGATTATCGAAGTTGTGAAGATGGATTATGTTAACACTGCGTTTGAGAGACTTGAGAAGAACGATGTGCGTTATAGGTTCGTCGTTGATGTCGAAGGAAGCAATCTCGACGCTTTAATTGGCTCCGATGGAGGGTCTGGTGGCGGATCAACAAGTCGTGACCACATGGTCCTTCATGAGTACGTAAATGCTGCTGGGATTACATGA,0.9082692
V015,V015-wildtype,BL21DE3,13885.00,17098.38,ATGGGAATAATGGAGGCAGAGAGGAAAACAACAGGCTGGGCTGCCAGAGACCCATCTGGCATCCTCTCTCCTTACACTTACACTCTTAGAGAGACTGGACCAGAGGATGTGAACATAAGAATCATTTGCTGTGGAATCTGCCACACCGATCTTCATCAAACTAAAAATGATCTTGGCATGTCTAATTACCCCATGGTTCCTGGGCATGAAGTGGTAGGGGAAGTAGTGGAGGTGGGATCAGATGTGAGCAAGTTCACCGTAGGGGACATAGTTGGAGTTGGTTGCCTCGTTGGATGTTGCGGAGGTTGTAGCCCCTGCGAGAGAGATCTGGAACAGTATTGTCCAAAGAAGATTTGGAGCTACAATGATGTTTACATCAATGGTCAACCTACACAAGGCGGCTTCGCTAAAGCCACCGTCGTTCACCAAAAGTTTGTGGTCAAGATTCCAGAAGGAATGGCGGTTGAGCAGGCTGCGCCGCTACTGTGCGCTGGTGTGACTGTGTACAGTCCACTGAGCCACTTTGGTCTGAAACAACCAGGCCTAAGAGGAGGTATACTAGGGTTAGGTGGAGTCGGTCACATGGGTGTGAAAATAGCCAAAGCAATGGGTCACCATGTGACTGTCATAAGCTCATCAAACAAGAAGAGAGAAGAGGCATTGCAAGATCTTGGAGCTGATGATTACGTGATCGGTTCCGACCAAGCGAAGATGAGCGAATTGGCTGATTCGTTGGATTACGTAATTGACACGGTGCCTGTTCATCATGCACTTGAGCCATATTTGTCTCTGCTTAAGCTTGATGGTAAACTCATTCTCATGGGAGTTATCAACAATCCATTACAGTTTCTCACTCCTCTGCTTATGCTTGGGAGGAAAGTGATAACGGGGAGCTTCATAGGGAGCATGAAGGAGACAGAGGAGATGCTTGAGTTCTGTAAAGAAAAGGGTTTGAGTTCGATTATCGAAGTTGTGAAGATGGATTATGTTAACACTGCGTTTGAGAGACTTGAGAAGAACGATGTGCGTTATAGGTTCGTCGTTGATGTCGAAGGAAGCAATCTCGACGCTTTAATTGGCTCCGATGGAGGGTCTGGTGGCGGATCAACAAGTCGTGACCACATGGTCCTTCATGAGTACGTAAATGCTGCTGGGATTACATGA,0.9082692
V015,V015-wildtype,K12,14853.50,14427.00,ATGGGAATAATGGAGGCAGAGAGGAAAACAACAGGCTGGGCTGCCAGAGACCCATCTGGCATCCTCTCTCCTTACACTTACACTCTTAGAGAGACTGGACCAGAGGATGTGAACATAAGAATCATTTGCTGTGGAATCTGCCACACCGATCTTCATCAAACTAAAAATGATCTTGGCATGTCTAATTACCCCATGGTTCCTGGGCATGAAGTGGTAGGGGAAGTAGTGGAGGTGGGATCAGATGTGAGCAAGTTCACCGTAGGGGACATAGTTGGAGTTGGTTGCCTCGTTGGATGTTGCGGAGGTTGTAGCCCCTGCGAGAGAGATCTGGAACAGTATTGTCCAAAGAAGATTTGGAGCTACAATGATGTTTACATCAATGGTCAACCTACACAAGGCGGCTTCGCTAAAGCCACCGTCGTTCACCAAAAGTTTGTGGTCAAGATTCCAGAAGGAATGGCGGTTGAGCAGGCTGCGCCGCTACTGTGCGCTGGTGTGACTGTGTACAGTCCACTGAGCCACTTTGGTCTGAAACAACCAGGCCTAAGAGGAGGTATACTAGGGTTAGGTGGAGTCGGTCACATGGGTGTGAAAATAGCCAAAGCAATGGGTCACCATGTGACTGTCATAAGCTCATCAAACAAGAAGAGAGAAGAGGCATTGCAAGATCTTGGAGCTGATGATTACGTGATCGGTTCCGACCAAGCGAAGATGAGCGAATTGGCTGATTCGTTGGATTACGTAATTGACACGGTGCCTGTTCATCATGCACTTGAGCCATATTTGTCTCTGCTTAAGCTTGATGGTAAACTCATTCTCATGGGAGTTATCAACAATCCATTACAGTTTCTCACTCCTCTGCTTATGCTTGGGAGGAAAGTGATAACGGGGAGCTTCATAGGGAGCATGAAGGAGACAGAGGAGATGCTTGAGTTCTGTAAAGAAAAGGGTTTGAGTTCGATTATCGAAGTTGTGAAGATGGATTATGTTAACACTGCGTTTGAGAGACTTGAGAAGAACGATGTGCGTTATAGGTTCGTCGTTGATGTCGAAGGAAGCAATCTCGACGCTTTAATTGGCTCCGATGGAGGGTCTGGTGGCGGATCAACAAGTCGTGACCACATGGTCCTTCATGAGTACGTAAATGCTGCTGGGATTACATGA,0.9082692
V015,V015-wildtype,K12,14101.50,14427.00,ATGGGAATAATGGAGGCAGAGAGGAAAACAACAGGCTGGGCTGCCAGAGACCCATCTGGCATCCTCTCTCCTTACACTTACACTCTTAGAGAGACTGGACCAGAGGATGTGAACATAAGAATCATTTGCTGTGGAATCTGCCACACCGATCTTCATCAAACTAAAAATGATCTTGGCATGTCTAATTACCCCATGGTTCCTGGGCATGAAGTGGTAGGGGAAGTAGTGGAGGTGGGATCAGATGTGAGCAAGTTCACCGTAGGGGACATAGTTGGAGTTGGTTGCCTCGTTGGATGTTGCGGAGGTTGTAGCCCCTGCGAGAGAGATCTGGAACAGTATTGTCCAAAGAAGATTTGGAGCTACAATGATGTTTACATCAATGGTCAACCTACACAAGGCGGCTTCGCTAAAGCCACCGTCGTTCACCAAAAGTTTGTGGTCAAGATTCCAGAAGGAATGGCGGTTGAGCAGGCTGCGCCGCTACTGTGCGCTGGTGTGACTGTGTACAGTCCACTGAGCCACTTTGGTCTGAAACAACCAGGCCTAAGAGGAGGTATACTAGGGTTAGGTGGAGTCGGTCACATGGGTGTGAAAATAGCCAAAGCAATGGGTCACCATGTGACTGTCATAAGCTCATCAAACAAGAAGAGAGAAGAGGCATTGCAAGATCTTGGAGCTGATGATTACGTGATCGGTTCCGACCAAGCGAAGATGAGCGAATTGGCTGATTCGTTGGATTACGTAATTGACACGGTGCCTGTTCATCATGCACTTGAGCCATATTTGTCTCTGCTTAAGCTTGATGGTAAACTCATTCTCATGGGAGTTATCAACAATCCATTACAGTTTCTCACTCCTCTGCTTATGCTTGGGAGGAAAGTGATAACGGGGAGCTTCATAGGGAGCATGAAGGAGACAGAGGAGATGCTTGAGTTCTGTAAAGAAAAGGGTTTGAGTTCGATTATCGAAGTTGTGAAGATGGATTATGTTAACACTGCGTTTGAGAGACTTGAGAAGAACGATGTGCGTTATAGGTTCGTCGTTGATGTCGAAGGAAGCAATCTCGACGCTTTAATTGGCTCCGATGGAGGGTCTGGTGGCGGATCAACAAGTCGTGACCACATGGTCCTTCATGAGTACGTAAATGCTGCTGGGATTACATGA,0.9082692


#### Compute r for K12 and BL21DE3 strains via wildtype. Add r column to dataframe

In [12]:
# Using translation.success.rate.of.mRNA function, compute and store success rate of the wildtype in a variable (id = V015 is the wildtype)
success_rate_wt <- translation.success.rate.of.mRNA(sequence_constructs[sequence_constructs$sequence_id == "V015", "Sequence"], nonsense_error_rates_ecoli)

# This function calculates the r value for a given strain by multiplying the wild type success rate its corresponding mean fluorescence value
calculate.r <- function(strain, data, success_rate_wt) {
  mean_fluo_wt <- data[data$sequence_name == "V015-wildtype" & data$strain == strain, "mean_fluorescence"][1]
  r <- success_rate_wt * mean_fluo_wt
  return(r)
}

add.r.column <- function(data, success_rate_wt) {
  r_k12 <- calculate.r("K12", data, success_rate_wt)
  r_bl21 <- calculate.r("BL21DE3", data, success_rate_wt)
  data$r <- ifelse(data$strain == "K12", r_k12, r_bl21)
  return(data)
}

# Add r column to dataframe
data_gfp_zrinka <- add.r.column(data_gfp_zrinka, success_rate_wt)
data_gfp_zrinka_no_outliers <- add.r.column(data_gfp_zrinka_no_outliers, success_rate_wt)

#### Compute equation from paper

In [13]:
# Predict protein levels and add to dataframe. Equation is obtained from paper
data_gfp_zrinka$predicted_level <- data_gfp_zrinka$r * data_gfp_zrinka$total_success_rate
data_gfp_zrinka
data_gfp_zrinka_no_outliers$predicted_level <- data_gfp_zrinka_no_outliers$r * data_gfp_zrinka_no_outliers$total_success_rate
data_gfp_zrinka_no_outliers

sequence_id,sequence_name,strain,value,mean_fluorescence,Sequence,total_success_rate,r,predicted_level
<chr>,<chr>,<chr>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<dbl>
V015,V015-wildtype,BL21DE3,16486.75,17098.38,ATGGGAATAATGGAGGCAGAGAGGAAAACAACAGGCTGGGCTGCCAGAGACCCATCTGGCATCCTCTCTCCTTACACTTACACTCTTAGAGAGACTGGACCAGAGGATGTGAACATAAGAATCATTTGCTGTGGAATCTGCCACACCGATCTTCATCAAACTAAAAATGATCTTGGCATGTCTAATTACCCCATGGTTCCTGGGCATGAAGTGGTAGGGGAAGTAGTGGAGGTGGGATCAGATGTGAGCAAGTTCACCGTAGGGGACATAGTTGGAGTTGGTTGCCTCGTTGGATGTTGCGGAGGTTGTAGCCCCTGCGAGAGAGATCTGGAACAGTATTGTCCAAAGAAGATTTGGAGCTACAATGATGTTTACATCAATGGTCAACCTACACAAGGCGGCTTCGCTAAAGCCACCGTCGTTCACCAAAAGTTTGTGGTCAAGATTCCAGAAGGAATGGCGGTTGAGCAGGCTGCGCCGCTACTGTGCGCTGGTGTGACTGTGTACAGTCCACTGAGCCACTTTGGTCTGAAACAACCAGGCCTAAGAGGAGGTATACTAGGGTTAGGTGGAGTCGGTCACATGGGTGTGAAAATAGCCAAAGCAATGGGTCACCATGTGACTGTCATAAGCTCATCAAACAAGAAGAGAGAAGAGGCATTGCAAGATCTTGGAGCTGATGATTACGTGATCGGTTCCGACCAAGCGAAGATGAGCGAATTGGCTGATTCGTTGGATTACGTAATTGACACGGTGCCTGTTCATCATGCACTTGAGCCATATTTGTCTCTGCTTAAGCTTGATGGTAAACTCATTCTCATGGGAGTTATCAACAATCCATTACAGTTTCTCACTCCTCTGCTTATGCTTGGGAGGAAAGTGATAACGGGGAGCTTCATAGGGAGCATGAAGGAGACAGAGGAGATGCTTGAGTTCTGTAAAGAAAAGGGTTTGAGTTCGATTATCGAAGTTGTGAAGATGGATTATGTTAACACTGCGTTTGAGAGACTTGAGAAGAACGATGTGCGTTATAGGTTCGTCGTTGATGTCGAAGGAAGCAATCTCGACGCTTTAATTGGCTCCGATGGAGGGTCTGGTGGCGGATCAACAAGTCGTGACCACATGGTCCTTCATGAGTACGTAAATGCTGCTGGGATTACATGA,0.9082692,15529.93,14105.36
V015,V015-wildtype,BL21DE3,16621.75,17098.38,ATGGGAATAATGGAGGCAGAGAGGAAAACAACAGGCTGGGCTGCCAGAGACCCATCTGGCATCCTCTCTCCTTACACTTACACTCTTAGAGAGACTGGACCAGAGGATGTGAACATAAGAATCATTTGCTGTGGAATCTGCCACACCGATCTTCATCAAACTAAAAATGATCTTGGCATGTCTAATTACCCCATGGTTCCTGGGCATGAAGTGGTAGGGGAAGTAGTGGAGGTGGGATCAGATGTGAGCAAGTTCACCGTAGGGGACATAGTTGGAGTTGGTTGCCTCGTTGGATGTTGCGGAGGTTGTAGCCCCTGCGAGAGAGATCTGGAACAGTATTGTCCAAAGAAGATTTGGAGCTACAATGATGTTTACATCAATGGTCAACCTACACAAGGCGGCTTCGCTAAAGCCACCGTCGTTCACCAAAAGTTTGTGGTCAAGATTCCAGAAGGAATGGCGGTTGAGCAGGCTGCGCCGCTACTGTGCGCTGGTGTGACTGTGTACAGTCCACTGAGCCACTTTGGTCTGAAACAACCAGGCCTAAGAGGAGGTATACTAGGGTTAGGTGGAGTCGGTCACATGGGTGTGAAAATAGCCAAAGCAATGGGTCACCATGTGACTGTCATAAGCTCATCAAACAAGAAGAGAGAAGAGGCATTGCAAGATCTTGGAGCTGATGATTACGTGATCGGTTCCGACCAAGCGAAGATGAGCGAATTGGCTGATTCGTTGGATTACGTAATTGACACGGTGCCTGTTCATCATGCACTTGAGCCATATTTGTCTCTGCTTAAGCTTGATGGTAAACTCATTCTCATGGGAGTTATCAACAATCCATTACAGTTTCTCACTCCTCTGCTTATGCTTGGGAGGAAAGTGATAACGGGGAGCTTCATAGGGAGCATGAAGGAGACAGAGGAGATGCTTGAGTTCTGTAAAGAAAAGGGTTTGAGTTCGATTATCGAAGTTGTGAAGATGGATTATGTTAACACTGCGTTTGAGAGACTTGAGAAGAACGATGTGCGTTATAGGTTCGTCGTTGATGTCGAAGGAAGCAATCTCGACGCTTTAATTGGCTCCGATGGAGGGTCTGGTGGCGGATCAACAAGTCGTGACCACATGGTCCTTCATGAGTACGTAAATGCTGCTGGGATTACATGA,0.9082692,15529.93,14105.36
V015,V015-wildtype,BL21DE3,17863.75,17098.38,ATGGGAATAATGGAGGCAGAGAGGAAAACAACAGGCTGGGCTGCCAGAGACCCATCTGGCATCCTCTCTCCTTACACTTACACTCTTAGAGAGACTGGACCAGAGGATGTGAACATAAGAATCATTTGCTGTGGAATCTGCCACACCGATCTTCATCAAACTAAAAATGATCTTGGCATGTCTAATTACCCCATGGTTCCTGGGCATGAAGTGGTAGGGGAAGTAGTGGAGGTGGGATCAGATGTGAGCAAGTTCACCGTAGGGGACATAGTTGGAGTTGGTTGCCTCGTTGGATGTTGCGGAGGTTGTAGCCCCTGCGAGAGAGATCTGGAACAGTATTGTCCAAAGAAGATTTGGAGCTACAATGATGTTTACATCAATGGTCAACCTACACAAGGCGGCTTCGCTAAAGCCACCGTCGTTCACCAAAAGTTTGTGGTCAAGATTCCAGAAGGAATGGCGGTTGAGCAGGCTGCGCCGCTACTGTGCGCTGGTGTGACTGTGTACAGTCCACTGAGCCACTTTGGTCTGAAACAACCAGGCCTAAGAGGAGGTATACTAGGGTTAGGTGGAGTCGGTCACATGGGTGTGAAAATAGCCAAAGCAATGGGTCACCATGTGACTGTCATAAGCTCATCAAACAAGAAGAGAGAAGAGGCATTGCAAGATCTTGGAGCTGATGATTACGTGATCGGTTCCGACCAAGCGAAGATGAGCGAATTGGCTGATTCGTTGGATTACGTAATTGACACGGTGCCTGTTCATCATGCACTTGAGCCATATTTGTCTCTGCTTAAGCTTGATGGTAAACTCATTCTCATGGGAGTTATCAACAATCCATTACAGTTTCTCACTCCTCTGCTTATGCTTGGGAGGAAAGTGATAACGGGGAGCTTCATAGGGAGCATGAAGGAGACAGAGGAGATGCTTGAGTTCTGTAAAGAAAAGGGTTTGAGTTCGATTATCGAAGTTGTGAAGATGGATTATGTTAACACTGCGTTTGAGAGACTTGAGAAGAACGATGTGCGTTATAGGTTCGTCGTTGATGTCGAAGGAAGCAATCTCGACGCTTTAATTGGCTCCGATGGAGGGTCTGGTGGCGGATCAACAAGTCGTGACCACATGGTCCTTCATGAGTACGTAAATGCTGCTGGGATTACATGA,0.9082692,15529.93,14105.36
V015,V015-wildtype,BL21DE3,13851.75,17098.38,ATGGGAATAATGGAGGCAGAGAGGAAAACAACAGGCTGGGCTGCCAGAGACCCATCTGGCATCCTCTCTCCTTACACTTACACTCTTAGAGAGACTGGACCAGAGGATGTGAACATAAGAATCATTTGCTGTGGAATCTGCCACACCGATCTTCATCAAACTAAAAATGATCTTGGCATGTCTAATTACCCCATGGTTCCTGGGCATGAAGTGGTAGGGGAAGTAGTGGAGGTGGGATCAGATGTGAGCAAGTTCACCGTAGGGGACATAGTTGGAGTTGGTTGCCTCGTTGGATGTTGCGGAGGTTGTAGCCCCTGCGAGAGAGATCTGGAACAGTATTGTCCAAAGAAGATTTGGAGCTACAATGATGTTTACATCAATGGTCAACCTACACAAGGCGGCTTCGCTAAAGCCACCGTCGTTCACCAAAAGTTTGTGGTCAAGATTCCAGAAGGAATGGCGGTTGAGCAGGCTGCGCCGCTACTGTGCGCTGGTGTGACTGTGTACAGTCCACTGAGCCACTTTGGTCTGAAACAACCAGGCCTAAGAGGAGGTATACTAGGGTTAGGTGGAGTCGGTCACATGGGTGTGAAAATAGCCAAAGCAATGGGTCACCATGTGACTGTCATAAGCTCATCAAACAAGAAGAGAGAAGAGGCATTGCAAGATCTTGGAGCTGATGATTACGTGATCGGTTCCGACCAAGCGAAGATGAGCGAATTGGCTGATTCGTTGGATTACGTAATTGACACGGTGCCTGTTCATCATGCACTTGAGCCATATTTGTCTCTGCTTAAGCTTGATGGTAAACTCATTCTCATGGGAGTTATCAACAATCCATTACAGTTTCTCACTCCTCTGCTTATGCTTGGGAGGAAAGTGATAACGGGGAGCTTCATAGGGAGCATGAAGGAGACAGAGGAGATGCTTGAGTTCTGTAAAGAAAAGGGTTTGAGTTCGATTATCGAAGTTGTGAAGATGGATTATGTTAACACTGCGTTTGAGAGACTTGAGAAGAACGATGTGCGTTATAGGTTCGTCGTTGATGTCGAAGGAAGCAATCTCGACGCTTTAATTGGCTCCGATGGAGGGTCTGGTGGCGGATCAACAAGTCGTGACCACATGGTCCTTCATGAGTACGTAAATGCTGCTGGGATTACATGA,0.9082692,15529.93,14105.36
V015,V015-wildtype,BL21DE3,19477.00,17098.38,ATGGGAATAATGGAGGCAGAGAGGAAAACAACAGGCTGGGCTGCCAGAGACCCATCTGGCATCCTCTCTCCTTACACTTACACTCTTAGAGAGACTGGACCAGAGGATGTGAACATAAGAATCATTTGCTGTGGAATCTGCCACACCGATCTTCATCAAACTAAAAATGATCTTGGCATGTCTAATTACCCCATGGTTCCTGGGCATGAAGTGGTAGGGGAAGTAGTGGAGGTGGGATCAGATGTGAGCAAGTTCACCGTAGGGGACATAGTTGGAGTTGGTTGCCTCGTTGGATGTTGCGGAGGTTGTAGCCCCTGCGAGAGAGATCTGGAACAGTATTGTCCAAAGAAGATTTGGAGCTACAATGATGTTTACATCAATGGTCAACCTACACAAGGCGGCTTCGCTAAAGCCACCGTCGTTCACCAAAAGTTTGTGGTCAAGATTCCAGAAGGAATGGCGGTTGAGCAGGCTGCGCCGCTACTGTGCGCTGGTGTGACTGTGTACAGTCCACTGAGCCACTTTGGTCTGAAACAACCAGGCCTAAGAGGAGGTATACTAGGGTTAGGTGGAGTCGGTCACATGGGTGTGAAAATAGCCAAAGCAATGGGTCACCATGTGACTGTCATAAGCTCATCAAACAAGAAGAGAGAAGAGGCATTGCAAGATCTTGGAGCTGATGATTACGTGATCGGTTCCGACCAAGCGAAGATGAGCGAATTGGCTGATTCGTTGGATTACGTAATTGACACGGTGCCTGTTCATCATGCACTTGAGCCATATTTGTCTCTGCTTAAGCTTGATGGTAAACTCATTCTCATGGGAGTTATCAACAATCCATTACAGTTTCTCACTCCTCTGCTTATGCTTGGGAGGAAAGTGATAACGGGGAGCTTCATAGGGAGCATGAAGGAGACAGAGGAGATGCTTGAGTTCTGTAAAGAAAAGGGTTTGAGTTCGATTATCGAAGTTGTGAAGATGGATTATGTTAACACTGCGTTTGAGAGACTTGAGAAGAACGATGTGCGTTATAGGTTCGTCGTTGATGTCGAAGGAAGCAATCTCGACGCTTTAATTGGCTCCGATGGAGGGTCTGGTGGCGGATCAACAAGTCGTGACCACATGGTCCTTCATGAGTACGTAAATGCTGCTGGGATTACATGA,0.9082692,15529.93,14105.36
V015,V015-wildtype,BL21DE3,19002.00,17098.38,ATGGGAATAATGGAGGCAGAGAGGAAAACAACAGGCTGGGCTGCCAGAGACCCATCTGGCATCCTCTCTCCTTACACTTACACTCTTAGAGAGACTGGACCAGAGGATGTGAACATAAGAATCATTTGCTGTGGAATCTGCCACACCGATCTTCATCAAACTAAAAATGATCTTGGCATGTCTAATTACCCCATGGTTCCTGGGCATGAAGTGGTAGGGGAAGTAGTGGAGGTGGGATCAGATGTGAGCAAGTTCACCGTAGGGGACATAGTTGGAGTTGGTTGCCTCGTTGGATGTTGCGGAGGTTGTAGCCCCTGCGAGAGAGATCTGGAACAGTATTGTCCAAAGAAGATTTGGAGCTACAATGATGTTTACATCAATGGTCAACCTACACAAGGCGGCTTCGCTAAAGCCACCGTCGTTCACCAAAAGTTTGTGGTCAAGATTCCAGAAGGAATGGCGGTTGAGCAGGCTGCGCCGCTACTGTGCGCTGGTGTGACTGTGTACAGTCCACTGAGCCACTTTGGTCTGAAACAACCAGGCCTAAGAGGAGGTATACTAGGGTTAGGTGGAGTCGGTCACATGGGTGTGAAAATAGCCAAAGCAATGGGTCACCATGTGACTGTCATAAGCTCATCAAACAAGAAGAGAGAAGAGGCATTGCAAGATCTTGGAGCTGATGATTACGTGATCGGTTCCGACCAAGCGAAGATGAGCGAATTGGCTGATTCGTTGGATTACGTAATTGACACGGTGCCTGTTCATCATGCACTTGAGCCATATTTGTCTCTGCTTAAGCTTGATGGTAAACTCATTCTCATGGGAGTTATCAACAATCCATTACAGTTTCTCACTCCTCTGCTTATGCTTGGGAGGAAAGTGATAACGGGGAGCTTCATAGGGAGCATGAAGGAGACAGAGGAGATGCTTGAGTTCTGTAAAGAAAAGGGTTTGAGTTCGATTATCGAAGTTGTGAAGATGGATTATGTTAACACTGCGTTTGAGAGACTTGAGAAGAACGATGTGCGTTATAGGTTCGTCGTTGATGTCGAAGGAAGCAATCTCGACGCTTTAATTGGCTCCGATGGAGGGTCTGGTGGCGGATCAACAAGTCGTGACCACATGGTCCTTCATGAGTACGTAAATGCTGCTGGGATTACATGA,0.9082692,15529.93,14105.36
V015,V015-wildtype,BL21DE3,19599.00,17098.38,ATGGGAATAATGGAGGCAGAGAGGAAAACAACAGGCTGGGCTGCCAGAGACCCATCTGGCATCCTCTCTCCTTACACTTACACTCTTAGAGAGACTGGACCAGAGGATGTGAACATAAGAATCATTTGCTGTGGAATCTGCCACACCGATCTTCATCAAACTAAAAATGATCTTGGCATGTCTAATTACCCCATGGTTCCTGGGCATGAAGTGGTAGGGGAAGTAGTGGAGGTGGGATCAGATGTGAGCAAGTTCACCGTAGGGGACATAGTTGGAGTTGGTTGCCTCGTTGGATGTTGCGGAGGTTGTAGCCCCTGCGAGAGAGATCTGGAACAGTATTGTCCAAAGAAGATTTGGAGCTACAATGATGTTTACATCAATGGTCAACCTACACAAGGCGGCTTCGCTAAAGCCACCGTCGTTCACCAAAAGTTTGTGGTCAAGATTCCAGAAGGAATGGCGGTTGAGCAGGCTGCGCCGCTACTGTGCGCTGGTGTGACTGTGTACAGTCCACTGAGCCACTTTGGTCTGAAACAACCAGGCCTAAGAGGAGGTATACTAGGGTTAGGTGGAGTCGGTCACATGGGTGTGAAAATAGCCAAAGCAATGGGTCACCATGTGACTGTCATAAGCTCATCAAACAAGAAGAGAGAAGAGGCATTGCAAGATCTTGGAGCTGATGATTACGTGATCGGTTCCGACCAAGCGAAGATGAGCGAATTGGCTGATTCGTTGGATTACGTAATTGACACGGTGCCTGTTCATCATGCACTTGAGCCATATTTGTCTCTGCTTAAGCTTGATGGTAAACTCATTCTCATGGGAGTTATCAACAATCCATTACAGTTTCTCACTCCTCTGCTTATGCTTGGGAGGAAAGTGATAACGGGGAGCTTCATAGGGAGCATGAAGGAGACAGAGGAGATGCTTGAGTTCTGTAAAGAAAAGGGTTTGAGTTCGATTATCGAAGTTGTGAAGATGGATTATGTTAACACTGCGTTTGAGAGACTTGAGAAGAACGATGTGCGTTATAGGTTCGTCGTTGATGTCGAAGGAAGCAATCTCGACGCTTTAATTGGCTCCGATGGAGGGTCTGGTGGCGGATCAACAAGTCGTGACCACATGGTCCTTCATGAGTACGTAAATGCTGCTGGGATTACATGA,0.9082692,15529.93,14105.36
V015,V015-wildtype,BL21DE3,13885.00,17098.38,ATGGGAATAATGGAGGCAGAGAGGAAAACAACAGGCTGGGCTGCCAGAGACCCATCTGGCATCCTCTCTCCTTACACTTACACTCTTAGAGAGACTGGACCAGAGGATGTGAACATAAGAATCATTTGCTGTGGAATCTGCCACACCGATCTTCATCAAACTAAAAATGATCTTGGCATGTCTAATTACCCCATGGTTCCTGGGCATGAAGTGGTAGGGGAAGTAGTGGAGGTGGGATCAGATGTGAGCAAGTTCACCGTAGGGGACATAGTTGGAGTTGGTTGCCTCGTTGGATGTTGCGGAGGTTGTAGCCCCTGCGAGAGAGATCTGGAACAGTATTGTCCAAAGAAGATTTGGAGCTACAATGATGTTTACATCAATGGTCAACCTACACAAGGCGGCTTCGCTAAAGCCACCGTCGTTCACCAAAAGTTTGTGGTCAAGATTCCAGAAGGAATGGCGGTTGAGCAGGCTGCGCCGCTACTGTGCGCTGGTGTGACTGTGTACAGTCCACTGAGCCACTTTGGTCTGAAACAACCAGGCCTAAGAGGAGGTATACTAGGGTTAGGTGGAGTCGGTCACATGGGTGTGAAAATAGCCAAAGCAATGGGTCACCATGTGACTGTCATAAGCTCATCAAACAAGAAGAGAGAAGAGGCATTGCAAGATCTTGGAGCTGATGATTACGTGATCGGTTCCGACCAAGCGAAGATGAGCGAATTGGCTGATTCGTTGGATTACGTAATTGACACGGTGCCTGTTCATCATGCACTTGAGCCATATTTGTCTCTGCTTAAGCTTGATGGTAAACTCATTCTCATGGGAGTTATCAACAATCCATTACAGTTTCTCACTCCTCTGCTTATGCTTGGGAGGAAAGTGATAACGGGGAGCTTCATAGGGAGCATGAAGGAGACAGAGGAGATGCTTGAGTTCTGTAAAGAAAAGGGTTTGAGTTCGATTATCGAAGTTGTGAAGATGGATTATGTTAACACTGCGTTTGAGAGACTTGAGAAGAACGATGTGCGTTATAGGTTCGTCGTTGATGTCGAAGGAAGCAATCTCGACGCTTTAATTGGCTCCGATGGAGGGTCTGGTGGCGGATCAACAAGTCGTGACCACATGGTCCTTCATGAGTACGTAAATGCTGCTGGGATTACATGA,0.9082692,15529.93,14105.36
V015,V015-wildtype,K12,14853.50,14075.00,ATGGGAATAATGGAGGCAGAGAGGAAAACAACAGGCTGGGCTGCCAGAGACCCATCTGGCATCCTCTCTCCTTACACTTACACTCTTAGAGAGACTGGACCAGAGGATGTGAACATAAGAATCATTTGCTGTGGAATCTGCCACACCGATCTTCATCAAACTAAAAATGATCTTGGCATGTCTAATTACCCCATGGTTCCTGGGCATGAAGTGGTAGGGGAAGTAGTGGAGGTGGGATCAGATGTGAGCAAGTTCACCGTAGGGGACATAGTTGGAGTTGGTTGCCTCGTTGGATGTTGCGGAGGTTGTAGCCCCTGCGAGAGAGATCTGGAACAGTATTGTCCAAAGAAGATTTGGAGCTACAATGATGTTTACATCAATGGTCAACCTACACAAGGCGGCTTCGCTAAAGCCACCGTCGTTCACCAAAAGTTTGTGGTCAAGATTCCAGAAGGAATGGCGGTTGAGCAGGCTGCGCCGCTACTGTGCGCTGGTGTGACTGTGTACAGTCCACTGAGCCACTTTGGTCTGAAACAACCAGGCCTAAGAGGAGGTATACTAGGGTTAGGTGGAGTCGGTCACATGGGTGTGAAAATAGCCAAAGCAATGGGTCACCATGTGACTGTCATAAGCTCATCAAACAAGAAGAGAGAAGAGGCATTGCAAGATCTTGGAGCTGATGATTACGTGATCGGTTCCGACCAAGCGAAGATGAGCGAATTGGCTGATTCGTTGGATTACGTAATTGACACGGTGCCTGTTCATCATGCACTTGAGCCATATTTGTCTCTGCTTAAGCTTGATGGTAAACTCATTCTCATGGGAGTTATCAACAATCCATTACAGTTTCTCACTCCTCTGCTTATGCTTGGGAGGAAAGTGATAACGGGGAGCTTCATAGGGAGCATGAAGGAGACAGAGGAGATGCTTGAGTTCTGTAAAGAAAAGGGTTTGAGTTCGATTATCGAAGTTGTGAAGATGGATTATGTTAACACTGCGTTTGAGAGACTTGAGAAGAACGATGTGCGTTATAGGTTCGTCGTTGATGTCGAAGGAAGCAATCTCGACGCTTTAATTGGCTCCGATGGAGGGTCTGGTGGCGGATCAACAAGTCGTGACCACATGGTCCTTCATGAGTACGTAAATGCTGCTGGGATTACATGA,0.9082692,12783.89,11611.21
V015,V015-wildtype,K12,14101.50,14075.00,ATGGGAATAATGGAGGCAGAGAGGAAAACAACAGGCTGGGCTGCCAGAGACCCATCTGGCATCCTCTCTCCTTACACTTACACTCTTAGAGAGACTGGACCAGAGGATGTGAACATAAGAATCATTTGCTGTGGAATCTGCCACACCGATCTTCATCAAACTAAAAATGATCTTGGCATGTCTAATTACCCCATGGTTCCTGGGCATGAAGTGGTAGGGGAAGTAGTGGAGGTGGGATCAGATGTGAGCAAGTTCACCGTAGGGGACATAGTTGGAGTTGGTTGCCTCGTTGGATGTTGCGGAGGTTGTAGCCCCTGCGAGAGAGATCTGGAACAGTATTGTCCAAAGAAGATTTGGAGCTACAATGATGTTTACATCAATGGTCAACCTACACAAGGCGGCTTCGCTAAAGCCACCGTCGTTCACCAAAAGTTTGTGGTCAAGATTCCAGAAGGAATGGCGGTTGAGCAGGCTGCGCCGCTACTGTGCGCTGGTGTGACTGTGTACAGTCCACTGAGCCACTTTGGTCTGAAACAACCAGGCCTAAGAGGAGGTATACTAGGGTTAGGTGGAGTCGGTCACATGGGTGTGAAAATAGCCAAAGCAATGGGTCACCATGTGACTGTCATAAGCTCATCAAACAAGAAGAGAGAAGAGGCATTGCAAGATCTTGGAGCTGATGATTACGTGATCGGTTCCGACCAAGCGAAGATGAGCGAATTGGCTGATTCGTTGGATTACGTAATTGACACGGTGCCTGTTCATCATGCACTTGAGCCATATTTGTCTCTGCTTAAGCTTGATGGTAAACTCATTCTCATGGGAGTTATCAACAATCCATTACAGTTTCTCACTCCTCTGCTTATGCTTGGGAGGAAAGTGATAACGGGGAGCTTCATAGGGAGCATGAAGGAGACAGAGGAGATGCTTGAGTTCTGTAAAGAAAAGGGTTTGAGTTCGATTATCGAAGTTGTGAAGATGGATTATGTTAACACTGCGTTTGAGAGACTTGAGAAGAACGATGTGCGTTATAGGTTCGTCGTTGATGTCGAAGGAAGCAATCTCGACGCTTTAATTGGCTCCGATGGAGGGTCTGGTGGCGGATCAACAAGTCGTGACCACATGGTCCTTCATGAGTACGTAAATGCTGCTGGGATTACATGA,0.9082692,12783.89,11611.21


sequence_id,sequence_name,strain,value,mean_fluorescence,Sequence,total_success_rate,r,predicted_level
<chr>,<chr>,<chr>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<dbl>
V015,V015-wildtype,BL21DE3,16486.75,17098.38,ATGGGAATAATGGAGGCAGAGAGGAAAACAACAGGCTGGGCTGCCAGAGACCCATCTGGCATCCTCTCTCCTTACACTTACACTCTTAGAGAGACTGGACCAGAGGATGTGAACATAAGAATCATTTGCTGTGGAATCTGCCACACCGATCTTCATCAAACTAAAAATGATCTTGGCATGTCTAATTACCCCATGGTTCCTGGGCATGAAGTGGTAGGGGAAGTAGTGGAGGTGGGATCAGATGTGAGCAAGTTCACCGTAGGGGACATAGTTGGAGTTGGTTGCCTCGTTGGATGTTGCGGAGGTTGTAGCCCCTGCGAGAGAGATCTGGAACAGTATTGTCCAAAGAAGATTTGGAGCTACAATGATGTTTACATCAATGGTCAACCTACACAAGGCGGCTTCGCTAAAGCCACCGTCGTTCACCAAAAGTTTGTGGTCAAGATTCCAGAAGGAATGGCGGTTGAGCAGGCTGCGCCGCTACTGTGCGCTGGTGTGACTGTGTACAGTCCACTGAGCCACTTTGGTCTGAAACAACCAGGCCTAAGAGGAGGTATACTAGGGTTAGGTGGAGTCGGTCACATGGGTGTGAAAATAGCCAAAGCAATGGGTCACCATGTGACTGTCATAAGCTCATCAAACAAGAAGAGAGAAGAGGCATTGCAAGATCTTGGAGCTGATGATTACGTGATCGGTTCCGACCAAGCGAAGATGAGCGAATTGGCTGATTCGTTGGATTACGTAATTGACACGGTGCCTGTTCATCATGCACTTGAGCCATATTTGTCTCTGCTTAAGCTTGATGGTAAACTCATTCTCATGGGAGTTATCAACAATCCATTACAGTTTCTCACTCCTCTGCTTATGCTTGGGAGGAAAGTGATAACGGGGAGCTTCATAGGGAGCATGAAGGAGACAGAGGAGATGCTTGAGTTCTGTAAAGAAAAGGGTTTGAGTTCGATTATCGAAGTTGTGAAGATGGATTATGTTAACACTGCGTTTGAGAGACTTGAGAAGAACGATGTGCGTTATAGGTTCGTCGTTGATGTCGAAGGAAGCAATCTCGACGCTTTAATTGGCTCCGATGGAGGGTCTGGTGGCGGATCAACAAGTCGTGACCACATGGTCCTTCATGAGTACGTAAATGCTGCTGGGATTACATGA,0.9082692,15529.93,14105.36
V015,V015-wildtype,BL21DE3,16621.75,17098.38,ATGGGAATAATGGAGGCAGAGAGGAAAACAACAGGCTGGGCTGCCAGAGACCCATCTGGCATCCTCTCTCCTTACACTTACACTCTTAGAGAGACTGGACCAGAGGATGTGAACATAAGAATCATTTGCTGTGGAATCTGCCACACCGATCTTCATCAAACTAAAAATGATCTTGGCATGTCTAATTACCCCATGGTTCCTGGGCATGAAGTGGTAGGGGAAGTAGTGGAGGTGGGATCAGATGTGAGCAAGTTCACCGTAGGGGACATAGTTGGAGTTGGTTGCCTCGTTGGATGTTGCGGAGGTTGTAGCCCCTGCGAGAGAGATCTGGAACAGTATTGTCCAAAGAAGATTTGGAGCTACAATGATGTTTACATCAATGGTCAACCTACACAAGGCGGCTTCGCTAAAGCCACCGTCGTTCACCAAAAGTTTGTGGTCAAGATTCCAGAAGGAATGGCGGTTGAGCAGGCTGCGCCGCTACTGTGCGCTGGTGTGACTGTGTACAGTCCACTGAGCCACTTTGGTCTGAAACAACCAGGCCTAAGAGGAGGTATACTAGGGTTAGGTGGAGTCGGTCACATGGGTGTGAAAATAGCCAAAGCAATGGGTCACCATGTGACTGTCATAAGCTCATCAAACAAGAAGAGAGAAGAGGCATTGCAAGATCTTGGAGCTGATGATTACGTGATCGGTTCCGACCAAGCGAAGATGAGCGAATTGGCTGATTCGTTGGATTACGTAATTGACACGGTGCCTGTTCATCATGCACTTGAGCCATATTTGTCTCTGCTTAAGCTTGATGGTAAACTCATTCTCATGGGAGTTATCAACAATCCATTACAGTTTCTCACTCCTCTGCTTATGCTTGGGAGGAAAGTGATAACGGGGAGCTTCATAGGGAGCATGAAGGAGACAGAGGAGATGCTTGAGTTCTGTAAAGAAAAGGGTTTGAGTTCGATTATCGAAGTTGTGAAGATGGATTATGTTAACACTGCGTTTGAGAGACTTGAGAAGAACGATGTGCGTTATAGGTTCGTCGTTGATGTCGAAGGAAGCAATCTCGACGCTTTAATTGGCTCCGATGGAGGGTCTGGTGGCGGATCAACAAGTCGTGACCACATGGTCCTTCATGAGTACGTAAATGCTGCTGGGATTACATGA,0.9082692,15529.93,14105.36
V015,V015-wildtype,BL21DE3,17863.75,17098.38,ATGGGAATAATGGAGGCAGAGAGGAAAACAACAGGCTGGGCTGCCAGAGACCCATCTGGCATCCTCTCTCCTTACACTTACACTCTTAGAGAGACTGGACCAGAGGATGTGAACATAAGAATCATTTGCTGTGGAATCTGCCACACCGATCTTCATCAAACTAAAAATGATCTTGGCATGTCTAATTACCCCATGGTTCCTGGGCATGAAGTGGTAGGGGAAGTAGTGGAGGTGGGATCAGATGTGAGCAAGTTCACCGTAGGGGACATAGTTGGAGTTGGTTGCCTCGTTGGATGTTGCGGAGGTTGTAGCCCCTGCGAGAGAGATCTGGAACAGTATTGTCCAAAGAAGATTTGGAGCTACAATGATGTTTACATCAATGGTCAACCTACACAAGGCGGCTTCGCTAAAGCCACCGTCGTTCACCAAAAGTTTGTGGTCAAGATTCCAGAAGGAATGGCGGTTGAGCAGGCTGCGCCGCTACTGTGCGCTGGTGTGACTGTGTACAGTCCACTGAGCCACTTTGGTCTGAAACAACCAGGCCTAAGAGGAGGTATACTAGGGTTAGGTGGAGTCGGTCACATGGGTGTGAAAATAGCCAAAGCAATGGGTCACCATGTGACTGTCATAAGCTCATCAAACAAGAAGAGAGAAGAGGCATTGCAAGATCTTGGAGCTGATGATTACGTGATCGGTTCCGACCAAGCGAAGATGAGCGAATTGGCTGATTCGTTGGATTACGTAATTGACACGGTGCCTGTTCATCATGCACTTGAGCCATATTTGTCTCTGCTTAAGCTTGATGGTAAACTCATTCTCATGGGAGTTATCAACAATCCATTACAGTTTCTCACTCCTCTGCTTATGCTTGGGAGGAAAGTGATAACGGGGAGCTTCATAGGGAGCATGAAGGAGACAGAGGAGATGCTTGAGTTCTGTAAAGAAAAGGGTTTGAGTTCGATTATCGAAGTTGTGAAGATGGATTATGTTAACACTGCGTTTGAGAGACTTGAGAAGAACGATGTGCGTTATAGGTTCGTCGTTGATGTCGAAGGAAGCAATCTCGACGCTTTAATTGGCTCCGATGGAGGGTCTGGTGGCGGATCAACAAGTCGTGACCACATGGTCCTTCATGAGTACGTAAATGCTGCTGGGATTACATGA,0.9082692,15529.93,14105.36
V015,V015-wildtype,BL21DE3,13851.75,17098.38,ATGGGAATAATGGAGGCAGAGAGGAAAACAACAGGCTGGGCTGCCAGAGACCCATCTGGCATCCTCTCTCCTTACACTTACACTCTTAGAGAGACTGGACCAGAGGATGTGAACATAAGAATCATTTGCTGTGGAATCTGCCACACCGATCTTCATCAAACTAAAAATGATCTTGGCATGTCTAATTACCCCATGGTTCCTGGGCATGAAGTGGTAGGGGAAGTAGTGGAGGTGGGATCAGATGTGAGCAAGTTCACCGTAGGGGACATAGTTGGAGTTGGTTGCCTCGTTGGATGTTGCGGAGGTTGTAGCCCCTGCGAGAGAGATCTGGAACAGTATTGTCCAAAGAAGATTTGGAGCTACAATGATGTTTACATCAATGGTCAACCTACACAAGGCGGCTTCGCTAAAGCCACCGTCGTTCACCAAAAGTTTGTGGTCAAGATTCCAGAAGGAATGGCGGTTGAGCAGGCTGCGCCGCTACTGTGCGCTGGTGTGACTGTGTACAGTCCACTGAGCCACTTTGGTCTGAAACAACCAGGCCTAAGAGGAGGTATACTAGGGTTAGGTGGAGTCGGTCACATGGGTGTGAAAATAGCCAAAGCAATGGGTCACCATGTGACTGTCATAAGCTCATCAAACAAGAAGAGAGAAGAGGCATTGCAAGATCTTGGAGCTGATGATTACGTGATCGGTTCCGACCAAGCGAAGATGAGCGAATTGGCTGATTCGTTGGATTACGTAATTGACACGGTGCCTGTTCATCATGCACTTGAGCCATATTTGTCTCTGCTTAAGCTTGATGGTAAACTCATTCTCATGGGAGTTATCAACAATCCATTACAGTTTCTCACTCCTCTGCTTATGCTTGGGAGGAAAGTGATAACGGGGAGCTTCATAGGGAGCATGAAGGAGACAGAGGAGATGCTTGAGTTCTGTAAAGAAAAGGGTTTGAGTTCGATTATCGAAGTTGTGAAGATGGATTATGTTAACACTGCGTTTGAGAGACTTGAGAAGAACGATGTGCGTTATAGGTTCGTCGTTGATGTCGAAGGAAGCAATCTCGACGCTTTAATTGGCTCCGATGGAGGGTCTGGTGGCGGATCAACAAGTCGTGACCACATGGTCCTTCATGAGTACGTAAATGCTGCTGGGATTACATGA,0.9082692,15529.93,14105.36
V015,V015-wildtype,BL21DE3,19477.00,17098.38,ATGGGAATAATGGAGGCAGAGAGGAAAACAACAGGCTGGGCTGCCAGAGACCCATCTGGCATCCTCTCTCCTTACACTTACACTCTTAGAGAGACTGGACCAGAGGATGTGAACATAAGAATCATTTGCTGTGGAATCTGCCACACCGATCTTCATCAAACTAAAAATGATCTTGGCATGTCTAATTACCCCATGGTTCCTGGGCATGAAGTGGTAGGGGAAGTAGTGGAGGTGGGATCAGATGTGAGCAAGTTCACCGTAGGGGACATAGTTGGAGTTGGTTGCCTCGTTGGATGTTGCGGAGGTTGTAGCCCCTGCGAGAGAGATCTGGAACAGTATTGTCCAAAGAAGATTTGGAGCTACAATGATGTTTACATCAATGGTCAACCTACACAAGGCGGCTTCGCTAAAGCCACCGTCGTTCACCAAAAGTTTGTGGTCAAGATTCCAGAAGGAATGGCGGTTGAGCAGGCTGCGCCGCTACTGTGCGCTGGTGTGACTGTGTACAGTCCACTGAGCCACTTTGGTCTGAAACAACCAGGCCTAAGAGGAGGTATACTAGGGTTAGGTGGAGTCGGTCACATGGGTGTGAAAATAGCCAAAGCAATGGGTCACCATGTGACTGTCATAAGCTCATCAAACAAGAAGAGAGAAGAGGCATTGCAAGATCTTGGAGCTGATGATTACGTGATCGGTTCCGACCAAGCGAAGATGAGCGAATTGGCTGATTCGTTGGATTACGTAATTGACACGGTGCCTGTTCATCATGCACTTGAGCCATATTTGTCTCTGCTTAAGCTTGATGGTAAACTCATTCTCATGGGAGTTATCAACAATCCATTACAGTTTCTCACTCCTCTGCTTATGCTTGGGAGGAAAGTGATAACGGGGAGCTTCATAGGGAGCATGAAGGAGACAGAGGAGATGCTTGAGTTCTGTAAAGAAAAGGGTTTGAGTTCGATTATCGAAGTTGTGAAGATGGATTATGTTAACACTGCGTTTGAGAGACTTGAGAAGAACGATGTGCGTTATAGGTTCGTCGTTGATGTCGAAGGAAGCAATCTCGACGCTTTAATTGGCTCCGATGGAGGGTCTGGTGGCGGATCAACAAGTCGTGACCACATGGTCCTTCATGAGTACGTAAATGCTGCTGGGATTACATGA,0.9082692,15529.93,14105.36
V015,V015-wildtype,BL21DE3,19002.00,17098.38,ATGGGAATAATGGAGGCAGAGAGGAAAACAACAGGCTGGGCTGCCAGAGACCCATCTGGCATCCTCTCTCCTTACACTTACACTCTTAGAGAGACTGGACCAGAGGATGTGAACATAAGAATCATTTGCTGTGGAATCTGCCACACCGATCTTCATCAAACTAAAAATGATCTTGGCATGTCTAATTACCCCATGGTTCCTGGGCATGAAGTGGTAGGGGAAGTAGTGGAGGTGGGATCAGATGTGAGCAAGTTCACCGTAGGGGACATAGTTGGAGTTGGTTGCCTCGTTGGATGTTGCGGAGGTTGTAGCCCCTGCGAGAGAGATCTGGAACAGTATTGTCCAAAGAAGATTTGGAGCTACAATGATGTTTACATCAATGGTCAACCTACACAAGGCGGCTTCGCTAAAGCCACCGTCGTTCACCAAAAGTTTGTGGTCAAGATTCCAGAAGGAATGGCGGTTGAGCAGGCTGCGCCGCTACTGTGCGCTGGTGTGACTGTGTACAGTCCACTGAGCCACTTTGGTCTGAAACAACCAGGCCTAAGAGGAGGTATACTAGGGTTAGGTGGAGTCGGTCACATGGGTGTGAAAATAGCCAAAGCAATGGGTCACCATGTGACTGTCATAAGCTCATCAAACAAGAAGAGAGAAGAGGCATTGCAAGATCTTGGAGCTGATGATTACGTGATCGGTTCCGACCAAGCGAAGATGAGCGAATTGGCTGATTCGTTGGATTACGTAATTGACACGGTGCCTGTTCATCATGCACTTGAGCCATATTTGTCTCTGCTTAAGCTTGATGGTAAACTCATTCTCATGGGAGTTATCAACAATCCATTACAGTTTCTCACTCCTCTGCTTATGCTTGGGAGGAAAGTGATAACGGGGAGCTTCATAGGGAGCATGAAGGAGACAGAGGAGATGCTTGAGTTCTGTAAAGAAAAGGGTTTGAGTTCGATTATCGAAGTTGTGAAGATGGATTATGTTAACACTGCGTTTGAGAGACTTGAGAAGAACGATGTGCGTTATAGGTTCGTCGTTGATGTCGAAGGAAGCAATCTCGACGCTTTAATTGGCTCCGATGGAGGGTCTGGTGGCGGATCAACAAGTCGTGACCACATGGTCCTTCATGAGTACGTAAATGCTGCTGGGATTACATGA,0.9082692,15529.93,14105.36
V015,V015-wildtype,BL21DE3,19599.00,17098.38,ATGGGAATAATGGAGGCAGAGAGGAAAACAACAGGCTGGGCTGCCAGAGACCCATCTGGCATCCTCTCTCCTTACACTTACACTCTTAGAGAGACTGGACCAGAGGATGTGAACATAAGAATCATTTGCTGTGGAATCTGCCACACCGATCTTCATCAAACTAAAAATGATCTTGGCATGTCTAATTACCCCATGGTTCCTGGGCATGAAGTGGTAGGGGAAGTAGTGGAGGTGGGATCAGATGTGAGCAAGTTCACCGTAGGGGACATAGTTGGAGTTGGTTGCCTCGTTGGATGTTGCGGAGGTTGTAGCCCCTGCGAGAGAGATCTGGAACAGTATTGTCCAAAGAAGATTTGGAGCTACAATGATGTTTACATCAATGGTCAACCTACACAAGGCGGCTTCGCTAAAGCCACCGTCGTTCACCAAAAGTTTGTGGTCAAGATTCCAGAAGGAATGGCGGTTGAGCAGGCTGCGCCGCTACTGTGCGCTGGTGTGACTGTGTACAGTCCACTGAGCCACTTTGGTCTGAAACAACCAGGCCTAAGAGGAGGTATACTAGGGTTAGGTGGAGTCGGTCACATGGGTGTGAAAATAGCCAAAGCAATGGGTCACCATGTGACTGTCATAAGCTCATCAAACAAGAAGAGAGAAGAGGCATTGCAAGATCTTGGAGCTGATGATTACGTGATCGGTTCCGACCAAGCGAAGATGAGCGAATTGGCTGATTCGTTGGATTACGTAATTGACACGGTGCCTGTTCATCATGCACTTGAGCCATATTTGTCTCTGCTTAAGCTTGATGGTAAACTCATTCTCATGGGAGTTATCAACAATCCATTACAGTTTCTCACTCCTCTGCTTATGCTTGGGAGGAAAGTGATAACGGGGAGCTTCATAGGGAGCATGAAGGAGACAGAGGAGATGCTTGAGTTCTGTAAAGAAAAGGGTTTGAGTTCGATTATCGAAGTTGTGAAGATGGATTATGTTAACACTGCGTTTGAGAGACTTGAGAAGAACGATGTGCGTTATAGGTTCGTCGTTGATGTCGAAGGAAGCAATCTCGACGCTTTAATTGGCTCCGATGGAGGGTCTGGTGGCGGATCAACAAGTCGTGACCACATGGTCCTTCATGAGTACGTAAATGCTGCTGGGATTACATGA,0.9082692,15529.93,14105.36
V015,V015-wildtype,BL21DE3,13885.00,17098.38,ATGGGAATAATGGAGGCAGAGAGGAAAACAACAGGCTGGGCTGCCAGAGACCCATCTGGCATCCTCTCTCCTTACACTTACACTCTTAGAGAGACTGGACCAGAGGATGTGAACATAAGAATCATTTGCTGTGGAATCTGCCACACCGATCTTCATCAAACTAAAAATGATCTTGGCATGTCTAATTACCCCATGGTTCCTGGGCATGAAGTGGTAGGGGAAGTAGTGGAGGTGGGATCAGATGTGAGCAAGTTCACCGTAGGGGACATAGTTGGAGTTGGTTGCCTCGTTGGATGTTGCGGAGGTTGTAGCCCCTGCGAGAGAGATCTGGAACAGTATTGTCCAAAGAAGATTTGGAGCTACAATGATGTTTACATCAATGGTCAACCTACACAAGGCGGCTTCGCTAAAGCCACCGTCGTTCACCAAAAGTTTGTGGTCAAGATTCCAGAAGGAATGGCGGTTGAGCAGGCTGCGCCGCTACTGTGCGCTGGTGTGACTGTGTACAGTCCACTGAGCCACTTTGGTCTGAAACAACCAGGCCTAAGAGGAGGTATACTAGGGTTAGGTGGAGTCGGTCACATGGGTGTGAAAATAGCCAAAGCAATGGGTCACCATGTGACTGTCATAAGCTCATCAAACAAGAAGAGAGAAGAGGCATTGCAAGATCTTGGAGCTGATGATTACGTGATCGGTTCCGACCAAGCGAAGATGAGCGAATTGGCTGATTCGTTGGATTACGTAATTGACACGGTGCCTGTTCATCATGCACTTGAGCCATATTTGTCTCTGCTTAAGCTTGATGGTAAACTCATTCTCATGGGAGTTATCAACAATCCATTACAGTTTCTCACTCCTCTGCTTATGCTTGGGAGGAAAGTGATAACGGGGAGCTTCATAGGGAGCATGAAGGAGACAGAGGAGATGCTTGAGTTCTGTAAAGAAAAGGGTTTGAGTTCGATTATCGAAGTTGTGAAGATGGATTATGTTAACACTGCGTTTGAGAGACTTGAGAAGAACGATGTGCGTTATAGGTTCGTCGTTGATGTCGAAGGAAGCAATCTCGACGCTTTAATTGGCTCCGATGGAGGGTCTGGTGGCGGATCAACAAGTCGTGACCACATGGTCCTTCATGAGTACGTAAATGCTGCTGGGATTACATGA,0.9082692,15529.93,14105.36
V015,V015-wildtype,K12,14853.50,14427.00,ATGGGAATAATGGAGGCAGAGAGGAAAACAACAGGCTGGGCTGCCAGAGACCCATCTGGCATCCTCTCTCCTTACACTTACACTCTTAGAGAGACTGGACCAGAGGATGTGAACATAAGAATCATTTGCTGTGGAATCTGCCACACCGATCTTCATCAAACTAAAAATGATCTTGGCATGTCTAATTACCCCATGGTTCCTGGGCATGAAGTGGTAGGGGAAGTAGTGGAGGTGGGATCAGATGTGAGCAAGTTCACCGTAGGGGACATAGTTGGAGTTGGTTGCCTCGTTGGATGTTGCGGAGGTTGTAGCCCCTGCGAGAGAGATCTGGAACAGTATTGTCCAAAGAAGATTTGGAGCTACAATGATGTTTACATCAATGGTCAACCTACACAAGGCGGCTTCGCTAAAGCCACCGTCGTTCACCAAAAGTTTGTGGTCAAGATTCCAGAAGGAATGGCGGTTGAGCAGGCTGCGCCGCTACTGTGCGCTGGTGTGACTGTGTACAGTCCACTGAGCCACTTTGGTCTGAAACAACCAGGCCTAAGAGGAGGTATACTAGGGTTAGGTGGAGTCGGTCACATGGGTGTGAAAATAGCCAAAGCAATGGGTCACCATGTGACTGTCATAAGCTCATCAAACAAGAAGAGAGAAGAGGCATTGCAAGATCTTGGAGCTGATGATTACGTGATCGGTTCCGACCAAGCGAAGATGAGCGAATTGGCTGATTCGTTGGATTACGTAATTGACACGGTGCCTGTTCATCATGCACTTGAGCCATATTTGTCTCTGCTTAAGCTTGATGGTAAACTCATTCTCATGGGAGTTATCAACAATCCATTACAGTTTCTCACTCCTCTGCTTATGCTTGGGAGGAAAGTGATAACGGGGAGCTTCATAGGGAGCATGAAGGAGACAGAGGAGATGCTTGAGTTCTGTAAAGAAAAGGGTTTGAGTTCGATTATCGAAGTTGTGAAGATGGATTATGTTAACACTGCGTTTGAGAGACTTGAGAAGAACGATGTGCGTTATAGGTTCGTCGTTGATGTCGAAGGAAGCAATCTCGACGCTTTAATTGGCTCCGATGGAGGGTCTGGTGGCGGATCAACAAGTCGTGACCACATGGTCCTTCATGAGTACGTAAATGCTGCTGGGATTACATGA,0.9082692,13103.60,11901.60
V015,V015-wildtype,K12,14101.50,14427.00,ATGGGAATAATGGAGGCAGAGAGGAAAACAACAGGCTGGGCTGCCAGAGACCCATCTGGCATCCTCTCTCCTTACACTTACACTCTTAGAGAGACTGGACCAGAGGATGTGAACATAAGAATCATTTGCTGTGGAATCTGCCACACCGATCTTCATCAAACTAAAAATGATCTTGGCATGTCTAATTACCCCATGGTTCCTGGGCATGAAGTGGTAGGGGAAGTAGTGGAGGTGGGATCAGATGTGAGCAAGTTCACCGTAGGGGACATAGTTGGAGTTGGTTGCCTCGTTGGATGTTGCGGAGGTTGTAGCCCCTGCGAGAGAGATCTGGAACAGTATTGTCCAAAGAAGATTTGGAGCTACAATGATGTTTACATCAATGGTCAACCTACACAAGGCGGCTTCGCTAAAGCCACCGTCGTTCACCAAAAGTTTGTGGTCAAGATTCCAGAAGGAATGGCGGTTGAGCAGGCTGCGCCGCTACTGTGCGCTGGTGTGACTGTGTACAGTCCACTGAGCCACTTTGGTCTGAAACAACCAGGCCTAAGAGGAGGTATACTAGGGTTAGGTGGAGTCGGTCACATGGGTGTGAAAATAGCCAAAGCAATGGGTCACCATGTGACTGTCATAAGCTCATCAAACAAGAAGAGAGAAGAGGCATTGCAAGATCTTGGAGCTGATGATTACGTGATCGGTTCCGACCAAGCGAAGATGAGCGAATTGGCTGATTCGTTGGATTACGTAATTGACACGGTGCCTGTTCATCATGCACTTGAGCCATATTTGTCTCTGCTTAAGCTTGATGGTAAACTCATTCTCATGGGAGTTATCAACAATCCATTACAGTTTCTCACTCCTCTGCTTATGCTTGGGAGGAAAGTGATAACGGGGAGCTTCATAGGGAGCATGAAGGAGACAGAGGAGATGCTTGAGTTCTGTAAAGAAAAGGGTTTGAGTTCGATTATCGAAGTTGTGAAGATGGATTATGTTAACACTGCGTTTGAGAGACTTGAGAAGAACGATGTGCGTTATAGGTTCGTCGTTGATGTCGAAGGAAGCAATCTCGACGCTTTAATTGGCTCCGATGGAGGGTCTGGTGGCGGATCAACAAGTCGTGACCACATGGTCCTTCATGAGTACGTAAATGCTGCTGGGATTACATGA,0.9082692,13103.60,11901.60


### Create dataframes and write .csv files with measured values for plotting and model evaluation

In [14]:
# Create a dataframe for the accuracy model with the measured values (not averaged) storing sequence, strain, value and predicted level
accuracy_model_df <- data_gfp_zrinka[,c("sequence_name", "strain", "value", "predicted_level")]
# write a .csv file with accuracy_model_df with the measured values (not averaged)
write.csv(
    accuracy_model_df[,c("sequence_name", "strain", "value", "predicted_level")], 
    file = "dataframes/accuracy/accuracy_model_predictions.csv", 
    row.names = FALSE
)

# Create a new dataframe called "accuracy_model_df_k12" storing only strain == K12 from accuracy_model_df
accuracy_model_df_k12 <- accuracy_model_df[accuracy_model_df$strain == "K12",]
# write a .csv file with accuracy_model_df with the measured values (not averaged) for K12 strain
write.csv(
    accuracy_model_df_k12[,c("sequence_name", "strain", "value", "predicted_level")], 
    file = "dataframes/accuracy/accuracy_model_predictions_k12.csv", 
    row.names = FALSE
)

# Create a new dataframe called "accuracy_model_df_bl21" storing only strain == BL21DE3 from accuracy_model_df
accuracy_model_df_bl21 <- accuracy_model_df[accuracy_model_df$strain == "BL21DE3",]
# write a .csv file with accuracy_model_df with the measured values (not averaged) for BL21 strain
write.csv(
    accuracy_model_df_bl21[,c("sequence_name", "strain", "value", "predicted_level")], 
    file = "dataframes/accuracy/accuracy_model_predictions_bl21.csv", 
    row.names = FALSE
)

Dataframes with no outliers

In [15]:
# Create a dataframe with no outliers for the accuracy model with the measured values (not averaged) and no outliers storing sequence, strain, value and predicted level
accuracy_model_df_no_outliers <- data_gfp_zrinka_no_outliers[,c("sequence_name", "strain", "value", "predicted_level")]
# Write .csv file with accuracy_model_df with the measured values (not averaged) with no outliers
write.csv(
    accuracy_model_df_no_outliers[,c("sequence_name", "strain", "value", "predicted_level")], 
    file = "dataframes/accuracy/accuracy_model_predictions_no_outliers.csv", 
    row.names = FALSE
)

# Create a new dataframe storing only strain == K12 from accuracy_model_df_no_outliers
accuracy_model_df_no_outliers_k12 <- accuracy_model_df_no_outliers[accuracy_model_df_no_outliers$strain == "K12",]
# write a .csv file with accuracy_model_df with the measured values (not averaged) for K12 strain with no outliers
write.csv(
    accuracy_model_df_no_outliers_k12[,c("sequence_name", "strain", "value", "predicted_level")], 
    file = "dataframes/accuracy/accuracy_model_predictions_no_outliers_k12.csv", 
    row.names = FALSE
)


# Create a new dataframe storing only strain == BL21DE3 from accuracy_model_df_no_outliers
accuracy_model_df_no_outliers_bl21 <- accuracy_model_df_no_outliers[accuracy_model_df_no_outliers$strain == "BL21DE3",]
# write a .csv file with accuracy_model_df with the measured values (not averaged) for BL21DE3 strain with no outliers
write.csv(
    accuracy_model_df_no_outliers_bl21[,c("sequence_name", "strain", "value", "predicted_level")], 
    file = "dataframes/accuracy/accuracy_model_predictions_no_outliers_bl21.csv", 
    row.names = FALSE
)

### Create dataframe and write .csv file with mean flourescence values for model evaluation

In [16]:
# Group data by sequence_name, strain, mean_fluorescence, error rate, predicted level and predicted protein levels
data_gfp_zrinka_grouped <- aggregate(cbind(mean_fluorescence, predicted_level) ~ sequence_name + strain, data = data_gfp_zrinka, FUN = mean)
# dataframe with no outliers
data_gfp_zrinka_grouped_no_outliers <- aggregate(cbind(mean_fluorescence, predicted_level) ~ sequence_name + strain, data = data_gfp_zrinka_no_outliers, FUN = mean)

# create a new dataframe called "data_gfp_zrinka_grouped_k12" storing only strain == K12 from data_gfp_zrinka_grouped
data_gfp_zrinka_grouped_k12 <- data_gfp_zrinka_grouped[data_gfp_zrinka_grouped$strain == "K12",]
# dataframe with no outliers
data_gfp_zrinka_grouped_no_outliers_k12 <- data_gfp_zrinka_grouped_no_outliers[data_gfp_zrinka_grouped_no_outliers$strain == "K12",]
 

# create a new dataframe called "data_gfp_zrinka_grouped_bl21" storing only strain == BL21DE3 from data_gfp_zrinka_grouped
data_gfp_zrinka_grouped_bl21 <- data_gfp_zrinka_grouped[data_gfp_zrinka_grouped$strain == "BL21DE3",]
# dataframe with no outliers
data_gfp_zrinka_grouped_no_outliers_bl21 <- data_gfp_zrinka_grouped_no_outliers[data_gfp_zrinka_grouped_no_outliers$strain == "BL21DE3",]

In [17]:
# Write csv file of accuracy model dataframe with averaged values for model evaluation
write.csv(
    data_gfp_zrinka_grouped[,c("sequence_name", "strain", "mean_fluorescence", "predicted_level")], 
    file = "dataframes/accuracy/accuracy_model_predictions_averaged.csv", 
    row.names = FALSE
)

# Write csv file of accuracy model dataframe with averaged values for "K12" strain for model evaluation
write.csv(
    data_gfp_zrinka_grouped_k12[,c("sequence_name", "strain", "mean_fluorescence", "predicted_level")], 
    file = "dataframes/accuracy/accuracy_model_predictions_averaged_k12.csv", 
    row.names = FALSE
)

# Write csv file of accuracy model dataframe with averaged values for "BL21DE3" strain for model evaluation
write.csv(
    data_gfp_zrinka_grouped_bl21[,c("sequence_name", "strain", "mean_fluorescence", "predicted_level")], 
    file = "dataframes/accuracy/accuracy_model_predictions_averaged_bl21.csv", 
    row.names = FALSE
)

Dataframe with no outliers

In [None]:
# Write csv file of accuracy model dataframe with averaged values for model evaluation with no outliers
write.csv(
    data_gfp_zrinka_grouped_no_outliers[,c("sequence_name", "strain", "mean_fluorescence", "predicted_level")], 
    file = "dataframes/accuracy/accuracy_model_predictions_averaged_no_outliers.csv", 
    row.names = FALSE
)

# Write csv file of accuracy model dataframe with averaged values for "K12" strain for model evaluation with no outliers
write.csv(
    data_gfp_zrinka_grouped_no_outliers_k12[,c("sequence_name", "strain", "mean_fluorescence", "predicted_level")], 
    file = "dataframes/accuracy/accuracy_model_predictions_averaged_no_outliers_k12.csv", 
    row.names = FALSE
)

# Write csv file of accuracy model dataframe with averaged values for "BL21DE3" strain for model evaluation with no outliers
write.csv(
    data_gfp_zrinka_grouped_no_outliers_bl21[,c("sequence_name", "strain", "mean_fluorescence", "predicted_level")], 
    file = "dataframes/accuracy/accuracy_model_predictions_averaged_no_outliers_bl21.csv", 
    row.names = FALSE
)