In [1]:
from trnasimtools.serialize import SerializeTwoCodonMultiTranscript
import os

In [2]:
from IPython.display import display, HTML
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import rcParams
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm
%matplotlib inline

First attempt to model a system where we are gradually increasing the expression of a transgene. Here, we have two populations of transcripts - one representing all cellular transcripts, and the other representing a population of transgene transcripts (note that in this toy system, the two kinds of transcripts are identical, aside from their copy numbers).

The idea is that as transgene expression increases, cellular transcript expression should decrease, since there is a fixed amount of cellular resources (i.e. tRNAs and ribosomes). To simulate IPTG induction of transgene expression, we run a series of simulations where transgene transcript copy number is increased, from 0 to 33% of total transcripts in the simulation. Here, codon usage is the same for both transcript types, and there is an order of magnitude difference between transcripts/ribosomes and tRNAs.

**Results**: We see very little, if any, difference in cellular transcript expression as we increase transgene transcript copy number. The reason for this seems to be that free ribosomes and tRNAs are nowhere near close to depletion, even when increasing the ribosome binding rate to two orders of magnitude higher than the tRNA charging rate. When we set tRNA-transcript-ribosome copy numbers equal to eachother, we don't see this behavior (i.e., free ribosomes and charged tRNAs are depleted as transgene transcript copy number increases).

- Should probably try increasing the binding rate further (in principle, there should be some limit where we do see ribosome use hit saturation, although it may or may not be unrealistically high with this set of parameters)
- Could also try initializing simulations with partially depleted tRNA pools.


In [3]:
#!mkdir ../yaml/august-4-2022
#!mkdir ../output/august-4-2022

In [5]:
# simulation parameters
time_limit = 500
time_step = 5
transcript_lens = [100, 100]
cellular_transcript_copy_number = 100
transgene_transcript_copy_numbers = [10, 20, 30, 40, 50]
ribosome_copy_number = 100
total_trna = 1000
ribosome_binding_rates = [1000.0, 1000.0]
trna_charging_rate = 1000.0
transcript_names = ["cellularProtein", "GFP"]
trna_props = (0.5, 0.5)
codon_comps_1 = (0.5, 0.5)
codon_comps_2 = (0.5, 0.5)

date = "august-4-2022"

In [6]:
serializer = SerializeTwoCodonMultiTranscript(transcript_lens=transcript_lens,
                                               codon_comps=[codon_comps_1, codon_comps_2],
                                               trna_proportion=trna_props,
                                               transcript_names=transcript_names,
                                               time_limit=time_limit,
                                               time_step=time_step)
serializer.serialize(f"../yaml/{date}")
conf = serializer.filename()

In [7]:
with open(f"../scripts/cmd_files/{date}.txt", "w") as stream:
    for transgene_copy in transgene_transcript_copy_numbers:
        for seed in range(1, 4):
            cmd = f"python3 twocodonmultitranscript.py ../yaml/{date}/{conf} {seed} {cellular_transcript_copy_number} {transgene_copy} " + \
              f"{ribosome_copy_number} {total_trna} {ribosome_binding_rates[0]} {ribosome_binding_rates[1]} {trna_charging_rate} {trna_charging_rate} ../output/{date}"
            stream.write(cmd)
            stream.write("\n")

In [8]:
def read_sim(path_pref, seed_start=1, seed_end=3, seed_incr=1, time_limit=None):
    """ 
    Reads in output for an arbitrary number of simulation trials 
    (with different seeds) and concatenates results into a single
    dataset.
    """
    df_master = pd.read_csv(f"{path_pref}_{seed_start}.tsv", sep="\t")
    df_master["seed"] = str(seed_start)
    for i in range(seed_start+1, seed_end+1):
        tmp = pd.read_csv(f"{path_pref}_{i}.tsv", sep="\t")
        tmp["seed"] = str(i)
        df_master = df_master.append(tmp, ignore_index=True)
    if time_limit is not None:
        df_master = df_master[df_master.time < time_limit]
    return df_master

from bisect import bisect_left
def take_closest(myList, myNumber):
    """
    Assumes myList is sorted. Returns closest value to myNumber.

    If two numbers are equally close, return the smallest number.
    """
    pos = bisect_left(myList, myNumber)
    if pos == 0:
        return myList[0]
    if pos == len(myList):
        return myList[-1]
    before = myList[pos - 1]
    after = myList[pos]
    if after - myNumber < myNumber - before:
        return after
    else:
        return before
    
def get_average_protein(path, perc_good, time, max_seed):
    df_master = pd.read_csv(f"{path}_1.tsv", sep="\t")
    df_master["seed"] = str(1)
    for i in range(2, max_seed+1):
        tmp = pd.read_csv(f"{path}_{i}.tsv", sep="\t")
        tmp["seed"] = str(i)
        df_master = df_master.append(tmp, ignore_index=True)
    
    df_master["time"] = df_master["time"].apply(np.ceil)
    tmp = df_master.loc[df_master['time'] == time].groupby(["species"])["protein"].mean().reset_index()
    if tmp.empty:
        times = df_master["time"].to_list()
        dedup_times = []
        [dedup_times.append(x) for x in times if x not in dedup_times]
        next_best_time = take_closest(dedup_times, time)
        tmp = df_master.loc[df_master['time'] == next_best_time].groupby(["species"])["protein"].mean().reset_index()
    #tmp["codon_comp"] = perc_good
    return tmp

In [9]:
df_master = None

for count, transgene_copy in enumerate(transgene_transcript_copy_numbers):
    df_tmp = None
    path = f"../output/{date}/two_codon_multi_transcript_0.5_0.5_0.5_0.5_0.5_0.5" + \
           f"_{cellular_transcript_copy_number}_{transgene_copy}_{ribosome_copy_number}_{total_trna}" + \
           f"_{ribosome_binding_rates[0]}_{ribosome_binding_rates[1]}_{trna_charging_rate}_{trna_charging_rate}"
    tmp = get_average_protein(path, 0.5, 500, 3)
    if df_tmp is not None:
        df_tmp = df_tmp.append(tmp, ignore_index=True)
    else:
        df_tmp = tmp
    df_tmp["transgene_copy"] = transgene_copy
    if df_master is not None:
        df_master = df_master.append(df_tmp, ignore_index=True)
    else:
        df_master = df_tmp
df_master["species"] = df_master["species"].replace({"__ribosome": "free ribosome"})
df_master

Unnamed: 0,species,protein,transgene_copy
0,ATA_charged,500.0,10
1,ATA_uncharged,0.0,10
2,GFP,11.0,10
3,TTT_charged,500.0,10
4,TTT_uncharged,0.0,10
5,__GFP_rbs,10.0,10
6,__cellularProtein_rbs,100.0,10
7,free ribosome,99.0,10
8,cellularProtein,1037.0,10
9,ATA_charged,500.0,20


In [10]:
trna_charging_rate = 100.0
with open(f"../scripts/cmd_files/{date}-2.txt", "w") as stream:
    for transgene_copy in transgene_transcript_copy_numbers:
        for seed in range(1, 4):
            cmd = f"python3 twocodonmultitranscript.py ../yaml/{date}/{conf} {seed} {cellular_transcript_copy_number} {transgene_copy} " + \
              f"{ribosome_copy_number} {total_trna} {ribosome_binding_rates[0]} {ribosome_binding_rates[1]} {trna_charging_rate} {trna_charging_rate} ../output/{date}"
            stream.write(cmd)
            stream.write("\n")

In [11]:
df_100 = None

for count, transgene_copy in enumerate(transgene_transcript_copy_numbers):
    df_tmp = None
    path = f"../output/{date}/two_codon_multi_transcript_0.5_0.5_0.5_0.5_0.5_0.5" + \
           f"_{cellular_transcript_copy_number}_{transgene_copy}_{ribosome_copy_number}_{total_trna}" + \
           f"_{ribosome_binding_rates[0]}_{ribosome_binding_rates[1]}_{trna_charging_rate}_{trna_charging_rate}"
    tmp = get_average_protein(path, 0.5, 500, 3)
    if df_tmp is not None:
        df_tmp = df_tmp.append(tmp, ignore_index=True)
    else:
        df_tmp = tmp
    df_tmp["transgene_copy"] = transgene_copy
    if df_100 is not None:
        df_100 = df_100.append(df_tmp, ignore_index=True)
    else:
        df_100 = df_tmp
df_100["species"] = df_100["species"].replace({"__ribosome": "free ribosome"})
df_100

Unnamed: 0,species,protein,transgene_copy
0,ATA_charged,496.0,10
1,ATA_uncharged,4.0,10
2,GFP,10.5,10
3,TTT_charged,494.5,10
4,TTT_uncharged,5.5,10
5,__GFP_rbs,10.0,10
6,__cellularProtein_rbs,100.0,10
7,free ribosome,98.0,10
8,cellularProtein,1009.5,10
9,ATA_charged,497.0,20


In [14]:
trna_charging_rate = 100.0
ribosome_binding_rates = [10000.0, 10000.0]
ribosome_copy_number = 200
with open(f"../scripts/cmd_files/{date}-4.txt", "w") as stream:
    for transgene_copy in transgene_transcript_copy_numbers:
        for seed in range(1, 4):
            cmd = f"python3 twocodonmultitranscript.py ../yaml/{date}/{conf} {seed} {cellular_transcript_copy_number} {transgene_copy} " + \
              f"{ribosome_copy_number} {total_trna} {ribosome_binding_rates[0]} {ribosome_binding_rates[1]} {trna_charging_rate} {trna_charging_rate} ../output/{date}"
            stream.write(cmd)
            stream.write("\n")

In [15]:
df_3 = None

for count, transgene_copy in enumerate(transgene_transcript_copy_numbers):
    df_tmp = None
    path = f"../output/{date}/two_codon_multi_transcript_0.5_0.5_0.5_0.5_0.5_0.5" + \
           f"_{cellular_transcript_copy_number}_{transgene_copy}_{ribosome_copy_number}_{total_trna}" + \
           f"_{ribosome_binding_rates[0]}_{ribosome_binding_rates[1]}_{trna_charging_rate}_{trna_charging_rate}"
    tmp = get_average_protein(path, 0.5, 500, 3)
    if df_tmp is not None:
        df_tmp = df_tmp.append(tmp, ignore_index=True)
    else:
        df_tmp = tmp
    df_tmp["transgene_copy"] = transgene_copy
    if df_3 is not None:
        df_3 = df_3.append(df_tmp, ignore_index=True)
    else:
        df_3 = df_tmp
df_3["species"] = df_3["species"].replace({"__ribosome": "free ribosome"})
df_3

Unnamed: 0,species,protein,transgene_copy
0,ATA_charged,480.666667,10
1,ATA_uncharged,19.333333,10
2,GFP,201.0,10
3,TTT_charged,479.0,10
4,TTT_uncharged,21.0,10
5,__GFP_rbs,10.0,10
6,__cellularProtein_rbs,99.666667,10
7,free ribosome,191.666667,10
8,cellularProtein,19702.333333,10
9,ATA_charged,480.0,20
