### Mini Optimization Script

In this script, I will optimize 1 parameter (promoter A binding strength) using first the hill climbing algorithm 

Overarching steps in this script:
1. Define a function that accepts initial parameter estimate for promoter strength (initial pA strength) 
2. Define an error function that calculates RMSE between experimental value & pinetree output 
3. Apply a hill climbing algorithm that uses error function 

#### Import relevant packages
insert details on versions

In [2]:
import pandas as pd
import numpy as np
import pinetree as pt

#### Global variables (KEY)

Here is a list of my global variables
1. gen = the generation number, ie trial 1 optimization, trial 2...etc
2. pA = strength of promoter A
3. report_df = dataframe with columns gen | pA | error

In [3]:
# Starting values
base_dir = "/Users/tanviingle/Documents/Wilke/phix174/"
gen = 0
pA = 2e5
total_gen = 100
report_df = pd.DataFrame(np.nan, index = np.arange(total_gen), columns = ['gen', 'pA', 'error'])

#### FUNCTION run_pt(gen, pA)
This function runs the pinetree simulation

Steps in this script: 
1. Read genomic coordinates from file & add genes 
2. Add promoters (pA will be passed into the function) 
3. Add terminators from genomic coordinates & define interactions (borrow from preoptimized_model code) 
4. Run simulation & output to file named gen_#.csv


In [16]:
def run_pt(gen, pA):

    print(gen, pA)
    print("Defining PhiX-174 genome")
    
    # Create host cell & genome
    CELL_VOLUME = 1.1e-15 # from T7
    model = pt.Model(cell_volume=CELL_VOLUME)
    phage = pt.Genome(name="phix_174", length=5386)
    
    # Read genomic coordinates from csv into dataframe
    genomic_coords = pd.read_csv(base_dir + "output/" + "genomic_coords.csv")
    print(genomic_coords.at[0, "type"])
    
    
    # Add genomic ns (loop through ^ df); hardcode necessary strengths according to preomtimized_model
    ## for length of genomic_coords, add elements
    #for n in genomic_coords:{
    n = 0
    while(n < len(genomic_coords)):
        
        if genomic_coords.at[n, "type"] == "gene": 
            phage.add_gene(name= genomic_coords.at[n, "type"] + "_" + genomic_coords.at[n, "name"], 
                           start= genomic_coords.at[n, "new_start"], 
                           stop= genomic_coords.at[n, "new_end"],
                           rbs_start=genomic_coords.at[n, "new_start"], 
                           rbs_stop=genomic_coords.at[n, "new_start"] + 15, rbs_strength=1e7) 

        elif genomic_coords.at[n, "type"] == "promoter" and genomic_coords.at[n, "name"] == "A":
            phage.add_promoter(name= genomic_coords.at[n, "type"] + "_" + genomic_coords.at[n, "name"], 
                               start= genomic_coords.at[n, "new_start"], 
                               stop= genomic_coords.at[n, "new_end"],
                               interactions={"ecolipol": pA})

        elif genomic_coords.at[n, "type"] == "promoter" and genomic_coords.at[n, "name"] == "B1":
            phage.add_promoter(name= genomic_coords.at[n, "type"] + "_" + genomic_coords.at[n, "name"], 
                               start= genomic_coords.at[n, "new_start"], 
                               stop= genomic_coords.at[n, "new_end"],
                               interactions={"ecolipol": 5e7})

        elif genomic_coords.at[n, "type"] == "promoter" and genomic_coords.at[n, "name"] == "D":
            phage.add_promoter(name= genomic_coords.at[n, "type"] + "_" + genomic_coords.at[n, "name"], 
                               start= genomic_coords.at[n, "new_start"], 
                               stop= genomic_coords.at[n, "new_end"],
                               interactions={"ecolipol": 2e6})

        else:
            print("ignoring pB2")

        n = n+1
    
    print("all genes and promoters added")
    
    # Add terminators manually 
    phage.add_terminator(name="terminator_J", start=2402, stop=2403, # Right before gene F start=2404, stop=3687,
                       efficiency={"ecolipol": 0.7}) # 0.7
    phage.add_terminator(name="terminator_F", start=3796, stop=3797, # Right before gene G start=3798, stop=4325
                     efficiency={"ecolipol": 0.8}) # 0.8
    phage.add_terminator(name="terminator_G", start=4332, stop=4333, # Right before gene H start=4334, stop=5320
                     efficiency={"ecolipol": 0.6}) # 0.6
    phage.add_terminator(name="terminator_H", start=5321, stop=5322, # Right after gene H
                     efficiency={"ecolipol": 0.3}) # 0.3

    print("all terminators added")
    
    # Register genome after promoters/terminators are added
    model.register_genome(phage)
    print("genome is registered")

    # Define interactions
    print("Defining Polymerases & Interactions")
    # Add polymerases & species
    model.add_polymerase(name="ecolipol", speed=35, footprint=35, copy_number=0)
    model.add_species("bound_ecolipol", 1800)  # initialization
    model.add_species("ecoli_genome", 0)
    model.add_species("ecoli_transcript", 0)
    model.add_reaction(1e6, ["ecolipol", "ecoli_genome"], ["bound_ecolipol"]) # 1e7
    model.add_reaction(0.04, ["bound_ecolipol"], ["ecolipol", "ecoli_genome", "ecoli_transcript"])
    model.add_ribosome(10, 30, 100)
    model.add_species("bound_ribosome", 100)
    model.seed(34)
    
    # Run simulation
    print("running simulation")
    model.simulate(time_limit=1200, time_step=5, output= base_dir + "output/gen_"+str(gen)+".tsv") # TODO change limit
    print("Simulation successful!")

In [17]:
run_pt(gen, pA)

0 200000.0
Defining PhiX-174 genome
gene
ignoring pB2
all genes and promoters added
all terminators added
genome is registered
Defining Polymerases & Interactions
running simulation
Simulation successful!


In [28]:
# SCRATCH BANK
    #genomic_coords = pd.read_csv(base_dir + "output/" + "genomic_coords.csv")
    #print(len(genomic_coords))
    #n=0
    #while(n < 5):
     #   if genomic_coords.at[n, "type"] == "gene":
      #      print(genomic_coords.at[n, "type"])
       # n = n+1

15


#### FUNCTION get_error()
This function calculates the RMSE between experimental & pinetree transcript values 

Steps in this script: 
1. Read in output file & extract final timepoint only
2. Hardcode experimental values 
3. Normalize simulated values 
4. RMSE calculation


Read in pinetree output file
Filter only gene data
Calculate normalized version 
Add column for ideal ratios:
A, A* = 1
B, K, C = 6
D, E, J = 17
F = 11
G = 5
H = 1

Calculate error with sqrt{(norm - ideal)^2}
Save error value to a vector

In [None]:
def get_error(file):
    sim_values = pd.read_csv(file)
    
    
    
    #genomic_coords = pd.read_csv(base_dir + "output/" + "genomic_coords.csv")

In [38]:
sim = pd.read_csv(base_dir + "output/gen_"+str(0)+".tsv", sep = "\t")
sim = sim.round({'time': 0})
sim = sim[sim['time'] == 1200.0]
sim = sim[sim.species.str.match("gene_")]
sim["norm"] = sim['transcript']/(sim.iloc[0]["transcript"])
#sim = sim[sim['norm'] = sim['transcript']/sim.iloc[0]["transcript"]]
display(sim)
#sim.iloc[0]["transcript"]


#sub_df.iloc[0]['A']
#print(sim.at[0, "transcript"])
#sim = sim[sim['norm'] = sim['transcript']/]

#df[df.name.str.match(regex)]
#display(sim)
#display(test)
#sim_values['time' == 0]

Unnamed: 0,time,species,protein,transcript,ribo_density,norm
7339,1200.0,gene_A,21.0,65.0,0.0,1.0
7340,1200.0,gene_A*,17.0,64.0,0.0,0.984615
7341,1200.0,gene_B,248.0,831.0,0.0,12.784615
7342,1200.0,gene_C,160.0,822.0,0.0,12.646154
7343,1200.0,gene_D,233.0,828.0,0.0,12.738462
7344,1200.0,gene_E,192.0,824.0,0.0,12.676923
7345,1200.0,gene_F,44.0,240.0,0.0,3.692308
7346,1200.0,gene_G,0.0,39.0,0.102564,0.6
7347,1200.0,gene_H,0.0,15.0,0.0,0.230769
7348,1200.0,gene_J,53.0,817.0,0.117503,12.569231
