In [None]:
import eee

%matplotlib inline
from matplotlib import pyplot as plt

import numpy as np

### Specify ensemble

This ensemble has three species:

hdna + 2IPTG <--> h + 2IPTG <--> l2e

+ `hdna` (our observable, with dG0 = 0 kcal/mol)
+ `h` (unobserved structure that does not bind to anything; dG0 = 5 kcal/mol)
+ `l2e` (unobserved structure that binds to 2 iptg molecules; dG0 = 5 kcal/mol)

At highly positive IPTG concentration (low concentration), `hdna` will be highly favored. At highly negative IPTG concentrations (high concentration), `l2e` is favored. 


In [None]:
ens = eee.core.Ensemble()
ens.add_species("hdna",dG0=0,observable=True)
ens.add_species("h",dG0=5)
ens.add_species("l2e",dG0=5,observable=False,iptg=2)

df = ens.get_obs(ligand_dict={"iptg":np.linspace(0,5)})
fig, ax = plt.subplots(1,figsize=(6,6))
ax.plot(df.iptg,df.fx_obs)
ax.set_xlabel("ln([iptg]) chemical potential")
ax.set_ylabel("fraction hdna")

ax.plot((1,1),(0,1),'--',color='gray')
ax.plot((4,4),(0,1),'--',color='gray')
ax.set_title("increasing iptg disfavors hdna")
None

### Load in $\Delta \Delta G$

Load in a spreadsheet with the energetics effects of mutations on all species in the ensemble. 

In [None]:
ddg_df = eee.io.read_ddg("../tests/data_for_tests/test_ddg/ddg.csv")
ddg_df

### Simulate evolution

For this, we are going to use the `eee.simulation.calcs.WrightFisherTreeSimulation` function. First, see what it's arguments are.


In [None]:
#help(eee.simulation.calcs.WrightFisherTreeSimulation)

### Run evolutionary simulation

In this run, we select over 100 generations for the protein to be `on` for both $\mu _{iptg} = -4$ and $\mu _{iptg} = -1$. Our population size is 1000 and our mutation rate 0.01.

In [None]:
from eee.analysis.wf.on_tree import extract_alignment
from eee.analysis.wf.on_tree import load_wf_tree_sim

newick = '((A:0.1,B:0.5):0.5,(C:0.3,D:0.7):0.3);'

conditions = {"fitness_fcn":["on","off"],
              "iptg":[1,4]}

wf = eee.calcs.WrightFisherTreeSimulation(ens=ens,
                                          ddg_df=ddg_df,
                                          conditions=conditions)

# Delete output directory if it already exists
import os
import shutil
if os.path.exists("eee_wf-tree"):
    shutil.rmtree("eee_wf-tree")

wf.run(tree=newick,
       population_size=1000,
       mutation_rate=0.01,
       num_generations=10000,
       burn_in_generations=10,
       output_directory="eee_wf-tree") 

out, fasta = extract_alignment("eee_wf-tree",include_ancestors=True)
print(fasta)


In [None]:
from eee.analysis.wf.on_tree import load_wf_tree_sim
sc, sc_kwargs, tree, genotypes = load_wf_tree_sim("eee_wf-tree")
tree.show()