In [None]:
import eee

%matplotlib inline
from matplotlib import pyplot as plt

import numpy as np

### Specify ensemble

This ensemble has three species:

hdna + 2IPTG <--> h + 2IPTG <--> l2e

+ `hdna` (our observable, with dG0 = 0 kcal/mol)
+ `h` (unobserved structure that does not bind to anything; dG0 = 5 kcal/mol)
+ `l2e` (unobserved structure that binds to 2 iptg molecules; dG0 = 5 kcal/mol)

At highly positive IPTG concentration (low concentration), `hdna` will be highly favored. At highly negative IPTG concentrations (high concentration), `l2e` is favored. 


In [None]:
ens = eee.core.Ensemble()
ens.add_species("hdna",dG0=0,observable=True)
ens.add_species("h",dG0=5)
ens.add_species("l2e",dG0=5,observable=False,iptg=2)

df = ens.get_obs(ligand_dict={"iptg":np.linspace(0,5)})
fig, ax = plt.subplots(1,figsize=(6,6))
ax.plot(df.iptg,df.fx_obs)
ax.set_xlabel("ln([iptg]) chemical potential")
ax.set_ylabel("fraction hdna")

ax.plot((1,1),(0,1),'--',color='gray')
ax.plot((4,4),(0,1),'--',color='gray')
ax.set_title("increasing iptg disfavors hdna")
None

### Load in $\Delta \Delta G$

Load in a spreadsheet with the energetics effects of mutations on all species in the ensemble. 

In [None]:
ddg_df = eee.io.read_ddg("../tests/data_for_tests/test_ddg/ddg.csv")
ddg_df

### Simulate evolution

For this, we are going to use the `eee.evolve.simulate_evolution` function. First, see what it's arguments are.


### Run evolutionary simulation

In this run, we select over 100 generations for the protein to be `off` for $\mu _{iptg} = 1$ and `on` for $\mu _{iptg} = 4$. Our population size is 1000 and our mutation rate 0.01.

In [None]:
conditions = {"fitness_fcn":["off","on"],
              "iptg":[1,4],
              "select_on":"fx_obs",
              "select_on_folded":True}

sc = eee.calcs.WrightFisherSimulation(ens=ens,
                                      ddg_df=ddg_df,
                                      conditions=conditions)
# Delete output directory if it already exists
import os
import shutil
if os.path.exists("test_run"):
    shutil.rmtree("test_run")

sc.run(output_directory="test_run",
       population_size=1000,
       mutation_rate=0.01,
       num_generations=1000)

In [None]:
import json
import os
print(os.listdir("test_run/"))

with open("test_run/input/simulation.json") as f:
    run_info = json.load(f)
print(run_info)

In [None]:
import pandas as pd

genotypes = pd.read_csv("test_run/eee_wf-sim_genotypes.csv")
genotypes

In [None]:
import pickle
with open("test_run/eee_wf-sim_generations_0.pickle","rb") as f:
    generations = pickle.load(f)

# First three generation
generations[:3]

### Extract gentoype frequencies from the simulation

In [None]:
df = eee.analysis.get_genotype_frequencies(generations)
gen = np.arange(len(generations))
for x in df.columns:
    plt.plot(gen,df[x],'-')  
    