# 20200619 Escape enrichment modeling
### Goals of notebook
* use modeling framework to show how escapes will enrich given their libary proportion and other members in the library

### Parameters of interest
* relative REU values (mapped to relative growth rates)
* different Cm concentrations

$f_{+}(t=0) : \{0.5,10^{-1},10^{-2},10^{-3}\}$

# Validating relative growth rates and plotting growth curves
Need to show that simulating the system with relative growth rates is the same as simulating the system with absolute growth rates. Hard to do this analytically because I can't find a general solution to competitive Lotke-Volterre equations...

Test case: Two species, $x_1$ and $x_2$, with absolute growth rates, $r_1 = 0.25, r_2 = 0.125$, and relative growth rates of $\bar{r_1} = 1, \bar{r_2} = 0.5$.

How does the system evolve over time for each?

In [None]:
import importlib
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

import matplotlib
matplotlib.rc('figure', dpi = 125)
sns.set_palette('muted')
sns.set_style('ticks')

import sys
sys.path.append('../modules')
import enrichments

In [None]:
data_dir = os.path.realpath('../data')

In [None]:
importlib.reload(enrichments)

In [None]:
# Define initial values and normalize them to fraction
lib_sz = 1e8
x_i0 = np.array([lib_sz-12,10, 1,1])
x_i0_norm = x_i0/np.sum(x_i0)


# Define absolute rates and relative rates
r_i = np.array([0,0.2,0.5,0.8])
r_i_norm = r_i / np.max(r_i)

dil = 100

sys_1 = enrichments.Growth_tube(x_i0_norm, r_i_norm, dil)
sys_1.sim_growth()

In [None]:
fig, ax = plt.subplots(figsize = (4,4))

sys_1.plot_x_t(ax)

ax.set_xlabel('Time')
ax.set_ylabel('Normalized abundance')

plt.show()

In [None]:
print('Enrichment vals:', sys_1.x_t[:,-1])

### Hypothetical library

* $10^8$ members
* 10000 weak hits
* 1 perfect hit
* 3000 escapes (or anticrispr hits, these are the same)

In [None]:
# Import df w/ growth rates for each library member eyeballed from plot above
df_growth= pd.read_excel(data_dir + '/anticrispr_growths.xlsx')
df_growth

In [None]:
# Function to simulate one round of selection at various Cm values (and dcas9 types)
def round_grow_sim(procas9, df_growth, x_i0, df_enrich):
    cms = df_growth['cm'].unique()
    # Loop for each value of Cm
    for cm in cms:
        # Get growth rates of everything for a given Cm value
        lib_types = ['no_hit', 'weak_hit', procas9, 'anticrispr_hit']
        gr_rates = np.empty(len(lib_types))
        for count, lib_memb in enumerate(lib_types):
            rate = df_growth[(df_growth['cm'] == cm) & (df_growth['strain'] == lib_memb)]['rel_growth'].values[0]
            gr_rates[count] = rate

        x_i0_norm = x_i0/np.sum(x_i0)

        # Define growth rates from data above
        r_i = gr_rates

        dil = 100
        # Simulate the system
        sys = enrichments.Growth_tube(x_i0_norm, r_i, dil)
        sys.sim_growth()

        # retrieve enrichments and add to dataframe
        for i, strain in enumerate(['no_hit', 'weak_hit', procas9, 'anticrispr_hit']):
            enrichment = sys.enrichs[i]
            frac = sys.x_t[i,-1] / np.sum(sys.x_t[:,-1])

            new_row = {'strain': strain, 'cm': cm, 'enrich': enrichment, 'frac': frac, 'procas9_type': procas9}
            df_enrich = df_enrich.append(new_row, ignore_index = True)

    return df_enrich


# Function to plot fractions of the library
def plot_lib_frac(df_enrich, procas9s):
    sns.set_style('ticks')
    fig, axs = plt.subplots(1,2,figsize = (8,4))
    plt.tight_layout(pad = 3)

    for procas9, ax in zip(procas9s, axs):

        # subset df to only include enrichment results for given procas9
        sub_df = df_enrich[df_enrich['procas9_type'] == procas9]

        sns.lineplot(x = 'cm', y = 'frac', hue = 'strain', data = sub_df, ax = ax)

        ax.set_yscale('log')
        ax.set_ylim([1e-9,10])
        ax.set_xlabel('Chloramphenicol (uM)')
        ax.set_ylabel('Fractions after one round')
        ax.legend(['Non hits', 'Weak hits', 'Real hit', 'Escapes'],bbox_to_anchor=(1.05, 1))

        if procas9 == 'VKLQ-0_hit':
            ax.set_title('Previous pro-dCas9')
            ax.get_legend().remove()
        else:
            ax.set_title('Tightened linker pro-dCas9')

    plt.show()

In [None]:
# Simulate two different sets of libraries: one w/ the old proCas9, one with the new proCas9
procas9s = ['VKLQ-0_hit', 'VKLQ-6_hit']

In [None]:
# Create df to store enrichment values for this round
df_enrich_1 = pd.DataFrame(columns = ['strain', 'cm', 'enrich', 'frac', 'procas9_type'])

# Instantiate library
lib_sz = 1e8

# Set library proportions here ['no_hit', 'weak_hit', procas9, 'anticrispr_hit']
x_i0 = np.array([lib_sz-13001,10000, 1,3000])

# Run simulations over various Cm values
for procas9 in procas9s:
    df_enrich_1 = round_grow_sim(procas9, df_growth, x_i0, df_enrich_1)

In [None]:
df_enrich_1.head()

In [None]:
plot_lib_frac(df_enrich_1, procas9s)

## Simulate enrichments over another round of selection (use the 500uM Cm value to input into the next round)

In [None]:
cm_val = 500
df_enrich_1[df_enrich_1['cm'] == cm_val]

In [None]:
# Pull out fraction of the library that the hit is (with either procas9)
new_hit_f = {}
for procas9 in procas9s:
    new_hit_f[procas9] = df_enrich_1[(df_enrich_1['cm'] == cm_val) & (df_enrich_1['strain'] == procas9)]['frac'].values[0]
new_hit_f

## Round 2

In [None]:
# Create df to store enrichment values for this round
df_enrich_2 = pd.DataFrame(columns = ['strain', 'cm', 'enrich', 'frac', 'procas9_type'])


for procas9 in procas9s:
    # Set library proportions here ['no_hit', 'weak_hit', procas9, 'anticrispr_hit']
    x_i0 = np.array([lib_sz-3000-(lib_sz*new_hit_f[procas9]),1, lib_sz*new_hit_f[procas9],3000])

    # Run simulations over various Cm values and procas9 type
    df_enrich_2 = round_grow_sim(procas9, df_growth, x_i0, df_enrich_2)

In [None]:
plot_lib_frac(df_enrich_2, procas9s)

## Round 3 (use the same Cm value from previous round to select on)

In [None]:
new_hit_f_2 = {}
for procas9 in procas9s:
    new_hit_f_2[procas9] = df_enrich_2[(df_enrich_2['cm'] == cm_val) & (df_enrich_2['strain'] == procas9)]['frac'].values[0]
new_hit_f_2

In [None]:
# Create df to store enrichment values for this round
df_enrich_3 = pd.DataFrame(columns = ['strain', 'cm', 'enrich', 'frac', 'procas9_type'])

for procas9 in procas9s:
    # Set library proportions here ['no_hit', 'weak_hit', procas9, 'anticrispr_hit']
    x_i0 = np.array([lib_sz-3000-(lib_sz*new_hit_f_2[procas9]),1, lib_sz*new_hit_f_2[procas9],3000])

    # Run simulations over various Cm values and procas9 type
    df_enrich_3 = round_grow_sim(procas9, df_growth, x_i0, df_enrich_3)

In [None]:
plot_lib_frac(df_enrich_3, procas9s)

## Findings

### Tightened linker provides advantages in early selection rounds
- Tightened linker provides clear advantage in round 2
  - the real hit is above escapes at high selection pressure

### Even the worse performing system will work with enough rounds

- either linker will allow full fixing of the real hit after 3 rounds of selection