In [None]:
import tfscreen
from tfscreen.plot import heatmap
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

## Epistasis calculations

How to do epistasis calcs for specific mutant cycles or all cycles at once

In [None]:
theta_file = "/Users/harmsm/Desktop/keep/theta_df.csv"

# Load dataframe and make genotype into categorical
df = tfscreen.util.read_dataframe(theta_file)
df = tfscreen.genetics.set_categorical_genotype(df)

In [None]:
# This generates an array holding lists of all complete mutant cycles in the
# dataframe
cycles = tfscreen.genetics.build_cycles(df["genotype"])
cycles

In [None]:
# Get the data corresponding to cycle 100
cycle_selector = cycles[100]
print(cycle_selector)
cycle_df = df[df["genotype"].isin(cycle_selector)]
cycle_df

In [None]:
# Get epistasis for the mutant cycle above. The condition_selector should be a set
# of columns that uniquely define the conditions under which to calculate 
# epistasis. As written here, this calculates epistasis at every titrant_conc.
# (if we had more than one titrant, this condition_selector would have to 
# be ['titrant_name','titrant_conc'] to select unique conditions for each 
# mutant cycle). 

# scale can be "add" (default) or "mult" (multiply). 
ep_out = tfscreen.analysis.extract_epistasis(cycle_df,
                                             condition_selector=["titrant_conc"],
                                             y_obs="theta_est",
                                             y_std="theta_std",
                                             scale="add")
ep_out

In [None]:
# Plot the results

# zero line. If "mult" above, zero_value = 1; if 'add', zero_value = 0
zero_value = 0

# Replace 0 mM iptg --> 1e-6 mM iptg
ep_out.loc[ep_out["titrant_conc"] == 0,"titrant_conc"] = 1e-6

# Create fig/ax plotting objects
fig, ax = plt.subplots(1,figsize=(6,6))

# Plot scatter and error bars
ax.scatter(ep_out["titrant_conc"],
           ep_out["ep_obs"],s=30,edgecolor='black',facecolor='none')
ax.errorbar(ep_out["titrant_conc"],
            ep_out["ep_obs"],
            ep_out["ep_std"],
            lw=0,capsize=5,elinewidth=0.5,color='black')

ax.plot([1e-6,1],[zero_value,zero_value],'--',lw=1,color='gray',zorder=-10)

# Clean up axes
ax.set_xscale('log')
ax.set_xlabel("iptg log10 (mM)")
ax.set_ylabel("epistasis in theta")
fig.tight_layout()
#fig.savefig("something.pdf") 


In [None]:
# Caculate epistasis for every possible mutant cycle at every possible
# titrant conc. Use multiplicative scale. 
all_ep_df = tfscreen.analysis.extract_epistasis(df,
                                                condition_selector=["titrant_conc"],
                                                y_obs="theta_est",
                                                y_std="theta_std",
                                                scale="mult")


In [None]:
# Plot distribution of observed epistasis at 0, 0.1, and 1 mM IPTG

# Extract epistasis 
ep_at_0mM = all_ep_df[all_ep_df["titrant_conc"] == 0]["ep_obs"]
ep_at_0p1mM = all_ep_df[all_ep_df["titrant_conc"] == 0.1]["ep_obs"]
ep_at_1mM = all_ep_df[all_ep_df["titrant_conc"] == 1]["ep_obs"]

bins = np.linspace(-8,8,100)
fig, ax = plt.subplots(1,3,figsize=(12,4),sharey=True)
counts, bin_edges, bars = ax[0].hist(ep_at_0mM,bins=bins)
ax[0].set_title("theta ep @ 0.0 mM IPTG")

counts, bin_edges, bars = ax[1].hist(ep_at_0p1mM,bins=bins)
ax[1].set_title("theta ep @ 0.1 mM IPTG")


counts, bin_edges, bars = ax[2].hist(ep_at_1mM,bins=bins)
ax[2].set_title("theta ep @ 1.0 mM IPTG")

ax[0].set_ylabel('counts')
for i in range(3):
    ax[i].set_xlabel("theta ep")
    ax[i].plot([0,0],[0,30000],'--',color='gray')
fig.tight_layout()


