In [None]:
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')

In [None]:
pam_side=5  # 5 or 3 
pam_length=4  # number of bases

####################################################################################
#                                                                                  #
# Leave these values unchanged to let the script determine them automatically.         #
# Only set them if something goes wrong!                                           #
#                                                                                  #
####################################################################################
target_name = ''
negative_control_name = ''
target_sequence_file = '/shared/targets.yml'

In [None]:
from champ import interactive

target_name = interactive.load_config_value('perfect_target_name', target_name)
negative_control_name = interactive.load_config_value('neg_control_target_name', negative_control_name)
# plot settings
fontsize = 18
tick_fontsize = 16

In [None]:
from Bio import SeqIO
from collections import defaultdict, Counter, namedtuple
from scipy import stats
import flabpal
import h5py
import matplotlib.pyplot as plt
import numpy as np
import yaml

from champ import misc
from champ import plotting
from champ.interactive import TargetSequence

%matplotlib inline
%load_ext autoreload
%autoreload 2

In [None]:
with open(target_sequence_file) as f:
    targets = yaml.load(f)
target = targets[target_name]
negative_control = targets[negative_control_name]
ts = TargetSequence(target, pam_side=pam_side, pam_length=pam_length)

sequence_labels = ["$%s_{%d}$" % (base, index) for base, index in ts.human_readable_indexes]
guide_sequence_labels = ["$%s_{%d}$" % (base, index) for base, index in ts.guide.human_readable_indexes]

In [None]:
target_len = len(target)
base_color = {'A': flabpal.blue, 'C': flabpal.yellow, 'G': flabpal.green, 'T': flabpal.red}
bases = 'ACGT'

In [None]:
kd_filename = 'results/cluster-data.h5'
with h5py.File(kd_filename, 'r') as h5:
    kd_data = h5['synthetic-kds'][:]
kds = {}
kd_uncertainty = {}
counts = []
for sequence, kd, uncertainty, yint, delta_y, count in kd_data:
    kds[sequence] = kd
    kd_uncertainty[sequence] = uncertainty
    counts.append(count)
print("Loaded %d KDs." % len(kds))

In [None]:
median_count = np.median(counts)
iqr = stats.iqr(counts)
fig, ax = plt.subplots(figsize=(10,5))
ax.hist(counts, bins=20, range=(0, median_count+iqr*5));
ax.set_title("Sequence coverage")
ax.set_ylabel("Counts")
ax.set_xlabel("Occurrences per sequence");

In [None]:
print("Lowest KD: %f" % min(kds.values()))
print("Perfect Target KD: %f" % kds[target])

# Off Target Binding Curve

In [None]:
import matplotlib
matplotlib.style.use('flab')
sorted_kds_and_uncertainties = sorted(kd_data, key=lambda x: x[1])
sorted_kds = [data[1] for data in sorted_kds_and_uncertainties]
max_kd = max(sorted_kds)
sorted_uncertainties = [min(data[2], max_kd) for data in sorted_kds_and_uncertainties]
x = np.arange(len(sorted_kds))
fig, ax = plt.subplots(figsize=(10, 10))
ax.scatter(x, sorted_kds, color=flabpal.blue, s=4)
ax.errorbar(x, sorted_kds, yerr=sorted_uncertainties, ecolor=flabpal.blue, fmt='none', zorder=0, capsize=0, linewidth=0.1, alpha=0.5)
ax.set_ylabel("Apparent $K_D$ (nM)")
ax.set_xlabel("Rank Ordered Affinities")
ax.set_title("Off Target Binding Curve");

# Single Mismatch Affinities

In [None]:
fig, ax = plt.subplots(figsize=(15, 5))
idxs = np.arange(len(target))
width = 0.5

for i, j, mismatch_base, seq in ts.single_mismatches:
    if seq == ts.sequence:
        continue
    affinity = kds.get(seq)
    yerr = kd_uncertainty.get(seq)
    if affinity is None or yerr is None:
        continue
    bar_x_position = i - width/2.0 + width*j/4.0
    color = base_color[mismatch_base]
    error_kw = dict(ecolor='k', alpha=0.6)
    ax.bar(bar_x_position, affinity, yerr=yerr, width=width / 4.0, color=color)
ax.axhline(kds[target], linestyle='--', color=flabpal.gray)
ax.axhline(kds[negative_control], linestyle='--', color=flabpal.gray)
plotting.configure_position_penalty_axes(target, fig, ax, sequence_labels, fontsize, tick_fontsize, 'kd', target_name)
ax.set_yscale("log")

# Double Mismatch Affinities

In [None]:
mm = interactive.MismatchMatrix(ts.sequence)
for i, j, base_i, base_j, seq in ts.double_mismatches:
    affinity = kds.get(seq)
    if affinity is None:
        continue
    mm.set_value(i, j, base_i, base_j, affinity)
plotting.plot_2d_mismatches(ts.sequence, sequence_labels, mm.to_matrix())

# Double Mismatch Epistasis

In [None]:
epistasis_matrix = interactive.MismatchMatrix(ts.sequence)
perfect_kd = kds[ts.sequence]
for downstream_mismatch_index, upstream_mismatch_index, downstream_mismatch_base, upstream_mismatch_base, sequence in ts.double_mismatches:
    double_mismatch_kd = kds.get(sequence)
    upstream_sequence = ts.sequence[:upstream_mismatch_index] + upstream_mismatch_base + ts.sequence[upstream_mismatch_index+1:]
    downstream_sequence = ts.sequence[:downstream_mismatch_index] + downstream_mismatch_base + ts.sequence[downstream_mismatch_index+1:]
    upstream_single_mismatch_kd = kds.get(upstream_sequence)
    downstream_single_mismatch_kd = kds.get(downstream_sequence)
    if double_mismatch_kd is not None and upstream_single_mismatch_kd is not None and downstream_single_mismatch_kd is not None:
        delta_double_mismatch_kd = double_mismatch_kd - perfect_kd
        delta_upstream = upstream_single_mismatch_kd - perfect_kd
        delta_downstream = downstream_single_mismatch_kd - perfect_kd
        dd_kd = delta_double_mismatch_kd - delta_upstream - delta_downstream
        epistasis_matrix.set_value(downstream_mismatch_index, upstream_mismatch_index, downstream_mismatch_base, upstream_mismatch_base, dd_kd)
plotting.plot_2d_mismatches(ts.sequence, sequence_labels, epistasis_matrix.to_matrix(), cmap='RdBu', force_full_bounds=True)

# Single Deletion Affinities

In [None]:
fig, ax = plt.subplots(figsize=(15, 5))
width = 0.5

for i, seq in ts.guide.single_deletions:
    sequence = ts.pam + seq if ts.pam_side == 5 else seq + ts.pam
    affinity = kds.get(sequence)
    yerr = kd_uncertainty.get(sequence)
    if affinity is None or yerr is None:
        continue
    ax.bar(i, affinity, width=width, yerr=yerr, color=flabpal.blue, error_kw=dict(ecolor='black'))
ax.axhline(kds[target], linestyle='--', color=flabpal.gray)
ax.axhline(kds[negative_control], linestyle='--', color=flabpal.gray)
plotting.configure_position_penalty_axes(ts.guide.sequence, fig, ax, guide_sequence_labels, fontsize, tick_fontsize, 'kd', target_name, legend=False)

# Double Deletion Affinities

In [None]:
from matplotlib import gridspec
import matplotlib as mpl

dm = interactive.SinglePositionMatrix(ts.guide.sequence)
for i, j, seq in ts.guide.double_deletions:
    # we add the PAM back in just to look up its affinity
    sequence = ts.pam + seq if ts.pam_side == 5 else seq + ts.pam
    affinity = kds.get(sequence)
    yerr = kd_uncertainty.get(sequence)
    if affinity is None or yerr is None:
        continue
    dm.set_value(j, i, affinity)
        
plotting.plot_2d_deletions(ts.guide.sequence, guide_sequence_labels, dm.to_matrix())

# Double Deletion Epistasis

In [None]:
epistasis_matrix = interactive.SinglePositionMatrix(ts.guide.sequence)
for upstream_index, downstream_index, sequence in ts.guide.double_deletions:
    double_kd = kds.get(ts.pam + sequence)
    upstream_sequence = ts.guide.sequence[:upstream_index] + ts.guide.sequence[upstream_index+1:]
    downstream_sequence = ts.guide.sequence[:downstream_index] + ts.guide.sequence[downstream_index+1:]
    upstream_single_kd = kds.get(ts.pam + upstream_sequence)
    downstream_single_kd = kds.get(ts.pam + downstream_sequence)
    if double_kd is not None and upstream_single_kd is not None and downstream_single_kd is not None:
        delta_double_kd = double_kd - perfect_kd
        delta_upstream = upstream_single_kd - perfect_kd
        delta_downstream = downstream_single_kd - perfect_kd
        dd_kd = delta_double_kd - (delta_upstream + delta_downstream)
        epistasis_matrix.set_value(downstream_index, upstream_index, dd_kd)
plotting.plot_2d_deletions(ts.guide.sequence, guide_sequence_labels, epistasis_matrix.to_matrix(), cmap='RdBu', force_full_bounds=True)

# Single Insertion Affinities

In [None]:
fig, ax = plt.subplots(figsize=(15, 5))
idxs = np.arange(len(target))
width = 0.5

for i, j, insertion_base, seq in ts.guide.single_insertions:
    sequence = ts.pam + seq if ts.pam_side == 5 else seq + ts.pam
    affinity = kds.get(sequence)
    yerr = kd_uncertainty.get(sequence)
    if affinity is None or yerr is None:
        continue
    bar_x_position = i - width/2.0 + width*j/4.0
    color = base_color[insertion_base]
    ax.bar(bar_x_position, affinity, width=width/4.0, yerr=yerr, color=color, error_kw=dict(ecolor='k', alpha=0.6))
ax.axhline(kds[target], linestyle='--', color=flabpal.gray)
ax.axhline(kds[negative_control], linestyle='--', color=flabpal.gray)
plotting.configure_position_penalty_axes(ts.guide.sequence, fig, ax, guide_sequence_labels, fontsize, tick_fontsize, 'kd', target_name)

# Double Insertion Affinities

In [None]:
mm = interactive.InsertionMatrix(ts.guide.sequence)
for i, j, base_i, base_j, seq in ts.guide.double_insertions:
    sequence = ts.pam + seq if ts.pam_side == 5 else seq + ts.pam
    affinity = kds.get(sequence)
    yerr = kd_uncertainty.get(sequence)
    if affinity is None or yerr is None:
        continue
    mm.set_value(i, j, base_j, base_i, affinity)
    
plotting.plot_2d_insertions(ts.guide.sequence, guide_sequence_labels, mm.to_matrix())

# Double Insertion Epistasis

In [None]:
epistasis_matrix = interactive.InsertionMatrix(ts.guide.sequence)
for downstream_index, upstream_index, upstream_insertion_base, downstream_insertion_base, sequence in ts.guide.double_insertions:
    double_kd = kds.get(ts.pam + sequence)
    upstream_sequence = ts.guide.sequence[:upstream_index] + upstream_insertion_base + ts.guide.sequence[upstream_index:]
    downstream_sequence = ts.guide.sequence[:downstream_index] + downstream_insertion_base + ts.guide.sequence[downstream_index:]
    upstream_single_kd = kds.get(ts.pam + upstream_sequence)
    downstream_single_kd = kds.get(ts.pam + downstream_sequence)
    if double_kd is not None and upstream_single_kd is not None and downstream_single_kd is not None:
        delta_double_kd = double_kd - perfect_kd
        delta_upstream = upstream_single_kd - perfect_kd
        delta_downstream = downstream_single_kd - perfect_kd
        dd_kd = delta_double_kd - (delta_upstream + delta_downstream)
        epistasis_matrix.set_value(downstream_index, upstream_index, downstream_insertion_base, upstream_insertion_base, dd_kd)
plotting.plot_2d_insertions(ts.guide.sequence, guide_sequence_labels, epistasis_matrix.to_matrix(), cmap='RdBu', force_full_bounds=True)

# Complement Stretch Affinities

In [None]:
dm = interactive.SinglePositionMatrix(ts.guide.sequence)
for start, stop, seq in ts.guide.complement_stretches:
    # we add the PAM back in just to look up its affinity
    sequence = ts.pam + seq if ts.pam_side == 5 else seq + ts.pam
    affinity = kds.get(sequence)
    yerr = kd_uncertainty.get(sequence)
    if affinity is None or yerr is None:
        continue
    dm.set_value(stop, start, affinity)

plotting.plot_complement_stretches(ts.guide.sequence, guide_sequence_labels, dm.to_matrix())