In [1]:
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')

In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

from champ.interactive import load_ABAs, TargetSequence, Comparator
from champ.plotting import plot_position_diff, plot_2d_deletions, plot_2d_insertions, plot_2d_mismatches
import yaml
import flabpal
import numpy as np
import matplotlib.pyplot as plt

base_color = {'A': flabpal.blue, 'C': flabpal.yellow, 'G': flabpal.green, 'T': flabpal.red}

with open("/shared/targets.yml") as f:
    targets = yaml.load(f)

protein1 = TargetSequence(targets[''], pam_side=3, pam_length=3)
protein2 = TargetSequence(targets[''], pam_side=5, pam_length=4)

In [None]:
comparator = Comparator()

ABAs_protein1, ABA_error_protein1 = load_ABAs('/path/to/LDA_Imin_const_Imax_adjusted_Kds_and_ABAs.txt')
comparator.add_experiment('Protein 1', protein1, ABAs, ABA_error)
perfect_protein1 = ABAs[protein1.sequence]

ABAs_protein2, ABA_error_protein2 = load_ABAs('/path/to/LDA_Imin_const_Imax_adjusted_Kds_and_ABAs.txt')
comparator.add_experiment('Protein 2', protein2, ABAs, ABA_error)
perfect_protein2 = ABAs[protein2.sequence]

# Scatterplot

In [None]:
def iter_matrices_no_diagonal(m1, m2):
    for n, (row_i, row_j) in enumerate(zip(m1, m2)):
        for m, (i, j) in enumerate(zip(row_i, row_j)):
            if n != m and not np.isnan(i) and not np.isnan(j):
                yield i, j

def iter_matrices_diagonal_only(m1, m2):
    for n, (row_i, row_j) in enumerate(zip(m1, m2)):
        for m, (i, j) in enumerate(zip(row_i, row_j)):
            if n == m and not np.isnan(i) and not np.isnan(j):
                yield i, j

def compare_data(bottom_data, left_data, bottom_error, left_error, color, full_label, diagonal_label, axis, extent):
    extent = scatterplot(iter_matrices_no_diagonal(bottom_data, left_data), iter_matrices_no_diagonal(bottom_error, left_error), axis, color, full_label, extent, 'o')
    extent = scatterplot(iter_matrices_diagonal_only(bottom_data, left_data), iter_matrices_diagonal_only(bottom_error, left_error), axis, color, diagonal_label, extent, '^')
    return extent

def scatterplot(data_iterator, error_iterator, axis, color, label, extent, marker):
    x, y, x_err, y_err = [], [], [], []
    for bottom_point, left_point in data_iterator:
        x.append(bottom_point)
        y.append(left_point)
    axis.scatter(x, y, color=color, marker=marker, label=label, s=49, edgecolor='black')
    extent = max(max(x), max(y))
    for bottom_err, left_err in error_iterator:
        x_err.append(bottom_err)
        y_err.append(left_err)
    axis.errorbar(x, y, yerr=(y_err, y_err), xerr=(x_err, x_err), ecolor='black', fmt='none', zorder=0, capsize=0)
    return extent


def scatterplot_comparison(comparator, bottom_set_label, left_set_label, normalize=False):
    extent = 1.0
    fig, ax = plt.subplots(figsize=(10,10))
    colors = (flabpal.blue, flabpal.green, flabpal.red, flabpal.yellow)
    labels = (('Double Mismatches', 'Single Mismatches'),
              ('Double Deletions', 'Single Deletions'),
              ('Double Insertions', 'Single Insertions'),
              ('Complement Stretches', 'Single Complements'))

    dataset_types = ('mismatches', 'deletions', 'insertions', 'complement_stretches')

    for color, label, dataset_type in zip(colors, labels, dataset_types):
        bottom, left, bottom_errors, left_errors = comparator.compare2d(bottom_set_label, left_set_label, dataset_type, dataset_type, guide_only=True, normalize=normalize, return_each_matrix=True)
        extent = compare_data(bottom, left, bottom_errors, left_errors, color, label[0], label[1], ax, extent)
    extent = 1.0 if normalize else extent
    ax.set_ylim([0, extent])
    ax.set_xlim([0, extent])
    ax.set_xlabel("%s ABA $(k_{B}T)$" % bottom_set_label, fontsize=18)
    ax.set_ylabel("%s ABA $(k_{B}T)$" % left_set_label, fontsize=18)
    ax.plot([0, extent], [0, extent], color='black', linestyle='--', label='Slope=1')
    ax.legend(loc='lower right', scatterpoints=1)
    fig.tight_layout()
    

def plot_rank_ordered_ABAs(ABAs, ABA_errors, ax, color, label, normalize_by=None):
    all_ABAs = [(ABA, seq) for seq, ABA in ABAs.items()]
    all_ABAs.sort(reverse=True)

    x = np.array([i for i, (ABA, seq) in enumerate(all_ABAs)])
    y = np.array([max(ABA, 0.0) for i, (ABA, seq) in enumerate(all_ABAs)])
    yerr = np.array([ABA_errors[seq] for i, (ABA, seq) in enumerate(all_ABAs)])
    if normalize_by is not None:
        y = y / normalize_by
        yerr = yerr / normalize_by
    ylb = y - yerr
    yub = y + yerr

    ax.fill_between(list(x), list(ylb), list(yub), facecolor='black', alpha=1.0, color=flabpal.gray)
    sc = ax.scatter(x, y, c=color, s=36, linewidth=0, label=label, zorder=20)
    return len(x)

# Correlation Between Various Polymorphisms

In [None]:
scatterplot_comparison(comparator, '', '', normalize=False)

# Rank-Ordered List of Affinities

In [None]:
fig, ax = plt.subplots(figsize=(8, 7))
    
protein1_count = plot_rank_ordered_ABAs(ABAs_protein1, ABA_error_protein1, ax, flabpal.blue, 'Protein 1', normalize_by=perfect_protein1)
protein2_count = plot_rank_ordered_ABAs(ABAs_protein2, ABA_error_protein2, ax, flabpal.red, 'Protein 2', normalize_by=perfect_protein2)

smallest_count = min(cpf1_count, cas9_count, short_count)
ax.set_axis_bgcolor('white')
ax.grid(False)
ax.legend(loc='best', scatterpoints=1)
ax.set_xlabel('Rank', fontsize=18)
ax.set_xticks([0, 5500, 11000])
ax.set_ylabel('Apparent Binding Affinity ($k_B T$)', fontsize=18)

ax.set_xlim((-5, smallest_count*1.01))
ax.set_ylim((0.0, 1.0))
for item in ax.get_xticklabels() + ax.get_yticklabels():
    item.set_fontsize(16)
fig.tight_layout()

# Double Mismatches

In [None]:
sequence, sequence_labels, merged, diff  = comparator.compare_2d_mismatches('', '', guide_only=True, normalize=True)
plot_position_diff(sequence, sequence_labels, base_color, diff, positions_are_merged=merged, colorbar_label='')

# Double Deletions

In [None]:
sequence, sequence_labels, merged, diff = comparator.compare_2d_deletions('', '', guide_only=True, normalize=True)
plot_position_diff(sequence, sequence_labels, base_color, diff, positions_are_merged=merged, colorbar_label='')

# Double Insertions

In [None]:
sequence, sequence_labels, merged, diff = comparator.compare_2d_insertions('', '', guide_only=True, normalize=True)
plot_position_diff(sequence, sequence_labels, base_color, diff, positions_are_merged=merged, colorbar_label='')

# Complement Stretches

In [None]:
sequence, sequence_labels, merged, diff = comparator.compare_2d_complement_stretches('', '', guide_only=True, normalize=True)
plot_position_diff(sequence, sequence_labels, base_color, diff, positions_are_merged=merged, colorbar_label='')