In [None]:
date = ''
target_name = ''
negative_control_target_name = ''
pam_side=5  # 5 or 3 
pam_length=4  # number of bases

# plot settings
fontsize = 18
tick_fontsize = 16

In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2
import os
import sys
import glob
import numpy as np
import matplotlib.pyplot as plt
import tifffile
import random
import itertools
from collections import defaultdict, Counter, namedtuple
from IPython.display import HTML, Image
from champ import misc, intensity, intensity_array, interactive
from Bio import SeqIO
import time
from JSAnimation import IPython_display
from matplotlib import animation
import yaml
import flabpal
from champ.adapters_cython import simple_hamming_distance
from mpl_toolkits.mplot3d import Axes3D
import matplotlib as mpl
import matplotlib.pyplot as plt
from champ.interactive import TargetSequence
from champ import plotting

In [None]:
with open("/shared/targets.yml") as f:
    targets = yaml.load(f)
target = targets[target_name]
ts = TargetSequence(target, pam_side=pam_side, pam_length=pam_length)

sequence_labels = ["$%s_{%d}$" % (base, index) for base, index in ts.human_readable_indexes]
guide_sequence_labels = ["$%s_{%d}$" % (base, index) for base, index in ts.guide.human_readable_indexes]

In [None]:
IA = intensity_array.IntensityArray()
intensity_filename = os.path.join('results', date, 'custom', 'LDA_intensity_scores.txt')
IA.parse_intensities_file(intensity_filename)

target_len = len(target)
base_color = {'A': flabpal.blue, 'C': flabpal.yellow, 'G': flabpal.green, 'T': flabpal.red}
    
bases = 'ACGT'
Imin_name = 'Imin_const'
Imax_name = 'Imax_adjusted'
inten_fmt = 'LDA'
TA_calibration_strategy = 'neg_control'  # ['pam', 'neg_control', 'all_possible']
ABA_zero_strategy = 'neg_control'  # neg_control or max_concentration

dname = '_'.join([inten_fmt, Imin_name, Imax_name, TA_calibration_strategy, ABA_zero_strategy])
fig_dir = 'analysis/figs'
results_dir = 'analysis/results'
for dpath in [fig_dir, results_dir]:
    if not os.path.isdir(dpath):
        os.makedirs(dpath)

custom_results_dir = os.path.join('results', date, 'custom')
custom_fig_dir = os.path.join('figs', date, 'custom')
inten_array_fpath = os.path.join(custom_results_dir, 'LDA_intensity_scores.txt')

In [None]:
Kds = {}
Kd_error = {} 
ABAs = {}
ABA_error = {}
ddGs = {}
fname = 'LDA_Imin_const_Imax_adjusted_Kds_and_ABAs.txt'
fpath = os.path.join(custom_results_dir, fname)
with open(fpath) as f:
    line = next(f)
    assert line.startswith('# Target:')
    target = line.strip().split(': ')[1]
    line = next(f)
    assert line.startswith('# Neg Control')
    neg_control_target = line.strip().split(': ')[1]
    line = next(f)
    assert line.startswith('# Concentration')
    line = next(f)
    while not line.startswith('#'):
        max_concentration = float(line.strip().split()[0])
        line = next(f)
    assert line.startswith('# Seq')
    for line in f:
        if line.startswith('#'):
            continue
        words = line.strip().split()
        seq = words[0]
        assert seq not in Kds, seq
        Kd, Kd_err, ABA, ABA_err = map(float, words[1:])
        Kds[seq] = Kd
        Kd_error[seq] = Kd_err
        ABAs[seq] = max(ABA, 0.0)
        ABA_error[seq] = ABA_err
ddGs = {seq: ABAs[target] - ABA for seq, ABA in ABAs.items()}
ddG_error = ABA_error
perfect_ABA = ABAs[ts.sequence]

# Single Mismatch Affinities

In [None]:
fig, ax = plt.subplots(figsize=(15, 5))
idxs = np.arange(len(target))
width = 0.5

for i, j, mismatch_base, seq in ts.single_mismatches:
    affinity = ABAs.get(seq)
    if affinity is not None:
        affinity -= perfect_ABA
    else:
        print("Missing sequence: %s" % seq)
    error = ABA_error.get(seq, 0.0)
    label = mismatch_base if i == 0 else None
    bar_x_position = i - width/2.0 + width*j/4.0
    color = base_color[mismatch_base]
    error_kw = dict(ecolor='k', alpha=0.6)
    ax.bar(bar_x_position, affinity, width=width/4.0, yerr=error, color=color, error_kw=error_kw, label=label)
plotting.configure_position_penalty_axes(target, fig, ax, sequence_labels, 
                                         fontsize, tick_fontsize, 'ABA', base_color, target_name)

# Double Mismatch Affinities

In [None]:
mm = interactive.MismatchMatrix(ts.sequence)
for i, j, base_i, base_j, seq in ts.double_mismatches:
    affinity = ABAs.get(seq)
    if affinity is not None:
        affinity -= perfect_ABA
    else:
        print("Missing sequence: %s" % seq)
    mm.set_value(i, j, base_i, base_j, affinity)
    
plotting.plot_2d_mismatches(ts.sequence, sequence_labels, base_color, mm.to_matrix())

# Single Deletion Affinities

In [None]:
fig, ax = plt.subplots(figsize=(15, 5))
width = 0.5

for i, seq in ts.guide.single_deletions:
    sequence = ts.pam + seq if ts.pam_side == 5 else seq + ts.pam
    affinity = ABAs.get(sequence)
    if affinity is not None:
        affinity -= perfect_ABA
    else:
        print("Missing sequence: %s" % seq)
    error = ABA_error.get(sequence, 0.0)    
    ax.bar(i - width/2.0, affinity, width=width, yerr=error, color=flabpal.gray, error_kw=dict(ecolor='black'))
plotting.configure_position_penalty_axes(ts.guide.sequence, fig, ax, guide_sequence_labels, fontsize, tick_fontsize, 'ABA', base_color, target_name, legend=False)

# Double Deltion Affinities

In [None]:
from matplotlib import gridspec
import matplotlib as mpl

dm = interactive.SinglePositionMatrix(ts.guide.sequence)
for i, j, seq in ts.guide.double_deletions:
    # we add the PAM back in just to look up its affinity
    sequence = ts.pam + seq if ts.pam_side == 5 else seq + ts.pam
    affinity = ABAs.get(sequence)
    if affinity is not None:
        affinity -= perfect_ABA
    else:
        print("Missing sequence: %s" % seq)
    dm.set_value(i, j, affinity)
        
plotting.plot_2d_deletions(ts.guide.sequence, guide_sequence_labels, base_color, dm.to_matrix('lower'))

# Single Insertion Affinities

In [None]:
fig, ax = plt.subplots(figsize=(15, 5))
idxs = np.arange(len(target))
width = 0.5

for i, j, insertion_base, seq in ts.guide.single_insertions:
    sequence = ts.pam + seq if ts.pam_side == 5 else seq + ts.pam
    affinity = ABAs.get(sequence)
    if affinity is not None:
        affinity -= perfect_ABA
    else:
        print("Missing sequence: %s" % seq)
    error = ABA_error.get(sequence, 0.0)
    label = insertion_base if i == 0 else None
    bar_x_position = i - width/2.0 + width*j/4.0
    color = base_color[insertion_base]
    ax.bar(bar_x_position, affinity, width=width/4.0, yerr=error, color=color, error_kw=dict(ecolor='k', alpha=0.6), label=label)
plotting.configure_position_penalty_axes(ts.guide.sequence, fig, ax, guide_sequence_labels, fontsize, tick_fontsize, 'ABA', base_color, target_name)

# Double Insertion Affinities

In [None]:
mm = interactive.InsertionMatrix(ts.guide.sequence)
for i, j, base_i, base_j, seq in ts.guide.double_insertions:
    sequence = ts.pam + seq if ts.pam_side == 5 else seq + ts.pam
    affinity = ABAs.get(sequence)
    if affinity is not None:
        affinity -= perfect_ABA
    else:
        print("Missing sequence: %s" % seq)
    mm.set_value(i, j, base_j, base_i, affinity)
    
plotting.plot_2d_insertions(ts.guide.sequence, guide_sequence_labels, base_color, mm.to_matrix())

# Complement Stretch Affinities

In [None]:
dm = interactive.SinglePositionMatrix(ts.guide.sequence)
for start, stop, seq in ts.guide.complement_stretches:
    # There's an extra datapoint that shows up in Jim's data - it's not bad but it is distracting, so we hide it
    if start == 6 and stop == 22:
        continue
    # we add the PAM back in just to look up its affinity
    sequence = ts.pam + seq if ts.pam_side == 5 else seq + ts.pam
    affinity = ABAs.get(sequence)
    if affinity is not None:
        affinity -= perfect_ABA
    else:
        print("Missing sequence: %s" % seq)
    dm.set_value(stop, start, affinity)

plotting.plot_complement_stretches(ts.guide.sequence, guide_sequence_labels, base_color, dm.to_matrix('lower'))