## Imports and Functions

In [None]:
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
from matplotlib import colors
from mpl_toolkits.axes_grid1 import make_axes_locatable
from soursop import sstrajectory, ssprotein
import numpy as np
import mdtraj as md
import pandas as pd
import seaborn as sns
import os
from scipy.stats import sem

# Set such that PDF fonts export in a manner that they
# are editable in illustrator/affinity
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42

# set to define axes linewidths
matplotlib.rcParams['axes.linewidth'] = 0.5

# this defines some prefactors so inline figures look nice
# on a retina macbook. These can be commented out without any
# issue and are solely asthetic.
%matplotlib inline
%config InlineBackend.figure_format='retina'

font = {'family' : 'arial',
        'weight' : 'normal'}

matplotlib.rc('font', **font)

# for rasterized=True
from matplotlib.backends.backend_pdf import PdfPages

In [None]:
def read_psw(psw_file):
    psw = np.loadtxt(psw_file, skiprows=1, usecols=(1,2,3,4,5,6,7,8,9,10,11))
    return np.sum(psw, axis=1, dtype=int)

def get_fixed_idr_residues(psw_data):
    idr_psw = psw_data[:-97]

    start = np.where(idr_psw<1.5)[0][0]
    try:
        end = np.where(idr_psw[start:]>1.5)[0][0] + start - 1
    except:
        end = len(idr_psw)-1

    return start, end

def load_trajectory(pdb, xtc, stride=1):
    # Load trajectory and protein objects
    print('Loading protein trajectory...')
    sstraj = sstrajectory.SSTrajectory(xtc, pdb, debug=False).traj[1::stride]
    ssprot = ssprotein.SSProtein(traj=sstraj, debug=False)
    
    return sstraj, ssprot
    
def get_interchain_dmap(prot, mode):
    stride=1
    periodic=False
    
    # use the previously identified residues with CA
    residuesWithCA = prot.resid_with_CA

    # initialize empty matrices that we're gonna fill up
    n_res = len(prot.resid_with_CA)
    
    top_df = prot.topology.to_dataframe()[0]
    chain_0_len = len(top_df.query("chainID == 0 and name == 'CA'"))
    chain_1_len = len(top_df.query("chainID == 1 and name == 'CA'"))

    # Initialize distance map
    distanceMap = np.zeros([len(prot), chain_0_len, chain_1_len])
    print('Creating distance map...')

    # cycle over CA-containing residues
    SM_index = 0
    for resIndex in prot.resid_with_CA[0:chain_0_len]:
        # get all distances between the residue of index resIndex and every other residue.
        # Note this gives the non-redudant upper triangle.
        #full_data = prot.calculate_all_CA_distances(resIndex, mode=mode, stride=stride, periodic=periodic)
        full_data = prot.calculate_all_CA_distances(resIndex, mode=mode, stride=stride)
        inter_chain_contacts = full_data[:, -chain_1_len:]
        distanceMap[:, SM_index, :] = inter_chain_contacts
        SM_index=SM_index+1
        
    return distanceMap

def get_contact_freq_map(dmap, contact_distance_threshold):
    return (dmap <= contact_distance_threshold).mean(axis=0)

def relative_change_cmap_vs_EV(full_cmap, EV_cmap):
    log_cmap = np.log(full_cmap, where=(full_cmap>0), out=np.zeros_like(full_cmap))
    log_ev = np.log(EV_cmap, where=(EV_cmap>0), out=np.zeros_like(EV_cmap))
    return log_cmap - log_ev

def absolute_change_cmap_vs_EV(full_cmap, EV_cmap):
    return full_cmap - EV_cmap

# Plotting functions
def plot_contact_heatmap(cmap, fixed_idr_residues):
    plt.figure(figsize=(3,3), dpi=200)
    ax = plt.gca()
    im = ax.imshow(cmap, cmap='afmhot_r', 
                   norm=colors.PowerNorm(gamma=0.2, vmin=0, vmax=1),
                   interpolation='none')

    ax.hlines(fixed_idr_residues[0], 0, len(cmap[0]), color='black', linestyle='-', linewidth=0.1)
    ax.hlines(fixed_idr_residues[1], 0, len(cmap[0]), color='black', linestyle='-', linewidth=0.1)

    ax.set_title('Contact frequency', fontweight='bold')
    ax.set_xlabel('FD residues')
    ax.set_ylabel('IDR residues')
    ax.set_xlim(0,len(cmap[0]))

    # create an axes on the right side of ax. The width of cax will be 5%
    # of ax and the padding between cax and ax will be fixed at 0.05 inch.
    divider = make_axes_locatable(ax)
    cax = divider.append_axes("right", size="5%", pad=0.05)

    cbar = plt.colorbar(im, cax=cax)
    tick_font_size = 6
    cbar.ax.tick_params(labelsize=tick_font_size)
    
    plt.show()
    
def plot_ev_rel_comparison_heatmap(norm_cmap, fixed_idr_residues):
    plt.figure(figsize=(3,3), dpi=200)
    ax = plt.gca()
    im = ax.imshow(norm_cmap, cmap='seismic_r', interpolation='none', vmin=-8, vmax=8)

    ax.hlines(fixed_idr_residues[0], 0, len(norm_cmap[0]), color='black', linestyle='-', linewidth=0.1)
    ax.hlines(fixed_idr_residues[1], 0, len(norm_cmap[0]), color='black', linestyle='-', linewidth=0.1)

    ax.set_title(r'Relative $\Delta$ contact freq', fontweight='bold')
    ax.set_xlabel('FD residues')
    ax.set_ylabel('IDR residues')
    ax.set_xlim(0,len(norm_cmap[0]))

    # create an axes on the right side of ax. The width of cax will be 5%
    # of ax and the padding between cax and ax will be fixed at 0.05 inch.
    divider = make_axes_locatable(ax)
    cax = divider.append_axes("right", size="5%", pad=0.05)

    cbar = plt.colorbar(im, cax=cax)
    tick_font_size = 6
    cbar.ax.tick_params(labelsize=tick_font_size)

    plt.show()

    
def plot_ev_abs_comparison_heatmap(norm_cmap, fixed_idr_residues):
    plt.figure(figsize=(3,3), dpi=200)
    ax = plt.gca()

    im = ax.imshow(norm_cmap, cmap='seismic_r', interpolation='none', vmin=-0.16, vmax=0.16)

    ax.hlines(fixed_idr_residues[0], 0, len(norm_cmap[0]), color='black', linestyle='-', linewidth=0.1)
    ax.hlines(fixed_idr_residues[1], 0, len(norm_cmap[0]), color='black', linestyle='-', linewidth=0.1)

    ax.set_title(r'Absolute $\Delta$ contact freq', fontweight='bold')
    ax.set_xlabel('FD residues')
    ax.set_ylabel('IDR residues')
    ax.set_xlim(0,len(norm_cmap[0]))

    # create an axes on the right side of ax. The width of cax will be 5%
    # of ax and the padding between cax and ax will be fixed at 0.05 inch.
    divider = make_axes_locatable(ax)
    cax = divider.append_axes("right", size="5%", pad=0.05)

    cbar = plt.colorbar(im, cax=cax, ticks=[-0.15, -0.1, -0.05, 0, 0.05, 0.1, 0.15])
    tick_font_size = 6
    cbar.ax.tick_params(labelsize=tick_font_size)
    
    plt.show()
    
    
def combined_plots(cmap, ev_cmap, full_frames, ev_frames, fixed_idr_residues, baseline='EV', title='IDR-FD', out=None):
    tick_font_size = 6
    
    fig, axs = plt.subplots(2,2, figsize=(8,8), dpi=300)
    
    # Top left - Full forces "raw" contact map
    im = axs[0,0].imshow(cmap, cmap='afmhot_r', 
                   norm=colors.PowerNorm(gamma=0.2, vmin=0, vmax=1),
                   interpolation='none')

    axs[0,0].hlines(fixed_idr_residues[0], 0, len(cmap[0]), color='black', linestyle='-', linewidth=0.1)
    axs[0,0].hlines(fixed_idr_residues[1], 0, len(cmap[0]), color='black', linestyle='-', linewidth=0.1)
    axs[0,0].set_title('IDR-FD contact freq', fontweight='bold', fontsize=8)
    axs[0,0].set_xlabel('FD residues')
    axs[0,0].set_ylabel('IDR residues')
    axs[0,0].set_xlim(0,len(cmap[0]))

    # create an axes on the right side of ax. The width of cax will be 5%
    # of ax and the padding between cax and ax will be fixed at 0.05 inch.
    divider = make_axes_locatable(axs[0,0])
    cax = divider.append_axes("right", size="5%", pad=0.05)
    cbar = plt.colorbar(im, cax=cax)
    cbar.ax.tick_params(labelsize=tick_font_size)
    
    # Top right - EV "raw" contact map
    im = axs[0,1].imshow(ev_cmap, cmap='afmhot_r', 
                   norm=colors.PowerNorm(gamma=0.2, vmin=0, vmax=1),
                   interpolation='none')

    axs[0,1].hlines(fixed_idr_residues[0], 0, len(ev_cmap[0]), color='black', linestyle='-', linewidth=0.1)
    axs[0,1].hlines(fixed_idr_residues[1], 0, len(ev_cmap[0]), color='black', linestyle='-', linewidth=0.1)
    axs[0,1].set_title(f'{baseline} contact freq', fontweight='bold', fontsize=8)
    axs[0,1].set_xlabel('FD residues')
    axs[0,1].set_ylabel('IDR residues')
    axs[0,1].set_xlim(0,len(ev_cmap[0]))

    # create an axes on the right side of ax. The width of cax will be 5%
    # of ax and the padding between cax and ax will be fixed at 0.05 inch.
    divider = make_axes_locatable(axs[0,1])
    cax = divider.append_axes("right", size="5%", pad=0.05)
    cbar = plt.colorbar(im, cax=cax)
    cbar.ax.tick_params(labelsize=tick_font_size)
    
    # Lower left - Relative change in contact freq between full and EV
    rel_norm_cmap = relative_change_cmap_vs_EV(cmap*full_frames, ev_cmap*ev_frames)
    im = axs[1,0].imshow(rel_norm_cmap, cmap='seismic_r', interpolation='none', vmin=-8, vmax=8)

    axs[1,0].hlines(fixed_idr_residues[0], 0, len(rel_norm_cmap[0]), color='black', linestyle='-', linewidth=0.1)
    axs[1,0].hlines(fixed_idr_residues[1], 0, len(rel_norm_cmap[0]), color='black', linestyle='-', linewidth=0.1)

    axs[1,0].set_title(r'Relative $\Delta$ contact freq', fontweight='bold', fontsize=8)
    axs[1,0].set_xlabel('FD residues')
    axs[1,0].set_ylabel('IDR residues')
    axs[1,0].set_xlim(0,len(rel_norm_cmap[0]))

    # create an axes on the right side of ax. The width of cax will be 5%
    # of ax and the padding between cax and ax will be fixed at 0.05 inch.
    divider = make_axes_locatable(axs[1,0])
    cax = divider.append_axes("right", size="5%", pad=0.05)
    cbar = plt.colorbar(im, cax=cax)
    cbar.ax.tick_params(labelsize=tick_font_size)
    
    abs_norm_cmap = absolute_change_cmap_vs_EV(cmap, ev_cmap)
    im = axs[1,1].imshow(abs_norm_cmap, cmap='seismic_r', interpolation='none', vmin=-0.16, vmax=0.16)

    axs[1,1].hlines(fixed_idr_residues[0], 0, len(abs_norm_cmap[0]), color='black', linestyle='-', linewidth=0.1)
    axs[1,1].hlines(fixed_idr_residues[1], 0, len(abs_norm_cmap[0]), color='black', linestyle='-', linewidth=0.1)

    axs[1,1].set_title(r'Absolute $\Delta$ contact freq', fontweight='bold', fontsize=8)
    axs[1,1].set_xlabel('FD residues')
    axs[1,1].set_ylabel('IDR residues')
    axs[1,1].set_xlim(0,len(abs_norm_cmap[0]))

    # create an axes on the right side of ax. The width of cax will be 5%
    # of ax and the padding between cax and ax will be fixed at 0.05 inch.
    divider = make_axes_locatable(axs[1,1])
    cax = divider.append_axes("right", size="5%", pad=0.05)
    cbar = plt.colorbar(im, cax=cax, ticks=[-0.15, -0.1, -0.05, 0, 0.05, 0.1, 0.15])
    cbar.ax.tick_params(labelsize=tick_font_size)
    
    plt.suptitle(title, fontweight='bold', fontsize=14)
    plt.tight_layout()

    if out is not None:
        plt.savefig(out)

    plt.show()


## Flanking IDR contact maps

In [None]:
mode='COM'
threshold=10
variants = ['cMYB_KIX', 'WT_ATF4_TAZ2', 'SufficiencyInert_ATF4_TAZ2', 
                 'SufficiencyNegCharge_ATF4_TAZ2', 'SufficiencyHydrophobic_ATF4_TAZ2', 
                 'SufficiencyNegChargeHydrophobic_ATF4_TAZ2']

# ----------------- #

# WT
data = {}
EV_data = {}

# for v in variants:
for v in variants:
    for s in structures:
        # Fixed IDR residue idxs for full motif
        print(f'{v} {s}')
        psw_path = f"{root}/{s}/{v}/coil_start/1/PSWFILE.psw"
        psw_data = read_psw(psw_path)
        fixed_idr_residue_idxs = get_fixed_idr_residues(psw_data)
    
        dmap_list = []
        cmap_list = []

        for repl in range(1, 5):
            try:
                print(repl)
                wt_pdb = f'{root}/{s}/{v}/coil_start/{repl}/EquilMC_END.pdb'
                wt_xtc = f'{root}/{s}/{v}/coil_start/{repl}/StandardMC_traj.xtc'

                traj, prot = load_trajectory(wt_pdb, wt_xtc, stride=1)
                dmap = get_interchain_dmap(prot, mode=mode)
                dmap_list.append(dmap)
                cmap_list.append(get_contact_freq_map(dmap, threshold))
            except:
                print(f'{v} {s} {repl}')
                continue                

        ev_dmap_list = []
        ev_cmap_list = []
        for ev_folder, ev_repl in zip(['ev', 'ev2', 'ev2'], ['1','1','2']):
            try:
                print(ev_folder, ev_repl)
                ev_pdb = f'{root}/{s}/{ev_folder}/{v}/coil_start/{ev_repl}/EquilMC_END.pdb'
                ev_xtc = f'{root}/{s}/{ev_folder}/{v}/coil_start/{ev_repl}/StandardMC_traj.xtc'

                ev_traj, ev_prot = load_trajectory(ev_pdb, ev_xtc, stride=1)
                ev_dmap = get_interchain_dmap(ev_prot, mode=mode)
                ev_dmap_list.append(ev_dmap)
                ev_cmap_list.append(get_contact_freq_map(ev_dmap, threshold))
            except:
                print(f'{v} {s} {ev_folder}/{ev_repl} -- EV')
                continue

    print('Done!')

    # Combine dmaps and create contact map
    merged_dmap = np.vstack(dmap_list)
    full_nframes = len(merged_dmap)
    cmap = get_contact_freq_map(merged_dmap, threshold)

    ev_merged_dmap = np.vstack(ev_dmap_list)
    full_nframes = len(ev_merged_dmap)
    ev_cmap = get_contact_freq_map(ev_merged_dmap, threshold)

    # mean & std idr/fd contacts
    idrs = []
    fds = []
    for c in cmap_list:
        # Set contacts involving bound region to zero
        c[fixed_idr_residue_idxs[0]-3:fixed_idr_residue_idxs[1]+2] = 0
        idrs.append(c.sum(axis=1).flatten())
        fds.append(c.sum(axis=0).flatten())

    idr_mean = np.vstack(idrs).mean(axis=0)
    idr_sem = sem(np.vstack(idrs), axis=0)
    fd_mean = np.vstack(fds).mean(axis=0)
    fd_sem = sem(np.vstack(fds), axis=0)

    data[v] = {'nframes':full_nframes, 'cmap':cmap, 'fixed_res':fixed_idr_residue_idxs, 
                   'idr_contact_mean':idr_mean, 'idr_contact_stderr':idr_sem,
                   'fd_contact_mean':fd_mean, 'fd_contact_stderr':fd_sem}

    # mean & std idr/fd contacts
    ev_idrs = []
    ev_fds = []
    for c in ev_cmap_list:
        # Set contacts involving bound region to zero
        c[fixed_idr_residue_idxs[0]-3:fixed_idr_residue_idxs[1]+2] = 0
        ev_idrs.append(c.sum(axis=1).flatten())
        ev_fds.append(c.sum(axis=0).flatten())

    ev_idr_mean = np.vstack(ev_idrs).mean(axis=0)
    ev_idr_sem = sem(np.vstack(ev_idrs), axis=0)
    ev_fd_mean = np.vstack(ev_fds).mean(axis=0)
    ev_fd_sem = sem(np.vstack(ev_fds), axis=0)

    EV_data[v] = {'nframes':full_nframes, 'cmap':ev_cmap,
                  'idr_contact_mean':ev_idr_mean, 'idr_contact_stderr':ev_idr_sem,
                  'fd_contact_mean':ev_fd_mean, 'fd_contact_stderr':ev_fd_sem}

In [None]:
output_dir = '/home/degriffith/projects/TADs/bound_sims/flanking_contacts/p300_coactivators_analyses/figures/round1_variants'
# Compare all AD/motif sizes to EV
# for v in variants:
for v in test_variants:
    combined_plots(data[v]['cmap'], EV_data[v]['cmap'], 
                   data[v]['nframes'], EV_data[v]['nframes'], 
                   [data[v]['fixed_res'][0]-2,data[v]['fixed_res'][1]+2] , baseline='EV',
                   title=f'{v} vs EV', out=f'{output_dir}/{v}_EVcomparison_heatmap.png')

## Mean flanking IDR contact boxplots

In [None]:
## TODO:
# Compute mean EV contact freqs for each structure
# For each WT sim
    # Compute contact freq
    # Get associated EV data
    # Normalize by this EV data
    # Compute contact score
    # Add score to list
# Plot all replicates
def compute_mean_contact_map(path_list, threshold=10):
    cmaps = []
    for path in path_list:
        pdb = f'{path}/EquilMC_END.pdb'
        xtc = f'{path}/StandardMC_traj.xtc'
        traj, prot = load_trajectory(pdb, xtc, stride=1)
        dmap = get_interchain_dmap(prot, mode='COM')
        cmap = get_contact_freq_map(dmap, threshold)
        cmaps.append(cmap)
        
    cmaps = np.dstack(cmaps)
    return cmaps.mean(axis=2)

def compute_contact_score_indiv_reps(sim_paths, ev_struct_map, ev_cmaps, idr_res_idxs):
    contact_sum_vals = []

    for i, sim_path in enumerate(sim_paths):
#         plt.figure(i)
        cmap = compute_mean_contact_map([sim_path])
        ev_struct = ev_struct_map[i]
        ev_cmap = ev_cmaps[ev_struct]
        
        norm_cmap = cmap - ev_cmap
    
        norm_cmap[idr_res_idxs[0]:idr_res_idxs[1], :] = 0
#         plt.matshow(norm_cmap, vmin=-1, vmax=1, cmap='seismic_r')
#         plt.show()
        
        contact_score = np.sum(norm_cmap)
        contact_sum_vals.append(contact_score)
    
    return contact_sum_vals

In [None]:
# cMYB-KIX vs ATF4-TAZ2
round1_root = '/work/degriffith/TADs/fixed_TAD_sims/p300_coactivators/flanking_idr_sims/sims/round1_IDR_variants_TAZ2_WT'
output_dir = '/home/degriffith/projects/TADs/bound_sims/flanking_contacts/p300_coactivators_analyses/figures/round1_variants'

psw_path = f"{round1_root}/structure_1/WT_ATF4_TAZ2_WT/coil_start/1/PSWFILE.psw"
psw_data = read_psw(psw_path)
fixed_residue_idxs = get_fixed_idr_residues(psw_data)
atf4_idr_residue_idxs = [fixed_residue_idxs[0]-3, fixed_residue_idxs[1]+2]

ATF4_WT_EV_paths = {'atf4_wt_1':   [f'{round1_root}/structure_1/ev/WT_ATF4_TAZ2_WT/coil_start/1',
                                    f'{round1_root}/structure_1/ev2/WT_ATF4_TAZ2_WT/coil_start/1',
                                    f'{round1_root}/structure_1/ev2/WT_ATF4_TAZ2_WT/coil_start/2'],
                    'atf4_wt_2':   [f'{round1_root}/structure_2/ev/WT_ATF4_TAZ2_WT/coil_start/1',
                                    f'{round1_root}/structure_2/ev2/WT_ATF4_TAZ2_WT/coil_start/1',
                                    f'{round1_root}/structure_2/ev2/WT_ATF4_TAZ2_WT/coil_start/2'],
                    'atf4_wt_3':   [f'{round1_root}/structure_3/ev/WT_ATF4_TAZ2_WT/coil_start/1',
                                    f'{round1_root}/structure_3/ev2/WT_ATF4_TAZ2_WT/coil_start/1',
                                    f'{round1_root}/structure_3/ev2/WT_ATF4_TAZ2_WT/coil_start/2']
                   }

# Get mean EV cmaps
ATF4_WT_EV_cmaps = {}
for name, path_list in ATF4_WT_EV_paths.items():
    ATF4_WT_EV_cmaps[name] = compute_mean_contact_map(path_list)

psw_path = f"{redo_root}/cMYB_1_KIX/coil_start/1/PSWFILE.psw"
psw_data = read_psw(psw_path)
fixed_residue_idxs = get_fixed_idr_residues(psw_data)
cmyb_idr_residue_idxs = [fixed_residue_idxs[0]-3, fixed_residue_idxs[1]+2]

cMYB_EV_paths =    {'cmyb_wt_1':   [f'{redo_root}/ev/cMYB_1_KIX/coil_start/1',
                                    f'{redo_root}/ev/cMYB_1_KIX/coil_start/2',
                                    f'{redo_root}/ev/cMYB_1_KIX/coil_start/3',
                                    f'{redo_root}/ev/cMYB_1_KIX/coil_start/4'],
                    'cmyb_wt_2':   [f'{redo_root}/ev/cMYB_2_KIX/coil_start/1',
                                    f'{redo_root}/ev/cMYB_2_KIX/coil_start/2',
                                    f'{redo_root}/ev/cMYB_2_KIX/coil_start/3',
                                    f'{redo_root}/ev/cMYB_2_KIX/coil_start/4'],
                    'cmyb_wt_3':   [f'{redo_root}/ev/cMYB_3_KIX/coil_start/1',
                                    f'{redo_root}/ev/cMYB_3_KIX/coil_start/2',
                                    f'{redo_root}/ev/cMYB_3_KIX/coil_start/3',
                                    f'{redo_root}/ev/cMYB_3_KIX/coil_start/4']
                   }

# Get mean EV cmaps
cMYB_EV_cmaps = {}
for name, path_list in cMYB_EV_paths.items():
    cMYB_EV_cmaps[name] = compute_mean_contact_map(path_list)


In [None]:
ev_struct_map = {0:'atf4_wt_1',1:'atf4_wt_1',2:'atf4_wt_1',3:'atf4_wt_1',
                 4:'atf4_wt_2',5:'atf4_wt_2',6:'atf4_wt_2',7:'atf4_wt_2',
                 8:'atf4_wt_3',9:'atf4_wt_3',10:'atf4_wt_3',11:'atf4_wt_3',}

full_energy_sim_paths = [f'{round1_root}/structure_1/WT_ATF4_TAZ2_WT/coil_start/1',
                         f'{round1_root}/structure_1/WT_ATF4_TAZ2_WT/coil_start/2',
                         f'{round1_root}/structure_1/WT_ATF4_TAZ2_WT/coil_start/3',
                         f'{round1_root}/structure_1/WT_ATF4_TAZ2_WT/coil_start/4',
                         f'{round1_root}/structure_2/WT_ATF4_TAZ2_WT/coil_start/1',
                         f'{round1_root}/structure_2/WT_ATF4_TAZ2_WT/coil_start/2',
                         f'{round1_root}/structure_2/WT_ATF4_TAZ2_WT/coil_start/3',
                         f'{round1_root}/structure_2/WT_ATF4_TAZ2_WT/coil_start/4',
                         f'{round1_root}/structure_3/WT_ATF4_TAZ2_WT/coil_start/1',
                         f'{round1_root}/structure_3/WT_ATF4_TAZ2_WT/coil_start/2',
                         f'{round1_root}/structure_3/WT_ATF4_TAZ2_WT/coil_start/3',
                         f'{round1_root}/structure_3/WT_ATF4_TAZ2_WT/coil_start/4']
atf4_scores = compute_contact_score_indiv_reps(full_energy_sim_paths, ev_struct_map, ATF4_WT_EV_cmaps, atf4_idr_residue_idxs)

In [None]:
ev_struct_map = {0:'cmyb_wt_1',1:'cmyb_wt_1',2:'cmyb_wt_1',3:'cmyb_wt_1',
                 4:'cmyb_wt_2',5:'cmyb_wt_2',6:'cmyb_wt_2',7:'cmyb_wt_2',
                 8:'cmyb_wt_3',9:'cmyb_wt_3',10:'cmyb_wt_3',11:'cmyb_wt_3',}

full_energy_sim_paths = [f'{redo_root}/cMYB_1_KIX/coil_start/1',
                         f'{redo_root}/cMYB_1_KIX/coil_start/2',
                         f'{redo_root}/cMYB_1_KIX/coil_start/3',
                         f'{redo_root}/cMYB_1_KIX/coil_start/4',
                         f'{redo_root}/cMYB_2_KIX/coil_start/1',
                         f'{redo_root}/cMYB_2_KIX/coil_start/2',
                         f'{redo_root}/cMYB_2_KIX/coil_start/3',
                         f'{redo_root}/cMYB_2_KIX/coil_start/4',
                         f'{redo_root}/cMYB_3_KIX/coil_start/1',
                         f'{redo_root}/cMYB_3_KIX/coil_start/2',
                         f'{redo_root}/cMYB_3_KIX/coil_start/3',
                         f'{redo_root}/cMYB_3_KIX/coil_start/4']
cmyb_scores = compute_contact_score_indiv_reps(full_energy_sim_paths, ev_struct_map, cMYB_EV_cmaps, cmyb_idr_residue_idxs)

In [None]:
output_dir = '/home/degriffith/projects/TADs/bound_sims/flanking_contacts/p300_coactivators_analyses/figures/round1_variants'

fig, ax = plt.subplots(figsize=(4,3), dpi=500)

sns.stripplot(data=np.vstack([atf4_scores, cmyb_scores]).T, orient='h', ax=ax, s=4, rasterized=True)
sns.boxplot(data=np.vstack([atf4_scores, cmyb_scores]).T, color='lightgray', whis=10, orient='h', ax=ax)
ax.set_yticks([0,1], ['ATF4-TAZ2', 'cMYB-KIX'])
ax.set_xlabel('Norm. flanking contact score')

plt.tight_layout()

# save out as pdf
with PdfPages(f'{output_dir}/ATF4-TAZ2_vs_cMYB-KIX_contacts_boxplot.pdf') as pdf:
    pdf.savefig(fig, dpi=500) 

In [None]:
# TAZ2-sufficiency mutants
round1_sufficiency_constructs = ['SufficiencyNegCharge_ATF4_8_TAZ2_WT',
                                  'SufficiencyHydrophobic_ATF4_8_TAZ2_WT',
                                  'SufficiencyNegChargeHydrophobic_ATF4_8_TAZ2_WT']

ev_struct_map = {0:1, 1:1, 2:1, 3:1, 4:2, 5:2, 6:2, 7:2, 8:3, 9:3, 10:3, 11:3}

psw_path = f"{round1_root}/structure_1/WT_ATF4_TAZ2_WT/coil_start/1/PSWFILE.psw"
psw_data = read_psw(psw_path)
fixed_residue_idxs = get_fixed_idr_residues(psw_data)
atf4_idr_residue_idxs = [fixed_residue_idxs[0]-3, fixed_residue_idxs[1]+2]

suff_score_dict = {}
for name in round1_sufficiency_constructs:
    print(name)
    print('  EV')
    # get EV cmaps
    ev_paths = {1:[f'{round1_root}/structure_1/ev/{name}/coil_start/1',
                     f'{round1_root}/structure_1/ev2/{name}/coil_start/1',
                     f'{round1_root}/structure_1/ev2/{name}/coil_start/2'],
                2:[f'{round1_root}/structure_2/ev/{name}/coil_start/1',
                     f'{round1_root}/structure_2/ev2/{name}/coil_start/1',
                     f'{round1_root}/structure_2/ev2/{name}/coil_start/2'],
                3:[f'{round1_root}/structure_3/ev/{name}/coil_start/1',
                     f'{round1_root}/structure_3/ev2/{name}/coil_start/1',
                     f'{round1_root}/structure_3/ev2/{name}/coil_start/2']}
    sufficiency_EV_cmaps = {}
    for idx, path_list in ev_paths.items():
        sufficiency_EV_cmaps[idx] = compute_mean_contact_map(path_list)
        
    
    # Get scores
    paths=[]
    for i in range(12):
        struct = 1 + i//4
        repl = 1 + i%4
        
        paths.append(f'{round1_root}/structure_{struct}/{name}/coil_start/{repl}')
        
    print('  Full')
    sufficiency_scores = compute_contact_score_indiv_reps(paths, ev_struct_map, 
                                                          sufficiency_EV_cmaps, atf4_idr_residue_idxs)

        
    
    suff_score_dict[name] = sufficiency_scores

In [None]:
name = 'Sufficiency_Inert_ExtendedMotif_ATF4'
print('  EV')
# get EV cmaps
ev_paths = {1:[f'{redo_root}/ev/{name}_1_TAZ2/coil_start/1',
               f'{redo_root}/ev/{name}_1_TAZ2/coil_start/2',
               f'{redo_root}/ev/{name}_1_TAZ2/coil_start/3'],
            2:[f'{redo_root}/ev/{name}_2_TAZ2/coil_start/1',
               f'{redo_root}/ev/{name}_2_TAZ2/coil_start/2',
               f'{redo_root}/ev/{name}_2_TAZ2/coil_start/3'],
            3:[f'{redo_root}/ev/{name}_3_TAZ2/coil_start/1',
               f'{redo_root}/ev/{name}_3_TAZ2/coil_start/2',
               f'{redo_root}/ev/{name}_3_TAZ2/coil_start/3']}
sufficiency_EV_cmaps = {}
for idx, path_list in ev_paths.items():
    sufficiency_EV_cmaps[idx] = compute_mean_contact_map(path_list)

# Get scores
paths=[]
for i in range(12):
    struct = 1 + i//4
    repl = 1 + i%4

    paths.append(f'{redo_root}/ev/{name}_{struct}_TAZ2/coil_start/{repl}')

print('  Full')
sufficiency_scores = compute_contact_score_indiv_reps(paths, ev_struct_map, 
                                                      sufficiency_EV_cmaps, atf4_idr_residue_idxs)

suff_score_dict[name] = sufficiency_scores

In [None]:
fig, ax = plt.subplots(figsize=(6,3), dpi=600)

dataset= np.vstack([atf4_scores, suff_score_dict['Sufficiency_Inert_ExtendedMotif_ATF4'],
                    suff_score_dict['SufficiencyNegCharge_ATF4_8_TAZ2_WT'],
                    suff_score_dict['SufficiencyHydrophobic_ATF4_8_TAZ2_WT'],
                    suff_score_dict['SufficiencyNegChargeHydrophobic_ATF4_8_TAZ2_WT']]).T

sns.stripplot(data=dataset, orient='h', ax=ax, s=4)
sns.boxplot(data=dataset, color='lightgray', whis=10, orient='h', ax=ax)
ax.set_yticks([0,1,2,3,4], ['WT ATF4', 'SufficiencyInert',
                            'SufficiencyNegCharge', 
                            'SufficiencyHydrophobic', 'SufficiencyNegChargeHydrophobic'])
ax.set_xticks([0,25,50,75,100,125])
ax.set_xlabel('Norm. flanking contact score')

plt.tight_layout()

plt.savefig(f'{output_dir}/wtATF_vs_sufficiencyATF4_TAZ2_contacts_boxplot.eps')
