In [None]:
import mdtraj as md 
from matplotlib import pyplot as pl
import numpy as np
from scipy import stats
import seaborn as sb

In [None]:
## Define distance calc function (for backbone atoms)

def calc_com_distance(traj):
    """
    Compute the Euclidean distance between the centers of mass of two specified groups of residues in an MDTraj trajectory.
    Also computes and prints the mean distance and the Full Width at Half Maximum (FWHM) of the distance distribution using KDE.

    Parameters:
    - traj: MDTraj trajectory object.

    Returns:
    - distances: NumPy array of distances between the centers of mass at each frame.
    - mean_distance: Mean of the distances.
    - fwhm: Full Width at Half Maximum (FWHM) of the KDE-based distance distribution.
    """
    # Compute center of mass for the first group (chain A)
    chaina_com = md.compute_center_of_mass(traj, select='(residue 2174 or residue 2176 or residue 2177 or residue 2178 or residue 2179 or residue 2180 or residue 2181 or residue 2182 or residue 2183) and chainid 0 and backbone')

    # Compute center of mass for the second group (chain B)
    chainb_com = md.compute_center_of_mass(traj, select='(residue 2579 or residue 2580 or residue 2581 or residue 2582 or residue 2583 or residue 2584 or residue 2585) and chainid 1 and backbone')

    # Compute Euclidean distance between centers of mass at each frame
    distances = np.linalg.norm(chainb_com - chaina_com, axis=1)
    
    return distances

In [None]:
## Define distance calc function (for sidechain atoms)

def calc_com_distance_sc(traj):
    """
    Compute the Euclidean distance between the centers of mass of two specified groups of residues in an MDTraj trajectory.
    Also computes and prints the mean distance and the Full Width at Half Maximum (FWHM) of the distance distribution using KDE.

    Parameters:
    - traj: MDTraj trajectory object.

    Returns:
    - distances: NumPy array of distances between the centers of mass at each frame.
    - mean_distance: Mean of the distances.
    - fwhm: Full Width at Half Maximum (FWHM) of the KDE-based distance distribution.
    """
    # Compute center of mass for the first group (chain A)
    chaina_com = md.compute_center_of_mass(traj, select='(residue 2174) and chainid 0 and name NH1 NH2 NZ')

    # Compute center of mass for the second group (chain B)
    chainb_com = md.compute_center_of_mass(traj, select='(residue 2584) and chainid 1 and name NH1 NH2 NZ')

    # Compute Euclidean distance between centers of mass at each frame
    distances = np.linalg.norm(chainb_com - chaina_com, axis=1)
    
    return distances

In [None]:
# Load trajectories 

apo_traj=md.load_dcd('path/to/joined.dcd',top='path/to/step3_input.pdb')
atp_traj=md.load_dcd('path/to/joined.dcd',top='path/to/step3_input.pdb')


In [None]:
# Calculate distance for backbone atoms

apo_dist=calc_com_distance(apo_traj)
atp_dist=calc_com_distance(atp_traj)

In [None]:
# Calculate distance for sidechain atoms

apo_dist_sc=calc_com_distance_sc(apo_traj)
atp_dist_sc=calc_com_distance_sc(atp_traj)

In [None]:
# Plot 2D KDE plot for backbone vs sidechain distance for ligand-free and +ATP conditions
# Also, do linear regression and plot the fit lines

pl.figure(figsize=(6,5))
apo_x = apo_dist_sc * 10
apo_y = apo_dist * 10
atp_x = atp_dist_sc * 10
atp_y = atp_dist * 10

pl.figure(figsize=(8,6))

# Perform linear regression for ligand-free condition
apo_slope, apo_intercept, apo_r_value, apo_p_value, apo_std_err = stats.linregress(apo_x, apo_y)
apo_r_squared = apo_r_value**2  # Calculate R^2

# Perform linear regression for ATP-bound condition
atp_slope, atp_intercept, atp_r_value, atp_p_value, atp_std_err = stats.linregress(atp_x, atp_y)
atp_r_squared = atp_r_value**2  # Calculate R^2



# Create a 2D KDE plot for the ligand-free condition
sb.kdeplot(x=apo_x, y=apo_y, fill=True, color='silver', label='ligand-free', alpha=0.6)

# Create a 2D KDE plot for the ATP-bound condition
sb.kdeplot(x=atp_x, y=atp_y, fill=True, color='crimson', label='atp bound', alpha=0.6)


# Plot the regression line for ligand-free
pl.plot(apo_x, apo_slope * apo_x + apo_intercept, color='gray', linestyle='-', label=f'Fit (ligand-free)')

# Plot the regression line for ATP-bound
pl.plot(atp_x, atp_slope * atp_x + atp_intercept, color='crimson', linestyle='-', label=f'Fit (ATP-bound)')

pl.savefig('2D_KDE_plot.png',dpi=300)