In [None]:

import mdtraj as md
import numpy as np
import matplotlib.pyplot as pl
import seaborn as sb
from sklearn.mixture import GaussianMixture
from scipy.stats import norm

In [None]:
## Define distance calculation function

def calc_com_distance(traj):
    # Compute center of mass for the first group (chain A)
    chaina_com = md.compute_center_of_mass(traj, select='(residue 2174 or residue 2176 or residue 2177 or residue 2178 or residue 2179 or residue 2180 or residue 2181 or residue 2182 or residue 2183) and chainid 0 and backbone')

    # Compute center of mass for the second group (chain B)
    chainb_com = md.compute_center_of_mass(traj, select='(residue 2579 or residue 2580 or residue 2581 or residue 2582 or residue 2583 or residue 2584 or residue 2585) and chainid 1 and backbone')

    # Compute Euclidean distance between centers of mass at each frame
    distances = np.linalg.norm(chainb_com - chaina_com, axis=1)
    
    return distances

In [None]:

# Define replicate trimming function

def trim_replicate_timeseries(data, total_ns, n_replicates, trim_ns):

    data = np.asarray(data)
    n_frames = data.shape[0]
    frames_per_rep = n_frames // n_replicates

    # assume each replicate represents 500 ns (as in your system)
    frames_per_ns = frames_per_rep / 500.0
    trim_frames = int(trim_ns * frames_per_ns)

    trimmed_segments = []
    for i in range(n_replicates):
        start = i * frames_per_rep + trim_frames
        end = (i + 1) * frames_per_rep
        trimmed_segments.append(data[start:end])

    trimmed_data = np.concatenate(trimmed_segments, axis=0)
    return trimmed_data

In [None]:
# Load trajectories 

apo_traj=md.load_dcd('path/to/joined.dcd',top='path/to/step3_input.pdb')
atp_traj=md.load_dcd('path/to/joined.dcd',top='path/to/step3_input.pdb')


In [None]:
# Calculate distance for loaded trajectories

apo_dist=calc_com_distance(apo_traj)
atp_dist=calc_com_distance(atp_traj)

In [None]:
# Trim timeseries 

apo_dist=trim_replicate_timeseries(apo_dist,5000,10,100)
atp_dist=trim_replicate_timeseries(atp_dist,5000,10,100)

In [None]:
# Fit Gaussian Mixture Model to ligand-free distance distribution and plot 
# KDE with fits

data = apo_dist * 10  # scale

# Fit a Gaussian Mixture with 2 components
gmm = GaussianMixture(n_components=2, random_state=0)
gmm.fit(data.reshape(-1, 1))

# Extract means and standard deviations
means = gmm.means_.flatten()
stds = np.sqrt(gmm.covariances_).flatten()
weights = gmm.weights_.flatten()

# Prepare x-axis for smooth Gaussian curves
x = np.linspace(min(data), max(data), 1000)
y1 = weights[0] * norm.pdf(x, means[0], stds[0])
y2 = weights[1] * norm.pdf(x, means[1], stds[1])

# Plot KDE
plt.figure(figsize=(8,6))
sb.kdeplot(data, label='Ligand-free', linewidth=4, color='darkgray', shade=False, alpha=1, bw_adjust=0.7)
sb.kdeplot(atp_dist*10,label='ATP-bound',linewidth=4,color='crimson',shade=False,alpha=0.7,bw_adjust=0.7)

# Plot Gaussian fits
plt.plot(x, y1, '--', color='cornflowerblue', linewidth=3, label=f'Gaussian 1 (μ={means[0]:.2f})')
plt.plot(x, y2, '--', color='orange', linewidth=3, label=f'Gaussian 2 (μ={means[1]:.2f})')

plt.xlabel('')
plt.xlim(min(data), max(data))
plt.yticks([])
plt.ylabel('')
plt.legend(frameon=False)
plt.savefig('apo_atp_kde_with_gaussians.pdf', format='pdf', dpi=300, bbox_inches='tight')
plt.show()

# Report the means
print(f"Gaussian 1 mean: {means[0]:.3f}")
print(f"Gaussian 2 mean: {means[1]:.3f}")