In [13]:
import dill
import numpy as np
import MDAnalysis as mda
from MDAnalysis.analysis.dihedrals import Dihedral

import mdtraj as md

import matplotlib.pyplot as plt
from collections import Counter

import os

**First Order of Business:** let's see if our conditioning consistantly picked the correct loss strategy

In [4]:
strategies_batches = [
    f'/home/bfd21/rds/hpc-work/tbg/cyclization/jobs/l1-sample-Feb-13/conditioned-on-cyc/uncond-on-time/cond-cyc-unc-time-l1-inference_losses_selected_batch-{i}.pkl' for i in range(4)
]

strategies_list = []

for batch_str in strategies_batches:
    # Load from the file
    with open(batch_str, "rb") as f:
        loaded_list = dill.load(f)
    
    for j in loaded_list:
        strategies_list.append(j)

In [5]:
count = 0

for i in strategies_list:
    if i == strategies_list[0]:
        count += 1

print(count) # per the below output, it picked the correct outcome every single time.

400


**Nice! Now onto next objective:** We want to do PCA on the dihedral angles, maybe bond angles, on the generated samples, and compare where they lie in this PCA space to the raw validation data.

In [15]:
def generate_mdtraj_trajectory(pdb_path, data_files):
    """
    Loads a PDB file, infers bonds, processes NPZ and NPY files, and constructs an MDTraj trajectory.

    Parameters:
        pdb_path (str): Path to the PDB file.
        data_files (list of str): List of paths to NPZ or NPY files containing sampled atomic positions.

    Returns:
        mdtraj.Trajectory: The generated trajectory with inferred bonds and centered coordinates.
    """

    # Load PDB and infer bonds
    pdb = md.load(pdb_path)
    pdb.topology.create_standard_bonds()  # Infer bonds based on atom types and residue connectivity
    topology = pdb.topology

    # Load and process generated samples
    all_samples = []
    for data_file in data_files:
        file_extension = os.path.splitext(data_file)[1]

        if file_extension == ".npz":
            data = np.load(data_file)
            samples_np = data["samples_np"]  # Extract the relevant array from NPZ
        elif file_extension == ".npy":
            samples_np = np.load(data_file)  # Load directly from NPY
        else:
            raise ValueError(f"Unsupported file format: {data_file}")

        # Reshape samples to match the number of atoms in the PDB
        n_atoms = len(list(topology.atoms))
        samples = samples_np.reshape(-1, n_atoms, 3)  # (n_frames, n_atoms, 3)
        all_samples.append(samples)

    # Combine all samples into a single array
    all_samples = np.concatenate(all_samples, axis=0)

    # Create an MDTraj trajectory with the inferred bonds
    traj = md.Trajectory(
        xyz=all_samples,  # Shape: (n_frames, n_atoms, 3)
        topology=topology
    )

    # Center the trajectory around the origin
    traj.center_coordinates()

    return traj

def compute_all_dihedrals(traj):
    """
    Computes backbone (phi, psi, omega) and side-chain (chi1, chi2, chi3, chi4) dihedral angles 
    for all amino acids in the given trajectory.
    
    Parameters:
        traj (mdtraj.Trajectory): MDTraj trajectory object.
    
    Returns:
        dict: Dictionary containing dihedral angles in degrees.
    """

    # Compute backbone dihedral angles
    phi_indices, phi_angles = md.compute_phi(traj)
    psi_indices, psi_angles = md.compute_psi(traj)
    omega_indices, omega_angles = md.compute_omega(traj)

    # Compute side-chain chi angles (some may be empty depending on the peptide)
    chi1_indices, chi1_angles = md.compute_chi1(traj)
    chi2_indices, chi2_angles = md.compute_chi2(traj)
    chi3_indices, chi3_angles = md.compute_chi3(traj)
    chi4_indices, chi4_angles = md.compute_chi4(traj)

    dihedral_angles = {
        "phi": phi_angles,
        "psi": psi_angles,
        "omega": omega_angles,
        "chi1": chi1_angles if chi1_angles.size else None,
        "chi2": chi2_angles if chi2_angles.size else None,
        "chi3": chi3_angles if chi3_angles.size else None,
        "chi4": chi4_angles if chi4_angles.size else None,
    }

    return dihedral_angles

In [None]:
traj = generate_mdtraj_trajectory(pdb_path = '/home/bfd21/rds/hpc-work/data/MDM2-sample-binders/ligand-only/l1/ligand1.pdb', 
                                  data_files = [])