### Objective 1
We want something which will, given the md trajectory, calculate the dihedral angles (from this we can get their distributions)
given the relevant SMILES string.

### Objective 2
We also want something to plot the csv file data, so that we can do a sanity check to ensure the macroscopic properties (temp, pressure)
are reasonable and are not discontinuous.

In [2]:
import os

from openmm import *
from openmm.app import *

import mdtraj
import matplotlib.pyplot as plt
import numpy as np
import openmoltools
import tempfile
import cctk

from rdkit import Chem
from rdkit.Chem import AllChem

KeyboardInterrupt: 

In [None]:
smiles_dict = {
    'Lys-Tyr': 'CNCc1c(O)ccc(c1)C',
    'Lys-Arg': 'N1(C)CN=C(NC1)NC',
    'Sulfur-Mediated-Amide': 'SC[C@@H](NC(=O)C)C(=O)C',
    'Carboxyl-Carboxyl': 'C(=O)(C)NCc1ccc(cc1)CNC(=O)C',
    'Disulfide': 'N[C@H](C(=O)O)CSSC[C@@H](N)C(=O)O',
    'Cys-Arg': 'CSCC(=O)NCCCC',
    'Cys-Carboxyl': 'CSCC(=O)C',
}

pdb_dir = '/home/bfd21/rds/hpc-work/tbg/md_work/cyc_toy_pdbs'

pdb_dict = {
    'Lys-Tyr': os.path.join(pdb_dir, 'Lys-Tyr.pdb'),
    'Lys-Arg': os.path.join(pdb_dir, 'Lys-Arg.pdb'),
    'Sulfur-Mediated-Amide': os.path.join(pdb_dir, 'Sulfur-Mediated-Amide.pdb'),
    'Carboxyl-Carboxyl': os.path.join(pdb_dir, 'Cys-Carboxyl.pdb'),
    'Disulfide': os.path.join(pdb_dir, 'Disulfide.pdb'),
    'Cys-Arg': os.path.join(pdb_dir, 'Cys-Arg.pdb'),
    'Cys-Carboxyl': os.path.join(pdb_dir, 'Cys-Carboxyl.pdb'),
}

def generate_initial_pdb(
    smiles: str,
    min_side_length: int = 25, # Å
    solvent_smiles = "O",
) -> PDBFile:
    """ Creates a PDB file for a solvated molecule, starting from two SMILES strings. """

    # do some math to figure how big the box needs to be
    solute = cctk.Molecule.new_from_smiles(smiles)
    solute_volume = solute.volume(qhull=True)
    solvent = cctk.Molecule.new_from_smiles(solvent_smiles)
    solvent_volume = solvent.volume(qhull=False)

    total_volume = 50 * solute_volume # seems safe?
    min_allowed_volume = min_side_length ** 3
    total_volume = max(min_allowed_volume, total_volume)

    total_solvent_volume = total_volume - solute_volume
    n_solvent = int(total_solvent_volume // solvent_volume)
    box_size = total_volume ** (1/3)

    # build pdb
    with tempfile.TemporaryDirectory() as tempdir:
        solute_fname = f"{tempdir}/solute.pdb"
        solvent_fname = f"{tempdir}/solvent.pdb"
        system_fname = f"system.pdb"

        smiles_to_pdb(smiles, solute_fname)
        smiles_to_pdb(solvent_smiles, solvent_fname)
        traj_packmol = openmoltools.packmol.pack_box(
          [solute_fname, solvent_fname],
          [1, n_solvent],
          box_size=box_size
         )
        traj_packmol.save_pdb(system_fname)

        return PDBFile(system_fname)

def smiles_to_pdb(smiles: str, filename: str) -> None:
    """ Turns a SMILES string into a PDB file (written to current working directory). """
    m = Chem.MolFromSmiles(smiles)
    mh = Chem.AddHs(m)
    AllChem.EmbedMolecule(mh)
    Chem.MolToPDBFile(mh, filename)


def load_trajectory(traj_path: str, pdb_path: str, fraction: float=0.2) -> md.Trajectory:
    """Load an MD trajectory from a .dcd file using a corresponding .pdb file for topology.
    Only keeps the last `fraction` of the frames."""
    
    # Load the topology from the PDB file
    pdb_topology = md.load(pdb_path).topology
    
    # Print the expected number of atoms from the PDB
    print(f"Expected number of atoms from PDB: {pdb_topology.n_atoms}")
    
    # Load the DCD trajectory
    traj = md.load(traj_path, top=pdb_path)
    
    # Print the actual number of atoms from the DCD file
    print(f"Number of atoms in DCD file: {traj.topology.n_atoms}")
    
    # Debugging: If mismatch, print first few atom names
    if traj.topology.n_atoms != pdb_topology.n_atoms:
        print("Mismatch detected! First few atoms in PDB:")
        for atom in pdb_topology.atoms[:10]:
            print(atom)
        print("First few atoms in DCD:")
        for atom in traj.topology.atoms[:10]:
            print(atom)
        raise ValueError(f"Mismatch in atom numbers: Trajectory ({traj.topology.n_atoms}) vs PDB ({pdb_topology.n_atoms})")
    
    # Keep only the last fraction of the trajectory
    start_frame = int(traj.n_frames * (1 - fraction))
    traj = traj[start_frame:]
    
    return traj

In [None]:
load_trajectory('/home/bfd21/rds/hpc-work/tbg/cyclization/jobs/md-jobs/Lys-Tyr/results/traj_seed_0.dcd', pdb_dict['Lys-Tyr'])

Expected number of atoms from PDB: 24


ValueError: The topology and the trajectory files might not contain the same atoms
The input topology must contain all atoms even if you want to select a subset of them with atom_indices

: 