This jupyter notebook extracts a trajectory of the bound state from a YANK nc file for the complex phase.

The following files and options need to be customized for your system.

In [1]:
# Input files
complex_prmtop = '/home/bxie4/restraint_simulation/YANK/TEST_SYSTEMS/bromodomain/3mxf/setup/complex.prmtop'
complex_inpcrd = '/home/bxie4/restraint_simulation/YANK/TEST_SYSTEMS/bromodomain/3mxf/setup/complex.inpcrd'
YANK_nc = '/home/bxie4/restraint_simulation/YANK/TEST_SYSTEMS/' + \
  'bromodomain/3mxf/40flat_output_repeat1/experiments/complex.nc'

# Output parameters
# How often to keep snapshots. I am using a large stride for testing but you will probably want a smaller one
stride = 100
# The output pdb is so that you can have a topology for the selected atoms
store_pdb = 'bound.pdb'
# The output dcd is the trajectory of the bound complex
store_dcd = 'bound.dcd'

# Selection strings
sel_str_align = 'protein and name CA' # The atom selection that will be aligned to the first frame
sel_str_store = 'protein or resname LIG' # The atom selection that will be stored
# sel_str_ligand = 'resname LIG and not name H*'

In [2]:
import MDAnalysis as mda
from MDAnalysis.coordinates.memory import MemoryReader
from MDAnalysis.analysis import align

ref = mda.Universe(complex_prmtop, complex_inpcrd)

In [3]:
# Go frame-by-frame through the trajectory of the bound state, 
#  1. aligning the protein to the reference
#  2. writing the trajectory to another file

import netCDF4 as netcdf

# Load netCDF4 file from YANK output
nc = netcdf.Dataset(YANK_nc)
niterations = nc.variables['positions'].shape[0]
nstates = nc.variables['states'].shape[1]
natoms = nc.variables['positions'].shape[2]

# Determine replica indices that belong to the bound state, which is state 0
replica_indices = \
  [list(nc.variables['states'][iteration,:]).index(0) \
    for iteration in range(0,niterations,stride)]

# Align and store snapshots
print('These are RMSDs before and after alignment:')
complex = mda.Universe(complex_prmtop, complex_inpcrd)
sel_store = complex.select_atoms(sel_str_store)
writer = mda.Writer(store_dcd, sel_store.n_atoms)
for frame in range(0,len(replica_indices)):
  coords = nc.variables['positions'][frame*stride,replica_indices[frame],:,:]*10.0
  complex.load_new(coords, format=MemoryReader)
  print(align.alignto(complex, ref, select=sel_str_align))
  writer.write(sel_store)

# Write a PDB file with the stored atoms, storing AMBER atom types as a remark
remarks = []
remarks.append('REMARK    <-- AMBER ATOM TYPES ')
for n in range(0,len(sel_store.types),20):
  remarks.append('REMARK    ' +  ' '.join([f'{t:2s}' for t in sel_store.types[n:n+20]]))
remarks.append('REMARK    AMBER ATOM TYPES -->')
remarks = '\n' + '\n'.join(remarks)
sel_store.write(store_pdb, remarks=remarks)

These are RMSDs before and after alignment:




(1.709486443295421e-06, 1.6924982250520969e-06)
(2.023061077444703, 1.3541844154807077)
(2.2641937115174615, 1.3540746484850308)
(3.3339063426552262, 1.5780176017589196)
(2.192816935691746, 1.0782748426843423)
(5.441641837495693, 1.2777724263063275)
(2.9594394492060596, 1.4305119105775626)
(4.25501178114698, 1.183847665635467)
(7.893656856116046, 1.6305504768189536)
(6.556299331207767, 1.6370786564410567)
(5.2764573690047065, 1.5882555942740233)
(8.413052913941366, 1.6896506857259885)
(6.023041330880456, 1.3965623160340763)
(6.808592639415474, 1.5690251686752417)
(10.218588632897653, 1.4875388134085887)
(6.342637198614475, 1.5470152542748652)
(5.610530657849822, 1.8799200485298175)
(6.082882440669318, 1.939127415496755)
(10.94073871738084, 1.3117141013459563)
(4.89172574670902, 1.7843057958556108)
(12.592136992302827, 1.879593230298374)
(5.617100845305622, 2.0640085892849522)
(7.249919811932606, 2.1486867283064637)
(6.196119382477828, 2.108423577547054)
(5.410431426484696, 2.0703949932