<a href="https://colab.research.google.com/github/google-research/protein-ligand-binding-free-energy-calculations/blob/matteo-dev/colab_tutorial/abfe_tutorial.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##### Copyright 2022 Google LLC.

In [None]:
#@title Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

In [1]:
#@title Install Gromacs and Gromacs Python API
!wget https://storage.googleapis.com/gromacs-bin/gromacs-avx2_256-cuda-11_2.tar.gz -O /tmp/gromacs.tar.gz
!tar zxf /tmp/gromacs.tar.gz
!sudo rm -rf /usr/local/gromacs
!sudo mv gromacs-avx /usr/local/gromacs
!rm /tmp/gromacs.tar.gz

!pip3 install --upgrade pip
!pip3 install setuptools wheel cmake pybind11 py3DMol
# This ensure we always install the latest PMX version from GitHub.
!if [ -d /usr/local/lib/python3.7/dist-packages/pmx/ ]; then yes Y | pip3 uninstall pmx; fi
!if [ -d /usr/local/lib/python3.7/site-packages/pmx/ ]; then yes Y | pip3 uninstall pmx; fi
!pip3 install --no-cache-dir git+https://github.com/deGrootLab/pmx.git@abfe_dev
!gmxapi_ROOT=/usr/local/gromacs/ pip3 install --no-cache-dir gmxapi

# Add path to where pmx gets installed.
import sys
sys.path.append('/usr/local/lib/python3.7/site-packages/')

--2022-12-14 20:56:58--  https://storage.googleapis.com/gromacs-bin/gromacs-avx2_256-cuda-11_2.tar.gz
Resolving storage.googleapis.com (storage.googleapis.com)... 172.253.115.128, 172.253.122.128, 172.253.63.128, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|172.253.115.128|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 23920923 (23M) [application/x-gzip]
Saving to: ‘/tmp/gromacs.tar.gz’


2022-12-14 20:56:59 (105 MB/s) - ‘/tmp/gromacs.tar.gz’ saved [23920923/23920923]

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pip
  Downloading pip-22.3.1-py3-none-any.whl (2.1 MB)
[K     |████████████████████████████████| 2.1 MB 19.9 MB/s 
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 21.1.3
    Uninstalling pip-21.1.3:
      Successfully uninstalled pip-21.1.3
Successfully installed pip-22.3.1
Looking in indexes: https://pypi.o

In [2]:
#@title Set Level for Logging
#@markdown Sets the python logging level. This is useful for debugging.
import logging

log_level = 'INFO' #@param ['DEBUG', 'INFO', 'ERROR']

_log_level = {
    'DEBUG': logging.DEBUG,
    'INFO': logging.INFO,
    'ERROR': logging.ERROR
}

logging.basicConfig(level=_log_level[log_level])

In [3]:
#@title [Please Rerun after Restart] Create a Session
#@markdown Please rerun this cell every time when you restart the runtime.
#@markdown A new session ID will be created for a new colab runtime session.
#@markdown Run this cell again will refresh the session ID.

import os
import time
import gmxapi


# Sets a session ID for this session.
_SESSION_ID = str(int(time.time())) #@param

# Monkey patch this session ID into the gmxapi.operation.ResourceManager.
# Hack. Do not use in the user code.
# This is temporarily added to allow users to start from a new set of working
# directories for all the mdrun launches. GMX API doesn't expose the
# underlying resource manager or context yet.
def operation_id(self):
  return f"{self._base_operation_id}_{_SESSION_ID}"
gmxapi.operation.ResourceManager.operation_id = property(operation_id)


def get_work_dir(md_output):
  values = [v for v in md_output.values()]
  return values[0].result()


def tail_log(md_output, output_file_base, last_n_lines=6):
  work_dir = get_work_dir(md_output)

  with open(os.path.join(work_dir, output_file_base + '.log'), 'r') as log:
    for line in log.readlines()[-last_n_lines:]:
      print(line)

print("Session ID: ", _SESSION_ID)

Session ID:  1671051601


# ABFE Workflow
-----

In [4]:
import numpy as np
import shutil
from tqdm import tqdm
import pmx
from pmx.AbsoluteDG import AbsoluteDG

RDKit imports failed


In [20]:
#@title Define some helper functions

def mdrun(tpr, pmegpu=True, nsteps=None):
  """Wrapper for gmxapi.mdrun with predefined scenarios optimized for different 
  types of simulations.

  Args:
    tpr (str): path to TPR file.
    pmegpu (bool): whether to run PME calculations on the GPU. Default is True.
      Note this is not possible with certain integrators.
    nsteps (int): number of integration steps to run, overwrites nsteps defined
      by the MDP file used to generate the TPR. Default is None (do not modify 
      TPR).

  Returns:
    object: StandardOperationHandle returned by gmxapi.mdrun.
  """
  # Load TPR file.
  input_tpr = gmxapi.read_tpr(tpr)
  if nsteps is not None:
    input_tpr = gmxapi.modify_input(input=input_tpr, 
                                    parameters={'nsteps': nsteps})    

  # Get path to TPR.
  path = "/".join(tpr.split('/')[:-1])
  
  # Specify command line flags and arguments to be passed to mdrun.
  # Note: energy minimization cannot run PME on GPU.
  if pmegpu:
    _pme = 'gpu'
    _pmefft = 'gpu'
    _bonded = 'gpu'
  else:
    _pme = 'auto'
    _pmefft = 'auto'
    _bonded = 'auto'

  md = gmxapi.mdrun(input_tpr,
                    runtime_args={'-nb': 'gpu', 
                                  '-pme': _pme, 
                                  '-pmefft': _pmefft, 
                                  '-bonded': _bonded,
                                  '-x': f'{path}/traj.xtc',
                                  '-o': f'{path}/traj.trr',
                                  '-c': f'{path}/confout.gro',
                                  '-e': f'{path}/ener.edr', 
                                  '-g': f'{path}/md.log',
                                  '-cpo': f'{path}/state.cpt',
                                  '-dhdl': f'{path}/dhdl.xvg'
                                  }
                    )

  # Run the simulation.
  md.run()

  return md


def mdrun_completed(tpr: str, transition: bool = False) -> bool:
  """Checks whether a simulation completed successfully.
  
  Args:
    tpr (str): path to TPR file.
    transition (bool): whether we are checking completion for a non-equilibrium 
      transition. Default is False.

  Returns:
    bool: whether the TPR has been run successfully.
  """
  # If we're chekcing the completion of a non-eq transition, we check that (i)
  # the right dhdl.xvg file exists, and that (ii) it contains info up to the 
  # end of the transition. Otherwise, we assume it crashed and needs to be
  # completed.
  if transition:
    dhdl = "/".join(tpr.split("/")[:-1]) + f"/dhdl.xvg"
    # If dhdl file exists, check it's complete.
    if os.path.isfile(dhdl):
      input_tpr = gmxapi.read_tpr(tpr)
      nsteps = input_tpr.output.parameters.result()['nsteps']
      dt = input_tpr.output.parameters.result()['dt']
      expected_final_time = nsteps * dt
      
      with open(dhdl, 'r') as f:
        lines = f.readlines()
      
      try:
        actual_final_time = float(lines[-1].split()[0])
        if expected_final_time - actual_final_time < 1e-6:
          return True
      except:
        return False
      
    return False
  else:
    # (maldeghi): IIRC Gromacs would output the GRO file only for mdruns that
    # did not crash/errored. We can make this stricted by checking the log
    # file too.
    gro = "/".join(tpr.split("/")[:-1]) + "/confout.gro"
    if os.path.isfile(gro):
      return True
    else:
      return False


def tail(fname, n):
  with open(fname) as f:
    for line in (f.readlines() [-n:]):
      print(line, end ='')

In [6]:
#@title Copy input files

WORKDIR = f"/content/pmxrun_{_SESSION_ID}"  #@param {type:"string"}
OVERWRITE_WORKDIR = True  #@param {type:"boolean"}

#@markdown Whether to run very short simulations for testing. Otherwise, we run 
#@markdown the full set of calculations, which takes several hours.
SHORT_SIMS = True  #@param {type:"boolean"}

if os.path.isdir(WORKDIR) and OVERWRITE_WORKDIR:
  shutil.rmtree(WORKDIR)

if not os.path.isdir(WORKDIR):
  os.mkdir(WORKDIR)

  pmx_path = pmx.__file__.replace('/__init__.py', '')

  # Copy topology and structure files for protein and ligand.
  shutil.copytree(f'{pmx_path}/abfe_scripts/struct_top/', f'{WORKDIR}/struct_top/')

  # Copy MDP files (i.e. Gromacs config files).
  shutil.copytree(f'{pmx_path}/abfe_scripts/mdppath/', f'{WORKDIR}/mdppath/')

# We also shorten the alchemical transitions to speed things up.
# Note we cannot just change nsteps in mdrun here because we need to adjust the
# lambda schedule too.
! if [ $SHORT_SIMS == "True" ]; then sed -i 's/nsteps.*/nsteps = 25000/g' $WORKDIR/mdppath/ti_l*mdp; fi
! if [ $SHORT_SIMS == "True" ]; then sed -i 's/delta-lambda.*/delta-lambda = 4e-5/g' $WORKDIR/mdppath/ti_l0.mdp; fi  # 1/25,000 = 4e-5
! if [ $SHORT_SIMS == "True" ]; then sed -i 's/delta-lambda.*/delta-lambda = -4e-5/g' $WORKDIR/mdppath/ti_l1.mdp; fi

# Show contents of our working directory.
!echo "> ls {WORKDIR}"
!ls -l {WORKDIR}

> ls /content/pmxrun_1671051601
total 8
drwxr-xr-x 2 root root 4096 Dec 14 21:00 mdppath
drwxr-xr-x 4 root root 4096 Dec 14 20:57 struct_top


## 1.&nbsp; Input Files Preparation

In [7]:
#@title 1.1&nbsp; Setup folder structure

# Initialize the free energy environment object. It will store the main 
# parameters for the calculations.
fe = AbsoluteDG(ligList=['lysozyme_benzene'], 
                apoCase='lysozyme_apo', 
                bDSSB=False,
                gmxexec='/usr/local/gromacs/bin/gmx')

# Set the workpath in which simulation input files will be created.
fe.workPath = f'{WORKDIR}/lysopath'
# Set the path to the MDP files.
fe.mdpPath = f'{WORKDIR}/mdppath'
#@markdown Set the number of replicas (i.e., number of equilibrium simulations per state).
fe.replicas = 2  #@param {type:"integer"}
# Provide the path to the structures and topologies.
fe.structTopPath = f'{WORKDIR}/struct_top'

# Prepare the directory structure with all simulations steps required.
fe.simTypes = ['em',  # Energy minimization.
               'eq_posre',  # Equilibrium sim with position restraints.
               'eq',  # Equilibrium simulation.
               'transitions']  # Alchemical, non-equilibrium simulations.

fe.prepareFreeEnergyDir()


---------------------
Summary of the setup:
---------------------

   workpath: /content/pmxrun_1671051601/lysopath
   mdp path: /content/pmxrun_1671051601/mdppath
   # ligands: 1
   ligands:
        lysozyme_benzene
   apo state: lysozyme_apo

---------------------
Directory structure:
---------------------

/content/pmxrun_1671051601/lysopath/
|
|--ligX
|--|--water
|--|--|--stateA
|--|--|--|--run1/2
|--|--|--|--|--/em/eq_posre/eq/transitions
|--|--|--stateB
|--|--|--|--run1/2
|--|--|--|--|--/em/eq_posre/eq/transitions
|--|--protein
|--|--|--stateA
|--|--|--|--run1/2
|--|--|--|--|--/em/eq_posre/eq/transitions
|--|--|--stateB
|--|--|--|--run1/2
|--|--|--|--|--/em/eq_posre/eq/transitions
|--|--strTopFolder
|--lig..

DONE


In [8]:
#@title 1.2&nbsp; Assemble simulation systems

# Assemble the systems: build Gromacs structure and topology for the 
# ligand+water and ligand+protein+water systems.
fe.assemble_systems()

# Define the simulation boxes, fill them with water molecules, and add ions to 
# neutralize the system and reach desired NaCl concentration (0.15 M by default).
fe.boxWaterIons()

----------------------
Assembling the systems
----------------------
Order: ligand-protein-other-water
--- Assembling structures: lysozyme_benzene ---
--- Assembling topologies: lysozyme_benzene ---
----------------
Box, water, ions
----------------


In [9]:
# Check the files present now. This is the content of root dir of the 
# calculation for this protein-ligand pair. The following ls calls show 
# the content of some subfolders for the complex simulations, 
# stateA (coupled ligand). The other folders have the same structure/files but 
# for the ligand in water simulations and for stateB too.
!echo "> ls {WORKDIR}/lysopath/lysozyme_benzene/"
!ls {WORKDIR}/lysopath/lysozyme_benzene/
!echo ""
!echo "> ls {WORKDIR}/lysopath/lysozyme_benzene/protein"
!ls {WORKDIR}/lysopath/lysozyme_benzene/protein/
!echo ""
!echo "> ls {WORKDIR}/lysopath/lysozyme_benzene/protein/stateA"
!ls {WORKDIR}/lysopath/lysozyme_benzene/protein/stateA/
!echo ""
!echo "> ls {WORKDIR}/lysopath/lysozyme_benzene/protein/stateA/run1/"
!ls {WORKDIR}/lysopath/lysozyme_benzene/protein/stateA/run1/

> ls /content/pmxrun_1671051601/lysopath/lysozyme_benzene/
protein  strTopFolder_apo  strTopFolder_holo  water

> ls /content/pmxrun_1671051601/lysopath/lysozyme_benzene/protein
stateA	stateB

> ls /content/pmxrun_1671051601/lysopath/lysozyme_benzene/protein/stateA
box.pdb   mdout.mdp  run2	 topol.top  tpr.tpr
ions.pdb  run1	     system.pdb  top.top    water.pdb

> ls /content/pmxrun_1671051601/lysopath/lysozyme_benzene/protein/stateA/run1/
em  eq	eq_posre  transitions


## 2&nbsp; Energy Minimization

In [10]:
#@title 2.1&nbsp; Prepare TPR files

# Call grompp and create TPR files for all systems and repeats.
tpr_files = fe.prepare_simulation(simType='em')

print("List of TPR files that we'll run:")
for f in tpr_files:
  print(f"  {f}")

-----------------------------------------
Preparing simulation: em
-----------------------------------------
List of TPR files that we'll run:
  /content/pmxrun_1671051601/lysopath/lysozyme_benzene/water/stateA/run1/em/tpr.tpr
  /content/pmxrun_1671051601/lysopath/lysozyme_benzene/water/stateA/run2/em/tpr.tpr
  /content/pmxrun_1671051601/lysopath/lysozyme_benzene/water/stateB/run1/em/tpr.tpr
  /content/pmxrun_1671051601/lysopath/lysozyme_benzene/water/stateB/run2/em/tpr.tpr
  /content/pmxrun_1671051601/lysopath/lysozyme_benzene/protein/stateA/run1/em/tpr.tpr
  /content/pmxrun_1671051601/lysopath/lysozyme_benzene/protein/stateA/run2/em/tpr.tpr
  /content/pmxrun_1671051601/lysopath/lysozyme_benzene/protein/stateB/run1/em/tpr.tpr
  /content/pmxrun_1671051601/lysopath/lysozyme_benzene/protein/stateB/run2/em/tpr.tpr


In [11]:
#@title 2.2&nbsp; Run minimizations

if SHORT_SIMS:
  nsteps = 500
else:
  nsteps = None  # i.e. use what's in the MDP files

# Read the TPR files and run all minimizations.
for tpr_file in tqdm(tpr_files):
  print(f"\nRunning {tpr_file}")

  # If minimization has been run already, skip.
  if mdrun_completed(tpr_file):
    print(f"`{tpr_file}` already ran successfully")
    continue

  # Run the simulation (with reduced number of steps if needed)
  md = mdrun(tpr_file, pmegpu=False, nsteps=nsteps)

  0%|          | 0/8 [00:00<?, ?it/s]


Running /content/pmxrun_1671051601/lysopath/lysozyme_benzene/water/stateA/run1/em/tpr.tpr


 12%|█▎        | 1/8 [00:03<00:22,  3.28s/it]


Running /content/pmxrun_1671051601/lysopath/lysozyme_benzene/water/stateA/run2/em/tpr.tpr


 25%|██▌       | 2/8 [00:05<00:15,  2.66s/it]


Running /content/pmxrun_1671051601/lysopath/lysozyme_benzene/water/stateB/run1/em/tpr.tpr


 38%|███▊      | 3/8 [00:07<00:12,  2.46s/it]


Running /content/pmxrun_1671051601/lysopath/lysozyme_benzene/water/stateB/run2/em/tpr.tpr


 50%|█████     | 4/8 [00:09<00:09,  2.38s/it]


Running /content/pmxrun_1671051601/lysopath/lysozyme_benzene/protein/stateA/run1/em/tpr.tpr


 62%|██████▎   | 5/8 [00:35<00:32, 10.82s/it]


Running /content/pmxrun_1671051601/lysopath/lysozyme_benzene/protein/stateA/run2/em/tpr.tpr


 75%|███████▌  | 6/8 [01:00<00:30, 15.38s/it]


Running /content/pmxrun_1671051601/lysopath/lysozyme_benzene/protein/stateB/run1/em/tpr.tpr


 88%|████████▊ | 7/8 [01:25<00:18, 18.53s/it]


Running /content/pmxrun_1671051601/lysopath/lysozyme_benzene/protein/stateB/run2/em/tpr.tpr


100%|██████████| 8/8 [01:50<00:00, 13.76s/it]


## 3.&nbsp; Equilibrium Simulations

First, we run a short MD simulation with position restraints to equilibrate the solvent and ions around the protein/ligand. Then, we run tha actual production simulation.

In [12]:
#@title 3.1&nbsp; Prepare TPR files for short equilibration

tpr_files = fe.prepare_simulation(simType='eq_posre', prevSim='em')

print("List of TPR files that we'll run:")
for f in tpr_files:
  print(f"  {f}")

-----------------------------------------
Preparing simulation: eq_posre
-----------------------------------------
List of TPR files that we'll run:
  /content/pmxrun_1671051601/lysopath/lysozyme_benzene/water/stateA/run1/eq_posre/tpr.tpr
  /content/pmxrun_1671051601/lysopath/lysozyme_benzene/water/stateA/run2/eq_posre/tpr.tpr
  /content/pmxrun_1671051601/lysopath/lysozyme_benzene/water/stateB/run1/eq_posre/tpr.tpr
  /content/pmxrun_1671051601/lysopath/lysozyme_benzene/water/stateB/run2/eq_posre/tpr.tpr
  /content/pmxrun_1671051601/lysopath/lysozyme_benzene/protein/stateA/run1/eq_posre/tpr.tpr
  /content/pmxrun_1671051601/lysopath/lysozyme_benzene/protein/stateA/run2/eq_posre/tpr.tpr
  /content/pmxrun_1671051601/lysopath/lysozyme_benzene/protein/stateB/run1/eq_posre/tpr.tpr
  /content/pmxrun_1671051601/lysopath/lysozyme_benzene/protein/stateB/run2/eq_posre/tpr.tpr


In [13]:
#@title 3.2&nbsp; Run short equilibration

if SHORT_SIMS:
  nsteps = 5_000  # 5,000 x 2 fs = 10 ps
else:
  nsteps = None  # i.e. use what's in the MDP files

# Read the TPR files and run all simulations.
for tpr_file in tqdm(tpr_files):
  print(f"\nRunning {tpr_file}")

  # If simulation has been run already, skip.
  if mdrun_completed(tpr_file):
    print(f"`{tpr_file}` already ran successfully")
    continue
  # Run the simulation (with reduced number of steps to speed things up)
  md = mdrun(tpr_file, pmegpu=True, nsteps=nsteps)

  0%|          | 0/8 [00:00<?, ?it/s]


Running /content/pmxrun_1671051601/lysopath/lysozyme_benzene/water/stateA/run1/eq_posre/tpr.tpr


 12%|█▎        | 1/8 [00:04<00:29,  4.17s/it]


Running /content/pmxrun_1671051601/lysopath/lysozyme_benzene/water/stateA/run2/eq_posre/tpr.tpr


 25%|██▌       | 2/8 [00:08<00:25,  4.23s/it]


Running /content/pmxrun_1671051601/lysopath/lysozyme_benzene/water/stateB/run1/eq_posre/tpr.tpr


 38%|███▊      | 3/8 [00:12<00:20,  4.12s/it]


Running /content/pmxrun_1671051601/lysopath/lysozyme_benzene/water/stateB/run2/eq_posre/tpr.tpr


 50%|█████     | 4/8 [00:16<00:16,  4.04s/it]


Running /content/pmxrun_1671051601/lysopath/lysozyme_benzene/protein/stateA/run1/eq_posre/tpr.tpr


 62%|██████▎   | 5/8 [00:41<00:35, 11.71s/it]


Running /content/pmxrun_1671051601/lysopath/lysozyme_benzene/protein/stateA/run2/eq_posre/tpr.tpr


 75%|███████▌  | 6/8 [01:05<00:31, 15.89s/it]


Running /content/pmxrun_1671051601/lysopath/lysozyme_benzene/protein/stateB/run1/eq_posre/tpr.tpr


 88%|████████▊ | 7/8 [01:28<00:18, 18.08s/it]


Running /content/pmxrun_1671051601/lysopath/lysozyme_benzene/protein/stateB/run2/eq_posre/tpr.tpr


100%|██████████| 8/8 [01:50<00:00, 13.85s/it]


In [14]:
# Show output files for one run.
print("Path to mdrun output files:", os.path.dirname(tpr_files[0]))
print("Output files:", os.listdir("/".join(tpr_files[0].split("/")[:-1])))
print()

# Tail log file to see performance.
tail(tpr_files[0].replace('tpr.tpr', 'md.log'), n=5)

Path to mdrun output files: /content/pmxrun_1671051601/lysopath/lysozyme_benzene/water/stateA/run1/eq_posre
Output files: ['ener.edr', 'md.log', 'confout.gro', 'tpr.tpr', 'traj.trr', 'state.cpt', 'mdout.mdp']

       Time:        6.180        3.090      200.0
                 (ns/day)    (hour/ns)
Performance:      279.675        0.086
Finished mdrun on rank 0 Wed Dec 14 21:02:14 2022



In [15]:
#@title 3.3&nbsp; Prepare TPR files for equilibrium simulations

tpr_files = fe.prepare_simulation(simType='eq', prevSim='eq_posre')

print("List of TPR files that we'll run:")
for f in tpr_files:
  print(f"  {f}")

-----------------------------------------
Preparing simulation: eq
-----------------------------------------
List of TPR files that we'll run:
  /content/pmxrun_1671051601/lysopath/lysozyme_benzene/water/stateA/run1/eq/tpr.tpr
  /content/pmxrun_1671051601/lysopath/lysozyme_benzene/water/stateA/run2/eq/tpr.tpr
  /content/pmxrun_1671051601/lysopath/lysozyme_benzene/water/stateB/run1/eq/tpr.tpr
  /content/pmxrun_1671051601/lysopath/lysozyme_benzene/water/stateB/run2/eq/tpr.tpr
  /content/pmxrun_1671051601/lysopath/lysozyme_benzene/protein/stateA/run1/eq/tpr.tpr
  /content/pmxrun_1671051601/lysopath/lysozyme_benzene/protein/stateA/run2/eq/tpr.tpr
  /content/pmxrun_1671051601/lysopath/lysozyme_benzene/protein/stateB/run1/eq/tpr.tpr
  /content/pmxrun_1671051601/lysopath/lysozyme_benzene/protein/stateB/run2/eq/tpr.tpr


In [16]:
#@title 3.4&nbsp; Run equilibrium simulations

if SHORT_SIMS:
  nsteps = 25_000  # 25,000 x 2 fs = 50 ps
else:
  nsteps = None  # i.e. use what's in the MDP files

# Read the TPR files and run all simulations.
for tpr_file in tqdm(tpr_files):
  print(f"\nRunning {tpr_file}")

  # If simulation has been run already, skip.
  if mdrun_completed(tpr_file):
    print(f"`{tpr_file}` already ran successfully")
    continue

  # Run the simulation (with reduced number of steps to speed things up)
  md = mdrun(tpr_file, pmegpu=True, nsteps=nsteps)

  0%|          | 0/8 [00:00<?, ?it/s]


Running /content/pmxrun_1671051601/lysopath/lysozyme_benzene/water/stateA/run1/eq/tpr.tpr


 12%|█▎        | 1/8 [00:16<01:55, 16.48s/it]


Running /content/pmxrun_1671051601/lysopath/lysozyme_benzene/water/stateA/run2/eq/tpr.tpr


 25%|██▌       | 2/8 [00:32<01:37, 16.27s/it]


Running /content/pmxrun_1671051601/lysopath/lysozyme_benzene/water/stateB/run1/eq/tpr.tpr


 38%|███▊      | 3/8 [00:48<01:19, 15.99s/it]


Running /content/pmxrun_1671051601/lysopath/lysozyme_benzene/water/stateB/run2/eq/tpr.tpr


 50%|█████     | 4/8 [01:04<01:03, 15.99s/it]


Running /content/pmxrun_1671051601/lysopath/lysozyme_benzene/protein/stateA/run1/eq/tpr.tpr


 62%|██████▎   | 5/8 [02:49<02:24, 48.02s/it]


Running /content/pmxrun_1671051601/lysopath/lysozyme_benzene/protein/stateA/run2/eq/tpr.tpr


 75%|███████▌  | 6/8 [04:33<02:14, 67.13s/it]


Running /content/pmxrun_1671051601/lysopath/lysozyme_benzene/protein/stateB/run1/eq/tpr.tpr


 88%|████████▊ | 7/8 [06:17<01:19, 79.26s/it]


Running /content/pmxrun_1671051601/lysopath/lysozyme_benzene/protein/stateB/run2/eq/tpr.tpr


100%|██████████| 8/8 [08:01<00:00, 60.20s/it]


In [17]:
i = 0

# Show output files for one run.
print("Path to mdrun output files:", os.path.dirname(tpr_files[i]))
print("Output files:", os.listdir("/".join(tpr_files[i].split("/")[:-1])))
print()

# Tail log file to see performance.
tail(tpr_files[i].replace('tpr.tpr', 'md.log'), n=5)

Path to mdrun output files: /content/pmxrun_1671051601/lysopath/lysozyme_benzene/water/stateA/run1/eq
Output files: ['ener.edr', 'md.log', 'confout.gro', 'tpr.tpr', 'traj.trr', 'state.cpt', 'mdout.mdp']

       Time:       30.140       15.070      200.0
                 (ns/day)    (hour/ns)
Performance:      286.675        0.084
Finished mdrun on rank 0 Wed Dec 14 21:04:19 2022



## 4.&nbsp; Non-Equilibrium Simulations

In [18]:
#@title 4.1&nbsp; Extract frames from equilibrium runs, and prepare TPRs

fe.equilTime = 0.  # ps to discard as equilibration
fe.bGenTiTpr = True  # Generates TPRs from extracted frames and rm GRO file.
tpr_files = fe.prepare_simulation(simType='transitions')

print("List of TPR files that we'll run:")
for f in tpr_files:
  print(f"  {f}")

-----------------------------------------
Preparing simulation: transitions
-----------------------------------------
List of TPR files that we'll run:
  /content/pmxrun_1671051601/lysopath/lysozyme_benzene/water/stateA/run1/transitions/frame1/tpr.tpr
  /content/pmxrun_1671051601/lysopath/lysozyme_benzene/water/stateA/run2/transitions/frame1/tpr.tpr
  /content/pmxrun_1671051601/lysopath/lysozyme_benzene/water/stateB/run1/transitions/frame1/tpr.tpr
  /content/pmxrun_1671051601/lysopath/lysozyme_benzene/water/stateB/run2/transitions/frame1/tpr.tpr
  /content/pmxrun_1671051601/lysopath/lysozyme_benzene/protein/stateA/run1/transitions/frame1/tpr.tpr
  /content/pmxrun_1671051601/lysopath/lysozyme_benzene/protein/stateA/run2/transitions/frame1/tpr.tpr
  /content/pmxrun_1671051601/lysopath/lysozyme_benzene/protein/stateB/run1/transitions/frame1/tpr.tpr
  /content/pmxrun_1671051601/lysopath/lysozyme_benzene/protein/stateB/run2/transitions/frame1/tpr.tpr


In [21]:
#@title 4.2&nbsp; Run non-equilibrium simulations

# Read the TPR files and run all simulations.
for tpr_file in tqdm(tpr_files):
  print(f"\nRunning {tpr_file}")

  # If simulation has been run already, skip.
  if mdrun_completed(tpr_file, transition=True):
    print(f"`{tpr_file}` already ran successfully")
    continue

  # Run the simulation (with reduced number of steps to speed things up)
  md = mdrun(tpr_file, pmegpu=True)

  0%|          | 0/8 [00:00<?, ?it/s]


Running /content/pmxrun_1671051601/lysopath/lysozyme_benzene/water/stateA/run1/transitions/frame1/tpr.tpr


 12%|█▎        | 1/8 [00:18<02:07, 18.27s/it]


Running /content/pmxrun_1671051601/lysopath/lysozyme_benzene/water/stateA/run2/transitions/frame1/tpr.tpr


 25%|██▌       | 2/8 [00:37<01:51, 18.66s/it]


Running /content/pmxrun_1671051601/lysopath/lysozyme_benzene/water/stateB/run1/transitions/frame1/tpr.tpr


 38%|███▊      | 3/8 [00:54<01:31, 18.22s/it]


Running /content/pmxrun_1671051601/lysopath/lysozyme_benzene/water/stateB/run2/transitions/frame1/tpr.tpr


 50%|█████     | 4/8 [01:12<01:11, 17.90s/it]


Running /content/pmxrun_1671051601/lysopath/lysozyme_benzene/protein/stateA/run1/transitions/frame1/tpr.tpr


 62%|██████▎   | 5/8 [03:09<02:41, 53.87s/it]


Running /content/pmxrun_1671051601/lysopath/lysozyme_benzene/protein/stateA/run2/transitions/frame1/tpr.tpr


 75%|███████▌  | 6/8 [05:08<02:31, 75.80s/it]


Running /content/pmxrun_1671051601/lysopath/lysozyme_benzene/protein/stateB/run1/transitions/frame1/tpr.tpr


 88%|████████▊ | 7/8 [07:09<01:30, 90.59s/it]


Running /content/pmxrun_1671051601/lysopath/lysozyme_benzene/protein/stateB/run2/transitions/frame1/tpr.tpr


100%|██████████| 8/8 [09:07<00:00, 68.43s/it]


In [23]:
i = 0  # tpr id

# Show output files for one run.
print("Path to mdrun output files:", os.path.dirname(tpr_files[i]))
print("Output files:", os.listdir("/".join(tpr_files[i].split("/")[:-1])))
print()

# Tail dhdl file.
print("Last 10 lines of an XVG files. The first column is time in ps, the second column is the instantaneous dH/dl.")
tail("/".join(tpr_files[i].split("/")[:-1]) + "/dhdl.xvg", n=10)

Path to mdrun output files: /content/pmxrun_1671051601/lysopath/lysozyme_benzene/water/stateA/run1/transitions/frame1
Output files: ['dhdl.xvg', 'ener.edr', 'md.log', 'confout.gro', 'tpr.tpr', 'state.cpt', 'mdout.mdp']

Last 10 lines of an XVG files. The first column is time in ps, the second column is the instantaneous dH/dl.
49.9820 -45.852627
49.9840 -45.171741
49.9860 -44.153950
49.9880 -42.886539
49.9900 -41.460636
49.9920 -40.087482
49.9940 -39.105045
49.9960 -38.526890
49.9980 -38.283115
50.0000 -38.413532


## 5.&nbsp; Free Energy Estimation

In [24]:
# Estimate and show free energy terms.
fe.ligSymmetry = 2  # This is becuase we're unlikely to sample ring flips in benzene.
fe.run_analysis(ligs=['lysozyme_benzene'])
fe.analysis_summary(ligs=['lysozyme_benzene'])
fe.resultsAll

----------------
Running analysis
----------------


Unnamed: 0,dGcalc,errBoot,errAnalyt,framesA,framesB
lysozyme_benzene_correction,-31.2955,,,,
lysozyme_benzene_symmetry,-1.718282,,,,
lysozyme_benzene_water_1,-7.84,0.0,0.21,1.0,1.0
lysozyme_benzene_water_2,-7.52,0.0,0.16,1.0,1.0
lysozyme_benzene_water_all,-7.68,0.07,0.15,2.0,2.0
lysozyme_benzene_protein_1,26.46,0.0,18.71,1.0,1.0
lysozyme_benzene_protein_2,18.57,0.0,88.31,1.0,1.0
lysozyme_benzene_protein_all,25.53,3.46,15.62,2.0,2.0
lysozyme_benzene_water,-7.68,0.113165,0.173584,2.0,2.0
lysozyme_benzene_protein,25.53,2.790234,46.093702,2.0,2.0


In [25]:
# Show estimated binding free energy (dGcalc).
fe.resultsSummary

Unnamed: 0,dGcalc,errAnalyt,errBoot
lysozyme_benzene,-3.632782,46.094029,2.792528
