Skip to content

Commit

Permalink
MPI example with dipeptide mutation (#1228)
Browse files Browse the repository at this point in the history
* peptide mutation MPI example added

* better documentation of motivation
  • Loading branch information
ijpulidos committed Aug 23, 2023
1 parent ef15992 commit aca858f
Show file tree
Hide file tree
Showing 5 changed files with 241 additions and 0 deletions.
42 changes: 42 additions & 0 deletions examples/dipeptide-mutation-repex-rest/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# Barnase-Barstar protein-protein interaction example

This example is based on the tools and work by Ivy Zhang in https://pubs.acs.org/doi/full/10.1021/acs.jctc.3c00333

More info: https://github.com/choderalab/perses-barnase-barstar-paper

## How to run

This example assumes that you will be running on an HPC infrastructure using MPI, running with
multiple GPUs.

### Why MPI?

Sampling protein-protein interactions for Free Energy calculations is a very computationally costly task, by
parallelizing the replica propagation (lambda windows) using MPI and multiple GPUs a higher throughput can be
achieved.

### Running environment

Make sure your environment has a `mpiplus` and `mpi4py` installed in your environment, besides of `perses`.

### Pipeline

Please note that in order to run in an MPI environment we have to separate the generation of the Hybrid Topology
with respect to the actual computation parts of the repex algorithm.

1. Generate HTFs with
```bash
python generate_htfs.py ala_vacuum.pdb 2 THR results_dir
```
Input pdb file can be found in https://github.com/choderalab/perses/blob/main/perses/data/ala_vacuum.pdb .
Please download the files from the mentioned URL to run this command.



2. Run with MPI (same host with 2 different GPUs) using
```bash
mpiexec -f hostfile -configfile configfile
```
you may need to adapt the command and the `hostfile` and `configfile` to your MPI environment. We have
previously prepared the config files for practical purposes. How to set up an MPI environment and files
is beyond the scope of this example.
2 changes: 2 additions & 0 deletions examples/dipeptide-mutation-repex-rest/configfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
-np 1 -env CUDA_VISIBLE_DEVICES 0 run_repex.py results_dir solvent 36 10000 300
-np 1 -env CUDA_VISIBLE_DEVICES 1 run_repex.py results_dir solvent 36 10000 300
82 changes: 82 additions & 0 deletions examples/dipeptide-mutation-repex-rest/generate_htfs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
import os
import pickle
import argparse
from pathlib import Path

import openmm
from openmm import unit, app
from perses.app.relative_point_mutation_setup import PointMutationExecutor
from perses.utils.smallmolecules import render_protein_residue_atom_mapping

# Read args
parser = argparse.ArgumentParser(description='run equilibration')
parser.add_argument('input_filename', type=str, help='protein file')
parser.add_argument('resid', type=str, help='residue id')
parser.add_argument('mutant_aa', type=str, help='amino acid to mutate to')
parser.add_argument('outdir', type=str, help='output directory')
parser.add_argument('--ligand_input', type=str, help='ligand input file')
parser.add_argument('--is_vacuum', action='store_true', help='whether to generate a vacuum htf')
parser.add_argument('--old_residue', type=str, help='old residue nonstandard name')
args = parser.parse_args()

# Set parameters for input to `PointMutationExecutor`
forcefield_files = ['amber14/protein.ff14SB.xml', 'amber14/tip3p.xml']
forcefield_kwargs = {'removeCMMotion': False, 'constraints' : app.HBonds, 'rigidWater': True, 'hydrogenMass' : 3 * unit.amus}

if not args.is_vacuum:
is_vacuum = False
is_solvated = True
barostat = openmm.MonteCarloBarostat(1.0 * unit.atmosphere, 300 * unit.kelvin, 50)
periodic_forcefield_kwargs = {'nonbondedMethod': app.PME, 'ewaldErrorTolerance': 0.00025}
nonperiodic_forcefield_kwargs = None
else:
is_vacuum = True
is_solvated = False
barostat = None
periodic_forcefield_kwargs = None
nonperiodic_forcefield_kwargs = {'nonbondedMethod': app.NoCutoff}

conduct_endstate_validation = False
w_lifting = 0.3 * unit.nanometer
generate_unmodified_hybrid_topology_factory = False
generate_rest_capable_hybrid_topology_factory = True

# Generate htfs
solvent_delivery = PointMutationExecutor(args.input_filename,
'1',
args.resid,
args.mutant_aa,
old_residue=args.old_residue,
is_vacuum=is_vacuum,
is_solvated=is_solvated,
forcefield_files=forcefield_files,
barostat=barostat,
forcefield_kwargs=forcefield_kwargs,
periodic_forcefield_kwargs=periodic_forcefield_kwargs,
nonperiodic_forcefield_kwargs=nonperiodic_forcefield_kwargs,
conduct_endstate_validation=conduct_endstate_validation,
w_lifting=w_lifting,
generate_unmodified_hybrid_topology_factory=generate_unmodified_hybrid_topology_factory,
generate_rest_capable_hybrid_topology_factory=generate_rest_capable_hybrid_topology_factory
)

# Saving htfs as pickles
print("Saving htfs as pickles")
apo_rest_htf = solvent_delivery.get_apo_rest_htf()
phase = 'vacuum' if args.is_vacuum else 'apo'

results_dir = args.outdir

if not os.path.exists(results_dir):
os.makedirs(results_dir)
with open(os.path.join(args.outdir, f"htf_{phase}.pickle"), "wb") as f:
pickle.dump(apo_rest_htf, f)

# Render atom map
atom_map_filename = f'{args.outdir}/atom_map.png'
render_protein_residue_atom_mapping(apo_rest_htf._topology_proposal, atom_map_filename)

# Save pdbs
app.PDBFile.writeFile(apo_rest_htf._topology_proposal.old_topology, apo_rest_htf.old_positions(apo_rest_htf.hybrid_positions), open(os.path.join(results_dir, f"{phase}_old.pdb"), "w"), keepIds=True)
app.PDBFile.writeFile(apo_rest_htf._topology_proposal.new_topology, apo_rest_htf.new_positions(apo_rest_htf.hybrid_positions), open(os.path.join(results_dir, f"{phase}_new.pdb"), "w"), keepIds=True)

2 changes: 2 additions & 0 deletions examples/dipeptide-mutation-repex-rest/hostfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
node1
node1
113 changes: 113 additions & 0 deletions examples/dipeptide-mutation-repex-rest/run_repex.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
import os
import pickle
import argparse
import logging
from pathlib import Path

from simtk import openmm
from simtk.openmm import unit

from openmmtools import mcmc
from openmmtools import cache, utils
from openmmtools.multistate import MultiStateReporter

from perses.dispersed.utils import configure_platform
from perses.samplers.multistate import HybridRepexSampler

from mdtraj.core.residue_names import _SOLVENT_TYPES

# Set up logger
_logger = logging.getLogger()
_logger.setLevel(logging.INFO)

# Configure platform
platform = configure_platform(utils.get_fastest_platform().getName())
#platform.setPropertyDefaultValue('UseBlockingSync', 'false')

# Load arguments
parser = argparse.ArgumentParser(description='run repex')
parser.add_argument('dir', type=str, help='path to input/output dir')
parser.add_argument('phase', type=str, help='phase of the simulation to use in storage filename')
parser.add_argument('n_states', type=int, help='number of states')
parser.add_argument('n_cycles', type=int, help='number of iterations to run')
parser.add_argument('t_max', type=int, help='maximum temperature to use for rest scaling')
parser.add_argument('--restraint', type=str, help="the atoms to restrain, if any: 'CA', 'heavy', 'heavy-solvent'")
parser.add_argument('--force_constant', type=float, help='the force constant to use for restraints in kcal/molA^2')
args = parser.parse_args()

# Load hybrid topology factory
directory_number = Path(args.dir).parts[-2]
htf = pickle.load(open(os.path.join(args.dir, f"{directory_number}_{args.phase}.pickle"), "rb" ))
hybrid_system = htf.hybrid_system
hybrid_positions = htf.hybrid_positions

# Make sure LRC is set correctly
force_dict = {force.getName(): index for index, force in enumerate(hybrid_system.getForces())}
htf_class_name = htf.__class__.__name__
custom_force_name = 'CustomNonbondedForce'
nonbonded_force_name = 'NonbondedForce'
if htf_class_name == 'RESTCapableHybridTopologyFactory':
custom_force_name += '_sterics'
nonbonded_force_name += '_sterics'
custom_force = hybrid_system.getForce(force_dict[custom_force_name])
nonbonded_force = hybrid_system.getForce(force_dict[nonbonded_force_name])
_logger.info(f"{custom_force_name} use LRC? {custom_force.getUseLongRangeCorrection()}")
_logger.info(f"{nonbonded_force_name} use LRC? {nonbonded_force.getUseDispersionCorrection()}")

# Add virtual bond for complex phase
if args.phase == 'complex':
chain_A = 0
chain_B = 2
chains = list(htf.hybrid_topology.chains)
atom_A = list(chains[chain_A].atoms)[0]
atom_B = list(chains[chain_B].atoms)[0]
force = openmm.CustomBondForce('0')
force.addBond(atom_A.index, atom_B.index, [])
hybrid_system.addForce(force)
_logger.info(f"Added virtual bond between {atom_A} and {atom_B}")

# Add restraints
if args.restraint is not None:
topology = htf.hybrid_topology
solvent_types = list(_SOLVENT_TYPES)
force_constant = args.force_constant*unit.kilocalories_per_mole/unit.angstrom**2 if args.force_constant is not None else 50*unit.kilocalories_per_mole/unit.angstrom**2
_logger.info(f"Adding restraint to {args.restraint} atoms with force constant {force_constant}")

if args.restraint == 'heavy':
atom_indices = [atom.index for atom in topology.atoms if atom.residue.name not in solvent_types and atom.element.name != 'hydrogen']
elif args.restraint == 'CA':
atom_indices = [atom.index for atom in topology.atoms if atom.residue.name not in solvent_types and atom.name == 'CA']
elif args.restraint == 'heavy-solvent':
atom_indices = [atom.index for atom in topology.atoms if atom.element.name != 'hydrogen']
else:
raise Exception("Invalid restraint string specified")

_logger.info(atom_indices)
custom_cv_force = openmm.CustomCVForce('(K_RMSD/2)*(RMSD)^2')
custom_cv_force.addGlobalParameter('K_RMSD', force_constant * 2)
rmsd_force = openmm.RMSDForce(hybrid_positions, atom_indices)
custom_cv_force.addCollectiveVariable('RMSD', rmsd_force)
hybrid_system.addForce(custom_cv_force)

# Instantiate sampler
_logger.setLevel(logging.DEBUG)
reporter_file = os.path.join(os.path.join(args.dir, f"{directory_number}_{args.phase}.nc"))
reporter = MultiStateReporter(reporter_file, checkpoint_interval=100)
move = mcmc.LangevinDynamicsMove(timestep= 4.0 * unit.femtoseconds,
collision_rate=1.0 / unit.picosecond,
n_steps=250,
reassign_velocities=False,
constraint_tolerance=1e-06)
sampler = HybridRepexSampler(mcmc_moves=move,
replica_mixing_scheme='swap-all',
hybrid_factory=htf,
online_analysis_interval=None)
sampler.setup(n_states=args.n_states, temperature=300*unit.kelvin, t_max=args.t_max * unit.kelvin, storage_file=reporter, endstates=True)

# Create context caches
sampler.energy_context_cache = cache.ContextCache(capacity=None, time_to_live=None, platform=platform)
sampler.sampler_context_cache = cache.ContextCache(capacity=None, time_to_live=None, platform=platform)

# Run simulation
sampler.extend(args.n_cycles)

0 comments on commit aca858f

Please sign in to comment.