MPI example with dipeptide mutation (#1228)

* peptide mutation MPI example added * better documentation of motivation
choderalab · Aug 23, 2023 · aca858f · aca858f
1 parent ef15992
commit aca858f
Show file tree

Hide file tree

Showing 5 changed files with 241 additions and 0 deletions.
diff --git a/examples/dipeptide-mutation-repex-rest/README.md b/examples/dipeptide-mutation-repex-rest/README.md
@@ -0,0 +1,42 @@
+# Barnase-Barstar protein-protein interaction example
+
+This example is based on the tools and work by Ivy Zhang in https://pubs.acs.org/doi/full/10.1021/acs.jctc.3c00333
+
+More info: https://github.com/choderalab/perses-barnase-barstar-paper
+
+## How to run
+
+This example assumes that you will be running on an HPC infrastructure using MPI, running with
+multiple GPUs.
+
+### Why MPI?
+
+Sampling protein-protein interactions for Free Energy calculations is a very computationally costly task, by
+parallelizing the replica propagation (lambda windows) using MPI and multiple GPUs a higher throughput can be
+achieved.
+
+### Running environment
+
+Make sure your environment has a `mpiplus` and `mpi4py` installed in your environment, besides of `perses`.
+
+### Pipeline
+
+Please note that in order to run in an MPI environment we have to separate the generation of the Hybrid Topology
+with respect to the actual computation parts of the repex algorithm.
+
+1. Generate HTFs with
+```bash
+python generate_htfs.py ala_vacuum.pdb 2 THR results_dir
+```
+Input pdb file can be found in https://github.com/choderalab/perses/blob/main/perses/data/ala_vacuum.pdb .
+Please download the files from the mentioned URL to run this command.
+
+
+
+2. Run with MPI (same host with 2 different GPUs) using
+```bash
+mpiexec -f hostfile -configfile configfile
+```
+you may need to adapt the command and the `hostfile` and `configfile` to your MPI environment. We have
+previously prepared the config files for practical purposes. How to set up an MPI environment and files
+is beyond the scope of this example.
diff --git a/examples/dipeptide-mutation-repex-rest/configfile b/examples/dipeptide-mutation-repex-rest/configfile
@@ -0,0 +1,2 @@
+-np 1 -env CUDA_VISIBLE_DEVICES 0 run_repex.py results_dir solvent 36 10000 300
+-np 1 -env CUDA_VISIBLE_DEVICES 1 run_repex.py results_dir solvent 36 10000 300
diff --git a/examples/dipeptide-mutation-repex-rest/generate_htfs.py b/examples/dipeptide-mutation-repex-rest/generate_htfs.py
@@ -0,0 +1,82 @@
+import os
+import pickle
+import argparse
+from pathlib import Path
+
+import openmm
+from openmm import unit, app
+from perses.app.relative_point_mutation_setup import PointMutationExecutor
+from perses.utils.smallmolecules import  render_protein_residue_atom_mapping
+
+# Read args
+parser = argparse.ArgumentParser(description='run equilibration')
+parser.add_argument('input_filename', type=str, help='protein file')
+parser.add_argument('resid', type=str, help='residue id')
+parser.add_argument('mutant_aa', type=str, help='amino acid to mutate to')
+parser.add_argument('outdir', type=str, help='output directory')
+parser.add_argument('--ligand_input', type=str, help='ligand input file')
+parser.add_argument('--is_vacuum', action='store_true', help='whether to generate a vacuum htf')
+parser.add_argument('--old_residue', type=str, help='old residue nonstandard name')
+args = parser.parse_args()
+
+# Set parameters for input to `PointMutationExecutor`
+forcefield_files = ['amber14/protein.ff14SB.xml', 'amber14/tip3p.xml']
+forcefield_kwargs = {'removeCMMotion': False, 'constraints' : app.HBonds, 'rigidWater': True, 'hydrogenMass' : 3 * unit.amus}
+
+if not args.is_vacuum:
+    is_vacuum = False
+    is_solvated = True
+    barostat = openmm.MonteCarloBarostat(1.0 * unit.atmosphere, 300 * unit.kelvin, 50)
+    periodic_forcefield_kwargs = {'nonbondedMethod': app.PME, 'ewaldErrorTolerance': 0.00025}
+    nonperiodic_forcefield_kwargs = None
+else:
+    is_vacuum = True
+    is_solvated = False
+    barostat = None
+    periodic_forcefield_kwargs = None
+    nonperiodic_forcefield_kwargs = {'nonbondedMethod': app.NoCutoff}
+
+conduct_endstate_validation = False
+w_lifting = 0.3 * unit.nanometer
+generate_unmodified_hybrid_topology_factory = False
+generate_rest_capable_hybrid_topology_factory = True
+
+# Generate htfs
+solvent_delivery = PointMutationExecutor(args.input_filename,
+                        '1',
+                        args.resid,
+                        args.mutant_aa,
+                        old_residue=args.old_residue,
+                        is_vacuum=is_vacuum,
+                        is_solvated=is_solvated,
+                        forcefield_files=forcefield_files,
+                        barostat=barostat,
+                        forcefield_kwargs=forcefield_kwargs,
+                        periodic_forcefield_kwargs=periodic_forcefield_kwargs,
+                        nonperiodic_forcefield_kwargs=nonperiodic_forcefield_kwargs,
+                        conduct_endstate_validation=conduct_endstate_validation,
+                        w_lifting=w_lifting,
+                        generate_unmodified_hybrid_topology_factory=generate_unmodified_hybrid_topology_factory,
+                        generate_rest_capable_hybrid_topology_factory=generate_rest_capable_hybrid_topology_factory
+                       )
+
+# Saving htfs as pickles
+print("Saving htfs as pickles")
+apo_rest_htf = solvent_delivery.get_apo_rest_htf()
+phase = 'vacuum' if args.is_vacuum else 'apo'
+
+results_dir = args.outdir
+
+if not os.path.exists(results_dir):
+    os.makedirs(results_dir)
+with open(os.path.join(args.outdir, f"htf_{phase}.pickle"), "wb") as f:
+    pickle.dump(apo_rest_htf, f)
+
+# Render atom map
+atom_map_filename = f'{args.outdir}/atom_map.png'
+render_protein_residue_atom_mapping(apo_rest_htf._topology_proposal, atom_map_filename)
+
+# Save pdbs
+app.PDBFile.writeFile(apo_rest_htf._topology_proposal.old_topology, apo_rest_htf.old_positions(apo_rest_htf.hybrid_positions), open(os.path.join(results_dir, f"{phase}_old.pdb"), "w"), keepIds=True)
+app.PDBFile.writeFile(apo_rest_htf._topology_proposal.new_topology, apo_rest_htf.new_positions(apo_rest_htf.hybrid_positions), open(os.path.join(results_dir, f"{phase}_new.pdb"), "w"), keepIds=True)
+
diff --git a/examples/dipeptide-mutation-repex-rest/hostfile b/examples/dipeptide-mutation-repex-rest/hostfile
@@ -0,0 +1,2 @@
+node1
+node1
diff --git a/examples/dipeptide-mutation-repex-rest/run_repex.py b/examples/dipeptide-mutation-repex-rest/run_repex.py
@@ -0,0 +1,113 @@
+import os
+import pickle
+import argparse
+import logging
+from pathlib import Path
+
+from simtk import openmm
+from simtk.openmm import unit
+
+from openmmtools import mcmc
+from openmmtools import cache, utils
+from openmmtools.multistate import MultiStateReporter
+
+from perses.dispersed.utils import configure_platform
+from perses.samplers.multistate import HybridRepexSampler
+
+from mdtraj.core.residue_names import _SOLVENT_TYPES
+
+# Set up logger
+_logger = logging.getLogger()
+_logger.setLevel(logging.INFO)
+
+# Configure platform
+platform = configure_platform(utils.get_fastest_platform().getName())
+#platform.setPropertyDefaultValue('UseBlockingSync', 'false')
+
+# Load arguments
+parser = argparse.ArgumentParser(description='run repex')
+parser.add_argument('dir', type=str, help='path to input/output dir')
+parser.add_argument('phase', type=str, help='phase of the simulation to use in storage filename')
+parser.add_argument('n_states', type=int, help='number of states')
+parser.add_argument('n_cycles', type=int, help='number of iterations to run')
+parser.add_argument('t_max', type=int, help='maximum temperature to use for rest scaling')
+parser.add_argument('--restraint', type=str, help="the atoms to restrain, if any: 'CA', 'heavy', 'heavy-solvent'")
+parser.add_argument('--force_constant', type=float, help='the force constant to use for restraints in kcal/molA^2')
+args = parser.parse_args()
+
+# Load hybrid topology factory
+directory_number = Path(args.dir).parts[-2]
+htf = pickle.load(open(os.path.join(args.dir, f"{directory_number}_{args.phase}.pickle"), "rb" ))
+hybrid_system = htf.hybrid_system
+hybrid_positions = htf.hybrid_positions
+
+# Make sure LRC is set correctly
+force_dict = {force.getName(): index for index, force in enumerate(hybrid_system.getForces())}
+htf_class_name = htf.__class__.__name__
+custom_force_name = 'CustomNonbondedForce'
+nonbonded_force_name = 'NonbondedForce'
+if htf_class_name == 'RESTCapableHybridTopologyFactory':
+    custom_force_name += '_sterics'
+    nonbonded_force_name += '_sterics'
+custom_force = hybrid_system.getForce(force_dict[custom_force_name])
+nonbonded_force = hybrid_system.getForce(force_dict[nonbonded_force_name])
+_logger.info(f"{custom_force_name} use LRC? {custom_force.getUseLongRangeCorrection()}")
+_logger.info(f"{nonbonded_force_name} use LRC? {nonbonded_force.getUseDispersionCorrection()}")
+
+# Add virtual bond for complex phase
+if args.phase == 'complex':
+    chain_A = 0
+    chain_B = 2
+    chains = list(htf.hybrid_topology.chains)
+    atom_A = list(chains[chain_A].atoms)[0]
+    atom_B = list(chains[chain_B].atoms)[0]
+    force = openmm.CustomBondForce('0')
+    force.addBond(atom_A.index, atom_B.index, [])
+    hybrid_system.addForce(force)
+    _logger.info(f"Added virtual bond between {atom_A} and {atom_B}")
+
+# Add restraints
+if args.restraint is not None:
+    topology = htf.hybrid_topology
+    solvent_types = list(_SOLVENT_TYPES)
+    force_constant = args.force_constant*unit.kilocalories_per_mole/unit.angstrom**2 if args.force_constant is not None else 50*unit.kilocalories_per_mole/unit.angstrom**2
+    _logger.info(f"Adding restraint to {args.restraint} atoms with force constant {force_constant}")
+
+    if args.restraint == 'heavy':
+        atom_indices = [atom.index for atom in topology.atoms if atom.residue.name not in solvent_types and atom.element.name != 'hydrogen']
+    elif args.restraint == 'CA':
+        atom_indices = [atom.index for atom in topology.atoms if atom.residue.name not in solvent_types and atom.name == 'CA']
+    elif args.restraint == 'heavy-solvent':
+        atom_indices = [atom.index for atom in topology.atoms if atom.element.name != 'hydrogen']
+    else:
+        raise Exception("Invalid restraint string specified")
+
+    _logger.info(atom_indices)
+    custom_cv_force = openmm.CustomCVForce('(K_RMSD/2)*(RMSD)^2')
+    custom_cv_force.addGlobalParameter('K_RMSD', force_constant * 2)
+    rmsd_force = openmm.RMSDForce(hybrid_positions, atom_indices)
+    custom_cv_force.addCollectiveVariable('RMSD', rmsd_force)
+    hybrid_system.addForce(custom_cv_force)
+
+# Instantiate sampler 
+_logger.setLevel(logging.DEBUG)
+reporter_file = os.path.join(os.path.join(args.dir, f"{directory_number}_{args.phase}.nc"))
+reporter = MultiStateReporter(reporter_file, checkpoint_interval=100)
+move = mcmc.LangevinDynamicsMove(timestep= 4.0 * unit.femtoseconds,
+                                          collision_rate=1.0 / unit.picosecond,
+                                          n_steps=250,
+                                          reassign_velocities=False,
+                                          constraint_tolerance=1e-06)
+sampler = HybridRepexSampler(mcmc_moves=move,
+                             replica_mixing_scheme='swap-all',
+                             hybrid_factory=htf, 
+                             online_analysis_interval=None)
+sampler.setup(n_states=args.n_states, temperature=300*unit.kelvin, t_max=args.t_max * unit.kelvin, storage_file=reporter, endstates=True)
+
+# Create context caches
+sampler.energy_context_cache = cache.ContextCache(capacity=None, time_to_live=None, platform=platform)
+sampler.sampler_context_cache = cache.ContextCache(capacity=None, time_to_live=None, platform=platform)
+
+# Run simulation
+sampler.extend(args.n_cycles)
+