-
Notifications
You must be signed in to change notification settings - Fork 50
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
MPI example with dipeptide mutation (#1228)
* peptide mutation MPI example added * better documentation of motivation
- Loading branch information
Showing
5 changed files
with
241 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
# Barnase-Barstar protein-protein interaction example | ||
|
||
This example is based on the tools and work by Ivy Zhang in https://pubs.acs.org/doi/full/10.1021/acs.jctc.3c00333 | ||
|
||
More info: https://github.com/choderalab/perses-barnase-barstar-paper | ||
|
||
## How to run | ||
|
||
This example assumes that you will be running on an HPC infrastructure using MPI, running with | ||
multiple GPUs. | ||
|
||
### Why MPI? | ||
|
||
Sampling protein-protein interactions for Free Energy calculations is a very computationally costly task, by | ||
parallelizing the replica propagation (lambda windows) using MPI and multiple GPUs a higher throughput can be | ||
achieved. | ||
|
||
### Running environment | ||
|
||
Make sure your environment has a `mpiplus` and `mpi4py` installed in your environment, besides of `perses`. | ||
|
||
### Pipeline | ||
|
||
Please note that in order to run in an MPI environment we have to separate the generation of the Hybrid Topology | ||
with respect to the actual computation parts of the repex algorithm. | ||
|
||
1. Generate HTFs with | ||
```bash | ||
python generate_htfs.py ala_vacuum.pdb 2 THR results_dir | ||
``` | ||
Input pdb file can be found in https://github.com/choderalab/perses/blob/main/perses/data/ala_vacuum.pdb . | ||
Please download the files from the mentioned URL to run this command. | ||
|
||
|
||
|
||
2. Run with MPI (same host with 2 different GPUs) using | ||
```bash | ||
mpiexec -f hostfile -configfile configfile | ||
``` | ||
you may need to adapt the command and the `hostfile` and `configfile` to your MPI environment. We have | ||
previously prepared the config files for practical purposes. How to set up an MPI environment and files | ||
is beyond the scope of this example. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
-np 1 -env CUDA_VISIBLE_DEVICES 0 run_repex.py results_dir solvent 36 10000 300 | ||
-np 1 -env CUDA_VISIBLE_DEVICES 1 run_repex.py results_dir solvent 36 10000 300 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
import os | ||
import pickle | ||
import argparse | ||
from pathlib import Path | ||
|
||
import openmm | ||
from openmm import unit, app | ||
from perses.app.relative_point_mutation_setup import PointMutationExecutor | ||
from perses.utils.smallmolecules import render_protein_residue_atom_mapping | ||
|
||
# Read args | ||
parser = argparse.ArgumentParser(description='run equilibration') | ||
parser.add_argument('input_filename', type=str, help='protein file') | ||
parser.add_argument('resid', type=str, help='residue id') | ||
parser.add_argument('mutant_aa', type=str, help='amino acid to mutate to') | ||
parser.add_argument('outdir', type=str, help='output directory') | ||
parser.add_argument('--ligand_input', type=str, help='ligand input file') | ||
parser.add_argument('--is_vacuum', action='store_true', help='whether to generate a vacuum htf') | ||
parser.add_argument('--old_residue', type=str, help='old residue nonstandard name') | ||
args = parser.parse_args() | ||
|
||
# Set parameters for input to `PointMutationExecutor` | ||
forcefield_files = ['amber14/protein.ff14SB.xml', 'amber14/tip3p.xml'] | ||
forcefield_kwargs = {'removeCMMotion': False, 'constraints' : app.HBonds, 'rigidWater': True, 'hydrogenMass' : 3 * unit.amus} | ||
|
||
if not args.is_vacuum: | ||
is_vacuum = False | ||
is_solvated = True | ||
barostat = openmm.MonteCarloBarostat(1.0 * unit.atmosphere, 300 * unit.kelvin, 50) | ||
periodic_forcefield_kwargs = {'nonbondedMethod': app.PME, 'ewaldErrorTolerance': 0.00025} | ||
nonperiodic_forcefield_kwargs = None | ||
else: | ||
is_vacuum = True | ||
is_solvated = False | ||
barostat = None | ||
periodic_forcefield_kwargs = None | ||
nonperiodic_forcefield_kwargs = {'nonbondedMethod': app.NoCutoff} | ||
|
||
conduct_endstate_validation = False | ||
w_lifting = 0.3 * unit.nanometer | ||
generate_unmodified_hybrid_topology_factory = False | ||
generate_rest_capable_hybrid_topology_factory = True | ||
|
||
# Generate htfs | ||
solvent_delivery = PointMutationExecutor(args.input_filename, | ||
'1', | ||
args.resid, | ||
args.mutant_aa, | ||
old_residue=args.old_residue, | ||
is_vacuum=is_vacuum, | ||
is_solvated=is_solvated, | ||
forcefield_files=forcefield_files, | ||
barostat=barostat, | ||
forcefield_kwargs=forcefield_kwargs, | ||
periodic_forcefield_kwargs=periodic_forcefield_kwargs, | ||
nonperiodic_forcefield_kwargs=nonperiodic_forcefield_kwargs, | ||
conduct_endstate_validation=conduct_endstate_validation, | ||
w_lifting=w_lifting, | ||
generate_unmodified_hybrid_topology_factory=generate_unmodified_hybrid_topology_factory, | ||
generate_rest_capable_hybrid_topology_factory=generate_rest_capable_hybrid_topology_factory | ||
) | ||
|
||
# Saving htfs as pickles | ||
print("Saving htfs as pickles") | ||
apo_rest_htf = solvent_delivery.get_apo_rest_htf() | ||
phase = 'vacuum' if args.is_vacuum else 'apo' | ||
|
||
results_dir = args.outdir | ||
|
||
if not os.path.exists(results_dir): | ||
os.makedirs(results_dir) | ||
with open(os.path.join(args.outdir, f"htf_{phase}.pickle"), "wb") as f: | ||
pickle.dump(apo_rest_htf, f) | ||
|
||
# Render atom map | ||
atom_map_filename = f'{args.outdir}/atom_map.png' | ||
render_protein_residue_atom_mapping(apo_rest_htf._topology_proposal, atom_map_filename) | ||
|
||
# Save pdbs | ||
app.PDBFile.writeFile(apo_rest_htf._topology_proposal.old_topology, apo_rest_htf.old_positions(apo_rest_htf.hybrid_positions), open(os.path.join(results_dir, f"{phase}_old.pdb"), "w"), keepIds=True) | ||
app.PDBFile.writeFile(apo_rest_htf._topology_proposal.new_topology, apo_rest_htf.new_positions(apo_rest_htf.hybrid_positions), open(os.path.join(results_dir, f"{phase}_new.pdb"), "w"), keepIds=True) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
node1 | ||
node1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
import os | ||
import pickle | ||
import argparse | ||
import logging | ||
from pathlib import Path | ||
|
||
from simtk import openmm | ||
from simtk.openmm import unit | ||
|
||
from openmmtools import mcmc | ||
from openmmtools import cache, utils | ||
from openmmtools.multistate import MultiStateReporter | ||
|
||
from perses.dispersed.utils import configure_platform | ||
from perses.samplers.multistate import HybridRepexSampler | ||
|
||
from mdtraj.core.residue_names import _SOLVENT_TYPES | ||
|
||
# Set up logger | ||
_logger = logging.getLogger() | ||
_logger.setLevel(logging.INFO) | ||
|
||
# Configure platform | ||
platform = configure_platform(utils.get_fastest_platform().getName()) | ||
#platform.setPropertyDefaultValue('UseBlockingSync', 'false') | ||
|
||
# Load arguments | ||
parser = argparse.ArgumentParser(description='run repex') | ||
parser.add_argument('dir', type=str, help='path to input/output dir') | ||
parser.add_argument('phase', type=str, help='phase of the simulation to use in storage filename') | ||
parser.add_argument('n_states', type=int, help='number of states') | ||
parser.add_argument('n_cycles', type=int, help='number of iterations to run') | ||
parser.add_argument('t_max', type=int, help='maximum temperature to use for rest scaling') | ||
parser.add_argument('--restraint', type=str, help="the atoms to restrain, if any: 'CA', 'heavy', 'heavy-solvent'") | ||
parser.add_argument('--force_constant', type=float, help='the force constant to use for restraints in kcal/molA^2') | ||
args = parser.parse_args() | ||
|
||
# Load hybrid topology factory | ||
directory_number = Path(args.dir).parts[-2] | ||
htf = pickle.load(open(os.path.join(args.dir, f"{directory_number}_{args.phase}.pickle"), "rb" )) | ||
hybrid_system = htf.hybrid_system | ||
hybrid_positions = htf.hybrid_positions | ||
|
||
# Make sure LRC is set correctly | ||
force_dict = {force.getName(): index for index, force in enumerate(hybrid_system.getForces())} | ||
htf_class_name = htf.__class__.__name__ | ||
custom_force_name = 'CustomNonbondedForce' | ||
nonbonded_force_name = 'NonbondedForce' | ||
if htf_class_name == 'RESTCapableHybridTopologyFactory': | ||
custom_force_name += '_sterics' | ||
nonbonded_force_name += '_sterics' | ||
custom_force = hybrid_system.getForce(force_dict[custom_force_name]) | ||
nonbonded_force = hybrid_system.getForce(force_dict[nonbonded_force_name]) | ||
_logger.info(f"{custom_force_name} use LRC? {custom_force.getUseLongRangeCorrection()}") | ||
_logger.info(f"{nonbonded_force_name} use LRC? {nonbonded_force.getUseDispersionCorrection()}") | ||
|
||
# Add virtual bond for complex phase | ||
if args.phase == 'complex': | ||
chain_A = 0 | ||
chain_B = 2 | ||
chains = list(htf.hybrid_topology.chains) | ||
atom_A = list(chains[chain_A].atoms)[0] | ||
atom_B = list(chains[chain_B].atoms)[0] | ||
force = openmm.CustomBondForce('0') | ||
force.addBond(atom_A.index, atom_B.index, []) | ||
hybrid_system.addForce(force) | ||
_logger.info(f"Added virtual bond between {atom_A} and {atom_B}") | ||
|
||
# Add restraints | ||
if args.restraint is not None: | ||
topology = htf.hybrid_topology | ||
solvent_types = list(_SOLVENT_TYPES) | ||
force_constant = args.force_constant*unit.kilocalories_per_mole/unit.angstrom**2 if args.force_constant is not None else 50*unit.kilocalories_per_mole/unit.angstrom**2 | ||
_logger.info(f"Adding restraint to {args.restraint} atoms with force constant {force_constant}") | ||
|
||
if args.restraint == 'heavy': | ||
atom_indices = [atom.index for atom in topology.atoms if atom.residue.name not in solvent_types and atom.element.name != 'hydrogen'] | ||
elif args.restraint == 'CA': | ||
atom_indices = [atom.index for atom in topology.atoms if atom.residue.name not in solvent_types and atom.name == 'CA'] | ||
elif args.restraint == 'heavy-solvent': | ||
atom_indices = [atom.index for atom in topology.atoms if atom.element.name != 'hydrogen'] | ||
else: | ||
raise Exception("Invalid restraint string specified") | ||
|
||
_logger.info(atom_indices) | ||
custom_cv_force = openmm.CustomCVForce('(K_RMSD/2)*(RMSD)^2') | ||
custom_cv_force.addGlobalParameter('K_RMSD', force_constant * 2) | ||
rmsd_force = openmm.RMSDForce(hybrid_positions, atom_indices) | ||
custom_cv_force.addCollectiveVariable('RMSD', rmsd_force) | ||
hybrid_system.addForce(custom_cv_force) | ||
|
||
# Instantiate sampler | ||
_logger.setLevel(logging.DEBUG) | ||
reporter_file = os.path.join(os.path.join(args.dir, f"{directory_number}_{args.phase}.nc")) | ||
reporter = MultiStateReporter(reporter_file, checkpoint_interval=100) | ||
move = mcmc.LangevinDynamicsMove(timestep= 4.0 * unit.femtoseconds, | ||
collision_rate=1.0 / unit.picosecond, | ||
n_steps=250, | ||
reassign_velocities=False, | ||
constraint_tolerance=1e-06) | ||
sampler = HybridRepexSampler(mcmc_moves=move, | ||
replica_mixing_scheme='swap-all', | ||
hybrid_factory=htf, | ||
online_analysis_interval=None) | ||
sampler.setup(n_states=args.n_states, temperature=300*unit.kelvin, t_max=args.t_max * unit.kelvin, storage_file=reporter, endstates=True) | ||
|
||
# Create context caches | ||
sampler.energy_context_cache = cache.ContextCache(capacity=None, time_to_live=None, platform=platform) | ||
sampler.sampler_context_cache = cache.ContextCache(capacity=None, time_to_live=None, platform=platform) | ||
|
||
# Run simulation | ||
sampler.extend(args.n_cycles) | ||
|