# Here, I document an attempt to validate a small set of protein mutations in vacuum and solvent with the following checks...

1. generate alanine dipeptide --> valine dipeptide in vac/solvent and conduct a forward _and_ reverse parallel tempering FEP calculation; the check passes if the forward free energy is equal to the reverse free energy within an error tolerance
2. generate alanine dipeptide --> valine dipeptide --> isoleucine dipeptide --> glycine dipeptide and attempt to close the thermodynamic cycle within an error tolerance

In [1]:
from __future__ import absolute_import

from perses.dispersed import feptasks
from perses.utils.openeye import *
from perses.utils.data import load_smi
from perses.annihilation.relative import HybridTopologyFactory
from perses.annihilation.lambda_protocol import RelativeAlchemicalState, LambdaProtocol
from perses.rjmc.topology_proposal import TopologyProposal, TwoMoleculeSetProposalEngine, SystemGenerator,SmallMoleculeSetProposalEngine, PointMutationEngine
from perses.rjmc.geometry import FFAllAngleGeometryEngine
import simtk.openmm.app as app

from openmmtools.states import ThermodynamicState, CompoundThermodynamicState, SamplerState

import pymbar
import simtk.openmm as openmm
import simtk.openmm.app as app
import simtk.unit as unit
import numpy as np
from openmoltools import forcefield_generators
import copy
import pickle
import mdtraj as md
from io import StringIO
from openmmtools.constants import kB
import logging
import os
import dask.distributed as distributed
import parmed as pm
from collections import namedtuple
from typing import List, Tuple, Union, NamedTuple
from collections import namedtuple
import random
#beta = 1.0/(kB*temperature)
import itertools
import os
from openeye import oechem
from perses.utils.smallmolecules import render_atom_mapping
from perses.tests.utils import validate_endstate_energies

ENERGY_THRESHOLD = 1e-6
temperature = 300 * unit.kelvin
kT = kB * temperature
beta = 1.0/kT

  n_2 = np.dot(a, a)
  d_ang = np.dot(angle_rotation_matrix, d_r)
  d_torsion = np.dot(torsion_rotation_matrix, d_ang)
  cos_phi = np.dot(plane1, plane2) / (_norm(plane1)*_norm(plane2))
  if np.dot(a, plane2) <= 0:


## First, we have to update alanine dipeptide test system with a system generator and amber14ffsb

In [2]:
import openmmtools.testsystems as ts
atp = ts.AlanineDipeptideVacuum(constraints = app.HBonds, hydrogenMass = 4 * unit.amus)
for chain in atp.topology.chains():
    print(chain.id)
    for res in chain.residues():
        print(res.name, res.id, res.index)
        for atom in res.atoms():
            print(atom.name, atom.index, atom.id)
        print()

forcefield_files = ['gaff.xml', 'amber14/protein.ff14SB.xml', 'amber14/tip3p.xml']
system_generator = SystemGenerator(forcefield_files,
                                   barostat = None,
                                   forcefield_kwargs = {'removeCMMotion': False, 
                                                        'ewaldErrorTolerance': 1e-4, 
                                                        'nonbondedMethod': app.NoCutoff,
                                                        'constraints' : app.HBonds, 
                                                        'hydrogenMass' : 4 * unit.amus})

atp.system = system_generator.build_system(atp.topology)

1
ACE 1 0
H1 0 1
CH3 1 2
H2 2 3
H3 3 4
C 4 5
O 5 6

ALA 2 1
N 6 7
H 7 8
CA 8 9
HA 9 10
CB 10 11
HB1 11 12
HB2 12 13
HB3 13 14
C 14 15
O 15 16

NME 3 2
N 16 17
H 17 18
C 18 19
H1 19 20
H2 20 21
H3 21 22



In [6]:
def generate_top_pos_sys(topology, new_res, system, positions, system_generator = system_generator):
    """generate point mutation engine, geometry_engine, and conduct topology proposal, geometry propsal, and hybrid factory generation"""
    #create the point mutation engine
    point_mutation_engine = PointMutationEngine(wildtype_topology = topology,
                                                system_generator = system_generator,
                                                chain_id = '1', #denote the chain id allowed to mutate (it's always a string variable)
                                                max_point_mutants = 1,
                                                residues_allowed_to_mutate = ['2'], #the residue ids allowed to mutate
                                                allowed_mutations = [('2', new_res)], #the residue ids allowed to mutate with the three-letter code allowed to change
                                                aggregate = True) #always allow aggregation

    #create a geometry engine
    geometry_engine = FFAllAngleGeometryEngine(metadata=None, 
                                           use_sterics=False, 
                                           n_bond_divisions=100, 
                                           n_angle_divisions=180, 
                                           n_torsion_divisions=360, 
                                           verbose=True, 
                                           storage=None, 
                                           bond_softening_constant=1.0, 
                                           angle_softening_constant=1.0, 
                                           neglect_angles = False, 
                                           use_14_nonbondeds = False)

    #create a top proposal
    topology_proposal, local_map_stereo_sidechain, new_oemol_sidechain, old_oemol_sidechain = point_mutation_engine.propose(current_system = system,
                                  current_topology = topology)

    #make a geometry proposal forward
    forward_new_positions, logp_proposal = geometry_engine.propose(topology_proposal, positions, beta)


    #create a hybrid topology factory
    forward_htf = HybridTopologyFactory(topology_proposal = topology_proposal,
                 current_positions =  positions,
                 new_positions = new_positions,
                 use_dispersion_correction = False,
                 functions=None,
                 softcore_alpha = None,
                 bond_softening_constant=1.0,
                 angle_softening_constant=1.0,
                 soften_only_new = False,
                 neglected_new_angle_terms = [],
                 neglected_old_angle_terms = [],
                 softcore_LJ_v2 = True,
                 softcore_electrostatics = True,
                 softcore_LJ_v2_alpha = 0.85,
                 softcore_electrostatics_alpha = 0.3,
                 softcore_sigma_Q = 1.0,
                 interpolate_old_and_new_14s = False,
                 omitted_terms = None)
    
    return topology_proposal, new_positions, forward_htf


## Next, we have to make a map from alanine to valine

In [7]:
ala_val_top_prop, val_pos, ala_val_htf = generate_top_pos_sys(atp.topology, 'VAL', atp.system, atp.positions)

## And a map from valine to alanine

In [8]:
val_alanine_top_prop, ala_pos, val_ala_htf = generate_top_pos_sys(ala_val_top_prop._new_topology, 'ALA', ala_val_top_prop._new_system, val_pos)

## now build the repex samplers

In [9]:
from perses.samplers.multistate import HybridSAMSSampler, HybridRepexSampler
from openmmtools.multistate import MultiStateReporter, MultiStateSamplerAnalyzer
from openmmtools import mcmc, utils
from perses.annihilation.lambda_protocol import LambdaProtocol

Desired platform not supported
Desired platform not supported
Desired platform not supported


In [15]:
def create_hss(reporter_name, hybrid_factory, selection_string ='all', checkpoint_interval = 1, n_states = 13):
    lambda_protocol = LambdaProtocol(functions='default')
    reporter = MultiStateReporter(reporter_name, analysis_particle_indices = hybrid_factory.hybrid_topology.select(selection_string), checkpoint_interval = checkpoint_interval)
    hss = HybridRepexSampler(mcmc_moves=mcmc.LangevinSplittingDynamicsMove(timestep= 4.0 * unit.femtoseconds,
                                                                                 collision_rate=5.0 / unit.picosecond,
                                                                                 n_steps=50,
                                                                                 reassign_velocities=False,
                                                                                 n_restart_attempts=20,
                                                                                 splitting="V R R R O R R R V",
                                                                                 constraint_tolerance=1e-06),
                                                                                 hybrid_factory=hybrid_factory,online_analysis_interval=10)
    hss.setup(n_states=n_states, temperature=300*unit.kelvin,storage_file=reporter,lambda_protocol=lambda_protocol,endstates=False)
    return hss, reporter

In [22]:
os.system(f"rm *.nc")

0

In [23]:
ala_val_hss, ala_val_reporter = create_hss('ala_val.nc', ala_val_htf)
ala_val_hss.equilibrate(10)
# val_ala_hss, val_ala_reporter = create_hss('val_ala.nc', val_ala_htf)

DEBUG:openmmtools.multistate.multistatereporter:Initial checkpoint file automatically chosen as ala_val_checkpoint.nc
DEBUG:mpiplus.mpiplus:Single node: executing <bound method MultiStateReporter.storage_exists of <openmmtools.multistate.multistatereporter.MultiStateReporter object at 0x7f32c7209be0>>
DEBUG:mpiplus.mpiplus:Single node: executing <function ReplicaExchangeSampler._display_citations at 0x7f32d8e34510>
DEBUG:mpiplus.mpiplus:Single node: executing <function MultiStateSampler._display_citations at 0x7f32d8ee6730>
DEBUG:mpiplus.mpiplus:Single node: executing <function MultiStateSampler._initialize_reporter at 0x7f32d8ee69d8>
DEBUG:openmmtools.multistate.multistatereporter:Serialized state thermodynamic_states/0 is  6463B | 6.312KB | 0.006MB
DEBUG:openmmtools.utils:Storing thermodynamic states took    0.019s
DEBUG:openmmtools.multistate.multistatesampler:Storing general ReplicaExchange options...
DEBUG:mpiplus.mpiplus:Single node: executing <function MultiStateSampler._report_

Please cite the following:

        Friedrichs MS, Eastman P, Vaidyanathan V, Houston M, LeGrand S, Beberg AL, Ensign DL, Bruns CM, and Pande VS. Accelerating molecular dynamic simulations on graphics processing unit. J. Comput. Chem. 30:864, 2009. DOI: 10.1002/jcc.21209
        Eastman P and Pande VS. OpenMM: A hardware-independent framework for molecular simulations. Comput. Sci. Eng. 12:34, 2010. DOI: 10.1109/MCSE.2010.27
        Eastman P and Pande VS. Efficient nonbonded interactions for molecular dynamics on a graphics processing unit. J. Comput. Chem. 31:1268, 2010. DOI: 10.1002/jcc.21413
        Eastman P and Pande VS. Constant constraint matrix approximation: A robust, parallelizable constraint method for molecular simulations. J. Chem. Theor. Comput. 6:434, 2010. DOI: 10.1021/ct900463w
        Chodera JD and Shirts MR. Replica exchange and expanded ensemble simulations as Gibbs multistate: Simple improvements for enhanced mixing. J. Chem. Phys., 135:194110, 2011. DOI:10.1063/

DEBUG:mpiplus.mpiplus:Running _get_replica_move_statistics serially.
DEBUG:openmmtools.utils:Propagating all replicas took    0.506s
DEBUG:openmmtools.multistate.multistatesampler:Equilibration iteration 1/10
DEBUG:openmmtools.multistate.multistatesampler:Propagating all replicas...
DEBUG:mpiplus.mpiplus:Running _propagate_replica serially.
DEBUG:mpiplus.mpiplus:Running _get_replica_move_statistics serially.
DEBUG:openmmtools.utils:Propagating all replicas took    0.372s
DEBUG:openmmtools.multistate.multistatesampler:Equilibration iteration 2/10
DEBUG:openmmtools.multistate.multistatesampler:Propagating all replicas...
DEBUG:mpiplus.mpiplus:Running _propagate_replica serially.
DEBUG:mpiplus.mpiplus:Running _get_replica_move_statistics serially.
DEBUG:openmmtools.utils:Propagating all replicas took    0.401s
DEBUG:openmmtools.multistate.multistatesampler:Equilibration iteration 3/10
DEBUG:openmmtools.multistate.multistatesampler:Propagating all replicas...
DEBUG:mpiplus.mpiplus:Running 

In [None]:
# val_ala_hss, val_ala_reporter = create_hss('val_ala.nc', val_ala_htf)

## As a quick test, we will check to see whether the ala --> val thermostate 1 is equal in reduced energy to the val --> ala thermostate 0

In [None]:
def create_thermostate(hybrid_system)

In [None]:
#instantiate thermodynamic state
lambda_alchemical_state = RelativeAlchemicalState.from_system(self.factory.hybrid_system)
lambda_alchemical_state.set_alchemical_parameters(0.0, LambdaProtocol(functions = self.lambda_protocol))
self.thermodynamic_state = CompoundThermodynamicState(ThermodynamicState(self.factory.hybrid_system, temperature = self.temperature),composable_states = [lambda_alchemical_state])

# set the SamplerState for the lambda 0 and 1 equilibrium simulations
sampler_state = SamplerState(self.factory.hybrid_positions,
                                  box_vectors=self.factory.hybrid_system.getDefaultPeriodicBoxVectors())

In [None]:
from openmmtools.states import ThermodynamicState, CompoundThermodynamicState, SamplerState