## Here, I extend the existing `ala_to_ser_and_cys_to_ser_analysis.ipynb` and test the speed of simulation timesteps in solvent, as well as simulation speed in vacuum with a `STRONG` atom map.

In [46]:
from pymbar import timeseries
from pymbar import MBAR
from perses.analysis import utils
import os

from openmmtools.multistate import MultiStateReporter, MultiStateSamplerAnalyzer
import networkx as nx
from itertools import combinations 
import time
import pickle
from perses.dispersed.utils import minimize
# from ala_to_ser_and_cys_to_ser import query_constraints, print_hybrid_atoms

## it looks like ALA --> SER (and reverse) works; let's look at the topologies and systems to see the transformation...

In [5]:
def query_constraints(htf):
    """
    this function will pull constraints from the hybrid system and print the names of the atoms with constraints and the constraint parameters...
    """
    hybrid_sys = htf._hybrid_system
    num_constraints = hybrid_sys.getNumConstraints()
    constraints = []
    for i in range(num_constraints):
        constraints.append(hybrid_sys.getConstraintParameters(i))
    
    hybr_to_old = htf._hybrid_to_old_map
    hybr_to_new = htf._hybrid_to_new_map
    old_top, new_top = htf._topology_proposal._old_topology,  htf._topology_proposal._new_topology
    old_top_dict = {atom.index: (atom.residue.name, atom.name) for atom in old_top.atoms()}
    new_top_dict = {atom.index: (atom.residue.name, atom.name) for atom in new_top.atoms()}
    
    counter = 0
    for (i, j, distance) in constraints:
        print(f"hybrid indices: {i}, {j}")
        new, old = False, False
        in_loop_counter = 0
        try: 
            old_idx_i,old_idx_j = hybr_to_old[i], hybr_to_old[j]
            old_atom_i, old_atom_j = old_top_dict[old_idx_i], old_top_dict[old_idx_j]
            print('\t', old_atom_i, old_atom_j, distance)
            old = True
            in_loop_counter +=1
        except Exception as e:
            pass
        
        try:
            new_idx_i,new_idx_j = hybr_to_new[i], hybr_to_new[j]
            new_atom_i, new_atom_j = new_top_dict[new_idx_i], new_top_dict[new_idx_j]
            print('\t', new_atom_i, new_atom_j, distance)
            new = True
            in_loop_counter +=1
        except Exception as e:
            pass
        
        if old and new:
            print(f"\tthis is core")
            in_loop_counter = 1
        elif old and not new:
            print(f"\tthis is old")
        elif not old and new:
            print(f"\tthis is new")
        else:
            print(f"\tthere is a problem")
        
        counter += in_loop_counter
    
    assert counter == len(constraints), f"uh oh"
        

In [57]:
def print_hybrid_atoms(htf):
    hybrid_sys = htf._hybrid_system
    hybr_to_old = htf._hybrid_to_old_map
    hybr_to_new = htf._hybrid_to_new_map
    old_top, new_top = htf._topology_proposal._old_topology,  htf._topology_proposal._new_topology
    old_top_dict = {atom.index: (atom.residue.name, atom.name) for atom in old_top.atoms()}
    new_top_dict = {atom.index: (atom.residue.name, atom.name) for atom in new_top.atoms()}
    for particle_idx in range(hybrid_sys.getNumParticles()):
        print(f"hybrid_index: {particle_idx}")
        print(f"hybrid mass: {hybrid_sys.getParticleMass(particle_idx)}")
        new, old = False, False
        try:
            new_atom = new_top_dict[hybr_to_new[particle_idx]]
            print(f"\tnew atom map: {new_atom}")
            new = True
        except:
            pass
        
        try: 
            old_atom = old_top_dict[hybr_to_old[particle_idx]]
            print(f"\told atom map: {old_atom}")
            old = True
        except: 
            pass
        
        if old and new:
            print(f"\tcore atom")
        elif old and not new:
            print(f"\told atom")
        elif not old and new:
            print(f"\tnew atom")
        else:
            print(f"\tthis atom is undefined")
        print()

In [58]:
with open('ALA_SER.solvent.default_map.pkl', 'rb') as f:
    ALA_SER = pickle.load(f)

In [59]:
with open('CYS_SER.solvent.default_map.pkl', 'rb') as f:
    CYS_SER = pickle.load(f)

In [72]:
ALA_SER._hybrid_system.getNumConstraints()

1548

In [60]:
from simtk import openmm
from openmmtools import integrators
import simtk.openmm as openmm
from openmmtools.constants import kB
import simtk.unit as unit
temperature = 300 * unit.kelvin
kT = kB * temperature
beta = 1.0/kT
from openmmtools.states import ThermodynamicState, SamplerState, CompoundThermodynamicState
from openmmtools import mcmc, utils
import openmmtools.cache as cache
from perses.dispersed.utils import configure_platform
import openmmtools.integrators as integrators
from openmmtools import mcmc, utils
import numpy as np

In [61]:
def create_langevin_integrator(system, positions, constraint_tol):
    """
    create lambda alchemical states, thermodynamic states, sampler states, integrator, and return context, thermostate, sampler_state, integrator
    """
    
        
    fast_thermodynamic_state = ThermodynamicState(system, temperature = temperature)
    
    fast_sampler_state = SamplerState(positions = positions, box_vectors = system.getDefaultPeriodicBoxVectors())\
    
    integrator_1 = integrators.LangevinIntegrator(temperature = temperature,
                                                     timestep = 4.0* unit.femtoseconds,
                                                     splitting = 'V R O R V',
                                                     measure_shadow_work = False,
                                                     measure_heat = False,
                                                     constraint_tolerance = constraint_tol,
                                                     collision_rate = 5.0 / unit.picoseconds)

    
    
    #print(integrator_1.getConstraintTolerance())
    
    fast_context, fast_integrator = cache.global_context_cache.get_context(fast_thermodynamic_state, integrator_1)
    
    
    fast_sampler_state.apply_to_context(fast_context)
    
    #minimize
    minimize(fast_thermodynamic_state, fast_sampler_state)
    
    
    return fast_context, fast_thermodynamic_state, fast_sampler_state, fast_integrator

def time_lan_integrator(integrator, num_steps = 10):
    import time
    _time = []
    integrator.step(1)
    #move.apply(thermostate, sstate)
    for i in range(num_steps):
        start = time.time()
        integrator.step(1)
        #move.apply(thermostate, sstate)
        end = time.time() - start
        _time.append(end)
    return np.array(_time)

In [62]:
def create_new_pdb(topology, positions, output_pdb = 'test_new.pdb'):
    """
    create a pdb of the geometry proposal (only new system)
    """
    import mdtraj as md
    _positions =  np.array(positions.value_in_unit(unit.nanometer))
    print(_positions)
    traj = md.Trajectory(_positions, md.Topology.from_openmm(topology))
    traj.save(output_pdb)

In [67]:
ALA_SER_context, ALA_SER_thermodynamic_state, ALA_SER_sampler_state, ALA_SER_integrator = create_langevin_integrator(ALA_SER._hybrid_system, ALA_SER._hybrid_positions, 1e-6)
CYS_SER_context, CYS_SER_thermodynamic_state, CYS_SER_sampler_state, CYS_SER_integrator = create_langevin_integrator(CYS_SER._hybrid_system, CYS_SER._hybrid_positions, 1e-6)

DEBUG:sMC_utils:using global context cache
DEBUG:sMC_utils:using global context cache


In [68]:
create_new_pdb(ALA_SER._topology_proposal._new_topology, ALA_SER._new_positions, output_pdb = 'ala_ser_solvent.pdb')
create_new_pdb(CYS_SER._topology_proposal._new_topology, CYS_SER._new_positions, output_pdb = 'cys_ser_solvent.pdb')

[[ 2.00000100e-01  1.00000000e-01 -1.30000000e-07]
 [ 2.00000100e-01  2.09000000e-01  1.00000000e-08]
 [ 1.48626400e-01  2.45384900e-01  8.89824000e-02]
 ...
 [-7.09523750e-01 -1.79800000e-01  1.50599985e-01]
 [ 1.92876250e-01  1.30790000e+00  1.57479999e+00]
 [ 7.69876250e-01 -4.36700000e-01  1.90479998e+00]]
[[ 2.00000100e-01  1.00000000e-01 -1.30000000e-07]
 [ 2.00000100e-01  2.09000000e-01  1.00000000e-08]
 [ 1.48626400e-01  2.45384900e-01  8.89824000e-02]
 ...
 [-7.09523750e-01 -1.79800000e-01  1.50599985e-01]
 [ 1.92876250e-01  1.30790000e+00  1.57479999e+00]
 [ 7.69876250e-01 -4.36700000e-01  1.90479998e+00]]


In [75]:
ALA_SER_times, CYS_SER_times = time_lan_integrator(ALA_SER_integrator, num_steps = 1000), time_lan_integrator(CYS_SER_integrator, num_steps = 1000)

In [76]:
print(np.average(ALA_SER_times), np.std(ALA_SER_times))
print(np.average(CYS_SER_times), np.std(CYS_SER_times))

0.004523008346557617 0.0016671330598278938
0.004359082460403442 0.0015907414927481677


wow, so now the speed difference goes away in CPU?



In [73]:
ALA_SER_state = ALA_SER_context.getState(getEnergy=True, 
                                         getForces=True, 
                                         getPositions=True,
                                         getVelocities=True)
CYS_SER_state = CYS_SER_context.getState(getEnergy=True, 
                                         getForces=True, 
                                         getPositions=True,
                                         getVelocities=True)

In [74]:
filenames = ['ALA_SER.solvent.default_map.state.xml', 'CYS_SER.solvent.default_map.state.xml']
for filename, _state in zip(filenames, [ALA_SER_state, CYS_SER_state]):
    with open(filename, 'w') as f:
        f.write(openmm.openmm.XmlSerializer.serialize(_state))

just for good measure, what happens if we make deepcopies of the htf.hybrid systems, remove the constraints, and run again?

## what happens when we use a strong mapping in vacuum?


In [77]:
with open('ALA_SER.vacuum.strong_map.pkl', 'rb') as f:
    ALA_SER = pickle.load(f)
with open('CYS_SER.vacuum.strong_map.pkl', 'rb') as f:
    CYS_SER = pickle.load(f)
    
ALA_SER_context, ALA_SER_thermodynamic_state, ALA_SER_sampler_state, ALA_SER_integrator = create_langevin_integrator(ALA_SER._hybrid_system, ALA_SER._hybrid_positions, 1e-6)
CYS_SER_context, CYS_SER_thermodynamic_state, CYS_SER_sampler_state, CYS_SER_integrator = create_langevin_integrator(CYS_SER._hybrid_system, CYS_SER._hybrid_positions, 1e-6)

ALA_SER_times, CYS_SER_times = time_lan_integrator(ALA_SER_integrator, num_steps = 1000), time_lan_integrator(CYS_SER_integrator, num_steps = 1000)

print(np.average(ALA_SER_times), np.std(ALA_SER_times))
print(np.average(CYS_SER_times), np.std(CYS_SER_times))

DEBUG:sMC_utils:using global context cache
DEBUG:sMC_utils:using global context cache


0.000464937686920166 3.876953237497666e-05
0.0004548287391662598 3.5257700215192345e-05


In [78]:
ALA_SER_state = ALA_SER_context.getState(getEnergy=True, 
                                         getForces=True, 
                                         getPositions=True,
                                         getVelocities=True)
CYS_SER_state = CYS_SER_context.getState(getEnergy=True, 
                                         getForces=True, 
                                         getPositions=True,
                                         getVelocities=True)

filenames = ['ALA_SER.vacuum.strong_map.state.xml', 'CYS_SER.vacuum.strong_map.state.xml']
for filename, _state in zip(filenames, [ALA_SER_state, CYS_SER_state]):
    with open(filename, 'w') as f:
        f.write(openmm.openmm.XmlSerializer.serialize(_state))

In [None]:
import copy

In [None]:
mod_system = copy.deepcopy(CYS_SER._hybrid_system)
mod_htf = copy.deepcopy(CYS_SER)

In [None]:
query_constraints(mod_htf)

In [None]:
#let's just delete the last constraint
num_const = mod_system.getNumConstraints()
mod_system.removeConstraint(num_const - 1)

In [None]:
CYS_SER_mod_context, CYS_SER_mod_thermodynamic_state, CYS_SER_mod_sampler_state, CYS_SER_mod_integrator = create_langevin_integrator(mod_system, CYS_SER._hybrid_positions, 1e-6)
CYS_SER_mod_times = time_lan_integrator(CYS_SER_mod_integrator, num_steps = 1000)

In [None]:
print(np.average(CYS_SER_times), np.std(CYS_SER_times))
print(np.average(CYS_SER_mod_times), np.std(CYS_SER_mod_times))

so, if we delete the new constraint between the hybrid OG/SG and the unique new HG, then we recover speed...why is this?