## Tutorial mdfeature 


Author: Zofia Trstanova   
Edits: Dom Phillips

In [None]:
%matplotlib inline
#import metadynamics
from openmm import *
from openmm.app import *
from openmm.unit import *
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import matplotlib.image as image
from pyemma import msm
from scipy.interpolate import griddata
from matplotlib.pyplot import cm
import mdtraj as md
from ipywidgets import IntProgress
from IPython.display import display
import time
import numpy as np

import mdfeature.features as features

In [None]:
import matplotlib.pyplot as plt
import matplotlib as mpl
mpl.rcParams['lines.linewidth'] = 2
font = {'family' : 'sans-serif',
        'size'   : 14.0}
mpl.rc('font', **font) 

mpl.rcParams['xtick.labelsize'] = 16
mpl.rcParams['ytick.labelsize'] =  16
mpl.rcParams['font.size'] =  15
mpl.rcParams['figure.autolayout'] =  True
mpl.rcParams['figure.figsize'] =  7.2,4.45
mpl.rcParams['axes.titlesize'] =  16
mpl.rcParams['axes.labelsize'] =  17
mpl.rcParams['lines.linewidth'] =  2
mpl.rcParams['lines.markersize'] =  6
mpl.rcParams['legend.fontsize'] =  13


## Initiating OpenMM to run or load a short trajectory

In [None]:
# Bunch of useful simulation parameters
steps = 100
iterations = 10000
temperature = 300
beta = 1.0/(temperature*0.0083144621)

Run a Langevin dynamics simulation using the Amber forcefield at $300K$ with friction coefficient of 1 $ps^{-1}$ and stepsize $0.002 ps$.

In [None]:
pdb_name = 'alanine.pdb'
pdb = PDBFile(pdb_name)
saving_file = 'trajectory.dcd'

forcefield = ForceField('amber14-all.xml', 'amber14/spce.xml')
system = forcefield.createSystem(pdb.topology, nonbondedMethod=CutoffNonPeriodic, constraints=HBonds)

integrator = LangevinIntegrator(temperature*kelvin, 1.0/picosecond, 0.002*picoseconds)
simulation = Simulation(pdb.topology, system, integrator, platform=Platform.getPlatformByName('CPU'))
simulation.context.setPositions(pdb.positions)
simulation.context.setVelocitiesToTemperature(temperature*kelvin)



Features module contains functions to print the name of the torsions.

In [None]:
# check the loaded topology

print(pdb.topology)
topology = md.load(pdb_name).topology
print(topology)

table, bonds = topology.to_dataframe()
#print(table)

Displays designation name of torsion angle for given atom indices (phi, psi)

In [None]:
import mdfeature.features as features

phi = [4, 6, 8 ,14] #dihedral coordinates # [4, 6, 8 ,14]#[1, 6, 8, 14]
psi = [6, 8, 14, 16]

phi_name = features.get_name_torsion(phi, pdb_file=pdb_name, table=table)
psi_name = features.get_name_torsion(psi, pdb_file=pdb_name, table=table)

print('Long name')
print(phi_name)
print(psi_name)

phi_name_short = features.get_name_torsion(phi, pdb_file=pdb_name, table=table, format='short')
psi_name_short = features.get_name_torsion(psi, pdb_file=pdb_name, table=table, format='short')

print('Short name')
print(phi_name_short)
print(psi_name_short)

In [None]:
# Run the simulation

# if False, dont run
run = False

mdinit = md.load_pdb(pdb_name)
    
if run: 

    mdinit.save_dcd(saving_file)     
    simulation.reporters.append(DCDReporter(saving_file, steps, append=True))

    max_count = iterations
    bar = IntProgress(min=0, max=max_count) # instantiate the bar
    display(bar) # display the bar
    
    for i in range(iterations):
        
        bar.value += 1
        simulation.step(steps)

        state = simulation.context.getState(getEnergy=True, enforcePeriodicBox = False)
        positions = simulation.context.getState(getPositions=True).getPositions()
        


## Load trajectory

In [None]:
traj_std_tmp = md.load_dcd('test_traj.dcd', mdinit.topology)

skip_first = 1000
traj_orig = traj_std_tmp[skip_first:]
print(traj_orig)
traj_orig = traj_orig.superpose(traj_orig[0])

### Compute diffusionmap

Module diffusionmap allows to compute the diffusion maps from the trajectory.

We first show how to compute the vanilla diffusionmap

In [None]:
import mdfeature.diffusionmap as diffusionmap

mydmap, traj = diffusionmap.compute_diffusionmaps(traj_orig, nrpoints=2000, epsilon=1.0)

In [None]:
evec = np.asarray(mydmap.evecs[:,0])
print(evec[1:].shape)
print(len(traj))

Compute time evolution of torsion angle for defined atomic indices. Plot them as a function of the diffusion coordinate(s).

In [None]:
phi_torsion = features.compute_torsion_mdraj(traj, phi)
psi_torsion = features.compute_torsion_mdraj(traj, psi)

clb = plt.scatter(phi_torsion, psi_torsion, c=mydmap.evecs[:,0], s=5)
plt.colorbar(clb, label='DC 1')
plt.xlim([-np.pi, np.pi])
plt.ylim([-np.pi, np.pi])

plt.xlabel(r'$\phi$')
plt.ylabel(r'$\psi$')
plt.show()


clb = plt.scatter(phi_torsion, psi_torsion, c=mydmap.evecs[:,1], s=5)
plt.colorbar(clb, label='DC 2')
plt.xlim([-np.pi, np.pi])
plt.ylim([-np.pi, np.pi])

plt.xlabel(r'$\phi$')
plt.ylabel(r'$\psi$')
plt.show()

time = np.arange(0, 9000*steps*0.002*0.001, 9000*steps*0.002*0.001/9000)
plt.plot(time, phi_torsion, label='phi')
plt.xlabel('ns')
plt.ylabel(r'$\phi$')
plt.legend()
plt.show()
plt.plot(time, psi_torsion, label='psi')
plt.xlabel('ns')
plt.ylabel(r'$\psi$')
plt.legend()
plt.show()

### TMDmap

The TMDmap (target measure diffusion map) allows for Boltzmann correction exp(-beta*V(x)).

In [None]:
import mdfeature.diffusionmap as diffusionmap

weight_params = {}
weight_params['simulation'] = simulation
weight_params['temperature'] = 300

mydmap_tmd, traj = diffusionmap.compute_diffusionmaps(traj_orig, nrpoints=2000, epsilon=1.0, weights='compute', weight_params=weight_params)

#######################################################
phi_torsion = features.compute_torsion_mdraj(traj, phi)
psi_torsion = features.compute_torsion_mdraj(traj, psi)

clb = plt.scatter(phi_torsion, psi_torsion, c=mydmap_tmd.evecs[:,0], s=5)
plt.colorbar(clb, label='DC 1')
plt.xlim([-np.pi, np.pi])
plt.ylim([-np.pi, np.pi])

plt.xlabel(r'$\phi$')
plt.ylabel(r'$\psi$')
plt.show()


clb = plt.scatter(phi_torsion, psi_torsion, c=mydmap_tmd.evecs[:,1], s=5)
plt.colorbar(clb, label='DC 2')
plt.xlim([-np.pi, np.pi])
plt.ylim([-np.pi, np.pi])

plt.xlabel(r'$\phi$')
plt.ylabel(r'$\psi$')
plt.show()

### Explicit weights
It is possible to compute diffusionmaps with explicit weights.

In [None]:
from openmmtools.constants import kB

simulation = weight_params['simulation']

positions = simulation.context.getState(getPositions=True).getPositions()
energy_unit = diffusionmap.energy(positions, simulation).unit
positions_unit = positions.unit

E = diffusionmap.compute_energy(traj.xyz, simulation, positions_unit, energy_unit)
print('Energy has shape')
print(E.shape)

# example: renormalize to new temperature value
T_orig = 300
kT_orig = kB * T_orig * kelvin

T2 = 400
kT2 = kB * T2 * kelvin

density_T_orig = diffusionmap.compute_target_measure(E, kT_orig, energy_unit)
density_T2 = diffusionmap.compute_target_measure(E, kT2, energy_unit)
        
weights = density_T2 / density_T_orig

weight_params = {}
weight_params['simulation'] = simulation
weight_params['weights'] = weights

mydmap_expl, traj = diffusionmap.compute_diffusionmaps(traj_orig, nrpoints=2000, epsilon=1.0, weights='explicit', weight_params=weight_params)


#######################################################
phi_torsion = features.compute_torsion_mdraj(traj, phi)
psi_torsion = features.compute_torsion_mdraj(traj, psi)

clb = plt.scatter(phi_torsion, psi_torsion, c=mydmap_expl.evecs[:,0], s=5)
plt.colorbar(clb, label='DC 1')
plt.xlim([-np.pi, np.pi])
plt.ylim([-np.pi, np.pi])

plt.xlabel(r'$\phi$')
plt.ylabel(r'$\psi$')
plt.show()



We continue using the vanilla diffusionmap for the rest of the tutorial.

In [None]:
import mdfeature.diffusionmap as diffusionmap

mydmap, traj = diffusionmap.compute_diffusionmaps(traj_orig, nrpoints=9000, epsilon=1.0)

computing free energy contour of the diffusion map space

In [None]:
free_energy_dc, edx, edy = np.histogram2d(mydmap.dmap[:,0], mydmap.dmap[:,1], bins=200)
#print(free_energy_dc)
#print(edx)
#print(edy)
with numpy.errstate(divide='ignore'):
    free_energy_dc = -np.log(free_energy_dc)

fig = plt.figure(figsize=(15,10))
clb = plt.contourf(edx[1:], edy[1:], free_energy_dc.T)
#plt.contourf([edx, edy,], free_energy_dc.T)
plt.colorbar(clb, label='Free energy')

plt.xlabel('DC 1')
plt.ylabel('DC 2')

plt.show()

Module features contains also functions to define list of features, compute all the correlations with the diffusion coordinates and select the best ones.

In [None]:
phi = [4, 6, 8 ,14] #dihedral coordinates
psi = [6, 8, 14, 16]
zeta = [1, 4, 6, 8]
theta = [8, 14, 16, 18]

default_torsions = [phi, psi, zeta, theta]
all_combinations = features.create_torsions_list(atoms=traj.xyz.shape[1], size=100, append_to=default_torsions, print_list=False)

In [None]:
dimension = 2

list_of_functions =['compute_cos_torsion_mdraj' for _ in range(len(all_combinations))]
#print(traj)
#print(mydmap)
#print(list_of_functions)
#print(all_combinations)
correlations = features.compute_all_correlations(traj, mydmap, dimension, list_of_functions, nevery=10, list_of_params=all_combinations)
#TODO fix bug in correlation code

In [None]:
# choose the two best ones
cv_indices, correlations_cv, cv_indices_2, correlations_cv_2 = features.identify_features(correlations, all_combinations, dimension)



In [None]:
print(cv_indices)
print(correlations_cv)

In [None]:
cv_indices_minimal, correlations_cv_minimal = features.identify_worst_features(correlations, all_combinations, dimension)


In [None]:
torsion_max = features.compute_torsion_mdraj(traj, cv_indices[0])
torsion_max_1 = features.compute_torsion_mdraj(traj, cv_indices[1])

In [None]:
clb = plt.scatter(torsion_max, mydmap.evecs[:,0], c=torsion_max_1)
plt.colorbar(clb, label=(features.get_name_torsion(cv_indices[1], pdb_file=pdb_name, table=table, format='short')))
plt.xlabel(features.get_name_torsion(cv_indices[0], pdb_file=pdb_name, table=table, format='short'))
    
plt.ylabel('DC 1')
plt.show()

clb = plt.scatter(torsion_max_1, mydmap.evecs[:,1], c=torsion_max)
plt.colorbar(clb, label=(features.get_name_torsion(cv_indices[1], pdb_file=pdb_name, table=table, format='short')))
plt.xlabel((features.get_name_torsion(cv_indices[1], pdb_file=pdb_name, table=table, format='short')))
plt.ylabel('DC 2')
plt.show()
#TODO: fix plot