In [5]:
import numpy as np
import pandas as pd
import sys
import os

try:
    import openmm as mm
    import openmm.app as app
    import openmm.unit as unit
except ImportError:
    import simtk.openmm as mm
    import simtk.openmm.app as app
    import simtk.unit as unit

sys.path.append('../..')
from openabc.forcefields.parsers import MOFFParser
from openabc.forcefields import MOFFMRGModel
from openabc.utils.stride import parse_stride

# set simulation platform
platform_name = 'CPU'

We use IBB.pdb as an example to show how to use stride output file as the input for `MOFFParser`. With secondary structure information read from stride as additional input for `MOFFParser`, only native pairs within continuous ordered domains are kept.

First, we input IBB.pdb into <http://webclu.bio.wzw.tum.de/cgi-bin/stride/stridecgi.py>, get the output, and save it as IBB_stride.dat. Stride analyzes the structure and assigns secondary structure type to each amino acid. 

Next, we use function `parse_stride` to take a look at what information stride gives.

In [6]:
stride_data = parse_stride('IBB_stride.dat')
print(stride_data.head(20))

   resname  pdb_resid  ordinal_resid ss_abbr        ss     phi     psi   rsaa
0      GLY          1              1       T      Turn  360.00    9.00  117.3
1      CYS          2              2       T      Turn  -64.21  -41.62  127.2
2      THR          3              3       T      Turn  -63.44  134.47  111.8
3      ASN          4              4       T      Turn  -71.94  154.03  147.1
4      GLU          5              5       C      Coil  -73.10  147.49  153.7
5      ASN          6              6       C      Coil  -87.67 -165.84  145.4
6      ALA          7              7       C      Coil  -56.84  132.10   86.5
7      ASN          8              8       C      Coil -148.99  -92.59  103.8
8      THR          9              9       C      Coil  -52.60  159.96  113.8
9      PRO         10             10       C      Coil  -71.25 -177.48  107.5
10     ALA         11             11       C      Coil  -46.01  149.13  107.0
11     ALA         12             12       C      Coil -113.50  

We can see stride gives information about the secondary structure of residues. Columns "ss_abbr" and "ss" are the secondary structure name abbreviation and full name, respectively. It also includes phi and psi dihedrals, and residue solvent accessible area (rsaa). 

Then, we use the stride secondary structure information as additional input to `MOFFParser` and compare with parsing without secondary structure information. 

In [7]:
IBB_no_stride = MOFFParser.from_atomistic_pdb('IBB.pdb', 'IBB_CA.pdb') # use default parse, which does not use secondary structure information
IBB_stride = MOFFParser.from_atomistic_pdb('IBB.pdb', 'IBB_CA.pdb', default_parse=False)
ss = stride_data['ss_abbr'].tolist()
IBB_stride.parse_mol(ss=ss, ordered_ss_names=['H', 'E']) # H represents alpha-helix, and E represents beta-strand

IBB_no_stride.native_pairs.loc[:, 'no stride'] = 1
IBB_stride.native_pairs.loc[:, 'stride'] = 1
merged_native_pairs = pd.merge(IBB_no_stride.native_pairs, IBB_stride.native_pairs, 'outer').fillna(0)
print(merged_native_pairs.head(20))
merged_native_pairs.to_csv('merged_native_pairs.csv', index=False)

Parse molecule with default settings.
Get native pairs with shadow algorithm.
Get native pairs with shadow algorithm.
Secondary structure information is provided.
Only native pairs within the continuous ordered secondary structure domains are kept.
      a1    a2        mu  epsilon  no stride  stride
0    4.0   8.0  0.945069      3.0          1     0.0
1   12.0  16.0  0.679052      3.0          1     0.0
2   13.0  19.0  0.649438      3.0          1     0.0
3   13.0  20.0  0.698829      3.0          1     0.0
4   14.0  19.0  0.526343      3.0          1     0.0
5   14.0  20.0  0.374820      3.0          1     0.0
6   16.0  20.0  0.709063      3.0          1     0.0
7   20.0  25.0  0.699613      3.0          1     0.0
8   20.0  26.0  0.909406      3.0          1     0.0
9   20.0  29.0  1.180938      3.0          1     0.0
10  21.0  25.0  0.604508      3.0          1     0.0
11  21.0  26.0  0.647860      3.0          1     0.0
12  21.0  29.0  0.994547      3.0          1     0.0
13  22.0 

We can take a look at merged_native_pairs.csv. There are two columns called 'no stride' and 'stride'. 1 means the pair is kept, while 0 means the pair is removed. We can see many native pairs are removed with stride secondary structure as input. For example, pair between atom 4 and 8 is removed with stride as input, as they are both within a coil domain. Also pair between atom 22 and 29 is removed given stride input, as atom 22 is in turn domain. Note here atom index starts from 0.

We can easily set up simulation for running IBB, as we have shown in other MOFF tutorials.

In [8]:
protein = MOFFMRGModel()
protein.append_mol(IBB_stride)
top = app.PDBFile('IBB_CA.pdb').getTopology()
protein.create_system(top)
salt_conc = 150*unit.millimolar
temperature = 300*unit.kelvin
protein.add_protein_bonds(force_group=1)
protein.add_protein_angles(force_group=2)
protein.add_protein_dihedrals(force_group=3)
protein.add_native_pairs(force_group=4)
protein.add_contacts(force_group=5)
protein.add_elec_switch(salt_conc, temperature, force_group=6)
friction_coeff = 1/unit.picosecond
timestep = 10*unit.femtosecond
integrator = mm.LangevinMiddleIntegrator(temperature, friction_coeff, timestep)
init_coord = app.PDBFile('IBB_CA.pdb').getPositions()
protein.set_simulation(integrator, platform_name, init_coord=init_coord)
protein.simulation.minimizeEnergy()
output_interval = 100
output_dcd = 'output.dcd'
protein.add_reporters(output_interval, output_dcd)
protein.simulation.context.setVelocitiesToTemperature(temperature)
protein.simulation.step(500)

Add protein bonds.
Add protein dihedrals.
Add native pairs.
Add protein and DNA nonbonded contacts.
Add protein and DNA electrostatic interactions with distance-dependent dielectric and switch.
Add electrostatic interactions between native pair atoms.
Use platform: CPU
#"Step","Time (ps)","Potential Energy (kJ/mole)","Kinetic Energy (kJ/mole)","Total Energy (kJ/mole)","Temperature (K)","Speed (ns/day)"
100,1.0000000000000007,154.45552404416557,258.7589025327119,413.2144265768775,216.1218235825033,0
200,2.0000000000000013,173.5103745443529,339.2166672463606,512.7270417907135,283.32213499628165,128
300,2.99999999999998,308.12215806115296,322.60498767677836,630.7271457379313,269.4476501139983,117
400,3.9999999999999587,257.67129568539536,427.50064525929935,685.1719409446947,357.0590929695899,114
500,4.999999999999938,265.55233535472325,387.9020409594406,653.4543763141639,323.98536105605496,112
