## Analyse Anton2 trajectory: Round1, System1

In [3]:
import warnings
warnings.filterwarnings('ignore')

In [78]:
import numpy as np
import mdtraj as md
import pyemma
print("Pyemma version %s"%pyemma.__version__)

#import Bio.PDB.Polypeptide

from bokeh.plotting import figure, show, output_file
from bokeh.models import HoverTool, ColumnDataSource
from bokeh.io import output_notebook
output_notebook()

from Bio import pairwise2

Pyemma version 2.4


In [18]:
len(selection_CA)

289

In [88]:
feat = pyemma.coordinates.featurizer(topfile="protein_ligand.pdb")
ref1 = md.load("protein_ligand.pdb")
ref_inactive = md.load("4dkl_atom.pdb")


selection_CA = [atom.index for atom in ref1.topology.atoms if atom.name=='CA']
selection_ligand = [atom.index for atom in ref1.topology.atoms if 
                    atom.residue.name=="MTD" and 
                    not atom.element.symbol=='H']
 # atom.residue.name==resN3 and atom.residue.resSeq==resIndex and not atom.element.symbol=='H']

## define several CVs to monitor
## 0. protein RMSD from initial frame
## 1. ligand RMSD from initial frame
## 2. and 3. activation cvs (tm3-tm6 and NPxxY RMSD)

feat.add_minrmsd_to_ref(ref1,atom_indices=selection_CA,precentered=False)
feat.add_minrmsd_to_ref(ref1,atom_indices=selection_ligand,precentered=False)

#feat.add_minrmsd_to_ref()
#N3327.49-A3377.54
#R1653.50-T2796.34

def selCA(resname,resid):
    sel= [
        atom.index for atom in ref1.topology.atoms if 
        atom.residue.name==resname and 
        atom.residue.resSeq==resid and 
        atom.name =='CA']
    if len(sel)>1:
        warnings.warn(
            "Multiple atoms selected for {}{}".format(resname,resid))
    return sel


sel_R165 = selCA("ARG", 165) 
sel_T279 = selCA("THR", 279) 
feat.add_distances(sel_R165+sel_T279, periodic=True)


Check the output of describe() to see the actual order of the features


In [89]:
seq_simulation = [(Bio.PDB.Polypeptide.three_to_one(res.name),res.name,res.resSeq) 
    for res in feat.topology.residues if not res.name in ["ACE","NME","MTD"]]
fasta_simulation = ''.join([a[0] for a in seq_simulation])

seq_inactive = [(Bio.PDB.Polypeptide.three_to_one(res.name),res.name,res.resSeq) 
    for res in ref_inactive.topology.residues if not res.name in ["ACE","NME","MTD"]]
fasta_inactive = ''.join([a[0] for a in seq_inactive])




In [90]:
alignments2 = pairwise2.align.globalxx(fasta_simulation, fasta_inactive)
ali2 = alignments2[0]
ali2

('SMVTAITIMALYSIVCVVGLFGNFLVMYVIVRYTKMKTATNIYIFNLALADALATSTLPFQSVNYLMGTWPFGNILCKIVISIDYYNMFTSIFTLCTMSVDRYIAVCHPVKALDFRTPRNAKIVNVCNWILSSAIGLPVMFMATTKYRQGSIDCTLTFSHPTWYWENLLKICVFIFAFIMPVLIITVCYGLMILRLKSVRMLSGSKEKDRNLRRITRMVLVVVAVFIVCWTPIHIYVIIKALITIPETTFQTVSWHFCIALGYTNSCLNPVLYAFLDENFKRCFREFCI',
 '-MVTAITIMALYSIVCVVGLFGNFLVMYVIVRYTKMKTATNIYIFNLALADALATSTLPFQSVNYLMGTWPFGNILCKIVISIDYYNMFTSIFTLCTMSVDRYIAVCHPVKALDFRTPRNAKIVNVCNWILSSAIGLPVMFMATTKYRQGSIDCTLTFSHPTWYWENLLKICVFIFAFIMPVLIITVCYGLMILRLKSVR------EKDRNLRRITRMVLVVVAVFIVCWTPIHIYVIIKALITIPETTFQTVSWHFCIALGYTNSCLNPVLYAFLDENFKRCFREFCI',
 282.0,
 0,
 289)

In [57]:
#for res in ref1.topology.residues: print(res)

In [64]:
feat.describe()

['minrmsd to frame 0 of <mdtraj.Trajectory with 1 frames, 4800 atoms, 292 residues, and unitcells at 0x7f35091f5290>, subset of atoms  ',
 'minrmsd to frame 0 of <mdtraj.Trajectory with 1 frames, 4800 atoms, 292 residues, and unitcells at 0x7f35091f5290>, subset of atoms  ',
 'DIST: ARG 165 CA 1613 - THR 279 CA 3518']

In [58]:
traj_list = ['skip10_protein_ligand.dcd']
inp = pyemma.coordinates.source(traj_list, feat)
print('number of trajectories = {}'.format(inp.number_of_trajectories()))
print('trajectory length = {}'.format(inp.trajectory_length(0)))
print('number of dimension = {}'.format(inp.dimension()))

number of trajectories = 1
trajectory length = 8317
number of dimension = 3


In [59]:
dt1=inp.get_output()

In [47]:
dt1[0].shape[0]
times = range(dt1[0].shape[0]);

In [62]:
source = ColumnDataSource(data=dict(
    frame = times,
    rmds_ref1 = dt1[0][:,0],
    rmsd_ligand = dt1[0][:,1],
    tm3tm6 = dt1[0][:,2],
))

p = figure(title = "comparison", 
           tools="hover, box_zoom, reset", toolbar_location="below")
p.xaxis.axis_label = 'frames'
p.yaxis.axis_label = 'CVs'
p.line('frame','tm3tm6',source=source, line_alpha=0.7, color='darkred')
show(p)

In [31]:
len(times)

8317

In [None]:
len(dt1)