## Analyse Anton2 trajectory: Round1, System1

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [14]:
import numpy as np
import mdtraj as md
import pyemma
print("Pyemma version %s"%pyemma.__version__)


from bokeh.plotting import figure, show, output_file
from bokeh.models import HoverTool, ColumnDataSource
from bokeh.io import output_notebook
from bokeh.layouts import column
output_notebook()

import Bio.PDB.Polypeptide
from Bio import pairwise2
from Bio.pairwise2 import format_alignment

Pyemma version 2.4


## Calculate basic CV

In [3]:
feat = pyemma.coordinates.featurizer(topfile="protein_ligand.pdb")
ref1 = md.load("protein_ligand.pdb")
ref_inactive = md.load("4dkl_atom.pdb")


selection_CA = [atom.index for atom in ref1.topology.atoms if atom.name=='CA']
selection_ligand = [atom.index for atom in ref1.topology.atoms if 
                    atom.residue.name=="MTD" and 
                    not atom.element.symbol=='H']
 # atom.residue.name==resN3 and atom.residue.resSeq==resIndex and not atom.element.symbol=='H']

## define several CVs to monitor
## 0. protein RMSD from initial frame
## 1. ligand RMSD from initial frame
## 2. and 3. activation cvs (tm3-tm6 and NPxxY RMSD)

feat.add_minrmsd_to_ref(ref1,atom_indices=selection_CA,precentered=False)
feat.add_minrmsd_to_ref(ref1,atom_indices=selection_ligand,precentered=False)

#feat.add_minrmsd_to_ref()
#N3327.49-A3377.54
#R1653.50-T2796.34

def selCA(resname,resid):
    sel= [
        atom.index for atom in ref1.topology.atoms if 
        atom.residue.name==resname and 
        atom.residue.resSeq==resid and 
        atom.name =='CA']
    if len(sel)>1:
        warnings.warn(
            "Multiple atoms selected for {}{}".format(resname,resid))
    return sel


sel_R165 = selCA("ARG", 165) 
sel_T279 = selCA("THR", 279) 
feat.add_distances(sel_R165+sel_T279, periodic=True)


Check the output of describe() to see the actual order of the features


In [4]:
feat.describe()

['minrmsd to frame 0 of <mdtraj.Trajectory with 1 frames, 4800 atoms, 292 residues, and unitcells at 0x7fa8a9de08d0>, subset of atoms  ',
 'minrmsd to frame 0 of <mdtraj.Trajectory with 1 frames, 4800 atoms, 292 residues, and unitcells at 0x7fa8a9de08d0>, subset of atoms  ',
 'DIST: ARG 165 CA 1613 - THR 279 CA 3518']

In [57]:
#for res in ref1.topology.residues: print(res)

In [5]:
traj_list = ['skip10_protein_ligand.dcd']
inp = pyemma.coordinates.source(traj_list, feat)
print('number of trajectories = {}'.format(inp.number_of_trajectories()))
print('trajectory length = {}'.format(inp.trajectory_length(0)))
print('number of dimension = {}'.format(inp.dimension()))
dt1=inp.get_output()

number of trajectories = 1
trajectory length = 8317
number of dimension = 3


In [6]:
dt1[0].shape[0]
times = range(dt1[0].shape[0]);

### Align to 4dkl and calculate NPxxY RMSD

In [147]:
seq_simulation = [(Bio.PDB.Polypeptide.three_to_one(res.name),res.name,res.resSeq) 
    for res in feat.topology.residues if not res.name in ["ACE","NME","MTD"]]
fasta_simulation = ''.join([a[0] for a in seq_simulation])

seq_inactive = [(Bio.PDB.Polypeptide.three_to_one(res.name),res.name,res.resSeq) 
    for res in ref_inactive.topology.residues if not res.name in ["ACE","NME","MTD"]]
fasta_inactive = ''.join([a[0] for a in seq_inactive])

alignments2 = pairwise2.align.globalxx(fasta_simulation, fasta_inactive)
ali2 = alignments2[0]

#N3327.49-A3377.54
selection_NPxxY_ref_inactive = [
    atom.index for atom in ref_inactive.topology.atoms if 
    atom.residue.resSeq in range(332,337) # and atom.name == 'CA'
]

NPxxY_reference = ref_inactive.atom_slice(selection_NPxxY_ref_inactive)
NPxxY_reference

selection_NPxxY_sim = [
    atom.index for atom in ref1.topology.atoms if 
    atom.residue.resSeq in range(332,337) and 
    not atom.element.symbol == 'H' # and atom.name == 'CA'
]

sim_trj = md.load("skip10_protein_ligand.dcd", top=ref1.topology, 
                 atom_indices=selection_NPxxY_sim)

NPxxY_RMSD = md.rmsd(sim_trj, NPxxY_reference)
ali2

('SMVTAITIMALYSIVCVVGLFGNFLVMYVIVRYTKMKTATNIYIFNLALADALATSTLPFQSVNYLMGTWPFGNILCKIVISIDYYNMFTSIFTLCTMSVDRYIAVCHPVKALDFRTPRNAKIVNVCNWILSSAIGLPVMFMATTKYRQGSIDCTLTFSHPTWYWENLLKICVFIFAFIMPVLIITVCYGLMILRLKSVRMLSGSKEKDRNLRRITRMVLVVVAVFIVCWTPIHIYVIIKALITIPETTFQTVSWHFCIALGYTNSCLNPVLYAFLDENFKRCFREFCI',
 '-MVTAITIMALYSIVCVVGLFGNFLVMYVIVRYTKMKTATNIYIFNLALADALATSTLPFQSVNYLMGTWPFGNILCKIVISIDYYNMFTSIFTLCTMSVDRYIAVCHPVKALDFRTPRNAKIVNVCNWILSSAIGLPVMFMATTKYRQGSIDCTLTFSHPTWYWENLLKICVFIFAFIMPVLIITVCYGLMILRLKSVR------EKDRNLRRITRMVLVVVAVFIVCWTPIHIYVIIKALITIPETTFQTVSWHFCIALGYTNSCLNPVLYAFLDENFKRCFREFCI',
 282.0,
 0,
 289)

## Ligand clustering

## Plots

In [148]:
source = ColumnDataSource(data=dict(
    frame = times,
    rmds_ref1 = dt1[0][:,0],
    rmsd_ligand = dt1[0][:,1],
    tm3tm6 = dt1[0][:,2],
    NPxxY_RMSD = NPxxY_RMSD,
))

tls = "hover, box_zoom, reset"

pls=[]
for cv in source.data.keys():
    if not cv=='frame':
        #print(cv)
        a = figure(title = cv, plot_height=300, tools=tls)
        a.xaxis.axis_label = 'frames'
        a.yaxis.axis_label = cv
        a.line('frame',cv,source=source, line_alpha=0.7, color='darkred')
        pls.append(a)

show(column(*pls))

In [120]:
def mapcols(data):
    pal = bokeh.palettes.viridis(11)
    return np.array(pal)[(data/(1.*data.max())*10).astype('int')]

In [150]:
act_cv = np.vstack((
    source.data['tm3tm6'],
    source.data['NPxxY_RMSD'])).transpose()

cluster_obj = pyemma.coordinates.cluster_regspace(act_cv,dmin=.01)
dtrj = cluster_obj.assign(act_cv)

cluster_centers = cluster_obj.clustercenters
cls_population = np.array([len(np.where(dtrj==cls)[0]) for cls in list(set(dtrj))])
pop = cls_population/(cls_population.max()*1/30.)+1

In [161]:
        from bokeh.models import BoxAnnotation
        a = figure(
            title = "activation cvs", 
            plot_height=500, 
            plot_width =500, 
            x_range=(.5,1.5),
            y_range=(0.1,.6))
        a.circle(act_cv[:,0],act_cv[:,1], 
                 line_alpha=0.1, color='gray', size=1)
        
        a.circle(cluster_centers[:,0],cluster_centers[:,1], 
                 line_alpha=0.7, color=mapcols(pop), size=pop)
        
        active_bx=BoxAnnotation(
            top=.6, bottom=.25, left = 1., 
            right = 1.5, fill_alpha = .1, fill_color='darkblue')
        inactive_bx=BoxAnnotation(
            top=.25, bottom=0, left = .5, 
            right = 1.0, fill_alpha = .1, fill_color='blue')
        int2_bx=BoxAnnotation(
            top=.6, bottom=.25, left = .5, 
            right = 1.0, fill_alpha = .1, fill_color='cyan')
        int1_bx=BoxAnnotation(
            top=.25, bottom=0, left = 1., 
            right = 1.5, fill_alpha = .1, fill_color='cyan')
        
        
        a.add_layout(active_bx)
        a.add_layout(inactive_bx)
        a.add_layout(int2_bx)
        a.add_layout(int1_bx)
        show(a)

In [164]:
from bokeh.resources import CDN
from bokeh.embed import file_html
from IPython.core.display import HTML


In [163]:
myplot_html = file_html(a, CDN)

In [165]:
HTML(myplot_html)
