In [117]:
import MDAnalysis as MDA

Please follow [this link](http://ipywidgets.readthedocs.io/en/latest/user_install.html) to install ipywidgets and allow notebook extensions for interactivity

In [118]:
from ipywidgets import interact
import os
import numpy as np
import MDAnalysis.analysis.diffusionmap as diffusionmap
import MDAnalysis.analysis.rms as rms
MDA.start_logging()

MDAnalysis  : INFO     MDAnalysis 0.15.1-dev0 STARTED logging to 'MDAnalysis.log'
MDAnalysis  : INFO     MDAnalysis 0.15.1-dev0 STARTED logging to 'MDAnalysis.log'


In [119]:
print MDA.__version__

0.15.1-dev0


In [120]:
from bokeh.plotting import figure, show, output_notebook
from bokeh.io import push_notebook
from bokeh.models import HoverTool, BoxSelectTool


In [121]:
print os.getcwd()

/home/jdetlefs/github/dimension_reduction/diffusionMaps


*Be sure to install the adk simulation from [this link](http://becksteinlab.github.io/MDAnalysis-workshop/datadownload.html)*

These are commands you need to input in the terminal in mac or linux to download and unzip:

```
curl -o mdatrj.zip -L 'https://www.dropbox.com/sh/am6y00kac8myihe/AABDiQI28fWnRZueQTT7W2s1a?dl=1'
unzip mdatrj.zip && rm mdatrj.zip
```


In [122]:
u = MDA.Universe('./equilibrium/adk4AKE.psf','./equilibrium/1ake_007-nowater-core-dt240ps.dcd')


In [123]:
dist_step20 = diffusionmap.DistanceMatrix(u, select='backbone', step=20)
%time dist_step20.run()

MDAnalysis.analysis.base: INFO     Starting preparation
MDAnalysis.analysis.base: INFO     Starting preparation
MDAnalysis.analysis.base: INFO     Finishing up
MDAnalysis.analysis.base: INFO     Finishing up


CPU times: user 1.33 s, sys: 128 ms, total: 1.46 s
Wall time: 1.47 s


In [124]:
dist_step10 = diffusionmap.DistanceMatrix(u, select='backbone', step=10)
%time dist_step10.run()

MDAnalysis.analysis.base: INFO     Starting preparation
MDAnalysis.analysis.base: INFO     Starting preparation
MDAnalysis.analysis.base: INFO     Finishing up
MDAnalysis.analysis.base: INFO     Finishing up


CPU times: user 5.08 s, sys: 556 ms, total: 5.64 s
Wall time: 5.64 s


In [None]:
dist_step5 = diffusionmap.DistanceMatrix(u, select='backbone', step=5)
%time dist_step5.run()

MDAnalysis.analysis.base: INFO     Starting preparation
MDAnalysis.analysis.base: INFO     Starting preparation
MDAnalysis.analysis.base: INFO     Finishing up
MDAnalysis.analysis.base: INFO     Finishing up


CPU times: user 19.9 s, sys: 2.23 s, total: 22.1 s
Wall time: 22.1 s


In [None]:
dist_step3 = diffusionmap.DistanceMatrix(u, select='backbone', step=3)
%time dist_step3.run()

MDAnalysis.analysis.base: INFO     Starting preparation
MDAnalysis.analysis.base: INFO     Starting preparation


That took a while for me, let's stop there.

# Using diffusion maps
Now that weve figured out timings,
lets 
+ investigate the distance_matrix
+ pick a reasonable constant epsilon 
+ find the spectrum of reasonable eigenvalues
+ perform an embedding
+ plot the coordinates of the embedding

A reasonable value of epsilon corresponds to two elements that are close on a free energy landscape, so lets set epsilon to 1.101 one of our smaller rmsd jumps between frames. Before doing this, let's save the distance matrix, because after determining the epsilon, it will be permanently altered for memory savings.

In [None]:
dmap = diffusionmap.DiffusionMap(dist_step3, epsilon = 5)
dmap.run()
print dmap.eigenvectors[1:3].T.shape, dmap.eigenvalues[1:3].T.shape

In [None]:
print dist_step3.dist_matrix

In [None]:
# diffusion map the two most dominant eigenvectors
fit = dmap.transform(2, 1)
fit.shape[0]

In [None]:
output_notebook()

In [None]:
TOOLS = [BoxSelectTool(), HoverTool()]
#fix range to show effect of scaling better
p = figure(tools=TOOLS, x_range=(-1,1), y_range=(-1,1))
p.title.text = 'coordinates of frames in two dimensional diffusion space'
fit = dmap.transform(2,1)
x = fit[:,0]
y = fit[:,1]
r = p.circle(x,y, fill_alpha=.6)


In [None]:
def update(t=0,vect0=0, vect1=1):
    fit = dmap.transform(time=t)
    x = fit[:,vect0]
    y = fit[:,vect1]
    r.data_source.data['x'] = x
    r.data_source.data['y'] = y
    push_notebook()

In [None]:
show(p)

In [None]:
interact(update, t=(0,5,.01), vect0=(0,10,1), vect1=(0,10,1))

As you can see, most of the points far away in terms of diffusion distance occur only in the beginning! So it looks like right now diffusion mapping only captures the initial closing, but not the opening. (Right now bokeh has an issue with too many hits with the hover tool.)

In [None]:
def diffusion_distance(fit):
    d = np.zeros((fit.shape[0]-1))
    for i in range(fit.shape[0]-1):
        d[i] = (rms.rmsd(fit[i],fit[i+1]))
    return d

In [None]:
dist = diffusion_distance(fit)

In [None]:
TOOLS = [BoxSelectTool(), HoverTool()]
#fix range to show effect of scaling better
p_2 = figure(tools=TOOLS, y_range=(-1,2))
p_2.title.text = 'Diffusion Distance from previous frame'
x_2 = range(dist.shape[0])
y_2 = dist
r1 = p_2.line(x_2[:50], y_2[:50], line_width=2)
r2 = p_2.circle(x_2[:50],y_2[:50], fill_alpha=.6)


In [None]:
def update_dist(n_eigenvectors=2, t=0, begin=0, end=100):
    fit = dmap.transform(n_eigenvectors,t)
    dist = diffusion_distance(fit)
    x_2 = range(dist.shape[0])
    y_2 = dist
    r1.data_source.data['x'] = x_2[begin:end]
    r1.data_source.data['y'] = y_2[begin:end]
    r2.data_source.data['x'] = x_2[begin:end]
    r2.data_source.data['y'] = y_2[begin:end]
    push_notebook()

In [None]:
show(p_2)

In [None]:
interact(update_dist, n_eigenvectors=(1,20,1), t=(0,10,.1), begin = (0,100), end= (10, 1000))

Play with the sliders and there are some interesting insights. When the diffusion space is the first two vectors, we get insight that the large diffusion distance frames occur in the beginning. Increasing the number of vectors in the diffusion space and the another set of frames jumps out between 30 and 50. So maybe 0-20 is an opening and 30-50 is the closing action, the frames need to be inspected with visualization software to understand. 