In [None]:
import mdtraj as md
import numpy as np
import matplotlib.pyplot as plt

## We load the trajectory using a topology file (in this case a pdb) and a trajectory file (the dcd)

In [None]:
traj = md.load('exampleTraj.dcd', top='exampleProtein.pdb')

## And get the number of frames in our trajectory

In [None]:
numFrames = len(traj)

## Prepare a list to contain the atom numbers of each first shell water's oxygen in each frame

In [None]:
firstShells = []

proteinSelection = traj.topology.select('protein')
waterSelection = traj.topology.select('water and name O')

## Use an MDTraj function to run through the trajectory and identify all the water oxygens within 2 angstroms of the protein.

In [None]:
watersNearProtein = md.compute_neighbors(traj, 
                                         0.2, 
                                         proteinSelection,
                                         haystack_indices=waterSelection)

## For each frame that was analyzed, put the water oxygen IDs into a set.

In [None]:
watersNearProtein = [set(i) for i in watersNearProtein]

## Walk through the waters near the protein in each frame (called the "first hydration shell"), and see how many of them are close in other frames. Make a (numFrames x numFrames) matrix with the number of first-shell waters that different frames have in common

In [None]:
firstOverlapMat = np.zeros((numFrames, numFrames))
for i in range(numFrames):
    for j in range(i, numFrames):
        overlap = len(watersNearProtein[i] & watersNearProtein[j])
        firstOverlapMat[i,j] = overlap
        firstOverlapMat[j,i] = overlap

## Use PyPlot to show what the first shell overlap matrix looks like

In [None]:
plt.imshow(firstOverlapMat)
plt.colorbar()
plt.show()

## Prepare a numpy array to hold the frame-to-frame similarity values

In [None]:
firstOverlapMat = np.zeros((numFrames, numFrames))
nLeftList = []
nStayedList = []

## Now we go through each frame and compare it to the one right after it. We will use sets to do a handy task: Subtraction of two sets gives us a new set containing the items that are in one set and not another (difference). We will also use the "&" (intersection) operator, which returns a new set containing the items that both sets have in common. By counting how many items are in the difference and intersection, between every frame and the frame after it, we can count how many waters stay and leave each step.

In [None]:
for frameIndex in range(numFrames-1):

    watersThatLeft = watersNearProtein[frameIndex] - watersNearProtein[frameIndex+1]
    # And the len() of a set is the number of items in it
    numWatersThatLeft = len(watersThatLeft)
    # The & operator returns items which appear in both 
    watersThatStayed = watersNearProtein[frameIndex] & watersNearProtein[frameIndex+1]
    numWatersThatStayed = len(watersThatStayed)
    
    print '%i waters left the first shell this frame, and %i stayed' %(numWatersThatLeft,numWatersThatStayed)

    # Store the results in a list
    nLeftList.append(numWatersThatLeft)
    nStayedList.append(numWatersThatStayed)

## And now we can use the pylab module to plot them

In [None]:
plt.plot(nLeftList,label='Number of waters that left')
plt.plot(nStayedList,label='Number of waters that stayed')
plt.legend()
plt.show()
