# Notebook1 Demo

## Import required libraries

In [1]:
# import all libraries and modules
import sys
import os
import pytraj as pt
from sklearn.decomposition import PCA
import numpy as np
import scipy
from scipy import spatial

# for plotting
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import proj3d
from matplotlib.patches import Circle
from mpl_toolkits.mplot3d import art3d

import warnings
warnings.filterwarnings("ignore")

## Import modules from BKit

In [2]:
from BKit.SmoothPath import BuildSmoothMeanPath
from BKit.Utils import LoadTrajs, SplitEvenOdd, PlotSelected, pathpatch_2d_to_3d
from BKit.InterpolateCurve import InterpolatePath
from BKit.ConstructMilestones3D import ConstructMilestones3D
from BKit.ConstructMilestones3D import SortFrames, rotation_matrix

## Set model parameters

In [3]:
dim = 3            # pc dimention    
w_size = 50        # rolling ave window size, which is used as reference path
stride = w_size//5
ml_length = .80    # distance between milestones
n_iter = 8000      # number of iterations for ML disk normal optimization
thresh = -0.27     # threshold to remove points with corners 
dr = 2.
dz = 0.4
yz_pad = 8         # yz pad length to avoid disk distortion upon visualization 

DIR_SAVE = '../output/'
FIG_SAVE = '../figs/'
complex_name = 'CycAsp'

## Set  path to all input files & load trajectories



In [4]:
path = '/home/talant/Projects/UCRiver/Milestoning/ForRuben/data/'
TOP = path + 'cd_asp_protein.prmtop'

TRAJ = path + 'Representative/' + 'short_be16_pca.dcd'
pdb_path = path + 'cd_asp_reference.pdb'

mask_align = "(:1-7)&(@O1)" 
mask_selec = ['@O1,C1,C2,:8','!@H*']

new_frames, refframe, traj= LoadTrajs(traj_path = TRAJ,
                                    top_path = TOP,
                                    refPDB_path = pdb_path,
                                    mask_selec = mask_selec,
                                    mask_align = mask_align)

Total number of residues --  8
Total number of atoms --  168
Total number of frames --  430
Loading metadynamics trajectory by chunk... 


100%|█████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 312.47it/s]

Total number of selected atoms --  34





## Perform PCA

In [5]:
pca = PCA(n_components=dim)
PCs = pca.fit_transform(new_frames)
  
# obtain eigvals and eigvecs
eigvals = pca.explained_variance_ratio_
eigvecs = pca.components_.T
  
dat = np.concatenate((np.dot(new_frames, eigvecs[:,0]).reshape(len(new_frames),1),
                      np.dot(new_frames, eigvecs[:,1]).reshape(len(new_frames),1),
                      np.dot(new_frames, eigvecs[:,2]).reshape(len(new_frames),1)),
                      axis=1)
print('Coverage',round(sum(pca.explained_variance_ratio_)*100,1))

Coverage 83.8


## Visualize PCA Space

In [6]:
%matplotlib widget
plotOrig = True
fig = plt.figure(figsize = [8, 6])
ax = plt.axes(projection='3d')
p = ax.scatter3D(dat[:,0], dat[:,1], dat[:,2], c=range(int(len(dat))), alpha=0.4)
fig.colorbar(p, ax=ax)
ax.set_xlabel('PC1'); ax.set_ylabel('PC2'); ax.set_zlabel('PC3')
fig.tight_layout()
#plt.savefig(FIG_SAVE + complex_name + '3D_PCA.png', dpi=600)
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

## Construct 3D Path

In [7]:
#this constructs 3D path
SReactPath = BuildSmoothMeanPath(dat, w_size=w_size, thresh=thresh, stride=stride)    
meanP = SReactPath.GetPath()

# crude path to compare
CrudePath = SReactPath.roll_ave(w_size=w_size//2, stride=stride//2)
kd_path = SReactPath.GetPathKD(rad=dr, w_size=w_size, stride=stride)

#interpolate  points  
InterPath = InterpolatePath(meanP, pair_dist = ml_length , dim=dim, kind='linear')
InterPathKD = InterpolatePath(kd_path, pair_dist = ml_length , dim=dim, kind='linear')

## Visualize crude path vs constructed smooth path

In [8]:
%matplotlib widget
plotOrig = True
fs=9
fig = plt.figure(figsize = [7.2, 5.4])
ax = plt.axes(projection='3d')

if plotOrig:
    p = ax.scatter3D(dat[:,0], dat[:,1], dat[:,2], c=range(int(len(dat))), alpha=0.3)
    #fig.colorbar(p, ax=ax)

ax.plot3D(CrudePath[:,0], CrudePath[:,1], CrudePath[:,2], 'orange', marker='o', markersize=2)
ax.plot3D(InterPath[:,0], InterPath[:,1], InterPath[:,2], 'black', ls='-', marker='o', markersize=2)
ax.plot3D(InterPathKD[:,0], InterPathKD[:,1], InterPathKD[:,2], 'black', ls='-', marker='o', markersize=2)

ax.set_xlabel('PC1', fontsize=fs)
ax.set_ylabel('PC2',fontsize=fs)
ax.set_zlabel('PC3',fontsize=fs)
ax.set_xlim3d(-5,25); ax.set_ylim3d(-5,20); ax.set_zlim3d(-10,0)
plt.legend(["Crude mean path", "Smooth mean path", 'Smooth kd-tree path'], loc=[0.04,0.75], fontsize=fs)
ax.xaxis.pane.fill = False
ax.yaxis.pane.fill = False
ax.zaxis.pane.fill = False
ax.xaxis.pane.set_edgecolor('w')
ax.yaxis.pane.set_edgecolor('w')
ax.zaxis.pane.set_edgecolor('w')
#ax.xaxis.set_tick_params(labelsize=fs-1)
#ax.yaxis.set_tick_params(labelsize=fs-1)
#ax.zaxis.set_tick_params(labelsize=fs-1)
#ax.set_axis_off()

fig.tight_layout()
#plt.savefig(FIG_SAVE + complex_name + '3D_path.png', dpi=600)
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

## Now lets break into states with barriers

1. We use half length of ml


In [9]:
##################################################################################
# apply with equidistant partition of path using half ml_length
pathAll = InterpolatePath(meanP, pair_dist=ml_length/2, dim=dim, kind='linear')
#pathAll = InterpolatePath(kd_path, pair_dist=ml_length/2, dim=dim, kind='linear')
#pathAll = pathAll[10:-1] # drop few initial points

#split into barriers and states
diskID, midID = SplitEvenOdd(N=pathAll.shape[0])
pathMid = pathAll[midID]
pathP = pathAll[diskID]    

In [10]:
# construct 3D milestones (disks)
ConsMile = ConstructMilestones3D(pathAll)
vecs = ConsMile.GetVectors()
normals = vecs[diskID] 

In [11]:
normalsAll = ConsMile.OptVectors(vecs, n_iter=n_iter, lr=0.01)
#split to state and barriers
normals = normalsAll[diskID]       # barrier disk vectors
normalsMid = normalsAll[midID]     # mid disk vectors

In [12]:
n_disks = normals.shape[0]        # also number of barriers
n_cells = normalsMid.shape[0]     # also number of states
print('Number of disks -- ', n_disks)
print('Number of cells -- ', n_cells)

Number of disks --  62
Number of cells --  61


## Extract points on disk surface

In [13]:
SortF = SortFrames(dat,dr,dz)
datDS, _ = SortF.SortAllPoints(normals, normalsMid, pathP, pathMid, SortMethod='surface')

In [14]:
selected_frames = np.array(datDS[:,4], dtype=int)
datOut = dat[~selected_frames]

In [15]:
%matplotlib widget
plotOrig = True
dat_sel = datDS
#fig = plt.figure(figsize = [4.2, 3.4])
fig = plt.figure(figsize = [8.2, 6.4])

ax = plt.axes(projection='3d')
fs=9   
#plot path poits
ax.plot3D(pathP[:,0], pathP[:,1], pathP[:,2], c='black', marker='o', markersize=2, alpha=0.8)
ax.set_xlabel('PC1', fontsize=fs)
ax.set_ylabel('PC2',fontsize=fs)
ax.set_zlabel('PC3',fontsize=fs)

#plotting disks
for i in range(n_disks):
    c = Circle((0,0), dr, facecolor='grey', alpha=0.4)
    ax.add_patch(c)
    pathpatch_2d_to_3d(c, pathP[i], normal = normals[i])
    #ax.text(pathP[i,0], pathP[i,1] + dr, pathP[i,2] + dr, str(i), 'x', size=9)

#plot selected points
p = ax.scatter3D(dat_sel[:,0], dat_sel[:,1], dat_sel[:,2], c = dat_sel[:,3],
                 alpha=1.0, s = 2., cmap = 'prism')
    
#p = ax.scatter3D(datOut[:,0], datOut[:,1], datOut[:,2], c = 'grey')
    
x_min, y_min, z_min = pathP.min(axis=0) 
x_max, y_max, z_max = pathP.max(axis=0)
ax.set_xlim3d(x_min, x_max)
ax.set_ylim3d(y_min - yz_pad, y_max + yz_pad)
ax.set_zlim3d(z_min - yz_pad, z_max + yz_pad)
#ax.set_xlim3d(0,160); ax.set_ylim3d(-50,70); ax.set_zlim3d(-40,60)
ax.xaxis.pane.fill = False
ax.yaxis.pane.fill = False
ax.zaxis.pane.fill = False
ax.xaxis.pane.set_edgecolor('w')
ax.yaxis.pane.set_edgecolor('w')
ax.zaxis.pane.set_edgecolor('w')
ax.xaxis.set_tick_params(labelsize=fs-1)
ax.yaxis.set_tick_params(labelsize=fs-1)
ax.zaxis.set_tick_params(labelsize=fs-1)
fig.tight_layout()
plt.savefig(FIG_SAVE + complex_name + 'FramesOnDisk.pdf')
plt.show()    


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

### Select points between disks
1. This method is used to assign state index to each point and to carry kinetics calculation later

In [17]:
datMid, _ = SortF.SortAllPoints(normals, normalsMid, pathP, pathMid, SortMethod='middle')

In [18]:
%matplotlib widget
plotOrig = True
dat_sel = datMid
#n_disks = normals.shape[0]
#fig = plt.figure(figsize = [4.2, 3.4])
fig = plt.figure(figsize = [6.2, 4.4])
ax = plt.axes(projection='3d')
  
#plot path poits
ax.plot3D(pathP[:,0], pathP[:,1], pathP[:,2], c='black', marker='o', markersize=2, alpha=0.8)
ax.set_xlabel('PC1', fontsize=fs)
ax.set_ylabel('PC2',fontsize=fs)
ax.set_zlabel('PC3',fontsize=fs)

#plotting disks
for i in range(n_disks):
    c = Circle((0,0), dr, facecolor='grey', alpha=0.4)
    ax.add_patch(c)
    pathpatch_2d_to_3d(c, pathP[i], normal = normals[i])
    ax.text(pathP[i,0] +0.1*dr, pathP[i,1] +0.1*dr ,
            pathP[i,2] +0.1*dr , str(i), 'x', size=fs)
    #ax.text(pathP[i,0] - 1.2*dr, pathP[i,1] + 0.4*dr ,
    #        pathP[i,2] - 2.3*dr, str(i), 'x', size=fs)

#plot selected points
p = ax.scatter3D(dat_sel[:,0], dat_sel[:,1], dat_sel[:,2], c = dat_sel[:,3],
                 alpha=1.0, s = 2., cmap = 'prism')
    
p = ax.scatter3D(datOut[:,0], datOut[:,1], datOut[:,2], c = 'grey', s=2., alpha=0.4)
    
x_min, y_min, z_min = pathP.min(axis=0) 
x_max, y_max, z_max = pathP.max(axis=0)
ax.set_xlim3d(x_min, x_max)
ax.set_ylim3d(y_min - yz_pad, y_max + yz_pad)
ax.set_zlim3d(z_min - yz_pad, z_max + yz_pad)
#ax.xaxis.pane.fill = False
#ax.yaxis.pane.fill = False
#ax.zaxis.pane.fill = False
#ax.xaxis.pane.set_edgecolor('w')
#ax.yaxis.pane.set_edgecolor('w')
#ax.zaxis.pane.set_edgecolor('w')
#ax.set_xlim3d(0,160); ax.set_ylim3d(-50,70); ax.set_zlim3d(-40,60)
ax.set_axis_off()
fig.tight_layout()

plt.show()    


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [17]:
print("writing outputs to " + DIR_SAVE)
np.save(DIR_SAVE + '/MlPosNorms.npy', np.column_stack([pathAll, normalsAll]))
np.save(DIR_SAVE + '/EigVecs.npy', eigvecs)
np.save(DIR_SAVE + '/RefFrame.npy', refframe)

writing outputs to ../output/


### Choose how to select points to initiate kinetics runs!

In [19]:
SortMethod = 'middle' # 'surface'
if SortMethod == 'middle':
    sel_frames = datMid[:,4]
elif SortMethod == 'surface':
    sel_frames = datDS[:,4]
else: 
    print('Make your own method :)')

sel_frames.shape

(268,)

## Save selected frames as .dcd

In [20]:
#output trajs
pt.write_traj('output/SelectedFrames4ShortMD.dcd', traj[sel_frames], overwrite=True)

Error: File 'output/SelectedFrames4ShortMD.dcd': No such file or directory
Error: Setting up output trajectory 'output/SelectedFrames4ShortMD.dcd'


## Select frames on disks and save as .dcd

In [18]:
for i in range(n_disks):
    sel_frames = np.array(datDS[datDS[:,3]==i][:,4], dtype=int)
    if sel_frames.size != 0:
        pt.write_traj(DIR_SAVE + 'UnbindIndx' + str(i) + '.dcd',
                      traj[sel_frames],
                      overwrite=True)
    else:
        print('Empty Disk ID', i)

Empty Disk ID 17
Empty Disk ID 31
Empty Disk ID 32
Empty Disk ID 33
Empty Disk ID 38
Empty Disk ID 39
Empty Disk ID 45
Empty Disk ID 47
Empty Disk ID 49
Empty Disk ID 52
Empty Disk ID 57
Empty Disk ID 58
Empty Disk ID 59
Empty Disk ID 60
Empty Disk ID 61


# Done!