In [67]:
# general imports
import mdtraj as md
import msmbuilder
import nglview as nv
from msmbuilder.featurizer import LigandRMSDFeaturizer, RawPositionsFeaturizer
from msmbuilder.cluster import KMeans, KMedoids, KCenters
import numpy as np 
from collections import Counter
from msmbuilder.cluster import RegularSpatial

In [68]:
trajectory = md.load('/Users/albaness/github/musashi/yank/experiments/experiment-harmonic/Roasix/RoAsix_state0_skip4.pdb')

In [69]:
atoms_to_align = trajectory.top.select('backbone and resn !=UNK')
atoms_to_align

array([   0,    4,   17,   18,   19,   21,   39,   40,   41,   43,   56,
         57,   58,   60,   76,   77,   78,   80,   95,   96,   97,   99,
        102,  103,  104,  106,  109,  110,  111,  113,  128,  129,  130,
        132,  139,  140,  141,  143,  163,  164,  165,  167,  180,  181,
        182,  184,  194,  195,  196,  198,  205,  206,  207,  217,  219,
        220,  221,  223,  231,  232,  233,  235,  242,  243,  244,  246,
        261,  262,  263,  265,  285,  286,  287,  289,  297,  298,  299,
        301,  318,  319,  320,  322,  338,  339,  340,  342,  349,  350,
        351,  353,  371,  372,  373,  375,  391,  392,  393,  395,  398,
        399,  400,  402,  413,  414,  415,  417,  432,  433,  434,  436,
        456,  457,  458,  460,  471,  472,  473,  475,  482,  483,  484,
        486,  499,  500,  501,  503,  515,  516,  517,  519,  532,  533,
        534,  536,  556,  557,  558,  560,  568,  569,  570,  580,  582,
        583,  584,  586,  596,  597,  598,  600,  6

In [70]:
aligned_traj = trajectory.superpose(trajectory[0], frame=0, atom_indices=atoms_to_align)

In [71]:
view = nv.show_mdtraj(aligned_traj)
view.add_licorice('resn UNK')
view

In [72]:
ligand = aligned_traj.topology.select('resn UNK')
ligand

array([1343, 1344, 1345, 1346, 1347, 1348, 1349, 1350, 1351, 1352, 1353,
       1354, 1355, 1356, 1357, 1358, 1359, 1360, 1361, 1362, 1363, 1364,
       1365, 1366, 1367, 1368, 1369, 1370, 1371, 1372, 1373, 1374, 1375,
       1376, 1377])

In [73]:
ligand_trajectory = aligned_traj.atom_slice(ligand)

In [74]:
len(trajectory)

3351

In [75]:
reg_space = RegularSpatial(d_min=0.1, metric='rmsd').fit(ligand_trajectory)

In [40]:
for i,center in enumerate(reg_space.cluster_center_indices_):
    aligned_traj[center][0].save('musashi_ro6_h_reg_cluster%s_frame%s.pdb' % (i,center[0]))

In [79]:
reg_list = reg_space.fit_predict(ligand_trajectory)

In [80]:
converted_labels = []
for x in reg_list:
    converted_labels.append(x[0])
Counter(converted_labels).most_common

<bound method Counter.most_common of Counter({17: 457, 201: 451, 37: 198, 100: 190, 47: 185, 344: 181, 285: 164, 378: 152, 61: 145, 282: 100, 32: 87, 0: 85, 51: 83, 357: 78, 27: 76, 173: 68, 179: 67, 148: 59, 21: 41, 212: 41, 267: 38, 26: 37, 205: 32, 313: 30, 215: 28, 3: 25, 196: 23, 92: 18, 9: 17, 197: 17, 116: 16, 184: 13, 78: 13, 195: 12, 1: 11, 342: 10, 404: 9, 297: 7, 29: 6, 82: 6, 358: 6, 193: 6, 360: 6, 326: 4, 130: 4, 410: 4, 2: 3, 83: 3, 98: 3, 85: 3, 312: 3, 103: 3, 139: 3, 292: 3, 4: 2, 55: 2, 163: 2, 7: 1, 35: 1, 45: 1, 74: 1, 81: 1, 105: 1, 134: 1, 168: 1, 204: 1, 214: 1, 248: 1, 260: 1, 315: 1, 341: 1, 385: 1})>

In [81]:
reg_space.cluster_center_indices_[201]

array([1190,    0])

In [83]:
nsamples = 10
for x in [37, 201, 17]:
    indices_in_cluster = []
    for i,y in enumerate(converted_labels):
        if y == x: 
            indices_in_cluster.append(i)
    samples = np.random.choice(indices_in_cluster, nsamples, replace=False)
    for frame in samples:  
        aligned_traj[frame].save('musashi_ro6_h_reg_cluster%s_frame%s.pdb' % (x,frame))
        
aligned_traj[frame].save('musashi_ro6_h_reg_cluster%s_frame%s.pdb' % (37, 99))
aligned_traj[frame].save('musashi_ro6_h_reg_cluster%s_frame%s.pdb' % (17, 35))
aligned_traj[frame].save('musashi_ro6_h_reg_cluster%s_frame%s.pdb' % (201, 1190))

In [51]:
len(reg_list)

3351

# RoOH analysis

In [85]:
trajectory = md.load('/Users/albaness/github/musashi/yank/experiments/experiment-harmonic/RoOH/Rooh_h_state0.h5')

In [86]:
atoms_to_align = trajectory.top.select('backbone and resn !=UNK')
atoms_to_align

array([   0,    4,   17,   18,   19,   21,   39,   40,   41,   43,   56,
         57,   58,   60,   76,   77,   78,   80,   95,   96,   97,   99,
        102,  103,  104,  106,  109,  110,  111,  113,  128,  129,  130,
        132,  139,  140,  141,  143,  163,  164,  165,  167,  180,  181,
        182,  184,  194,  195,  196,  198,  205,  206,  207,  217,  219,
        220,  221,  223,  231,  232,  233,  235,  242,  243,  244,  246,
        261,  262,  263,  265,  285,  286,  287,  289,  297,  298,  299,
        301,  318,  319,  320,  322,  338,  339,  340,  342,  349,  350,
        351,  353,  371,  372,  373,  375,  391,  392,  393,  395,  398,
        399,  400,  402,  413,  414,  415,  417,  432,  433,  434,  436,
        456,  457,  458,  460,  471,  472,  473,  475,  482,  483,  484,
        486,  499,  500,  501,  503,  515,  516,  517,  519,  532,  533,
        534,  536,  556,  557,  558,  560,  568,  569,  570,  580,  582,
        583,  584,  586,  596,  597,  598,  600,  6

In [87]:
aligned_traj = trajectory.superpose(trajectory[0], frame=0, atom_indices=atoms_to_align)

In [88]:
view = nv.show_mdtraj(aligned_traj)
view.add_licorice('resn UNK')
view

In [89]:
ligand = aligned_traj.topology.select('resn UNK')
ligand
ligand_trajectory = aligned_traj.atom_slice(ligand)
ligand

array([1343, 1344, 1345, 1346, 1347, 1348, 1349, 1350, 1351, 1352, 1353,
       1354, 1355, 1356, 1357, 1358, 1359, 1360, 1361, 1362, 1363, 1364,
       1365, 1366, 1367, 1368, 1369, 1370, 1371, 1372, 1373, 1374])

In [90]:
reg_space = RegularSpatial(d_min=0.10, metric='rmsd').fit(ligand_trajectory)

In [97]:
for i,center in enumerate(reg_space.cluster_center_indices_):
    aligned_traj[center][0].save('musashi_rooh_h_reg_cluster%s_frame%s.pdb' % (i,center[0]))

In [92]:
reg_list = reg_space.partial_predict(ligand_trajectory)

In [93]:
converted_labels = []
for x in reg_list:
    converted_labels.append(x)
converted_labels[:10]
Counter(converted_labels)

Counter({0: 1502,
         1: 37,
         2: 228,
         3: 1090,
         4: 736,
         5: 487,
         6: 467,
         7: 470,
         8: 101,
         9: 79,
         10: 42,
         11: 93,
         13: 95,
         14: 853,
         15: 23,
         16: 389,
         17: 235,
         18: 349,
         19: 315,
         20: 214,
         21: 1,
         28: 46,
         29: 104,
         30: 1,
         31: 59,
         33: 1,
         35: 10,
         36: 31,
         37: 13,
         41: 219,
         43: 53,
         44: 158})

In [96]:
reg_space.cluster_center_indices_

array([[   0,    0],
       [   3,    0],
       [   4,    0],
       [   6,    0],
       [   7,    0],
       [  18,    0],
       [  20,    0],
       [  51,    0],
       [  76,    0],
       [  97,    0],
       [ 109,    0],
       [ 133,    0],
       [ 188,    0],
       [ 308,    0],
       [ 414,    0],
       [ 691,    0],
       [ 752,    0],
       [ 926,    0],
       [1000,    0],
       [1049,    0],
       [1151,    0],
       [1378,    0],
       [1483,    0],
       [1809,    0],
       [2060,    0],
       [2069,    0],
       [2492,    0],
       [2692,    0],
       [2813,    0],
       [3069,    0],
       [3119,    0],
       [3367,    0],
       [3625,    0],
       [3954,    0],
       [4298,    0],
       [4388,    0],
       [4691,    0],
       [5363,    0],
       [5488,    0],
       [5914,    0],
       [6126,    0],
       [6616,    0],
       [6732,    0],
       [6861,    0],
       [7495,    0],
       [7509,    0]])

In [95]:
nsamples = 10
for x in [0, 3, 4, 14]:
    indices_in_cluster = []
    for i,y in enumerate(converted_labels):
        if y == x: 
            indices_in_cluster.append(i)
    samples = np.random.choice(indices_in_cluster, nsamples, replace=False)
    for frame in samples:  
        aligned_traj[frame].save('musashi_rooh_h_reg_cluster%s_frame%s.pdb' % (x,frame))