In [1]:
import os 

import glob 
import h5py 

import numpy as np 

In [2]:
import MDAnalysis as mda

In [3]:
from MDAnalysis.analysis.rms import RMSD

In [4]:
from utils import cm_to_cvae 

Using TensorFlow backend.


In [5]:
omm_list = sorted(glob.glob('../fs-pep.3rd/omm_runs_*'))

# RMSD

In [6]:
ref_pdb_file = '../fs-pep.3rd/pdb/fs-peptide.pdb'

In [7]:
RMSD_all = []
for omm in omm_list: 
    dcd_file = os.path.join(omm, 'output.dcd')
    mda_traj = mda.Universe(ref_pdb_file, dcd_file)
    ref_traj = mda.Universe(ref_pdb_file)
    R = RMSD(mda_traj, ref_traj, select='protein and name CA')
    R.run() 
    RMSD_all.append(R.rmsd[:,2])

In [8]:
RMSD_all = np.hstack(RMSD_all)

# Embed

In [9]:
model_weight = '../fs-pep.3rd/CVAE_exps/cvae_weight.h5'

In [10]:
cm_data_lists = [] 
num_frame = 0 
for omm in omm_list: 
    cm_file = os.path.join(omm, 'output_cm.h5')
    cm_h5 = h5py.File(cm_file, 'r') 
#     print cm_h5[u'contact_maps']
    cm_data_lists.append(cm_h5[u'contact_maps'].value) 
    num_frame += cm_h5[u'contact_maps'].shape[1]
    cm_h5.close() 



In [11]:
num_frame * 0.05

18000.0

In [12]:
cvae_input = cm_to_cvae(cm_data_lists)

In [13]:
cvae_input.shape

(360000, 22, 22, 1)

In [14]:
from utils import predict_from_cvae
cm_predict = predict_from_cvae(model_weight, cvae_input, hyper_dim=3)

Instructions for updating:
Colocations handled automatically by placer.
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 22, 22, 1)    0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 22, 22, 64)   640         input_1[0][0]                    
__________________________________________________________________________________________________
conv2d_2 (Conv2D)               (None, 11, 11, 64)   36928       conv2d_1[0][0]                   
__________________________________________________________________________________________________
conv2d_3 (Conv2D)               (None, 11, 11, 64)   36928       conv2d_2[0][0]                   
_____________________________________

In [15]:
cm_predict.shape

(360000, 3)

# outliers 

In [16]:
from utils import outliers_from_latent
eps = 0.2 

while True:
    outliers = np.squeeze(outliers_from_latent(cm_predict, eps=eps))
    n_outlier = len(outliers)
    print('dimension = {0}, eps = {1:.2f}, number of outlier found: {2}'.format(
        3, eps, n_outlier))
    if n_outlier > 150:
        eps = eps + 0.05
    else:
        outlier_list = outliers 
        break

dimension = 3, eps = 0.20, number of outlier found: 143


In [17]:
outlier_list

array([  4298,   4328,  11854,  22043,  40117,  40119,  40289,  40337,
        41738,  41749,  41756,  41939,  42052,  42067,  42075,  42122,
        42123,  42127,  42160,  42161,  42492,  43097,  43110,  43442,
        43457,  44091,  44092,  44093,  44094,  44096,  44101,  44135,
        44139,  44142,  44149,  56001,  60382,  60383,  60388,  60687,
        60797,  60804,  60830,  60837,  60838,  60841,  80038,  80696,
        84085,  84160,  84161,  84164,  84165,  84215,  84260,  85002,
        85221,  85222,  85224,  85225,  86927,  93730,  93736,  93750,
       105296, 114286, 126125, 136436, 143060, 143078, 143095, 146144,
       146145, 147612, 150055, 153195, 172113, 172115, 172560, 172561,
       172563, 176701, 192006, 204023, 212856, 223895, 228048, 249730,
       257759, 257849, 257850, 260197, 262972, 269466, 270497, 277280,
       279800, 279801, 281954, 282805, 285516, 289253, 290950, 294317,
       295945, 295950, 297152, 297153, 297158, 297160, 297163, 297164,
      

In [18]:
h5_save = h5py.File('./latent3d_fsp.h5', 'w') 

IOError: Unable to create file (unable to lock file, errno = 35, error message = 'Resource temporarily unavailable')

In [None]:
h5_save.create_dataset('cm_predict', data=cm_predict)  
h5_save.create_dataset('RMSD', data=RMSD_all) 
h5_save.create_dataset('outliers', data=outlier_list)  

In [None]:
h5_save.close() 

In [None]:
h5_save = h5py.File('./latent3d_fsp.h5', 'r') 

In [None]:
h5_save.items()