In this iPython notebook, we will featurize MOR ligand binding simulation by pairwise distances between the ligand and different receptor residues. We will then perform tICA and prospectively build an MSM. 

In [None]:
from PDB_Order_Fixer import PDB_Order_Fixer
import mdtraj as md
import os
import numpy as np
import h5py

import datetime
import glob
import copy
from functools import partial 
import operator
import time

import random 
import subprocess
from subprocess import Popen
import sys
from io_functions import *
from custom_clusterer import *
from custom_tica import *
from custom_featurizer import *
from pdb_editing import *
from analysis import *
from io_functions import *
#from topology_fixing import *
from subsampling import *
from conversions import *
from custom_msm import *
from grids import *

In [None]:
from detect_intermediates import *
from interpret_tICs import *

In [None]:
from mor_ligand_atom_residue_feature_types import *
from get_variable_names import *
from mor_ligand_atom_residue_tica_config import *
from residue import Residue, Atom

In [4]:
(active_ref_dir, inactive_ref_dir, simulation_ref_dir, scripts_dir,
          ligand_dir, agonist_dir, inverse_agonist_dir, biased_agonist_dir, ref_receptors_dir, whole_trajectory_pnas,
          sasa_file) = get_base_files(base)

tica_dir = get_tica_dir(base, is_sparse, lag_time, n_components, feature_name, 
                                 wolf_string, shrinkage_string, rho_string)
ori_tica_dir = copy.deepcopy(tica_dir)
features_dir = get_features_dir(base, feature_name)

landmarks_dir = get_landmarks_dir(tica_dir)
analysis_dir = get_analysis_dir(tica_dir, n_clusters, sampling_method)
gmm_dir = get_gmm_dir(tica_dir)
rf_dir = get_rf_dir(tica_dir)


ref_tica_dir, ref_tica_coords = get_ref_tica_dirs(tica_dir)

graph_file = get_graph_file(tica_dir, msm_lag_time, n_clusters)

pnas_titles =  ["tm6_tm3_dist", "rmsd_npxxy_inactive", "rmsd_npxxy_active", "rmsd_connector_inactive", "rmsd_connector_active"]
pnas_features_dir = analysis_dir


(clusterer_dir, msm_model_dir, macrostate_dir, features_known, model_dir, projected_features_dir,
         projection_operator_dir, ktica_fit_model_filename, ktica_projected_data_filename, nystroem_data_filename,
         mutual_information_csv, pearson_csv) = get_tica_files(base, tica_dir, n_clusters, msm_lag_time, n_macrostates)

(standardized_features_dir, feature_residues_csv, feature_residues_pkl,
          contact_csv, ref_features_dir) = get_feature_files(features_dir)

(kmeans_csv, tica_coords_csv, features_csv, active_rmsd_dir, inactive_rmsd_dir, active_pnas_dir, inactive_pnas_joined, active_pnas_joined,
        clusters_map_file, ktica_clusters_map_file, analysis_file, combined_file, docking_summary, docking_joined, docking_z_scores_csv,
        aggregate_docking, aggregate_docking_joined, docking_pnas_joined, aggregate_docking_pnas, aggregate_docking_pnas_joined, docking_multiple_ligands,
        docking_distances_file, docking_pdf, mmgbsa_docking_distances, pnas_coords, mmgbsa_dir, mmgbsa_csv, mmgbsa_pdf, aggregate_mmgbsa,
        aggregate_mmgbsa_joined, aggregate_mmgbsa_pnas_joined, mmgbsa_z_scores_csv, active_clusters_csv, intermediate_clusters_csv,
        inactive_clusters_csv, pnas_clusters_averages, tica_clusters_averages, tica_classes_csv, tica_samples_csv, subgraph_save_base,
        degree_save_base, degree_map_csv, degree_z_map_csv, aggregate_docking_pnas_degree_z_joined, tic_residue_csv, feature_coefs_csv,
        duplicated_feature_coefs_csv) = get_analysis_files(analysis_dir, n_clusters, tica_dir, tica_dir, sampling_method, n_samples, precision,
                                                           msm_lag_time)

(inactive_pnas_distances_dir, active_pnas_distances_dir, active_pnas_all_distances_dir,
          inactive_pnas_distances_new_csv, active_pnas_distances_new_csv, active_pnas_joined, active_pnas_means, pnas_coords_dir,
          pnas_coords_csv, pnas_all_coords_csv, pnas_coords_hexbin_dir, pnas_coords_co_crystallized_docking_dir,
          pnas_coords_active_colors_dir, user_defined_features_file, reaction_coordinates_trajs_file) = get_pnas_files(whole_trajectory_pnas, pnas_features_dir)

features_dir = get_features_dir(base, feature_name)



graph_file = get_graph_file(tica_dir, msm_lag_time, n_clusters)
(scripts_dir, pymol_fixpdb_dir) = get_script_dir(scripts_dir)
(save_dir, reimaged_dir, mae_dir, combined_reimaged_dir, grid_dir, docking_dir) = get_docking_dirs(tica_dir, n_clusters, n_components, n_samples, sampling_method, precision)


/home/enf/quintin/Post_Process/GPCR/MOR/LIG_path/BU_path/featuresprotein_ligand_atom_contacts
/home/enf/quintin/Post_Process/GPCR/MOR/LIG_path/BU_path/featuresprotein_ligand_atom_contacts


In [5]:
print feature_residues_pkl

/home/enf/quintin/Post_Process/GPCR/MOR/LIG_path/BU_path/featuresprotein_ligand_atom_contacts/feature_residues.pkl


In [6]:
ligand_residue = Residue(resSeq=900, chain_id="L", res_name="LIG")
ligand_atom_names = ["N1", "O1", "C17", "C7"]
ligand_atoms = []
for atom_name in ligand_atom_names:
    ligand_atoms.append(Atom(chain_id="L", resSeq=900, atom_name=atom_name, res_name="LIG"))

In [8]:
included = "65 65 65 65 65 66 66 66 66 66 66 66 67 67 67 67 67 68 68 68 68 68 69 69 69 69 69 69 69 69 70 70 70 70 70 70 70 71 71 71 71 71 71 71 71 72 72 72 72 72 72 72 72 73 73 73 73 73 74 74 74 74 74 74 74 74 75 75 75 75 75 75 75 75 75 75 75 75 76 76 76 76 76 76 77 77 77 77 77 77 77 77 78 78 78 78 78 78 78 79 79 79 79 79 79 80 80 80 80 80 80 80 81 81 81 81 81 81 81 82 82 82 82 83 83 83 83 83 83 83 83 84 84 84 86 86 86 86 86 87 87 87 87 87 87 87 87 90 90 109 109 109 109 109 110 110 110 110 110 110 111 111 111 111 112 112 112 112 112 112 112 112 113 113 113 113 113 114 114 114 114 114 114 114 114 115 115 115 115 115 116 116 116 116 116 116 116 116 117 117 117 117 117 118 118 118 118 118 118 118 119 119 119 119 119 119 120 120 120 120 120 120 120 121 121 121 121 121 121 121 121 122 122 122 122 122 122 122 123 123 123 123 123 123 123 123 123 123 123 124 124 124 124 124 124 124 124 124 125 125 125 125 125 125 126 126 126 126 126 126 126 127 127 127 127 127 127 127 127 128 128 128 128 128 128 128 128 128 128 128 128 129 129 129 129 129 129 129 129 130 130 130 130 130 130 130 130 131 131 131 131 132 132 132 132 132 132 132 133 133 133 133 133 133 133 133 133 133 133 133 133 133 134 134 134 134 134 134 134 135 135 135 135 135 135 135 135 135 135 135 136 136 136 136 137 137 137 137 137 137 137 137 138 138 138 138 138 138 138 138 139 139 139 139 139 139 139 139 140 140 140 140 140 140 141 141 141 141 141 141 141 141 141 142 142 142 142 142 142 142 142 143 143 143 143 143 143 143 144 144 144 144 144 144 144 144 145 145 145 145 145 145 146 146 146 146 146 146 146 146 147 147 147 147 147 147 147 147 148 148 148 148 148 148 148 148 148 148 148 148 149 149 149 149 149 149 149 149 149 149 149 149 150 150 150 150 150 150 150 150 151 151 151 151 151 151 151 151 152 152 152 152 152 152 152 152 152 152 152 153 153 153 153 153 153 153 154 154 154 154 154 154 155 155 155 155 155 155 155 155 156 156 156 156 156 156 156 156 156 156 156 157 157 157 157 158 158 158 159 159 187 188 188 189 189 189 189 189 189 189 190 190 190 190 190 190 191 191 191 191 191 191 191 191 192 192 192 192 192 192 192 192 192 192 192 192 192 192 193 193 193 193 193 193 193 193 194 194 194 194 194 194 194 194 195 195 195 195 195 195 196 196 196 196 196 196 197 197 197 197 197 198 198 198 198 198 198 198 198 199 199 199 199 200 200 200 200 200 200 200 200 201 201 201 201 201 201 201 202 202 202 202 202 202 202 203 203 203 203 203 203 203 203 204 204 204 204 204 204 204 204 204 204 204 205 205 205 205 205 205 205 205 206 206 206 206 206 207 207 207 207 207 207 207 208 208 208 208 208 208 208 209 209 209 209 209 209 209 209 209 210 210 210 210 210 210 210 210 210 210 210 210 211 211 211 211 211 211 211 211 211 211 211 212 212 212 212 212 212 212 212 212 213 213 213 213 214 214 214 214 214 214 215 215 215 215 215 215 215 215 216 216 216 216 216 216 216 216 217 217 217 217 217 217 218 218 218 218 218 218 218 219 219 219 219 219 219 219 219 220 220 220 220 220 220 220 221 221 221 221 221 221 221 221 221 221 221 222 222 222 222 222 222 223 223 223 223 223 223 223 223 223 223 224 224 224 224 224 224 224 225 225 225 225 225 225 225 226 226 226 226 226 226 226 226 226 226 226 226 226 226 227 227 227 227 227 227 227 227 227 227 227 227 228 228 228 228 228 228 228 228 228 228 228 228 228 228 229 229 229 229 229 229 229 229 229 230 230 230 230 230 230 230 230 231 231 231 231 231 231 231 231 232 232 232 232 232 232 232 232 233 233 233 233 233 233 233 233 233 234 234 234 234 234 234 234 234 235 235 235 235 235 235 236 236 236 236 236 236 236 237 237 237 237 237 237 237 237 237 237 237 238 238 238 238 238 238 238 238 239 239 239 239 239 239 239 239 239 239 239 240 240 240 240 240 241 241 241 241 241 241 241 241 241 241 241 242 242 242 242 242 242 242 242 243 243 243 243 243 243 243 243 244 244 244 244 244 244 244 245 245 245 245 245 245 245 246 246 246 246 246 246 246 246 247 247 247 247 247 248 286 287 288 288 288 288 289 289 289 289 289 289 289 289 289 289 289 290 290 290 290 290 290 290 290 291 291 291 291 291 291 291 292 292 292 292 292 292 293 293 293 293 293 293 293 293 293 293 293 293 293 293 294 294 294 294 294 294 294 295 295 295 295 295 295 295 296 296 296 296 296 296 296 296 297 297 297 297 297 297 297 297 297 297 298 298 298 298 298 298 298 298 299 299 299 299 299 299 299 299 299 299 299 299 300 300 300 300 300 300 300 301 301 301 301 301 301 301 301 302 302 302 302 302 302 302 302 303 303 303 303 303 303 303 303 303 304 304 304 304 304 305 305 305 305 305 305 305 305 306 306 306 306 306 306 306 306 307 307 307 307 307 307 307 308 308 308 308 308 308 308 308 309 309 309 309 309 309 309 310 310 310 310 310 310 310 310 310 311 311 311 311 311 311 311 312 312 312 312 312 312 312 313 313 313 313 313 313 313 313 313 313 313 314 314 314 314 314 314 314 314 314 315 315 315 315 315 315 315 316 316 316 316 316 316 316 317 317 317 317 317 317 318 318 318 318 318 318 318 318 318 318 318 318 318 318 319 319 319 319 319 319 319 319 319 319 320 320 320 320 320 320 320 320 320 320 320 321 321 321 321 321 321 322 322 322 322 322 322 322 322 323 323 323 323 323 324 324 324 324 324 324 324 324 325 325 325 325 326 326 326 326 326 326 326 326 326 326 326 326 327 327 327 327 327 327 327 328 328 328 328 328 328 328 328 329 329 329 329 329 329 330 330 330 330 330 330 331 331 331 331 331 331 601 601 601 601 601 601 601 601 601 601 601 601 601 601 601 601 601 601 601 601 601 601 601 601 601 601 601 601 601 601 601 601 601"
included_list = set(included.split())
included_list = sorted([int(i) for i in included_list])
print(included_list)
len(included_list)

[65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 86, 87, 90, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 601]


183

In [9]:
excluded_set = set(list(range(270, 292)) + list(range(239, 263)) + list(range(153, 170)) + list(range(170, 197)) + list(range(102, 116)) + list(range(80, 102)) + list(range(326, 350)))
included_residues = [res for res in contact_residues if res.resSeq in included_list]
user_specified_contact_residue_pairs = [(ligand_residue, contact_residue) for contact_residue in contact_residues if contact_residue.resSeq in included_list]
user_specified_atom_residue_pairs = [x for x in itertools.product(ligand_atoms, included_residues)]
print(user_specified_contact_residue_pairs)
print(user_specified_atom_residue_pairs)

[(LIG, ILE198), (LIG, ILE69), (LIG, ARG211), (LIG, ASN127), (LIG, ILE193), (LIG, THR132), (LIG, PRO134), (LIG, THR220), (LIG, HIS297), (LIG, GLY82), (LIG, GLU229), (LIG, ILE215), (LIG, THR67), (LIG, LEU116), (LIG, LEU112), (LIG, ALA113), (LIG, LEU110), (LIG, ALA323), (LIG, CYS159), (LIG, LEU194), (LIG, ILE322), (LIG, LEU83), (LIG, VAL245), (LIG, LEU219), (LIG, GLN314), (LIG, ALA287), (LIG, SER125), (LIG, ALA197), (LIG, ASN328), (LIG, ASN150), (LIG, VAL187), (LIG, TYR210), (LIG, VAL80), (LIG, VAL81), (LIG, ALA115), (LIG, TYR299), (LIG, THR120), (LIG, THR70), (LIG, ALA117), (LIG, MET130), (LIG, CYS79), (LIG, GLN124), (LIG, SER329), (LIG, ALA111), (LIG, ILE298), (LIG, ASN230), (LIG, PRO244), (LIG, VAL236), (LIG, ILE296), (LIG, PRO295), (LIG, GLY136), (LIG, GLY199), (LIG, PHE123), (LIG, SER154), (LIG, LEU74), (LIG, MET65), (LIG, PRO224), (LIG, PRO122), (LIG, VAL78), (LIG, THR153), (LIG, THR327), (LIG, THR157), (LIG, LEU139), (LIG, ASP216), (LIG, SER317), (LIG, PHE204), (LIG, THR225), (LIG,

In [8]:
import custom_featurizer
reload(custom_featurizer)
from custom_featurizer import *
#featurize_contacts_custom(traj_dir, features_dir = features_dir, traj_ext = traj_ext, contact_residue_pairs_file = feature_residues_pkl, structures=[], contact_residues=[],
#                          residues_map = None, contact_cutoff = cutoff, parallel = featurize_parallel, exacycle = exacycle, traj_top_structure = None, iterative=False,
#                          user_specified_atom_residue_pairs = user_specified_atom_residue_pairs)

In [11]:
import pickle
with open(feature_residues_pkl, "rb") as f:
    feature_residues = pickle.load(f)
print(feature_residues[0])
print(feature_residues[0][0].__dict__)
print(feature_residues[0][1].__dict__)
print(feature_residues[727])

(Lig900-N1, Ile198)
{'mdtraj_rep': 'Lig900-N1', 'chain_id': 'L', 'res_name': 'LIG', 'resSeq': 900, 'atom_id': None, 'atom_name': 'N1'}
{'res_name': 'Ile198', 'resSeq': 198, 'ballosteros_weinstein': None, 'chain_id': 'R', 'chain_name': None}
(Lig900-C7, Val316)


In [12]:
fit_and_transform(features_directory = features_dir, model_dir = tica_dir, stride=5, lag_time = lag_time, n_components = n_components, sparse = sparse, wolf = wolf, rho = rho, shrinkage = shrinkage, parallel=True, traj_ext = traj_ext)

loading feature files
(180, 728)
[ 1.51745558  1.71240592  1.66846228  0.8173328   2.03149128  1.31050098
  1.45045555  1.17150629  1.28468215  1.73942006]
(35,)
fitting data to tICA model
Sparse time-structure based Independent Components Analysis (tICA)
------------------------------------------------------------------
n_components        : 10
shrinkage           : None
lag_time            : 5
weighted_transform  : True
rho                 : 0.005
n_features          : 728

Top 5 timescales :
[ 458.50716235  272.2169145   218.18654661  165.29916588  129.55611906]

Top 5 eigenvalues :
[ 0.98915429  0.98179995  0.97734441  0.97020471  0.96214192]

Number of active degrees of freedom:
[10/728, 6/728, 15/728, 17/728, 20/728]
loading /home/enf/quintin/Post_Process/GPCR/MOR/LIG_path/BU_path/featuresprotein_ligand_atom_contacts/Rep_0-0.dataset
loading /home/enf/quintin/Post_Process/GPCR/MOR/LIG_path/BU_path/featuresprotein_ligand_atom_contacts/Rep_0-1.dataset
loading /home/enf/quintin/Post_

In [14]:
plot_columns(tica_dir, projected_features_dir, titles = ["tIC%d" %j for j in range(1,11)], tICA = True, scale = 1.0, refcoords_file = None)

loading "/home/enf/quintin/Post_Process/GPCR/MOR/LIG_path/BU_path/sparse-tICA_t5_n_components10protein_ligand_atom_contacts_regularization_wolf_autoShrinkage_rho0pt005/phi_psi_chi2_allprot_projected.h5"...
()
None
tIC1
tIC2
tIC3
tIC4
tIC5
tIC6
tIC7
tIC8
tIC9
tIC10
Done plotting columns


In [None]:
%load_ext autoreload
%autoreload 2

In [13]:
import interpret_tICs
reload(interpret_tICs)
from interpret_tICs import *
tic_components_dir = tica_dir
interpret_tIC_components(projection_operator_dir, tic_components_dir, feature_residues_pkl, n_tica_components=n_components, percentile=95)


loading "/home/enf/quintin/Post_Process/GPCR/MOR/LIG_path/BU_path/sparse-tICA_t5_n_components10protein_ligand_atom_contacts_regularization_wolf_autoShrinkage_rho0pt005/phi_psi_chi2_allprot_tica_coords.h5"...
Interpreting tIC 1
feature_importances_df.shape
(728, 6)
residue_importances_df.shape
(186, 3)
          feature_name       res_i   res_j  resid_i  resid_j  importance
411  Lig900-C17-Val236  Lig900-C17  Val236      900      236    0.655291
140   Lig900-N1-Cys330   Lig900-N1  Cys330      900      330    0.539804
639   Lig900-C7-Ile290   Lig900-C7  Ile290      900      290   -0.507525
306   Lig900-O1-Phe320   Lig900-O1  Phe320      900      320    0.457135
545  Lig900-C17-Val316  Lig900-C17  Val316      900      316   -0.348474
107   Lig900-N1-Lys233   Lig900-N1  Lys233      900      233   -0.320273
451   Lig900-C17-Val66  Lig900-C17   Val66      900       66   -0.236599
445  Lig900-C17-Tyr149  Lig900-C17  Tyr149      900      149    0.225220
42    Lig900-N1-Ser329   Lig900-N1  Ser3

In [16]:
import custom_featurizer
reload(custom_featurizer)
from custom_featurizer import *
compute_user_defined_features_wrapper(traj_dir, traj_ext, inactive_dir, active_dir, structure,
                                          feature_name_residues_dict, user_defined_features_file)
plot_columns(pnas_features_dir, user_defined_features_file, titles = feature_name_residues_dict.keys(), tICA=False, scale=1.0, refcoords_file=None)

Saving "/home/enf/quintin/Post_Process/GPCR/MOR/LIG_path/BU_path/all_pnas_features/user_defined_features.h5"... (<type 'list'>)
loading "/home/enf/quintin/Post_Process/GPCR/MOR/LIG_path/BU_path/all_pnas_features/user_defined_features.h5"...
()featurizing /home/enf/quintin/Post_Process/GPCR/MOR/LIG_path/BU_path/h5_trajectories/Rep_0-0.h5
featurizing /home/enf/quintin/Post_Process/GPCR/MOR/LIG_path/BU_path/h5_trajectories/Rep_0-1.h5
featurizing /home/enf/quintin/Post_Process/GPCR/MOR/LIG_path/BU_path/h5_trajectories/Rep_0-2.h5
featurizing /home/enf/quintin/Post_Process/GPCR/MOR/LIG_path/BU_path/h5_trajectories/Rep_0-3.h5
featurizing /home/enf/quintin/Post_Process/GPCR/MOR/LIG_path/BU_path/h5_trajectories/Rep_1-0.h5
featurizing /home/enf/quintin/Post_Process/GPCR/MOR/LIG_path/BU_path/h5_trajectories/Rep_1-1.h5
featurizing /home/enf/quintin/Post_Process/GPCR/MOR/LIG_path/BU_path/h5_trajectories/Rep_1-2.h5
featurizing /home/enf/quintin/Post_Process/GPCR/MOR/LIG_path/BU_path/h5_trajectories/

In [18]:
import custom_featurizer
reload(custom_featurizer)
from custom_featurizer import *
coords_bounds_dict = {"tm6_tm3_dist": [6.0, 8.0, 10.0]}
reaction_coordinate_sampler(traj_dir, traj_ext, user_defined_features_file, 
                                feature_name_residues_dict, coords_bounds_dict, reaction_coordinates_trajs_file)

loading "/home/enf/quintin/Post_Process/GPCR/MOR/LIG_path/BU_path/all_pnas_features/user_defined_features.h5"...
(6930, 3)
Analyzing tm6_tm3_dist
{'tm6_tm3_dist': ['Rep_2-2.h5', 'Snap1.h5', 'rep_3.h5']}


{'tm6_tm3_dist': ['Rep_2-2.h5', 'Snap1.h5', 'rep_3.h5']}

In [None]:
n_clusters = 50
clusterer_dir = "%s/clusterer_50clusters.h5" % tica_dir
cluster_minikmeans(tica_dir, projected_features_dir, traj_dir, n_clusters, clusterer_dir, tICs=list(range(0,5)))

In [17]:
n_samples=10
save_dir = "%s/clusters50_samples10" % tica_dir
sampling_method = "random"
clusters_map_file = "%s/clusters50_map.h5" % tica_dir
sample_clusters(clusterer_dir, projected_features_dir, traj_dir, traj_ext, save_dir, n_samples, method = sampling_method, clusters_map_file = clusters_map_file)

loading "/home/amir/Post_Process/GPCR/MOR/LIG_path/BU_path/tica_ligand_protein_contacts_3_sparse_0pt0025/clusterer_50clusters.h5"...
478
136
151
74
155
164
329
188
46
179
51
53
358
116
22
73
192
135
148
41
125
82
114
156
131
199
105
35
17
168
94
72
52
47
115
258
179
69
75
80
183
211
134
317
272
25
192
58
185
91
loading "/home/amir/Post_Process/GPCR/MOR/LIG_path/BU_path/tica_ligand_protein_contacts_3_sparse_0pt0025/phi_psi_chi2_allprot_projected.h5"...
[-504.15923741   46.74621863  -80.63647766   67.33860839  189.12706714]
[ 386.17165754 -153.83715071  158.50347295 -234.90808379   25.43297459]
[-410.16139299  -13.48744081   12.6899848    58.72163181  -94.72815609]
[ 636.99721471 -283.5879122   157.69926624  184.56203233  128.58142793]
[ 623.38906852 -110.28955748  -82.64496207  -67.70225719  -55.91743954]
[-256.41318496  -32.0686535    50.24223589  -88.2497065     2.93650121]
[-410.81951717   29.1200788    35.47512224   38.22852782 -115.38933323]
[-366.42992062   58.38719609   25.674628

In [16]:
plot_all_tics_and_clusters(tica_dir, projected_features_dir, clusterer_dir, lag_time, label = "cluster_id", active_cluster_ids = range(0,n_clusters), intermediate_cluster_ids = [], inactive_cluster_ids = [])

loading "/home/amir/Post_Process/GPCR/MOR/LIG_path/BU_path/tica_ligand_protein_contacts_3_sparse_0pt0025/phi_psi_chi2_allprot_projected.h5"...
loading "/home/amir/Post_Process/GPCR/MOR/LIG_path/BU_path/tica_ligand_protein_contacts_3_sparse_0pt0025/clusterer_50clusters.h5"...
Looking at 5 tICS
Printed all tICA coords and all requested clusters


In [13]:
plot_timescales(clusterer_dir, n_clusters, tica_dir, list(range(1,25)))


loading "/home/amir/Post_Process/GPCR/MOR/LIG_path/BU_path/tica_ligand_protein_contacts_3_sparse_0pt0025/clusterer_50clusters.h5"...
MiniBatchKMeans(batch_size=100, compute_labels=True, init='k-means++',
        init_size=None, max_iter=100, max_no_improvement=10, n_clusters=50,
        n_init=10, random_state=None, reassignment_ratio=0.01, tol=0.0,
        verbose=0)
MSM contains 6 strongly connected components above weight=1.00. Component 4 selected, with population 83.132705%
MSM contains 6 strongly connected components above weight=0.50. Component 4 selected, with population 83.090379%
MSM contains 7 strongly connected components above weight=0.33. Component 5 selected, with population 81.978022%
MSM contains 7 strongly connected components above weight=0.25. Component 5 selected, with population 81.929308%
MSM contains 7 strongly connected components above weight=0.20. Component 5 selected, with population 81.880089%
MSM contains 7 strongly connected components above weight=0.17. 

In [15]:
lag_time = 5
msm_model_dir = "%s/msm_lag_time%d.h5" % (tica_dir, lag_time)
#build_msm(clusterer_dir, lag_time=lag_time, msm_model_dir=msm_model_dir)


In [17]:
graph_file = "%s/msm_lag_time%d_graph.graphml" % (tica_dir, lag_time)
construct_graph(msm_model_dir, clusterer_dir, n_clusters, 5, 5, graph_file, inactive = None, active = None, pnas_clusters_averages = None, tica_clusters_averages = None, docking=None, macrostate = None)


loading "/home/amir/Post_Process/GPCR/MOR/LIG_path/BU_path/tica_ligand_protein_contacts_3_sparse_0pt0025/clusterer_50clusters.h5"...
loading "/home/amir/Post_Process/GPCR/MOR/LIG_path/BU_path/tica_ligand_protein_contacts_3_sparse_0pt0025/msm_lag_time5.h5"...
39


In [31]:
from imp import reload
import custom_msm
reload(custom_msm)
from custom_msm import *
msm_file = msm_model_dir
sampled_frames_file = '/home/amir/Post_Process/GPCR/MOR/LIG_path/BU_path/tica_ligand_protein_contacts_3_sparse_0pt0025/sampled_frames_clusters50_samples1.h5'
msm_trajectory_filename = '/home/amir/Post_Process/GPCR/MOR/LIG_path/BU_path/tica_ligand_protein_contacts_3_sparse_0pt0025/msm_lagtime5_start48_3'
make_msm_trajectory(msm_file, projected_features_dir, traj_dir, sampled_frames_file, clusterer_dir, msm_trajectory_filename, 
                    n_clusters, start_cluster=48, n_steps=1000)

loading "/home/amir/Post_Process/GPCR/MOR/LIG_path/BU_path/tica_ligand_protein_contacts_3_sparse_0pt0025/phi_psi_chi2_allprot_projected.h5"...
loading "/home/amir/Post_Process/GPCR/MOR/LIG_path/BU_path/tica_ligand_protein_contacts_3_sparse_0pt0025/clusterer_50clusters.h5"...
loading "/home/amir/Post_Process/GPCR/MOR/LIG_path/BU_path/tica_ligand_protein_contacts_3_sparse_0pt0025/sampled_frames_clusters50_samples1.h5"...
loading "/home/amir/Post_Process/GPCR/MOR/LIG_path/BU_path/tica_ligand_protein_contacts_3_sparse_0pt0025/msm_lag_time5.h5"...
Complete. Saving to disk.


In [22]:
mytraj = md.load("/home/amir/Post_Process/GPCR/MOR/LIG_path/BU_path/h5_trajectories/rep_1.h5")

In [23]:
dir(mytraj)

['__add__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__gt__',
 '__hash__',
 '__init__',
 '__le__',
 '__len__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__slotnames__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_check_valid_unitcell',
 '_distance_unit',
 '_have_unitcell',
 '_rmsd_traces',
 '_savers',
 '_string_summary_basic',
 '_time',
 '_time_default_to_arange',
 '_topology',
 '_unitcell_angles',
 '_unitcell_lengths',
 '_xyz',
 'atom_slice',
 'center_coordinates',
 'join',
 'load',
 'n_atoms',
 'n_chains',
 'n_frames',
 'n_residues',
 'openmm_boxes',
 'openmm_positions',
 'remove_solvent',
 'restrict_atoms',
 'save',
 'save_amberrst7',
 'save_binpos',
 'save_dcd',
 'save_dtr',
 'save_gro',
 'save_hdf5',
 'save_lammpstrj',
 'save_lh5',
 'save_mdcrd',
 'save_netcdf',
 'save_netcdfrst',
 'save_

In [19]:
del mytraj

In [48]:
subset = crystal_structure.atom_slice(range(0,400))
subset.xyz
print(subset.xyz)
distances = md.compute_contacts(subset)
print(distances)


[[[-3.67750001 -2.00090003 -3.06229997]
  [-3.55780005 -1.92980003 -3.01830006]
  [-3.56550002 -1.78180003 -3.05369997]
  ..., 
  [-1.46140003 -0.32269999 -1.54359996]
  [-1.40170002 -0.40900001 -1.77069998]
  [-1.92920005 -0.43360001 -1.81420004]]]
(array([[ 0.29087129,  0.30480972,  0.50911838, ...,  0.31547278,
         0.29779878,  0.30303043]], dtype=float32), array([[ 0,  3],
       [ 0,  4],
       [ 0,  5],
       ..., 
       [47, 50],
       [47, 51],
       [48, 51]]))


In [20]:
distances0 = distances[0]

In [23]:
distances0 = np.nan_to_num(distances0)

In [24]:
distances0

array([[ 0.,  0.,  0., ...,  0.,  0.,  0.]], dtype=float32)

In [25]:
np.where(distances0 > 0.0)

(array([], dtype=int64), array([], dtype=int64))