In this iPython notebook, we will featurize MOR ligand binding simulation by pairwise distances between the ligand and different receptor residues. We will then perform tICA and prospectively build an MSM. 

In [1]:
from PDB_Order_Fixer import PDB_Order_Fixer
import mdtraj as md
import os
import numpy as np
import h5py

import datetime
import glob
import copy
from functools import partial 
import operator
import time

import random 
import subprocess
from subprocess import Popen
import sys
from io_functions import *
from custom_clusterer import *
from custom_tica import *
from custom_featurizer import *
from pdb_editing import *
from analysis import *
from io_functions import *
#from topology_fixing import *
from subsampling import *
from conversions import *
from custom_msm import *
from grids import *

In [2]:
from detect_intermediates import *
from interpret_tICs import *

we are operating on biox3




In [3]:
from mor_ligand_atom_residue_feature_types import *
from get_variable_names import *
from mor_ligand_atom_residue_tica_config import *
from residue import Residue, Atom

tm6_tm3_residues
[R279, R165]
[65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 273, 274, 275, 276, 277, 278,

In [4]:
(active_ref_dir, inactive_ref_dir, simulation_ref_dir, scripts_dir,
          ligand_dir, agonist_dir, inverse_agonist_dir, biased_agonist_dir, ref_receptors_dir, whole_trajectory_pnas,
          sasa_file) = get_base_files(base)

tica_dir = get_tica_dir(base, is_sparse, lag_time, n_components, feature_name, 
                                 wolf_string, shrinkage_string, rho_string)
ori_tica_dir = copy.deepcopy(tica_dir)
features_dir = get_features_dir(base, feature_name)

landmarks_dir = get_landmarks_dir(tica_dir)
analysis_dir = get_analysis_dir(tica_dir, n_clusters, sampling_method)
gmm_dir = get_gmm_dir(tica_dir)
rf_dir = get_rf_dir(tica_dir)


ref_tica_dir, ref_tica_coords = get_ref_tica_dirs(tica_dir)

graph_file = get_graph_file(tica_dir, msm_lag_time, n_clusters)

pnas_titles =  ["tm6_tm3_dist", "rmsd_npxxy_inactive", "rmsd_npxxy_active", "rmsd_connector_inactive", "rmsd_connector_active"]
pnas_features_dir = analysis_dir


(clusterer_dir, msm_model_dir, macrostate_dir, features_known, model_dir, projected_features_dir,
         projection_operator_dir, ktica_fit_model_filename, ktica_projected_data_filename, nystroem_data_filename,
         mutual_information_csv, pearson_csv) = get_tica_files(base, tica_dir, n_clusters, msm_lag_time, n_macrostates)

(standardized_features_dir, feature_residues_csv, feature_residues_pkl,
          contact_csv, ref_features_dir) = get_feature_files(features_dir)

(kmeans_csv, tica_coords_csv, features_csv, active_rmsd_dir, inactive_rmsd_dir, active_pnas_dir, inactive_pnas_joined, active_pnas_joined,
        clusters_map_file, ktica_clusters_map_file, analysis_file, combined_file, docking_summary, docking_joined, docking_z_scores_csv,
        aggregate_docking, aggregate_docking_joined, docking_pnas_joined, aggregate_docking_pnas, aggregate_docking_pnas_joined, docking_multiple_ligands,
        docking_distances_file, docking_pdf, mmgbsa_docking_distances, pnas_coords, mmgbsa_dir, mmgbsa_csv, mmgbsa_pdf, aggregate_mmgbsa,
        aggregate_mmgbsa_joined, aggregate_mmgbsa_pnas_joined, mmgbsa_z_scores_csv, active_clusters_csv, intermediate_clusters_csv,
        inactive_clusters_csv, pnas_clusters_averages, tica_clusters_averages, tica_classes_csv, tica_samples_csv, subgraph_save_base,
        degree_save_base, degree_map_csv, degree_z_map_csv, aggregate_docking_pnas_degree_z_joined, tic_residue_csv, feature_coefs_csv,
        duplicated_feature_coefs_csv) = get_analysis_files(analysis_dir, n_clusters, tica_dir, tica_dir, sampling_method, n_samples, precision,
                                                           msm_lag_time)

(inactive_pnas_distances_dir, active_pnas_distances_dir, active_pnas_all_distances_dir,
          inactive_pnas_distances_new_csv, active_pnas_distances_new_csv, active_pnas_joined, active_pnas_means, pnas_coords_dir,
          pnas_coords_csv, pnas_all_coords_csv, pnas_coords_hexbin_dir, pnas_coords_co_crystallized_docking_dir,
          pnas_coords_active_colors_dir, user_defined_features_file, reaction_coordinates_trajs_file) = get_pnas_files(whole_trajectory_pnas, pnas_features_dir)

features_dir = get_features_dir(base, feature_name)



graph_file = get_graph_file(tica_dir, msm_lag_time, n_clusters)
(scripts_dir, pymol_fixpdb_dir) = get_script_dir(scripts_dir)
(save_dir, reimaged_dir, mae_dir, combined_reimaged_dir, grid_dir, docking_dir) = get_docking_dirs(tica_dir, n_clusters, n_components, n_samples, sampling_method, precision)


/home/enf/quintin/Post_Process/GPCR/MOR/LIG_path/BU_path/featuresprotein-ligand_protein-protein_contacts-all_residues_4dkl_5c1m_under_cutoff6A
/home/enf/quintin/Post_Process/GPCR/MOR/LIG_path/BU_path/featuresprotein-ligand_protein-protein_contacts-all_residues_4dkl_5c1m_under_cutoff6A


In [5]:
ligand_residue = Residue(resSeq=900, chain_id="L", res_name="LIG")

In [14]:
ligand_atom_names = ["N1", "O1", "C7", "C17"]
ligand_atoms = []
for atom_name in ligand_atom_names:
    ligand_atoms.append(Atom(resSeq=900, chain_id="L", atom_name=atom_name, res_name="LIG"))

In [15]:
included = "65 65 65 65 65 66 66 66 66 66 66 66 67 67 67 67 67 68 68 68 68 68 69 69 69 69 69 69 69 69 70 70 70 70 70 70 70 71 71 71 71 71 71 71 71 72 72 72 72 72 72 72 72 73 73 73 73 73 74 74 74 74 74 74 74 74 75 75 75 75 75 75 75 75 75 75 75 75 76 76 76 76 76 76 77 77 77 77 77 77 77 77 78 78 78 78 78 78 78 79 79 79 79 79 79 80 80 80 80 80 80 80 81 81 81 81 81 81 81 82 82 82 82 83 83 83 83 83 83 83 83 84 84 84 86 86 86 86 86 87 87 87 87 87 87 87 87 90 90 109 109 109 109 109 110 110 110 110 110 110 111 111 111 111 112 112 112 112 112 112 112 112 113 113 113 113 113 114 114 114 114 114 114 114 114 115 115 115 115 115 116 116 116 116 116 116 116 116 117 117 117 117 117 118 118 118 118 118 118 118 119 119 119 119 119 119 120 120 120 120 120 120 120 121 121 121 121 121 121 121 121 122 122 122 122 122 122 122 123 123 123 123 123 123 123 123 123 123 123 124 124 124 124 124 124 124 124 124 125 125 125 125 125 125 126 126 126 126 126 126 126 127 127 127 127 127 127 127 127 128 128 128 128 128 128 128 128 128 128 128 128 129 129 129 129 129 129 129 129 130 130 130 130 130 130 130 130 131 131 131 131 132 132 132 132 132 132 132 133 133 133 133 133 133 133 133 133 133 133 133 133 133 134 134 134 134 134 134 134 135 135 135 135 135 135 135 135 135 135 135 136 136 136 136 137 137 137 137 137 137 137 137 138 138 138 138 138 138 138 138 139 139 139 139 139 139 139 139 140 140 140 140 140 140 141 141 141 141 141 141 141 141 141 142 142 142 142 142 142 142 142 143 143 143 143 143 143 143 144 144 144 144 144 144 144 144 145 145 145 145 145 145 146 146 146 146 146 146 146 146 147 147 147 147 147 147 147 147 148 148 148 148 148 148 148 148 148 148 148 148 149 149 149 149 149 149 149 149 149 149 149 149 150 150 150 150 150 150 150 150 151 151 151 151 151 151 151 151 152 152 152 152 152 152 152 152 152 152 152 153 153 153 153 153 153 153 154 154 154 154 154 154 155 155 155 155 155 155 155 155 156 156 156 156 156 156 156 156 156 156 156 157 157 157 157 158 158 158 159 159 187 188 188 189 189 189 189 189 189 189 190 190 190 190 190 190 191 191 191 191 191 191 191 191 192 192 192 192 192 192 192 192 192 192 192 192 192 192 193 193 193 193 193 193 193 193 194 194 194 194 194 194 194 194 195 195 195 195 195 195 196 196 196 196 196 196 197 197 197 197 197 198 198 198 198 198 198 198 198 199 199 199 199 200 200 200 200 200 200 200 200 201 201 201 201 201 201 201 202 202 202 202 202 202 202 203 203 203 203 203 203 203 203 204 204 204 204 204 204 204 204 204 204 204 205 205 205 205 205 205 205 205 206 206 206 206 206 207 207 207 207 207 207 207 208 208 208 208 208 208 208 209 209 209 209 209 209 209 209 209 210 210 210 210 210 210 210 210 210 210 210 210 211 211 211 211 211 211 211 211 211 211 211 212 212 212 212 212 212 212 212 212 213 213 213 213 214 214 214 214 214 214 215 215 215 215 215 215 215 215 216 216 216 216 216 216 216 216 217 217 217 217 217 217 218 218 218 218 218 218 218 219 219 219 219 219 219 219 219 220 220 220 220 220 220 220 221 221 221 221 221 221 221 221 221 221 221 222 222 222 222 222 222 223 223 223 223 223 223 223 223 223 223 224 224 224 224 224 224 224 225 225 225 225 225 225 225 226 226 226 226 226 226 226 226 226 226 226 226 226 226 227 227 227 227 227 227 227 227 227 227 227 227 228 228 228 228 228 228 228 228 228 228 228 228 228 228 229 229 229 229 229 229 229 229 229 230 230 230 230 230 230 230 230 231 231 231 231 231 231 231 231 232 232 232 232 232 232 232 232 233 233 233 233 233 233 233 233 233 234 234 234 234 234 234 234 234 235 235 235 235 235 235 236 236 236 236 236 236 236 237 237 237 237 237 237 237 237 237 237 237 238 238 238 238 238 238 238 238 239 239 239 239 239 239 239 239 239 239 239 240 240 240 240 240 241 241 241 241 241 241 241 241 241 241 241 242 242 242 242 242 242 242 242 243 243 243 243 243 243 243 243 244 244 244 244 244 244 244 245 245 245 245 245 245 245 246 246 246 246 246 246 246 246 247 247 247 247 247 248 286 287 288 288 288 288 289 289 289 289 289 289 289 289 289 289 289 290 290 290 290 290 290 290 290 291 291 291 291 291 291 291 292 292 292 292 292 292 293 293 293 293 293 293 293 293 293 293 293 293 293 293 294 294 294 294 294 294 294 295 295 295 295 295 295 295 296 296 296 296 296 296 296 296 297 297 297 297 297 297 297 297 297 297 298 298 298 298 298 298 298 298 299 299 299 299 299 299 299 299 299 299 299 299 300 300 300 300 300 300 300 301 301 301 301 301 301 301 301 302 302 302 302 302 302 302 302 303 303 303 303 303 303 303 303 303 304 304 304 304 304 305 305 305 305 305 305 305 305 306 306 306 306 306 306 306 306 307 307 307 307 307 307 307 308 308 308 308 308 308 308 308 309 309 309 309 309 309 309 310 310 310 310 310 310 310 310 310 311 311 311 311 311 311 311 312 312 312 312 312 312 312 313 313 313 313 313 313 313 313 313 313 313 314 314 314 314 314 314 314 314 314 315 315 315 315 315 315 315 316 316 316 316 316 316 316 317 317 317 317 317 317 318 318 318 318 318 318 318 318 318 318 318 318 318 318 319 319 319 319 319 319 319 319 319 319 320 320 320 320 320 320 320 320 320 320 320 321 321 321 321 321 321 322 322 322 322 322 322 322 322 323 323 323 323 323 324 324 324 324 324 324 324 324 325 325 325 325 326 326 326 326 326 326 326 326 326 326 326 326 327 327 327 327 327 327 327 328 328 328 328 328 328 328 328 329 329 329 329 329 329 330 330 330 330 330 330 331 331 331 331 331 331 601 601 601 601 601 601 601 601 601 601 601 601 601 601 601 601 601 601 601 601 601 601 601 601 601 601 601 601 601 601 601 601 601"
included_list = set(included.split())
included_list = sorted([int(i) for i in included_list])
print(included_list)
len(included_list)

[65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 86, 87, 90, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 601]


183

In [39]:
excluded_set = set(list(range(270, 288)) + list(range(239, 263)) + list(range(153, 170)) + list(range(170, 197)) + list(range(102, 116)) + list(range(80, 102)) + list(range(328, 350)))
included_residues = [res for res in contact_residues if res.resSeq not in excluded_set]
user_specified_contact_residue_pairs = [(ligand_residue, contact_residue) for contact_residue in contact_residues if contact_residue.resSeq in included_list]
user_specified_atom_residue_pairs = [x for x in itertools.product(ligand_atoms, included_residues)]
print(user_specified_contact_residue_pairs)
print(user_specified_atom_residue_pairs)

[(Lig900, Ile198), (Lig900, Ile69), (Lig900, Arg211), (Lig900, Asn127), (Lig900, ILE193), (Lig900, Thr132), (Lig900, Pro134), (Lig900, Thr220), (Lig900, His297), (Lig900, GLY82), (Lig900, Glu229), (Lig900, Ile215), (Lig900, Thr67), (Lig900, Leu116), (Lig900, LEU112), (Lig900, ALA113), (Lig900, LEU110), (Lig900, Ala323), (Lig900, CYS159), (Lig900, LEU194), (Lig900, Ile322), (Lig900, LEU83), (Lig900, VAL245), (Lig900, Leu219), (Lig900, Gln314), (Lig900, ALA287), (Lig900, Ser125), (Lig900, Ala197), (Lig900, ASN328), (Lig900, Asn150), (Lig900, VAL187), (Lig900, Tyr210), (Lig900, VAL80), (Lig900, VAL81), (Lig900, ALA115), (Lig900, Tyr299), (Lig900, Thr120), (Lig900, Thr70), (Lig900, Ala117), (Lig900, Met130), (Lig900, Cys79), (Lig900, Gln124), (Lig900, SER329), (Lig900, ALA111), (Lig900, Ile298), (Lig900, Asn230), (Lig900, PRO244), (Lig900, Val236), (Lig900, Ile296), (Lig900, Pro295), (Lig900, Gly136), (Lig900, Gly199), (Lig900, Phe123), (Lig900, SER154), (Lig900, Leu74), (Lig900, Met65), (

In [40]:
os.system("rm -rf %s" % features_dir)
if not os.path.exists(features_dir): os.makedirs(features_dir)
featurize_contacts_custom(traj_dir, features_dir = features_dir, traj_ext = traj_ext, contact_residue_pairs_file = feature_residues_pkl, structures=[inactive_dir, active_dir], contact_residues=contact_residues,
                          residues_map = None, contact_cutoff = cutoff, parallel = featurize_parallel, exacycle = exacycle, traj_top_structure = None, iterative=False,
                          user_specified_atom_residue_pairs=user_specified_atom_residue_pairs)

structure
/home/enf/quintin/Post_Process/GPCR/MOR/4dkl_R_for_conformation.pdb
structure
None
mdraj_index_combinations[0:10]
[(133, 4), (133, 146), (133, 62), (133, 128), (133, 261), (133, 274), (133, 67), (133, 29), (133, 27), (133, 69)]
contact_features[0:10]
[[ILE198, ILE69], [ILE198, ARG211], [ILE198, ASN127], [ILE198, ILE193], [ILE198, ASN332], [ILE198, ARG345], [ILE198, THR132], [ILE198, VAL94], [ILE198, VAL92], [ILE198, PRO134]]
About to compute 36315 features
(1, 36315)
cutoff
0.6
distances[0:10]
[[ 2.9838984   2.29805589  1.95851779 ...,  3.44139647  2.53797245
   3.89411235]]
There are 1728 residue-residue contacts below cutoff in structure.
structure
/home/enf/quintin/Post_Process/GPCR/MOR/5c1m.pdb
structure
None
mdraj_index_combinations[0:10]
[(146, 17), (146, 159), (146, 75), (146, 141), (146, 280), (146, 293), (146, 80), (146, 42), (146, 40), (146, 82)]
contact_features[0:10]
[[ILE198, ILE69], [ILE198, ARG211], [ILE198, ASN127], [ILE198, ILE193], [ILE198, ASN332], [ILE198,

In [41]:
import pickle
with open(feature_residues_pkl, "rb") as f:
    feature_residues = pickle.load(f)
print(feature_residues[0])
print(feature_residues[0][0].__dict__)
print(feature_residues[0][1].__dict__)

[Ile198, ILE193]
{'res_name': 'Ile198', 'resSeq': 198, 'ballosteros_weinstein': None, 'chain_id': 'R', 'chain_name': None}
{'res_name': 'ILE193', 'resSeq': 193, 'ballosteros_weinstein': None, 'chain_id': 'R', 'chain_name': None}


In [42]:
os.system("rm -rf %s" % tica_dir)
#lag_time = 5
#n_components = 5
#sparse = True
#wolf = True
#rho = 0.0025
#shrinkage = None
#traj_ext = ".h5"
fit_and_transform(features_directory = features_dir, model_dir = tica_dir, stride=5, lag_time = lag_time, n_components = n_components, sparse = sparse, wolf = wolf, rho = rho, shrinkage = shrinkage, parallel=True, traj_ext = traj_ext)

loading feature files
(180, 2421)
[ 0.52346456  0.32963106  0.13113599  0.54487395  0.13683915  0.36521748
  0.4056693   0.66657305  0.72754705  0.58542711]
(63,)
fitting data to tICA model
Sparse time-structure based Independent Components Analysis (tICA)
------------------------------------------------------------------
n_components        : 10
shrinkage           : None
lag_time            : 10
weighted_transform  : True
rho                 : 0.005
n_features          : 2421

Top 5 timescales :
[ 760.75444272  908.75320432  421.90543559  309.8079982   276.49682614]

Top 5 eigenvalues :
[ 0.98694117  0.98905624  0.97657669  0.96823732  0.96447942]

Number of active degrees of freedom:
[20/2421, 13/2421, 12/2421, 14/2421, 23/2421]
loading /home/enf/quintin/Post_Process/GPCR/MOR/LIG_path/BU_path/featuresprotein-ligand_protein-protein_contacts-all_residues_4dkl_5c1m_under_cutoff6A/Rep_0-0.dataset
loading /home/enf/quintin/Post_Process/GPCR/MOR/LIG_path/BU_path/featuresprotein-ligand_pro



In [48]:
projected_features_dir = "%s/phi_psi_chi2_allprot_projected.h5" % tica_dir
plot_columns(tica_dir, projected_features_dir, titles = ["tIC%d" %j for j in range(1,11)], tICA = True, scale = 1.0, refcoords_file = None)

loading "/home/enf/quintin/Post_Process/GPCR/MOR/LIG_path/BU_path/sparse-tICA_t5_n_components10protein-ligand_protein-protein_contacts-all_residues_4dkl_5c1m_under_cutoff6A_regularization_wolf_autoShrinkage_rho0pt005/phi_psi_chi2_allprot_projected.h5"...
()
None


RuntimeError: In set_text: could not load glyph

In [44]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [45]:
tic_components_dir = "%s/tic_components" % tica_dir
if not os.path.exists(tic_components_dir): os.makedirs(tic_components_dir)
projection_operator_dir = "%s/phi_psi_chi2_allprot_tica_coords.h5" % tica_dir
interpret_tIC_components(projection_operator_dir, tic_components_dir, feature_residues_pkl, n_tica_components=5, percentile=95)


loading "/home/enf/quintin/Post_Process/GPCR/MOR/LIG_path/BU_path/sparse-tICA_t5_n_components10protein-ligand_protein-protein_contacts-all_residues_4dkl_5c1m_under_cutoff6A_regularization_wolf_autoShrinkage_rho0pt005/phi_psi_chi2_allprot_tica_coords.h5"...
Interpreting tIC 1
feature_importances_df.shape
(2421, 6)
residue_importances_df.shape
(274, 3)
           feature_name       res_i   res_j  resid_i  resid_j  importance
1421      His319-Thr315      His319  Thr315      319      315    1.466279
616         Thr70-Val66       Thr70   Val66       70       66    0.880260
1073      Leu339-Val284      Leu339  Val284      339      284    0.842377
2184   Lig900-C7-Pro295   Lig900-C7  Pro295      900      295   -0.750346
871       Val282-Leu339      Val282  Leu339      282      339   -0.641739
2402  Lig900-C17-Val300  Lig900-C17  Val300      900      300    0.546278
480       Asn150-Trp293      Asn150  Trp293      150      293    0.488708
541       Ile107-Met281      Ile107  Met281      107   

In [47]:
tica_coords = verboseload(projected_features_dir)
np.shape(tica_coords[0])

loading "/home/enf/quintin/Post_Process/GPCR/MOR/LIG_path/BU_path/sparse-tICA_t5_n_components10protein-ligand_protein-protein_contacts-all_residues_4dkl_5c1m_under_cutoff6A_regularization_wolf_autoShrinkage_rho0pt005/phi_psi_chi2_allprot_projected.h5"...


(180, 10)

In [49]:
n_clusters = 100
clusterer_dir = "%s/clusterer_%dclusters.h5" % (tica_dir, n_clusters)
cluster_minikmeans(tica_dir, projected_features_dir, traj_dir, n_clusters, clusterer_dir, tICs=list(range(0,10)))

Clustering by KMeans
loading "/home/enf/quintin/Post_Process/GPCR/MOR/LIG_path/BU_path/sparse-tICA_t5_n_components10protein-ligand_protein-protein_contacts-all_residues_4dkl_5c1m_under_cutoff6A_regularization_wolf_autoShrinkage_rho0pt005/phi_psi_chi2_allprot_projected.h5"...
Saving "/home/enf/quintin/Post_Process/GPCR/MOR/LIG_path/BU_path/sparse-tICA_t5_n_components10protein-ligand_protein-protein_contacts-all_residues_4dkl_5c1m_under_cutoff6A_regularization_wolf_autoShrinkage_rho0pt005/clusterer_100clusters.h5"... (<class 'msmbuilder.cluster.MiniBatchKMeans'>)


In [50]:
n_samples=10
save_dir = "%s/clusters100_samples10" % tica_dir
sampling_method = "random"
clusters_map_file = "%s/clusters100_map.h5" % tica_dir
sample_clusters(clusterer_dir, projected_features_dir, traj_dir, traj_ext, save_dir, n_samples, method = sampling_method, clusters_map_file = clusters_map_file)

loading "/home/enf/quintin/Post_Process/GPCR/MOR/LIG_path/BU_path/sparse-tICA_t5_n_components10protein-ligand_protein-protein_contacts-all_residues_4dkl_5c1m_under_cutoff6A_regularization_wolf_autoShrinkage_rho0pt005/clusterer_100clusters.h5"...
160
130
66
143
99
247
113
389
30
127
148
118
46
112
187
185
240
132
163
289
90
114
278
203
134
46
175
166
50
51
133
231
228
67
231
228
300
170
184
146
82
172
144
30
318
116
200
70
48
58
165
191
80
69
133
103
165
298
90
222
66
161
156
172
150
131
246
110
183
345
120
58
185
391
18
140
169
305
171
128
477
74
200
157
104
234
172
197
176
61
32
62
103
112
113
21
62
76
92
141
loading "/home/enf/quintin/Post_Process/GPCR/MOR/LIG_path/BU_path/sparse-tICA_t5_n_components10protein-ligand_protein-protein_contacts-all_residues_4dkl_5c1m_under_cutoff6A_regularization_wolf_autoShrinkage_rho0pt005/phi_psi_chi2_allprot_projected.h5"...
[-139.13719371  102.00695654   54.46698333  168.76828189  371.05506083
  -16.0564827   -48.06447244  269.34655703 -297.10001555

In [55]:
import analysis
reload(analysis)
from analysis import *
plot_all_tics_and_clusters(tica_dir, projected_features_dir, clusterer_dir, lag_time, label = "cluster_id", active_cluster_ids = range(0,n_clusters), intermediate_cluster_ids = [], inactive_cluster_ids = [])

loading "/home/enf/quintin/Post_Process/GPCR/MOR/LIG_path/BU_path/sparse-tICA_t5_n_components10protein-ligand_protein-protein_contacts-all_residues_4dkl_5c1m_under_cutoff6A_regularization_wolf_autoShrinkage_rho0pt005/phi_psi_chi2_allprot_projected.h5"...
loading "/home/enf/quintin/Post_Process/GPCR/MOR/LIG_path/BU_path/sparse-tICA_t5_n_components10protein-ligand_protein-protein_contacts-all_residues_4dkl_5c1m_under_cutoff6A_regularization_wolf_autoShrinkage_rho0pt005/clusterer_100clusters.h5"...
Printed all tICA coords and all requested clusters


In [36]:
plot_timescales(clusterer_dir, n_clusters, tica_dir, list(range(1,25)))


loading "/home/enf/quintin/Post_Process/GPCR/MOR/LIG_path/BU_path/sparse-tICA_t5_n_components10protein-ligand_protein-protein_contacts-all_residues_4dkl_5c1m_under_cutoff6A_regularization_wolf_autoShrinkage_rho0pt005/clusterer_100clusters.h5"...
MiniBatchKMeans(batch_size=100, compute_labels=True, init='k-means++',
        init_size=None, max_iter=100, max_no_improvement=10,
        n_clusters=100, n_init=10, random_state=None,
        reassignment_ratio=0.01, tol=0.0, verbose=0)
MSM contains 17 strongly connected components above weight=1.00. Component 10 selected, with population 75.971147%
MSM contains 18 strongly connected components above weight=0.50. Component 11 selected, with population 75.910420%cluster 26 sample 27
cluster 29 sample 44
cluster 32 sample 29
cluster 35 sample 28
cluster 2 sample 143cluster 5 sample 151cluster 8 sample 34cluster 11 sample 143cluster 14 sample 120cluster 17 sample 63cluster 20 sample 42cluster 23 sample 4cluster 26 sample 292cluster 29 sample 107

In [56]:
lag_time = 10
msm_model_dir = "%s/msm_lag_time%d.h5" % (tica_dir, lag_time)
build_msm(clusterer_dir, lag_time=lag_time, msm_model_dir=msm_model_dir)


loading "/home/enf/quintin/Post_Process/GPCR/MOR/LIG_path/BU_path/sparse-tICA_t5_n_components10protein-ligand_protein-protein_contacts-all_residues_4dkl_5c1m_under_cutoff6A_regularization_wolf_autoShrinkage_rho0pt005/clusterer_100clusters.h5"...
fitting msm to trajectories with 100 clusters and lag_time 10
MSM contains 17 strongly connected components above weight=0.10. Component 9 selected, with population 77.388973%
Saving "/home/enf/quintin/Post_Process/GPCR/MOR/LIG_path/BU_path/sparse-tICA_t5_n_components10protein-ligand_protein-protein_contacts-all_residues_4dkl_5c1m_under_cutoff6A_regularization_wolf_autoShrinkage_rho0pt005/msm_lag_time10.h5"... (<class 'msmbuilder.msm.msm.MarkovStateModel'>)
fitted msm to trajectories with 72 states


In [17]:
graph_file = "%s/msm_lag_time%d_graph.graphml" % (tica_dir, lag_time)
construct_graph(msm_model_dir, clusterer_dir, n_clusters, 5, 5, graph_file, inactive = None, active = None, pnas_clusters_averages = None, tica_clusters_averages = None, docking=None, macrostate = None)


loading "/home/amir/Post_Process/GPCR/MOR/LIG_path/BU_path/tica_ligand_protein_contacts_3_sparse_0pt0025/clusterer_50clusters.h5"...
loading "/home/amir/Post_Process/GPCR/MOR/LIG_path/BU_path/tica_ligand_protein_contacts_3_sparse_0pt0025/msm_lag_time5.h5"...
39


In [57]:
from imp import reload
import custom_msm
reload(custom_msm)
from custom_msm import *
msm_file = msm_model_dir
sampled_frames_file = "%s/msm100_frames.h5" %tica_dir
msm_trajectory_filename = "%s/msm100_1000frames" %tica_dir
make_msm_trajectory(msm_file, projected_features_dir, traj_dir, sampled_frames_file, clusterer_dir, msm_trajectory_filename, 
                    n_clusters, start_cluster=22, n_steps=1000)

loading "/home/enf/quintin/Post_Process/GPCR/MOR/LIG_path/BU_path/sparse-tICA_t5_n_components10protein-ligand_protein-protein_contacts-all_residues_4dkl_5c1m_under_cutoff6A_regularization_wolf_autoShrinkage_rho0pt005/phi_psi_chi2_allprot_projected.h5"...
loading "/home/enf/quintin/Post_Process/GPCR/MOR/LIG_path/BU_path/sparse-tICA_t5_n_components10protein-ligand_protein-protein_contacts-all_residues_4dkl_5c1m_under_cutoff6A_regularization_wolf_autoShrinkage_rho0pt005/clusterer_100clusters.h5"...
Saving "/home/enf/quintin/Post_Process/GPCR/MOR/LIG_path/BU_path/sparse-tICA_t5_n_components10protein-ligand_protein-protein_contacts-all_residues_4dkl_5c1m_under_cutoff6A_regularization_wolf_autoShrinkage_rho0pt005/msm100_frames.h5"... (<type 'list'>)
loading "/home/enf/quintin/Post_Process/GPCR/MOR/LIG_path/BU_path/sparse-tICA_t5_n_components10protein-ligand_protein-protein_contacts-all_residues_4dkl_5c1m_under_cutoff6A_regularization_wolf_autoShrinkage_rho0pt005/msm_lag_time10.h5"...13
32
45

In [22]:
mytraj = md.load("/home/amir/Post_Process/GPCR/MOR/LIG_path/BU_path/h5_trajectories/rep_1.h5")

In [23]:
dir(mytraj)

['__add__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__gt__',
 '__hash__',
 '__init__',
 '__le__',
 '__len__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__slotnames__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_check_valid_unitcell',
 '_distance_unit',
 '_have_unitcell',
 '_rmsd_traces',
 '_savers',
 '_string_summary_basic',
 '_time',
 '_time_default_to_arange',
 '_topology',
 '_unitcell_angles',
 '_unitcell_lengths',
 '_xyz',
 'atom_slice',
 'center_coordinates',
 'join',
 'load',
 'n_atoms',
 'n_chains',
 'n_frames',
 'n_residues',
 'openmm_boxes',
 'openmm_positions',
 'remove_solvent',
 'restrict_atoms',
 'save',
 'save_amberrst7',
 'save_binpos',
 'save_dcd',
 'save_dtr',
 'save_gro',
 'save_hdf5',
 'save_lammpstrj',
 'save_lh5',
 'save_mdcrd',
 'save_netcdf',
 'save_netcdfrst',
 'save_

In [19]:
del mytraj

In [48]:
subset = crystal_structure.atom_slice(range(0,400))
subset.xyz
print(subset.xyz)
distances = md.compute_contacts(subset)
print(distances)


[[[-3.67750001 -2.00090003 -3.06229997]
  [-3.55780005 -1.92980003 -3.01830006]
  [-3.56550002 -1.78180003 -3.05369997]
  ..., 
  [-1.46140003 -0.32269999 -1.54359996]
  [-1.40170002 -0.40900001 -1.77069998]
  [-1.92920005 -0.43360001 -1.81420004]]]
(array([[ 0.29087129,  0.30480972,  0.50911838, ...,  0.31547278,
         0.29779878,  0.30303043]], dtype=float32), array([[ 0,  3],
       [ 0,  4],
       [ 0,  5],
       ..., 
       [47, 50],
       [47, 51],
       [48, 51]]))


In [20]:
distances0 = distances[0]

In [23]:
distances0 = np.nan_to_num(distances0)

In [24]:
distances0

array([[ 0.,  0.,  0., ...,  0.,  0.,  0.]], dtype=float32)

In [25]:
np.where(distances0 > 0.0)

(array([], dtype=int64), array([], dtype=int64))