In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pyemma
import mdtraj as md
from threading import Timer
import glob
import Bio.PDB
from itertools import product
from pandas import DataFrame

In [None]:
pdb = md.load('/home/yongnayuan/MXQ/traj/Tau_ini.pdb').topology

files = glob.glob('/home/yongnayuan/MXQ/traj/*.xtc')
print(pdb)
print(files)

In [None]:

parser = PDBParser()


structure = parser.get_structure('example', '/home/yongnayuan/MXQ/traj/Tau_ini.pdb')


model = structure[0]


residues = list(model.get_residues())


phi_list = []
psi_list = []

for i in range(1, len(residues)-1):
   
    residue = residues[i]
    prev_residue = residues[i-1]
    next_residue = residues[i+1]
    
    
    c = residue['C'].get_vector()
    ca = residue['CA'].get_vector()
    n = residue['N'].get_vector()

 
num_pairs = len(residues)-2
phi_indexes = np.zeros((num_pairs, 4), dtype=int)
psi_indexes = np.zeros((num_pairs, 4), dtype=int)

for i in range(1,num_pairs):
    prev_residue = residues[i]
    residue = residues[i+1]
    phi_indexes[i, 0] = (prev_residue['C'].get_serial_number() - 1)
    phi_indexes[i, 1] = (residue['N'].get_serial_number() -1 )
    phi_indexes[i, 2] = (residue['CA'].get_serial_number() -1 )
    phi_indexes[i, 3] = (residue['C'].get_serial_number() -1 )
    
    next_residue = residues[i+2]
    psi_indexes[i, 0] = (prev_residue['N'].get_serial_number() -1)
    psi_indexes[i, 1] = (residue['CA'].get_serial_number() -1 )
    psi_indexes[i, 2] = (residue['C'].get_serial_number() -1) 
    psi_indexes[i, 3] = (next_residue['N'].get_serial_number() -1  )

phi_indexes = np.delete(phi_indexes, 0, axis=0) 
psi_indexes = np.delete(psi_indexes, 0, axis=0) 
torsions_feat = pyemma.coordinates.featurizer(pdb)
torsions_feat.add_dihedrals(phi_indexes)
torsions_feat.add_dihedrals(psi_indexes)
torsions_data = pyemma.coordinates.load(files, features=torsions_feat)

In [None]:
tica = pyemma.coordinates.tica(torsions_data, lag=500)
tica_output = tica.get_output()
tica_concatenated = np.concatenate(tica_output)

In [None]:
cluster = pyemma.coordinates.cluster_kmeans(
    tica_output, k=100, max_iter=1000)
dtrajs_concatenated = np.concatenate(cluster.dtrajs)

In [None]:
its = pyemma.msm.its(cluster.dtrajs, lags=600, nits=10,n_jobs=1)  
pyemma.plots.plot_implied_timescales(its, units= 'ns', dt=0.1)

In [None]:
msm = pyemma.msm.bayesian_markov_model(cluster.dtrajs, lag=400, dt_traj='0.1 ns')
nstates = 5
cktest = msm.cktest(nstates,n_jobs=1) 
pyemma.plots.plot_cktest(cktest, dt=0.1, units='ns'); 

In [None]:

mfpt = np.zeros((nstates, nstates))
for i, j in product(range(nstates), repeat=2):
    mfpt[i, j] = msm.mfpt(
        msm.metastable_sets[i],
        msm.metastable_sets[j])

print('MFPT / ns:')
DataFrame(np.round(mfpt, decimals=2), index=range(1, nstates + 1), columns=range(1, nstates + 1))

In [None]:
inverse_mfpt = np.zeros_like(mfpt)
nz = mfpt.nonzero()
inverse_mfpt[nz] = 1.0 /( mfpt[nz] *1000)

pyemma.plots.plot_network(
    inverse_mfpt,
    pos=coarse_state_centers,
    state_colors =['blue', 'orange', 'green', 'red', 'purple'],
    arrow_label_format='%.1f us',
    arrow_labels=mfpt/1000,
    arrow_scale=2.0,
    state_labels=range(1, nstates + 1),
    size=12,);

In [None]:
p_values = np.zeros(len(msm.metastable_sets))
print('state\tπ\t\tG/kT')
for i, s in enumerate(msm.metastable_sets):
    p = msm.pi[s].sum()
    p_values[i] = p
    print('{}\t{:f}\t{:f}'.format(i + 1, p, -np.log(p)))

print(p_values)

In [None]:
fig, ax = plt.subplots(figsize=(5, 4), sharex=True)

pyemma.plots.plot_free_energy(
    *tica_concatenated[:, :2].T,
    weights=np.concatenate(msm.trajectory_weights()),
    ax=ax,
    cmap=plt.cm.nipy_spectral_r,
    legacy=False)

ax.set_xlabel('IC 1')
ax.set_ylabel('IC 2')

ax.set_title('Reweighted free energy surface', fontweight='bold')
fig.tight_layout()

In [None]:
pcca_samples = msm.sample_by_distributions(msm.metastable_distributions, 1000)
torsions_source = pyemma.coordinates.source(files, features=torsions_feat)
pyemma.coordinates.save_trajs(
    torsions_source,
    pcca_samples,
    outfiles=['./data/pcca{}_1samples.pdb'.format(n + 1)
              for n in range(msm.n_metastable)])