In [None]:
from __future__ import print_function
import sys
sys.path.append('..')
import numpy as np
import pyemma
import pyemma.datasets
import simplex
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
np.random.seed(7778)

In [None]:
data = pyemma.datasets.load_2well_discrete()
plt.plot(-np.log(data.msm.stationary_distribution))
plt.ylabel('energy')
plt.xlabel('x')

In [None]:
# simulating non-connected data
# In a real application, the data could be completely disconnected if we start multiple simulations
# form different intial conditons.
trajs1 = [ data.generate_traj(10000, start=30, stop=48).astype(float) for _ in range(3) ]
trajs2 = [ data.generate_traj(10000, start=70, stop=52).astype(float) for _ in range(2) ]
trajs = trajs1 + trajs2
trajs = [ t + 3*np.random.randn(len(t)) for t in trajs ] # add some noise to model a second,
# non-metastable dimension that is not completely orthogonal to the x-coordinate of the double-well

In [None]:
plt.hist(np.concatenate(trajs), bins=40);
plt.ylabel('counts')
plt.xlabel('x')

In [None]:
# select a very bad clustering 
dtrajs = pyemma.coordinates.assign_to_centers(data=trajs, centers=np.array([[40.0],[70.0]]))

In [None]:
# go ahead and compute the MSM
its = pyemma.msm.its(dtrajs)
pyemma.plots.plot_implied_timescales(its)

In [None]:
# use some default algorithm
kmeans = pyemma.coordinates.cluster_kmeans(data=trajs, k=101)
dtrajs = kmeans.dtrajs

In [None]:
# also the default MSM is typically fooled
its = pyemma.msm.its(dtrajs)
pyemma.plots.plot_implied_timescales(its)

In [None]:
for d in dtrajs:
    plt.plot(kmeans.clustercenters[d])

In [None]:
# go to TICA space (in 1-D this is only a formal operation)
tics = pyemma.coordinates.tica(data=trajs).get_output()

In [None]:
# build core-set MSM
# a) find vertices
vertices = simplex.find_vertices_inner_simplex(tics)

In [None]:
# b) comute core assignments
ctrajs = simplex.core_assignments(tics, vertices=vertices, f=0.55)

In [None]:
# TODO: plot cores, plot memberships!

In [None]:
# plot core-to-core trajectories
for c in ctrajs:
    plt.plot(np.where(c>=0, c, np.nan))
plt.ylim((-0.1,1.1))
# sometimes there are some recorssing events left, despite the introduction of cores 

In [None]:
# c) We can filter these out by relabeling very short visits to cores as 'unassigned'.
ctrajs_metastable = simplex.filter_out_short_core_visits(ctrajs, cutoff=1) # usually a small life time cutoff is enough
for c in ctrajs_metastable:
    plt.plot(np.where(c>=0, c, np.nan))
plt.ylim((-0.1,1.1))

In [None]:
# d) now that we are more or less sure about the discretization quality, compute count matrices.
simplex.milestoning_count_matrix(ctrajs_metastable, lag=1)

In [None]:
# TODO: make example where we can estimate a connected MSM

In [None]:
ltd = simplex.life_time_distributions(ctrajs_metastable)
for core in ltd:
    if core >= 0:
        print(core, ltd[core])

In [None]:
# basic hypothesis: there is always some dimension along which the dyanamics are irreversible and which is not
# well-discretized