# Analyzing (RE)PPTIS simulations using an MSM approach
This notebook contains an example workflow that can be used for estimating the crossing probability and pathlengths of a (RE)PPTIS simulation.

## 1. Import the necessary functions

In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib qt

import matplotlib.pyplot as plt
from pprint import pprint    # to print the vars of the pathensemble object
import numpy as np
import os
import glob

# Reading
from tistools import read_inputfile, get_LMR_interfaces, read_pathensemble, get_weights
from tistools import set_tau_distrib, set_tau_first_hit_M_distrib, cross_dist_distr, pathlength_distr
from tistools import collect_tau, collect_tau1, collect_tau2, collect_taum
from tistools import ACCFLAGS, REJFLAGS

# REPPTIS analysis
from tistools import get_lmr_masks, get_generation_mask, get_flag_mask, select_with_masks
from tistools import unwrap_by_weight, running_avg_local_probs, get_local_probs, get_global_probs_from_dict, get_global_probs_from_local

# MSM functions
from tistools import construct_M
from tistools import global_pcross_msm
from tistools import mfpt_to_first_last_state, mfpt_to_absorbing_states, construct_tau_vector
from tistools import create_labels_states, print_vector, print_all_tau

## 2. Load the simulation data

In [7]:

# Set the working directory
indir = "/Users/an/Documents/0_mfpt/repptis1/"  

# indir = "/mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/PyRETIS3/toytis/simulations/sim_istarwell0_2108"
# indir = "/mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/PyRETIS3/toytis/simulations/sim_repptismazegap2708"
# indir = "/mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/i_star/simulations/RETIS_flat_br_noswap"
# indir = "/run/user/1001/gvfs/smb-share:server=files.ugent.be,share=eliawils,user=eliawils/shares/tw06_biommeda_pyretis/04.2024_MSM_elias/simulations/flat_w-walls/brownian-gamma5/30k-cycles/REPPTIS"
# indir = "/run/user/1001/gvfs/smb-share:server=files.ugent.be,share=eliawils,user=eliawils/shares/tw06_biommeda_abl/paper-msm-short-term/simulations-and-analysis/trypsin-benzamidine/infrepptisanalysis/tistools-mfptanalysis/"
indir = "/mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/MSM-REPPTIS/trypsin-benzos/pyretis/"
# indir = "/mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/APPTIS/simdata/infrepptis/"
# indir = "/mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/MSM-REPPTIS/1D-experiments/REPPTIS"
# indir = "/run/user/1001/gvfs/sftp:host=172.18.15.4,user=elias/home/elias/Documents/trypsin-benzos/infrepptis/pyretis"

# zero_minus_one = True if lambda_-1 interface is set
# zero_minus_one = False if lambda_-1 interface is not set
zero_minus_one = False

inputfile = indir + "/repptis.rst"    # When using PyRETIS, the input file for REPPTIS simulations is a .rst file
# inputfile = indir + "/retis3.rst"

# Move to working directory
os.chdir(indir)
print(os.getcwd())

# Set the ensemble folders and print them
folders = glob.glob(indir + "/0[0-9][0-9]")
folders = sorted(folders)
print(folders)

/mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/MSM-REPPTIS/trypsin-benzos/pyretis
['/mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/MSM-REPPTIS/trypsin-benzos/pyretis/000', '/mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/MSM-REPPTIS/trypsin-benzos/pyretis/001', '/mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/MSM-REPPTIS/trypsin-benzos/pyretis/002', '/mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/MSM-REPPTIS/trypsin-benzos/pyretis/003', '/mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/MSM-REPPTIS/trypsin-benzos/pyretis/004', '/mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/MSM-REPPTIS/trypsin-benzos/pyretis/005', '/mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/MSM-REPPTIS/trypsin-benzos/pyretis/006', '/mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/MSM-REPPTIS/trypsin-benzos/pyretis/007', '/mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/MSM-REPPTIS/trypsin-benzos/pyretis/008', '/mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/MSM-REPPTIS/trypsin-benzos/pyretis/009', '/mnt/0bf0c339-34bb-4500-a5

In [8]:
# Reading all input
#===================
interfaces, zero_left, timestep = read_inputfile(inputfile)
LMR_interfaces, LMR_strings = get_LMR_interfaces(interfaces, zero_left)
pathensembles = []
for i,fol in enumerate(folders):
    print("#"*80)
    print(fol)
    pe = read_pathensemble(fol+"/pathensemble.txt")
    pe.set_name(fol)
    pe.set_interfaces([LMR_interfaces[i], LMR_strings[i]])
    if i==0:
        pe.set_zero_minus_one(zero_minus_one)   # TODO this is never used
        pe.set_in_zero_minus(True)
    if i==1:
        pe.set_in_zero_plus(True)
    w, _ = get_weights(pe.flags, ACCFLAGS, REJFLAGS, verbose = False)
    pe.set_weights(w)
    print("pathensemble info: ")
    pprint(vars(pe))
    pathensembles.append(pe)

    
    # Read order parameters order.txt/order.npy into path ensemble object, or load from order.npy file.
    # Saving order parameter files allows to speed up this notebook.
    #### CHANGE HERE ####
    pe.set_orders(load=False, acc_only=True, save=True)        # for the 1st time you run this notebook for a certain simulation, this will store .npy files
    # pe.set_orders(load=True, acc_only=True, save=False)                  # for the next times, you can read npy files (save=True/False is not important)
    # pe.set_orders(load=False, acc_only=True, save=False)     # if saving doesn't work

################################################################################
/mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/MSM-REPPTIS/trypsin-benzos/pyretis/000
pathensemble info: 
{'cyclenumbers': array([   0,    1,    2, ..., 9563, 9564, 9565]),
 'flags': array(['ACC', 'ACC', 'REJ', ..., 'REJ', 'ACC', 'REJ'], dtype='<U3'),
 'generation': array(['sh', 'sh', 'sh', ..., 'sh', 'sh', 'sh'], dtype='<U2'),
 'has_zero_minus_one': False,
 'in_zero_minus': True,
 'in_zero_plus': False,
 'interfaces': [[1.0, 1.0, 1.0], ['l_[0]', 'l_[0]', 'l_[0]']],
 'lambmaxs': array([1.03094, 1.02106, 1.02106, ..., 1.01272, 1.02529, 1.02529]),
 'lambmins': array([0.9924 , 0.87089, 0.87089, ..., 0.71466, 0.72096, 0.72096]),
 'lengths': array([ 4, 17,  1, ...,  1, 41,  1]),
 'lmrs': array(['RMR', 'RMR', 'RMR', ..., 'RMR', 'RMR', 'RMR'], dtype='<U3'),
 'name': '/mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/MSM-REPPTIS/trypsin-benzos/pyretis/000',
 'ncycle': 9566,
 'newpathnumbers': array([   0,    1,    1,

## 3. Regular (RE)PPTIS analysis using tistools

### Analyze the REPPTIS simulation.

In [9]:
# Analysis output is saved to the data dictionary.
data = {}
for i, pe in enumerate(pathensembles):
    print("doing pathensemble {}".format(i))
    if i == 0:
        data[i] = {}
        continue  #  [0-] is not used for Pcross calculations

    # Classify the paths according to their path type.
    pathtypes = ("LML", "LMR", "RML", "RMR")
    pathtype_cycles = {}
    for ptype in pathtypes:
        pathtype_cycles[ptype] = unwrap_by_weight(
                (pe.lmrs == ptype).astype(int), pe.weights)
    
    # Running average analysis: ["running"]
    data[i] = {}
    data[i]["running"] = {}
    data[i]["running"]["plocal"] = {}
    for (ptype, p_loc) in zip(pathtypes, 
                              running_avg_local_probs(pathtype_cycles, 
                                                      pe.weights, tr = False)):
        data[i]["running"]["plocal"][ptype] = p_loc

    # Analysis using all data: ["full"]
    plocfull = get_local_probs(pe, tr=False)
    data[i]["full"] = {}
    for ptype in pathtypes:
        data[i]["full"][ptype] = plocfull[ptype]

    # data[i] have now ["full"] and ["running"]

doing pathensemble 0
doing pathensemble 1
Weights of the different paths:
wRMR = 0
wRML = 19
wLMR = 26
wLML = 9655
Local crossing probabilities:
pRMR = 0.0
pRML = 1.0
pLMR = 0.0026856729676686293
pLML = 0.9973143270323314
Local crossing probabilities:
p2R = 0.0026804123711340207
p2L = 0.997319587628866
doing pathensemble 2
Weights of the different paths:
wRMR = 0
wRML = 0
wLMR = 0
wLML = 9648
Local crossing probabilities:
pRMR = nan
pRML = nan
pLMR = 0.0
pLML = 1.0
Local crossing probabilities:
p2R = 0.0
p2L = 1.0
doing pathensemble 3
Weights of the different paths:
wRMR = 1750
wRML = 258
wLMR = 288
wLML = 3454
Local crossing probabilities:
pRMR = 0.8715139442231076
pRML = 0.12848605577689243
pLMR = 0.0769641902725815
pLML = 0.9230358097274185
Local crossing probabilities:
p2R = 0.35443478260869565
p2L = 0.6455652173913043
doing pathensemble 4
Weights of the different paths:
wRMR = 0
wRML = 1
wLMR = 5
wLML = 9284
Local crossing probabilities:
pRMR = 0.0
pRML = 1.0
pLMR = 0.000538271073

### Generate pathlength distribution figures, as in PyRETIS reports.

In [10]:
for i, pe in enumerate(pathensembles):
    upe = pe.unify_pe()
    # Pathlength distribution
    data[i]["pathlengths"] = pathlength_distr(upe)  # these might be used later or not! TODO
        
#=======================================
# make figures
makefigs = True 
if makefigs:
    for i, pe in enumerate(pathensembles):     
        if i == 0:
            continue
        # Cross distances distribution
        L, M, R, lmlpercs, lmllambs, rmrpercs, rmrlambs = cross_dist_distr(pe)
        fig,ax = plt.subplots()
        ax.plot(lmllambs, lmlpercs, lw=1, c="g")
        ax.plot(rmrlambs, rmrpercs, lw=1, c="r")
        for lamb in (L,M,R):
            ax.axvline(lamb, color='k', linestyle='--', lw = 0.5)
        ax.set_xlabel('Cross distance')
        ax.set_ylabel('Frequency')
        ax.set_title("Ensemble {}. L = {}, M = {}, R = {}".format(
            pe.name, L, M, R))
        ax.set_ylim(0)
        fig.savefig(f"pathensemble_{i}_crossdist.pdf")
        plt.close(fig)

        # Pathlength distribution      
        for ptype in pathtypes:
            fig, ax = plt.subplots()
            ax.plot(data[i]["pathlengths"][ptype]["bin_centers"], 
                data[i]["pathlengths"][ptype]["hist"])
            ax.set_xlabel('Pathlength')
            ax.set_ylabel('Frequency')
            ax.set_title(f"{np.sum(data[i]['pathlengths'][ptype]['hist'])} " + \
                         f"{ptype} paths. ")
            ax.legend([f"mean = {data[i]['pathlengths'][ptype]['mean']:.2f}, " + \
                          f"std = {data[i]['pathlengths'][ptype]['std']:.2f}"])
            fig.savefig(f"pathensemble_{i}_pathlength_{ptype}.pdf")
            plt.close(fig)

Are all weights 1?  True
Are all paths accepted?  True
Are all weights 1?  True
Are all paths accepted?  True
Are all weights 1?  True
Are all paths accepted?  True
Are all weights 1?  True
Are all paths accepted?  True
Are all weights 1?  True
Are all paths accepted?  True
Are all weights 1?  True
Are all paths accepted?  True
Are all weights 1?  True
Are all paths accepted?  True
Are all weights 1?  True
Are all paths accepted?  True
Are all weights 1?  True
Are all paths accepted?  True
Are all weights 1?  True
Are all paths accepted?  True
Are all weights 1?  True
Are all paths accepted?  True


### Compute Pcross using in-house functions

In [11]:
# Global crossing probabilities (no error analysis)  
psfull = []
for i in range(1, len(pathensembles)):   # do not use the 0- ensemble
    psfull.append({"LMR": data[i]["full"]["LMR"], 
               "RML": data[i]["full"]["RML"], 
               "RMR": data[i]["full"]["RMR"],
               "LML": data[i]["full"]["LML"]})

Pminfull, Pplusfull, Pcrossfull = get_global_probs_from_dict(psfull)

In [12]:
# Make a figure of the global crossing probabilities
fig, ax = plt.subplots()
ax.set_yscale("log")
ax.plot(Pcrossfull, "o", c = "r")
ax.errorbar([i for i in range(len(Pcrossfull))], Pcrossfull, fmt="-o", c = "b", ecolor="r", capsize=6)


ax.set_xlabel("intf")
ax.set_ylabel(r"$P_A(\lambda_i|\lambda_A)$")
ax.set_xticks(np.arange(len(interfaces)))
fig.tight_layout()
fig.show()
fig.savefig("Global_probs.pdf")

print("This should be the same as the repptis_report.pdf value:", Pcrossfull[-1])
print("which is the case!")
print(Pcrossfull)
print([Pcrossfull[i]/Pcrossfull[i-1] for i in range(1,len(Pcrossfull))])
print("Here, the load immediately disappeared. For a simulation where this is")
print("not the case, the above code should be adapted a little bit.")

This should be the same as the repptis_report.pdf value: nan
which is the case!
[1.0, 0.0026856729676686293, 0.0, nan, nan, nan, nan, nan, nan, nan, nan]
[0.0026856729676686293, 0.0, nan, nan, nan, nan, nan, nan, nan, nan]
Here, the load immediately disappeared. For a simulation where this is
not the case, the above code should be adapted a little bit.


In [35]:
# TODO DONT INCLUDE??

# Construct lists of the local probs

# Or we can use the get_global_probs_from_local function, using lists of the local probs
# These do not use the 0- ensemble
pmps = [data[i]["full"]["LMR"] for i in range(1,len(pathensembles))]
pmms = [data[i]["full"][
    "LML"] for i in range(1,len(pathensembles))]
ppps = [data[i]["full"]["RMR"] for i in range(1,len(pathensembles))]
ppms = [data[i]["full"]["RML"] for i in range(1,len(pathensembles))]
a,b,c = get_global_probs_from_local(pmps, pmms, ppps, ppms)
print("This should be the same as the repptis_report.pdf value:", c[-1])
print(c)
print([c[i]/c[i-1] for i in range(1,len(c))])

This should be the same as the repptis_report.pdf value: 0.18807823161904824
[1.0, 0.5212885962380965, 0.39580791320961656, 0.3525852940559797, 0.31941044624364084, 0.30093772940797586, 0.2836206499987164, 0.263114438442881, 0.2368724572808718, 0.18807823161904824]
[0.5212885962380965, 0.7592874965345162, 0.8907990019624835, 0.9059097234864488, 0.9421662094871678, 0.9424562701282861, 0.9276984537059337, 0.9002640017883092, 0.7940063347932185]


## 4. Analysis using the MSM

### Construct transition matrix M

In [36]:
print(interfaces)
N = len(interfaces)
NS = 4*N-5
print("N", N)
# print("len pmms", len(pmms)) # TODO INCLUDE?
print("NS", NS)

labels1, labels2 = create_labels_states(N)

[4.0, 5.5556, 7.1111, 8.6667, 10.2222, 11.7778, 13.3333, 14.8889, 16.4444, 18.0]
N 10
NS 35


In [37]:
print("mm", pmms)
print("mp", pmps)
print("pm", ppms)
print("pp", ppps)
print("sum", np.array(pmms)+np.array(pmps))
print("sum", np.array(ppms)+np.array(ppps))
if N > 3:  
    M = construct_M(pmms, pmps, ppms, ppps, N)
elif N == 3:
    M = construct_M_N3(pmms, pmps, ppms, ppps, N)
else:
    raise ValueError("The amount of interfaces needs to be 3 at least!")

mm [0.4787114037619035, 0.24071250346548378, 0.31848337252267384, 0.4295100222717149, 0.36949438417602637, 0.4497857214046045, 0.5327967217913069, 0.5992229527794382, 0.706274853407023]
mp [0.5212885962380965, 0.7592874965345162, 0.6815166274773262, 0.570489977728285, 0.6305056158239736, 0.5502142785953955, 0.4672032782086931, 0.40077704722056184, 0.293725146592977]
pm [1.0, 0.2623231256599789, 0.4023413757564546, 0.47814961189616445, 0.4771876300097074, 0.5341863005149009, 0.5460223419822263, 0.6482369534555712, 0.6979773551720347]
pp [0.0, 0.7376768743400212, 0.5976586242435453, 0.5218503881038355, 0.5228123699902926, 0.4658136994850991, 0.4539776580177737, 0.35176304654442875, 0.30202264482796526]
sum [1. 1. 1. 1. 1. 1. 1. 1. 1.]
sum [1. 1. 1. 1. 1. 1. 1. 1. 1.]


In [38]:
# We can print the transition matrix M and check that all rows sum to 1.
print("M")
print("shape", M.shape)
print("sum prob in rows", np.sum(M,axis=1))
print(M)

M
shape (35, 35)
sum prob in rows [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
[[0.        0.4787114 0.5212886 ... 0.        0.        0.       ]
 [1.        0.        0.        ... 0.        0.        0.       ]
 [0.        0.        0.        ... 0.        0.        0.       ]
 ...
 [0.        0.        0.        ... 0.        0.        0.       ]
 [0.        0.        0.        ... 0.        0.        1.       ]
 [1.        0.        0.        ... 0.        0.        0.       ]]


### Look at this Markov model
*INCLUDE?*

In [39]:
#import numpy.linalg
vals, vecs = np.linalg.eig(M)
print(vals)
vals, vecs = np.linalg.eig(M.T)
print(vals)
pprint(M)

[-9.65994843e-01+0.00000000e+00j -9.25100453e-01+0.00000000e+00j
  1.00000000e+00+0.00000000e+00j  7.96875236e-01+4.00875247e-02j
  7.96875236e-01-4.00875247e-02j -5.46166539e-01+4.57119667e-02j
 -5.46166539e-01-4.57119667e-02j -5.91237897e-02+3.99745493e-01j
 -5.91237897e-02-3.99745493e-01j  2.22232653e-01+1.83743824e-01j
  2.22232653e-01-1.83743824e-01j  9.97612800e-02+2.05655364e-01j
  9.97612800e-02-2.05655364e-01j -1.23512192e-01+0.00000000e+00j
 -8.49282339e-02+0.00000000e+00j -4.54932673e-02+0.00000000e+00j
  7.19013257e-02+0.00000000e+00j  4.59699805e-02+0.00000000e+00j
  3.29911277e-09+0.00000000e+00j -3.29911700e-09+0.00000000e+00j
  4.85348398e-16+7.97648439e-16j  4.85348398e-16-7.97648439e-16j
  8.82461824e-16+0.00000000e+00j -2.11297089e-16+2.52238085e-16j
 -2.11297089e-16-2.52238085e-16j -1.90426752e-16+0.00000000e+00j
  2.39501305e-17+2.73151039e-17j  2.39501305e-17-2.73151039e-17j
 -5.11822116e-33+3.88005746e-31j -5.11822116e-33-3.88005746e-31j
  3.99115307e-31+0.000000

In [40]:
print("what if chain propagates")
print("A[0,:]")
# check stationary behavior
A = M
for n in range(10):
    A = np.dot(A,M)
    #print(A)
    print(A[0,:])
    print(np.sum(A[0,:]))  # is 1 indeed

what if chain propagates
A[0,:]
[0.4787114  0.         0.         0.         0.12548068 0.39580791
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.        ]
1.0
[0.         0.22916461 0.2495468  0.12548068 0.         0.
 0.         0.         0.12605824 0.26974967 0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.        ]
1.0
[0.35464529 0.         0.         0.         0.06006903 0.18947776
 0.03306799 0.09299025 0.         0.         0.         0.
 0.11586019 0.15388949 0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0. 

### Pcross with MSM

In [41]:
# Inspect Z and Y vectors

z1, z2, y1, y2 = global_pcross_msm(M)
print("Z")
print_vector(z1, labels1)
print_vector(z2, labels2)
print("Y")
print_vector(y1, labels1)
print_vector(y2, labels2)
print("\nGlobal crossing probability: ", y1[0][0])

Z
state 0-     : 0
state B      : 1
state 0+- LML: 0.0
state 0+- LMR: 0.3607948322221579
state 0+- RML: 0.0
state 1+- LML: 0.0
state 1+- LMR: 0.4751755216158188
state 1+- RML: 0.0
state 1+- RMR: 0.4751755216158188
state 2+- LML: 0.3505259935484464
state 2+- LMR: 0.5334261944265525
state 2+- RML: 0.35052599354844644
state 2+- RMR: 0.5334261944265525
state 3+- LML: 0.45983787597912334
state 3+- LMR: 0.5888293067146131
state 3+- RML: 0.4598378759791234
state 3+- RMR: 0.5888293067146131
state 4+- LML: 0.5271521041705077
state 4+- LMR: 0.6249739173251818
state 4+- RML: 0.5271521041705077
state 4+- RMR: 0.6249739173251818
state 5+- LML: 0.5782945581426505
state 5+- LMR: 0.6631330674261534
state 5+- RML: 0.5782945581426504
state 5+- RMR: 0.6631330674261534
state 6+- LML: 0.6178134980108
state 6+- LMR: 0.714815320406211
state 6+- RML: 0.6178134980108
state 6+- RMR: 0.714815320406211
state 7+- LML: 0.6618501581653247
state 7+- LMR: 0.7940063347932185
state 7+- RML: 0.6618501581653247
state 7+- 

### Pathlength analysis

In [42]:
# Setting path ensemble properties
#==================================
for i,fol in enumerate(folders):
    print(i)
    print("Calculating path lengths.")
    set_tau_distrib(pathensembles[i])
    print("Done.")

    if True:
        print("Calculating first hitting lengths to middle interface")
        set_tau_first_hit_M_distrib(pathensembles[i])
        print("Done.")

0
Calculating path lengths.
Done.
Calculating first hitting lengths to middle interface
Done.
1
Calculating path lengths.
Done.
Calculating first hitting lengths to middle interface
Done.
2
Calculating path lengths.
Done.
Calculating first hitting lengths to middle interface
Done.
3
Calculating path lengths.
Done.
Calculating first hitting lengths to middle interface
Done.
4
Calculating path lengths.
Done.
Calculating first hitting lengths to middle interface
Done.
5
Calculating path lengths.
Done.
Calculating first hitting lengths to middle interface
Done.
6
Calculating path lengths.
Done.
Calculating first hitting lengths to middle interface
Done.
7
Calculating path lengths.
Done.
Calculating first hitting lengths to middle interface
Done.
8
Calculating path lengths.
Done.
Calculating first hitting lengths to middle interface
Done.
9
Calculating path lengths.
Done.
Calculating first hitting lengths to middle interface
Done.


In [13]:
# Additional information
#==================================
# Average path lengths per ensemble for each path type
print(indir[-20:])
pathtypes = ("LML", "LMR", "RMR", "RML", "LM*", "*M*", "***", "RM*", "L**", "**R", "R**")

print("=" * 80)
print("AVERAGE PATH LENGTHS BY ENSEMBLE AND PATH TYPE")
print("=" * 80)

for i, pe in enumerate(pathensembles):
    print(f"\nEnsemble {i} ({pe.name}):")
    print("-" * 50)
    
    # Get accepted paths only
    accepted_mask = np.isin(pe.flags, ACCFLAGS)
    total_accepted_count = np.sum(accepted_mask)
    
    if total_accepted_count > 0:
        # Calculate weighted average for all accepted paths
        accepted_lengths = pe.lengths[accepted_mask]
        accepted_weights = pe.weights[accepted_mask]
        total_weighted_avg = np.average(accepted_lengths, weights=accepted_weights)
        
        print(f"  All accepted paths: {total_weighted_avg:8.2f} (n={total_accepted_count:4d}, weighted)")
        print("-" * 30)
    
    for ptype in pathtypes:
        mask = (pe.lmrs == ptype) & accepted_mask
        if np.any(mask):
            lengths = pe.lengths[mask]
            weights = pe.weights[mask]
            weighted_avg = np.average(lengths, weights=weights)
            count = np.sum(mask)
            print(f"  {ptype:4s}: {weighted_avg:8.2f} (n={count:4d}, weighted)")
        else:
            print(f"  {ptype:4s}: {0:8.2f} (n={0:4d}, weighted)")

psin-benzos/pyretis/
AVERAGE PATH LENGTHS BY ENSEMBLE AND PATH TYPE

Ensemble 0 (/mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/MSM-REPPTIS/trypsin-benzos/pyretis/000):
--------------------------------------------------
  All accepted paths:    36.92 (n=1797, weighted)
------------------------------
  LML :     0.00 (n=   0, weighted)
  LMR :     0.00 (n=   0, weighted)
  RMR :    36.92 (n=1797, weighted)
  RML :     0.00 (n=   0, weighted)
  LM* :     0.00 (n=   0, weighted)
  *M* :     0.00 (n=   0, weighted)
  *** :     0.00 (n=   0, weighted)
  RM* :     0.00 (n=   0, weighted)
  L** :     0.00 (n=   0, weighted)
  **R :     0.00 (n=   0, weighted)
  R** :     0.00 (n=   0, weighted)

Ensemble 1 (/mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/MSM-REPPTIS/trypsin-benzos/pyretis/001):
--------------------------------------------------
  All accepted paths:    15.05 (n=1944, weighted)
------------------------------
  LML :    14.26 (n=1933, weighted)
  LMR :   130.54 (n=   8, weighted

In [44]:
# Compute taus for pathlength analysis
tau_mm, tau_mp, tau_pm, tau_pp = collect_tau(pathensembles)
tau1_mm, tau1_mp, tau1_pm, tau1_pp = collect_tau1(pathensembles)
tau2_mm, tau2_mp, tau2_pm, tau2_pp = collect_tau2(pathensembles)
taum_mm, taum_mp, taum_pm, taum_pp = collect_taum(pathensembles)

Collect tau
ensemble 0 /mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/APPTIS/simdata/infrepptis/000
ensemble 1 /mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/APPTIS/simdata/infrepptis/001
ensemble 2 /mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/APPTIS/simdata/infrepptis/002
ensemble 3 /mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/APPTIS/simdata/infrepptis/003
ensemble 4 /mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/APPTIS/simdata/infrepptis/004
ensemble 5 /mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/APPTIS/simdata/infrepptis/005
ensemble 6 /mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/APPTIS/simdata/infrepptis/006
ensemble 7 /mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/APPTIS/simdata/infrepptis/007
ensemble 8 /mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/APPTIS/simdata/infrepptis/008
ensemble 9 /mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/APPTIS/simdata/infrepptis/009
Collect tau1
Collect tau2
Collect taum


In [45]:
# Look at computed taus
print("tau")
print_all_tau(pathensembles, tau_mm, tau_mp, tau_pm, tau_pp)
print("\ntau1")
print_all_tau(pathensembles, tau1_mm, tau1_mp, tau1_pm, tau1_pp)
print("\ntaum")
print_all_tau(pathensembles, taum_mm, taum_mp, taum_pm, taum_pp)
print("\ntau2")
print_all_tau(pathensembles, tau2_mm, tau2_mp, tau2_pm, tau2_pp)

tau
Index Name            mm           mp           pm           pp
-----------------------------------------------------
0     000            nan          nan          nan        160.4
1     001           23.2         65.8         63.0          nan
2     002          352.0        308.1        305.5        290.8
3     003          189.1        178.5        177.3        191.0
4     004          182.0        171.1        175.4        182.7
5     005          191.1        169.3        170.8        184.1
6     006          217.2        189.5        183.8        191.6
7     007          207.6        182.0        177.3        179.1
8     008          201.5        185.9        175.6        197.1
9     009          218.0        175.5        195.7        204.7

tau1
Index Name            mm           mp           pm           pp
-----------------------------------------------------
0     000            nan          nan          nan          0.0
1     001            0.0          0.0         63.0

In [46]:
# TODO include prints?
tau  = construct_tau_vector(N, NS, tau_mm, tau_mp, tau_pm, tau_pp)
tau1 = construct_tau_vector(N, NS, tau1_mm, tau1_mp, tau1_pm, tau1_pp)
taum = construct_tau_vector(N, NS, taum_mm, taum_mp, taum_pm, taum_pp)
tau2 = construct_tau_vector(N, NS, tau2_mm, tau2_mp, tau2_pm, tau2_pp)
tau_m = tau-tau1-tau2  # yes, this is the same thing as taum

print("tau")
print(tau)
print("\n")
print("tau1")
print(tau1)
print("taum")
print(taum)
print("tau2")
print(tau2)

print("\n")
print("tau = tau1+taum+tau2 => difference is", np.sum((tau-tau1-taum-tau2)**2))

tau
[160.44671226  23.1663196   65.79970812  62.95291574 352.01036568
 308.12286627 305.5196039  290.7831315  189.0558998  178.48290309
 177.34515599 191.04125398 182.03413707 171.07215824 175.43783077
 182.69745015 191.14493205 169.27048881 170.76297588 184.13554377
 217.20171357 189.46932738 183.79682206 191.58089368 207.57413613
 182.01653928 177.32341983 179.10511006 201.53044661 185.89932886
 175.59300012 197.06025891 217.96236918 175.47529448   0.        ]


tau1
[ 0.          0.          0.         62.95291574 74.12841923 60.25330899
 46.42524756 45.19651293 47.86908372 47.84732918 57.57110016 51.86014326
 51.50583355 54.78534713 47.89950645 49.74223602 50.53107169 49.95926556
 49.32519616 47.04233422 50.42026738 48.19665499 52.03510924 52.18007637
 50.62302917 47.82740258 53.72697562 46.34606918 51.1271367  49.85492509
 53.29870695 52.43051896 52.89725483 47.9440875   0.        ]
taum
[160.44671226  23.1663196    0.           0.         218.85603225
 199.11355545 189.67699863 1

## 5. Flux calculation

### Collect tau for [0+]

In [60]:
# Construct g and h vectors
g1, g2, h1, h2 = mfpt_to_first_last_state(M, np.nan_to_num(tau1), np.nan_to_num(tau_m), np.nan_to_num(tau2)) #, doprint=True)
print("G")
print_vector(g1, labels1)
print_vector(g2, labels2)
print("H")
print_vector(h1, labels1)
print_vector(h2, labels2)
print("\ntau [0+]: ", h1[0])

[ 0 34] g1:  [[0.]
 [0.]] D [[1. 0.]
 [0. 0.]
 [1. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 1.]] tp [[ 23.1663196 ]
 [ 65.79970812]
 [  0.        ]
 [277.88194644]
 [247.86955728]
 [259.09435633]
 [245.58661857]
 [141.18681608]
 [130.6355739 ]
 [119.77405583]
 [139.18111072]
 [130.52830352]
 [116.28681111]
 [127.53832431]
 [132.95521412]
 [140.61386036]
 [119.31122325]
 [121.43777972]
 [137.09320955]
 [166.78144619]
 [141.27267238]
 [131.76171282]
 [139.40081731]
 [156.95110696]
 [134.1891367 ]
 [123.59644421]
 [132.75904088]
 [150.40330991]
 [136.04440377]
 [122.29429317]
 [144.62973995]
 [165.06511436]
 [127.53120698]]
G
state 0-     : 0.0
state B      : 0.0
state 0+- LML: 23.166319596668128
state 0+- LMR: 4806.4814081916
state 0+- RML: 0.0
state 1+- LML: 277

### The flux

In [63]:
flux = 1/(tau[0]+h1[0][0])
print(tau[0], h1[0][0])
dt = 0.002 # Change if needed
sc = 10
# flux /= (dt*sc)
# print(flux/(dt*sc), "1/time")
print(flux, "1/ps")

160.44671225856615 2516.653927494827
0.0003735384412339901 1/ps


## 6. The rate constant
We can compute an accurate rate constant using only our MSM.

In [64]:
# rate constant = flux * Pcross

print("The rate constant k is: ", flux*y1[0][0], "1/ps")

The rate constant k is:  7.025444946902463e-05 1/ps


## 7. Direct rate computation via $\tau_{\mathcal{A},1}$ 

In [56]:
# Construct g and h vectors
absor = np.array([NS - 1])
kept = np.array([i for i in range(NS) if i not in absor])

g1, g2, h1, h2 = mfpt_to_absorbing_states(M, np.nan_to_num(tau1), np.nan_to_num(tau_m), np.nan_to_num(tau2), absor, kept, remove_initial_m=False, doprint=True)
print("G")
print_vector(g1, labels1[-1])
print_vector(g2, [labels1[0]] + labels2)
print("H")
print_vector(h1, labels1[-1])
print_vector(h2, [labels1[0]] + labels2)
print("interesting")
print(h2[0])
mfpt = h2[0][0]  # tau_A,1

[34] g1:  [[0.]] D [[0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [1.]] tp [[160.44671226]
 [ 23.1663196 ]
 [ 65.79970812]
 [  0.        ]
 [277.88194644]
 [247.86955728]
 [259.09435633]
 [245.58661857]
 [141.18681608]
 [130.6355739 ]
 [119.77405583]
 [139.18111072]
 [130.52830352]
 [116.28681111]
 [127.53832431]
 [132.95521412]
 [140.61386036]
 [119.31122325]
 [121.43777972]
 [137.09320955]
 [166.78144619]
 [141.27267238]
 [131.76171282]
 [139.40081731]
 [156.95110696]
 [134.1891367 ]
 [123.59644421]
 [132.75904088]
 [150.40330991]
 [136.04440377]
 [122.29429317]
 [144.62973995]
 [165.06511436]
 [127.53120698]]
Eigenvalues of Mp:
[ 9.88416204e-01+0.00000000e+00j  8.74754078e-01+0.00000000e+00j
 -9.88416204e-01+0.00000000e+00j -8.74754078e-01+0.00000000e+00j
  6.89343933e-01+0.00000000e+00j -6.89343933e-01+0.00000000e+00j
  3.22720378e-

In [57]:
k_flux_pcross = flux * y1[0][0]
k_mfpt = 1 / (mfpt*dt*sc)

print(f"Rate constant from P_cross × flux: {k_flux_pcross:.10e} [1/ps]")
print(f"Rate constant from MFPT:           {k_mfpt:.10e} [1/ps]")
print(f"Relative difference:               {abs(k_flux_pcross - k_mfpt)/k_mfpt*100:.2f}%")


Rate constant from P_cross × flux: 6.5330254237e-04 [1/ps]
Rate constant from MFPT:           3.5127224735e-03 [1/ps]
Relative difference:               81.40%
