# Analyzing (RE)PPTIS simulations using an MSM approach
This notebook contains an example workflow that can be used for estimating the crossing probability and pathlengths of a (RE)PPTIS simulation.

## 1. Import the necessary functions

In [2]:
%load_ext autoreload
%autoreload 2
%matplotlib qt

import matplotlib.pyplot as plt
from pprint import pprint    # to print the vars of the pathensemble object
import numpy as np
import os
import glob

# Reading
from tistools import read_inputfile, get_LMR_interfaces, read_pathensemble, get_weights
from tistools import set_tau_distrib, set_tau_first_hit_M_distrib, cross_dist_distr, pathlength_distr
from tistools import collect_tau, collect_tau1, collect_tau2, collect_taum
from tistools import ACCFLAGS, REJFLAGS

# REPPTIS analysis
from tistools import get_lmr_masks, get_generation_mask, get_flag_mask, select_with_masks
from tistools import unwrap_by_weight, running_avg_local_probs, get_local_probs, get_global_probs_from_dict, get_global_probs_from_local

# MSM functions
from tistools import construct_M
from tistools import global_pcross_msm
from tistools import mfpt_to_first_last_state, mfpt_to_absorbing_states, construct_tau_vector
from tistools import create_labels_states, print_vector, print_all_tau

## 2. Load the simulation data

### How to run this notebook
- Set `indir` to the simulation directory (see the commented presets below).
- Toggle `zero_minus_one` if you have a λ

−1 interface.
- Set `VERBOSE` to `True` if you want detailed per-pathensemble output; otherwise it stays concise.
- Figures are controlled by `makefigs` later on.
- The notebook assumes PyRETIS/REPPTIS output structure with numbered folders under `indir`.


In [4]:
# Set the working directory
indir = "/mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/APPTIS/simdata/infrepptis/"  


# Control verbosity and options
VERBOSE = True          # Set True for detailed per-ensemble printing
PRINT_PATHENSEMBLE = True  # Set True to pprint each pathensemble vars

# zero_minus_one: True if lambda_-1 interface is set
zero_minus_one = False

inputfile = indir + "/repptis.rst"    # When using PyRETIS, the input file for REPPTIS simulations is a .rst file

# Move to working directory
os.chdir(indir)
print(os.getcwd())

# Set the ensemble folders and print them
folders = sorted(glob.glob(indir + "/0[0-9][0-9]"))
print(f"Found {len(folders)} ensemble folders")
print(folders)


/mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/APPTIS/simdata/infrepptis
Found 10 ensemble folders
['/mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/APPTIS/simdata/infrepptis/000', '/mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/APPTIS/simdata/infrepptis/001', '/mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/APPTIS/simdata/infrepptis/002', '/mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/APPTIS/simdata/infrepptis/003', '/mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/APPTIS/simdata/infrepptis/004', '/mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/APPTIS/simdata/infrepptis/005', '/mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/APPTIS/simdata/infrepptis/006', '/mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/APPTIS/simdata/infrepptis/007', '/mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/APPTIS/simdata/infrepptis/008', '/mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/APPTIS/simdata/infrepptis/009']


In [5]:
# Reading all input
#===================
interfaces, zero_left, timestep = read_inputfile(inputfile)
LMR_interfaces, LMR_strings = get_LMR_interfaces(interfaces, zero_left)

pathensembles = []
for i, fol in enumerate(folders):
    pe = read_pathensemble(fol + "/pathensemble.txt")
    pe.set_name(fol)
    pe.set_interfaces([LMR_interfaces[i], LMR_strings[i]])
    if i == 0:
        pe.set_zero_minus_one(zero_minus_one)   # TODO this is never used
        pe.set_in_zero_minus(True)
    if i == 1:
        pe.set_in_zero_plus(True)

    w, _ = get_weights(pe.flags, ACCFLAGS, REJFLAGS, verbose=False)
    pe.set_weights(w)

    if PRINT_PATHENSEMBLE:
        print("#" * 80)
        print(fol)
        print("pathensemble info:")
        pprint(vars(pe))
    else:
        print(f"Loaded {fol} (ensemble {i})")

    # Read order parameters order.txt/order.npy into path ensemble object, or load from order.npy file.
    # Saving order parameter files allows to speed up this notebook.
    pe.set_orders(load=False, acc_only=True, save=True)        # first run: store .npy files
    # pe.set_orders(load=True, acc_only=True, save=False)          # subsequent runs: read .npy files
    # pe.set_orders(load=False, acc_only=True, save=False)       # if saving doesn't work

    pathensembles.append(pe)

print(f"\nLoaded {len(pathensembles)} pathensembles")


################################################################################
/mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/APPTIS/simdata/infrepptis/000
pathensemble info:
{'cyclenumbers': array([    0,     1,     2, ..., 56586, 56587, 56588]),
 'flags': array(['ACC', 'REJ', 'REJ', ..., 'ACC', 'REJ', 'REJ'], dtype='<U3'),
 'generation': array(['sh', 'sh', 'sh', ..., 'sh', 'sh', 'sh'], dtype='<U2'),
 'has_zero_minus_one': False,
 'in_zero_minus': True,
 'in_zero_plus': False,
 'interfaces': [[4.0, 4.0, 4.0], ['l_[0]', 'l_[0]', 'l_[0]']],
 'lambmaxs': array([4.06896, 4.06896, 4.06896, ..., 4.07519, 4.07519, 4.07519]),
 'lambmins': array([2.7722 , 2.7722 , 2.7722 , ..., 3.73407, 3.73407, 3.73407]),
 'lengths': array([433,   1,   1, ...,  16,   1,   1]),
 'lmrs': array(['RMR', 'RMR', 'RMR', ..., 'RMR', 'RMR', 'RMR'], dtype='<U3'),
 'name': '/mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/APPTIS/simdata/infrepptis/000',
 'ncycle': 56589,
 'newpathnumbers': array([   0,    0,    0, ..., 

## 3. Regular (RE)PPTIS analysis using tistools

### Analyze the REPPTIS simulation.

**Tip on output volume**: Set `VERBOSE = True` (in the setup cell above) to see detailed per-ensemble diagnostics. With `VERBOSE = False`, only compact summaries are printed to keep the log tidy.

In [6]:
# Analysis output is saved to the data dictionary.
data = {}
for i, pe in enumerate(pathensembles):
    if i == 0:
        data[i] = {}
        continue  # [0-] is not used for Pcross calculations

    if VERBOSE:
        print(f"doing pathensemble {i}")

    # Classify the paths according to their path type.
    pathtypes = ("LML", "LMR", "RML", "RMR")
    pathtype_cycles = {}
    for ptype in pathtypes:
        pathtype_cycles[ptype] = unwrap_by_weight(
            (pe.lmrs == ptype).astype(int), pe.weights
        )
    
    # Running average analysis: ["running"]
    data[i] = {}
    data[i]["running"] = {}
    data[i]["running"]["plocal"] = {}
    for (ptype, p_loc) in zip(
        pathtypes,
        running_avg_local_probs(pathtype_cycles, pe.weights, tr=False)
    ):
        data[i]["running"]["plocal"][ptype] = p_loc

    # Analysis using all data: ["full"]
    plocfull = get_local_probs(pe, tr=False)
    data[i]["full"] = {}
    for ptype in pathtypes:
        data[i]["full"][ptype] = plocfull[ptype]

    if VERBOSE:
        print(f"Finished ensemble {i}")

doing pathensemble 1
Weights of the different paths:
wRMR = 0
wRML = 18966
wLMR = 19871
wLML = 18248
Local crossing probabilities:
pRMR = 0.0
pRML = 1.0
pLMR = 0.5212885962380965
pLML = 0.4787114037619035
Local crossing probabilities:
p2R = 0.3480949461329596
p2L = 0.6519050538670403
Finished ensemble 1
doing pathensemble 2
Weights of the different paths:
wRMR = 34929
wRML = 12421
wLMR = 10955
wLML = 3473
Local crossing probabilities:
pRMR = 0.7376768743400212
pRML = 0.2623231256599789
pLMR = 0.7592874965345162
pLML = 0.24071250346548378
Local crossing probabilities:
p2R = 0.7427239470361617
p2L = 0.25727605296383826
Finished ensemble 2
doing pathensemble 3
Weights of the different paths:
wRMR = 22616
wRML = 15225
wLMR = 16231
wLML = 7585
Local crossing probabilities:
pRMR = 0.5976586242435453
pRML = 0.4023413757564546
pLMR = 0.6815166274773262
pLML = 0.31848337252267384
Local crossing probabilities:
p2R = 0.6300501159641241
p2L = 0.3699498840358759
Finished ensemble 3
doing pathensemb

### Generate pathlength distribution figures, as in PyRETIS reports.

In [5]:
for i, pe in enumerate(pathensembles):
    upe = pe.unify_pe()
    # Pathlength distribution
    data[i]["pathlengths"] = pathlength_distr(upe)  # these might be used later or not! TODO
        
#=======================================
# make figures
makefigs = True 
if makefigs:
    for i, pe in enumerate(pathensembles):     
        if i == 0:
            continue
        # Cross distances distribution
        L, M, R, lmlpercs, lmllambs, rmrpercs, rmrlambs = cross_dist_distr(pe)
        fig,ax = plt.subplots()
        ax.plot(lmllambs, lmlpercs, lw=1, c="g")
        ax.plot(rmrlambs, rmrpercs, lw=1, c="r")
        for lamb in (L,M,R):
            ax.axvline(lamb, color='k', linestyle='--', lw = 0.5)
        ax.set_xlabel('Cross distance')
        ax.set_ylabel('Frequency')
        ax.set_title("Ensemble {}. L = {}, M = {}, R = {}".format(
            pe.name, L, M, R))
        ax.set_ylim(0)
        fig.savefig(f"pathensemble_{i}_crossdist.pdf")
        plt.close(fig)

        # Pathlength distribution      
        for ptype in pathtypes:
            fig, ax = plt.subplots()
            ax.plot(data[i]["pathlengths"][ptype]["bin_centers"], 
                data[i]["pathlengths"][ptype]["hist"])
            ax.set_xlabel('Pathlength')
            ax.set_ylabel('Frequency')
            ax.set_title(f"{np.sum(data[i]['pathlengths'][ptype]['hist'])} " + \
                         f"{ptype} paths. ")
            ax.legend([f"mean = {data[i]['pathlengths'][ptype]['mean']:.2f}, " + \
                          f"std = {data[i]['pathlengths'][ptype]['std']:.2f}"])
            fig.savefig(f"pathensemble_{i}_pathlength_{ptype}.pdf")
            plt.close(fig)

Are all weights 1?  True
Are all paths accepted?  True
Are all weights 1?  True
Are all paths accepted?  True
Are all weights 1?  True
Are all paths accepted?  True
Are all weights 1?  True
Are all paths accepted?  True
Are all weights 1?  True
Are all paths accepted?  True
Are all weights 1?  True
Are all paths accepted?  True
Are all weights 1?  True
Are all paths accepted?  True
Are all weights 1?  True
Are all paths accepted?  True
Are all weights 1?  True
Are all paths accepted?  True
Are all weights 1?  True
Are all paths accepted?  True


### Compute Pcross using in-house functions

In [17]:
# Global crossing probabilities (no error analysis)  
psfull = []
for i in range(1, len(pathensembles)):   # do not use the 0- ensemble
    psfull.append({"LMR": data[i]["full"]["LMR"], 
               "RML": data[i]["full"]["RML"], 
               "RMR": data[i]["full"]["RMR"],
               "LML": data[i]["full"]["LML"]})

Pminfull, Pplusfull, Pcrossfull = get_global_probs_from_dict(psfull)

In [19]:
# Make a figure of the global crossing probabilities
fig, ax = plt.subplots()
ax.set_yscale("log")
# ax.plot(Pcrossfull, "o", c = "r")
ax.errorbar(interfaces, Pcrossfull, fmt="-o", c = "b", ecolor="r", capsize=6)


ax.set_xlabel("OP")
ax.set_ylabel(r"$P_A(\lambda_i|\lambda_A)$")
fig.tight_layout()
fig.show()
fig.savefig("Global_probs.pdf")

print("This should be the same as the repptis_report.pdf value:", Pcrossfull[-1])
print("which is the case!")
print(Pcrossfull)
print([Pcrossfull[i]/Pcrossfull[i-1] for i in range(1,len(Pcrossfull))])
print("Here, the load immediately disappeared. For a simulation where this is")
print("not the case, the above code should be adapted a little bit.")

This should be the same as the repptis_report.pdf value: 0.18807823161904824
which is the case!
[1.0, 0.5212885962380965, 0.39580791320961656, 0.3525852940559797, 0.31941044624364084, 0.30093772940797586, 0.2836206499987164, 0.263114438442881, 0.2368724572808718, 0.18807823161904824]
[0.5212885962380965, 0.7592874965345162, 0.8907990019624835, 0.9059097234864488, 0.9421662094871678, 0.9424562701282861, 0.9276984537059337, 0.9002640017883092, 0.7940063347932185]
Here, the load immediately disappeared. For a simulation where this is
not the case, the above code should be adapted a little bit.


In [None]:
# Construct lists of the local probs

# Or we can use the get_global_probs_from_local function, using lists of the local probs
# These do not use the 0- ensemble
pmps = [data[i]["full"]["LMR"] for i in range(1,len(pathensembles))]
pmms = [data[i]["full"]["LML"] for i in range(1,len(pathensembles))]
ppps = [data[i]["full"]["RMR"] for i in range(1,len(pathensembles))]
ppms = [data[i]["full"]["RML"] for i in range(1,len(pathensembles))]
a,b,c = get_global_probs_from_local(pmps, pmms, ppps, ppms)
print("This should be the same as the repptis_report.pdf value:", c[-1])

This should be the same as the repptis_report.pdf value: 0.025518974278035258


## 4. Analysis using the MSM

### Construct transition matrix M

In [10]:
print(interfaces)
N = len(interfaces)
NS = 4*N-5
print("N", N)
# print("len pmms", len(pmms)) # TODO INCLUDE?
print("NS", NS)

labels1, labels2 = create_labels_states(N)

[-0.1, 0.0, 0.1, 0.2, 0.3]
N 5
NS 15


In [11]:
if VERBOSE:
    print("mm", pmms)
    print("mp", pmps)
    print("pm", ppms)
    print("pp", ppps)
    print("sum", np.array(pmms) + np.array(pmps))
    print("sum", np.array(ppms) + np.array(ppps))

M = construct_M(pmms, pmps, ppms, ppps, N)

mm [0.9030653599587355, 0.48207357859531774, 0.4722053769961593, 0.4787678619574009]
mp [0.09693464004126447, 0.5179264214046823, 0.5277946230038407, 0.521232138042599]
pm [1.0, 0.5351803866852701, 0.5245382585751979, 0.5305637982195845]
pp [0.0, 0.46481961331472993, 0.47546174142480213, 0.46943620178041545]
sum [1. 1. 1. 1.]
sum [1. 1. 1. 1.]


In [12]:
# We can print the transition matrix M and check that all rows sum to 1.
if VERBOSE:
    print("M")
    print("shape", M.shape)
    print("sum prob in rows", np.sum(M, axis=1))
    print(M)
else:
    print("Transition matrix built. Set VERBOSE=True to print matrix and row sums.")

M
shape (15, 15)
sum prob in rows [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
[[0.         0.90306536 0.09693464 0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.        ]
 [1.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.        ]
 [0.         0.         0.         0.         0.48207358 0.51792642
  0.         0.         0.         0.         0.         0.
  0.         0.         0.        ]
 [1.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.        ]
 [0.         0.         0.         1.         0.         0.
  0.         0.         0.         0.         0.         0.
  0.         0.         0.        ]
 [0.         0.         0.         0.         0.         0.
  0.         0.         0.47220538 0.52779462 0.         0.
  0.       

### Look at this Markov model

In [13]:
if VERBOSE:
    vals, vecs = np.linalg.eig(M)
    print(vals)
    vals, vecs = np.linalg.eig(M.T)
    print(vals)
    pprint(M)
else:
    vals, _ = np.linalg.eig(M)
    print(f"Eigenvalues (summary): min={np.min(vals):.3f}, max={np.max(vals):.3f}")

[-9.65994843e-01+0.00000000e+00j -9.25100453e-01+0.00000000e+00j
  1.00000000e+00+0.00000000e+00j  7.96875236e-01+4.00875247e-02j
  7.96875236e-01-4.00875247e-02j -5.46166539e-01+4.57119667e-02j
 -5.46166539e-01-4.57119667e-02j -5.91237897e-02+3.99745493e-01j
 -5.91237897e-02-3.99745493e-01j  2.22232653e-01+1.83743824e-01j
  2.22232653e-01-1.83743824e-01j  9.97612800e-02+2.05655364e-01j
  9.97612800e-02-2.05655364e-01j -1.23512192e-01+0.00000000e+00j
 -8.49282339e-02+0.00000000e+00j -4.54932673e-02+0.00000000e+00j
  7.19013257e-02+0.00000000e+00j  4.59699805e-02+0.00000000e+00j
  2.99519286e-15+3.48396239e-09j  2.99519286e-15-3.48396239e-09j
 -5.29470519e-15+0.00000000e+00j  3.62419187e-16+4.66356725e-16j
  3.62419187e-16-4.66356725e-16j -1.28223094e-16+1.68194573e-16j
 -1.28223094e-16-1.68194573e-16j  7.64235840e-17+0.00000000e+00j
 -6.31109630e-17+0.00000000e+00j -4.67648690e-18+0.00000000e+00j
  2.00696663e-30+0.00000000e+00j  8.04597086e-30+0.00000000e+00j
 -2.77642175e-30+0.000000

In [None]:
if VERBOSE:
    print("what if chain propagates")
    print("A[0,:]")
    # check stationary behavior
    A = M
    for n in range(10):
        A = np.dot(A, M)
        print(A[0, :])
        print(np.sum(A[0, :]))  # is 1 indeed
else:
    print("Skipping propagation printout (set VERBOSE=True to inspect A^n)")

what if chain propagates
A[0,:]
[0.90306536 0.         0.         0.         0.04672963 0.05020501
 0.         0.         0.         0.         0.         0.
 0.         0.         0.        ]
1.0
[0.         0.81552704 0.08753832 0.04672963 0.         0.
 0.         0.         0.02370708 0.02649793 0.         0.
 0.         0.         0.        ]
0.9999999999999999
[0.86225667 0.         0.         0.         0.04219991 0.04533841
 0.01268756 0.01101951 0.         0.         0.         0.
 0.01268636 0.01381158 0.        ]
0.9999999999999999
[0.         0.77867413 0.08358254 0.05488747 0.         0.
 0.         0.         0.02661251 0.02974541 0.00665448 0.00603188
 0.         0.         0.01381158]
0.9999999999999999
[0.84737318 0.         0.         0.         0.04029293 0.04328961
 0.01780384 0.01546315 0.         0.         0.         0.
 0.01712901 0.01864827 0.        ]
0.9999999999999998
[0.         0.76523337 0.08213981 0.05809678 0.         0.
 0.         0.         0.0277433

### Pcross with MSM

In [59]:
# Inspect Z and Y vectors

z1, z2, y1, y2 = global_pcross_msm(M)
print("Z")
print_vector(z1, labels1)
print_vector(z2, labels2)
print("Y")
print_vector(y1, labels1)
print_vector(y2, labels2)
print("\nGlobal crossing probability: ", y1[0][0])

Z
state 0-     : 0
state B      : 1
state 0+- LML: 0.0
state 0+- LMR: 0.3607948322221575
state 0+- RML: 0.0
state 1+- LML: 0.0
state 1+- LMR: 0.4751755216158182
state 1+- RML: 0.0
state 1+- RMR: 0.4751755216158182
state 2+- LML: 0.350525993548446
state 2+- LMR: 0.5334261944265519
state 2+- RML: 0.3505259935484461
state 2+- RMR: 0.5334261944265519
state 3+- LML: 0.4598378759791229
state 3+- LMR: 0.5888293067146125
state 3+- RML: 0.45983787597912285
state 3+- RMR: 0.5888293067146125
state 4+- LML: 0.5271521041705072
state 4+- LMR: 0.6249739173251811
state 4+- RML: 0.5271521041705072
state 4+- RMR: 0.6249739173251811
state 5+- LML: 0.5782945581426499
state 5+- LMR: 0.6631330674261529
state 5+- RML: 0.5782945581426497
state 5+- RMR: 0.6631330674261529
state 6+- LML: 0.6178134980107992
state 6+- LMR: 0.7148153204062105
state 6+- RML: 0.6178134980107992
state 6+- RMR: 0.7148153204062105
state 7+- LML: 0.6618501581653242
state 7+- LMR: 0.794006334793218
state 7+- RML: 0.6618501581653241
state

### Pathlength analysis

In [39]:
# Setting path ensemble properties
#==================================
for i,fol in enumerate(folders):
    print(i)
    print("Calculating path lengths.")
    set_tau_distrib(pathensembles[i])
    print("Done.")

    if True:
        print("Calculating first hitting lengths to middle interface")
        set_tau_first_hit_M_distrib(pathensembles[i])
        print("Done.")

0
Calculating path lengths.
Done.
Calculating first hitting lengths to middle interface
Done.
1
Calculating path lengths.
Done.
Calculating first hitting lengths to middle interface
Done.
2
Calculating path lengths.
Done.
Calculating first hitting lengths to middle interface
Done.
3
Calculating path lengths.
Done.
Calculating first hitting lengths to middle interface
Done.
4
Calculating path lengths.
Done.
Calculating first hitting lengths to middle interface
Done.


In [14]:
# Additional information
#==================================
# Average path lengths per ensemble for each path type
print(indir[-20:])
pathtypes = ("LML", "LMR", "RMR", "RML", "LM*", "*M*", "***", "RM*", "L**", "**R", "R**")

print("=" * 80)
print("AVERAGE PATH LENGTHS BY ENSEMBLE AND PATH TYPE")
print("=" * 80)

for i, pe in enumerate(pathensembles):
    if not VERBOSE:
        print(f"Ensemble {i}: accepted paths = {np.sum(np.isin(pe.flags, ACCFLAGS))}")
        continue

    print(f"\nEnsemble {i} ({pe.name}):")
    print("-" * 50)
    
    # Get accepted paths only
    accepted_mask = np.isin(pe.flags, ACCFLAGS)
    total_accepted_count = np.sum(accepted_mask)
    
    if total_accepted_count > 0:
        # Calculate weighted average for all accepted paths
        accepted_lengths = pe.lengths[accepted_mask]
        accepted_weights = pe.weights[accepted_mask]
        total_weighted_avg = np.average(accepted_lengths, weights=accepted_weights)
        
        print(f"  All accepted paths: {total_weighted_avg:8.2f} (n={total_accepted_count:4d}, weighted)")
        print("-" * 30)
    
    for ptype in pathtypes:
        mask = (pe.lmrs == ptype) & accepted_mask
        if np.any(mask):
            lengths = pe.lengths[mask]
            weights = pe.weights[mask]
            weighted_avg = np.average(lengths, weights=weights)
            count = np.sum(mask)
            print(f"  {ptype:4s}: {weighted_avg:8.2f} (n={count:4d}, weighted)")
        else:
            print(f"  {ptype:4s}: {0:8.2f} (n={0:4d}, weighted)")

/simdata/infrepptis/
AVERAGE PATH LENGTHS BY ENSEMBLE AND PATH TYPE

Ensemble 0 (/mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/APPTIS/simdata/infrepptis/000):
--------------------------------------------------
  All accepted paths:   162.45 (n=5988, weighted)
------------------------------
  LML :     0.00 (n=   0, weighted)
  LMR :     0.00 (n=   0, weighted)
  RMR :   162.45 (n=5988, weighted)
  RML :     0.00 (n=   0, weighted)
  LM* :     0.00 (n=   0, weighted)
  *M* :     0.00 (n=   0, weighted)
  *** :     0.00 (n=   0, weighted)
  RM* :     0.00 (n=   0, weighted)
  L** :     0.00 (n=   0, weighted)
  **R :     0.00 (n=   0, weighted)
  R** :     0.00 (n=   0, weighted)

Ensemble 1 (/mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/APPTIS/simdata/infrepptis/001):
--------------------------------------------------
  All accepted paths:    53.23 (n=5073, weighted)
------------------------------
  LML :    25.17 (n=2202, weighted)
  LMR :    67.80 (n=2370, weighted)
  RMR :     0.00

In [15]:
# Compute taus for pathlength analysis
tau_mm, tau_mp, tau_pm, tau_pp = collect_tau(pathensembles)
tau1_mm, tau1_mp, tau1_pm, tau1_pp = collect_tau1(pathensembles)
tau2_mm, tau2_mp, tau2_pm, tau2_pp = collect_tau2(pathensembles)
taum_mm, taum_mp, taum_pm, taum_pp = collect_taum(pathensembles)

Collect tau
ensemble 0 /mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/APPTIS/simdata/infrepptis/000


AttributeError: 'PathEnsemble' object has no attribute 'tauavg'

In [None]:
# Look at computed taus
if VERBOSE:
    print("tau")
    print_all_tau(pathensembles, tau_mm, tau_mp, tau_pm, tau_pp)
    print("\ntau1")
    print_all_tau(pathensembles, tau1_mm, tau1_mp, tau1_pm, tau1_pp)
    print("\ntaum")
    print_all_tau(pathensembles, taum_mm, taum_mp, taum_pm, taum_pp)
    print("\ntau2")
    print_all_tau(pathensembles, tau2_mm, tau2_mp, tau2_pm, tau2_pp)
else:
    print("Tau diagnostics suppressed (set VERBOSE=True for details)")

tau
Index Name            mm           mp           pm           pp
-----------------------------------------------------
0     000            nan          nan          nan         64.4
1     001            9.2         50.3         48.8          nan
2     002          192.0        185.8        179.3        192.5
3     003          192.7        185.3        182.6        191.2
4     004          189.0        182.0        178.8        190.9

tau1
Index Name            mm           mp           pm           pp
-----------------------------------------------------
0     000            nan          nan          nan          0.0
1     001            0.0          0.0         48.8          nan
2     002           50.4         49.7         48.9         50.0
3     003           50.0         49.6         49.0         50.5
4     004           50.0         49.6         49.8         50.0

taum
Index Name            mm           mp           pm           pp
--------------------------------------------

In [None]:
# TODO include prints?
tau  = construct_tau_vector(N, NS, tau_mm, tau_mp, tau_pm, tau_pp)
tau1 = construct_tau_vector(N, NS, tau1_mm, tau1_mp, tau1_pm, tau1_pp)
taum = construct_tau_vector(N, NS, taum_mm, taum_mp, taum_pm, taum_pp)
tau2 = construct_tau_vector(N, NS, tau2_mm, tau2_mp, tau2_pp)
tau_m = tau - tau1 - tau2  # yes, this is the same thing as taum

if VERBOSE:
    print("tau")
    print(tau)
    print("\n")
    print("tau1")
    print(tau1)
    print("taum")
    print(taum)
    print("tau2")
    print(tau2)
    print("\n")
    print("tau = tau1+taum+tau2 => difference is", np.sum((tau - tau1 - taum - tau2) ** 2))
else:
    diff = np.sum((tau - tau1 - taum - tau2) ** 2)
    print(f"Tau vectors built. Consistency diff: {diff:.3e} (set VERBOSE=True to inspect)")

tau
[ 64.38558715   9.22638815  50.26795895  48.7838405  192.03760233
 185.77205218 179.30651769 192.49442539 192.69634703 185.34239755
 182.57683602 191.18007769 189.02590455 181.97555929   0.        ]


tau1
[ 0.          0.          0.         48.7838405  50.3790759  49.66692496
 48.91731844 49.99957118 50.00028539 49.59428061 48.98503521 50.52261376
 49.98761087 49.60080176  0.        ]
taum
[6.43855871e+01 9.22638815e+00 7.60167237e-04 0.00000000e+00
 9.10649369e+01 8.60402945e+01 7.97143389e+01 9.26112064e+01
 9.32424372e+01 8.43125239e+01 8.35983400e+01 9.12101831e+01
 8.87488385e+01 8.20037502e+01 0.00000000e+00]
tau2
[ 0.          0.         50.26719878  0.         50.59358957 50.06483275
 50.67486034 49.8836478  49.45362443 51.435593   49.99346076 49.4472808
 50.28945516 50.37100737  0.        ]


tau = tau1+taum+tau2 => difference is 3.0292258760486853e-28


## 5. Flux calculation

### Collect tau for [0+]

In [44]:
# Construct g and h vectors
g1, g2, h1, h2 = mfpt_to_first_last_state(M, np.nan_to_num(tau1), np.nan_to_num(tau_m), np.nan_to_num(tau2)) #, doprint=True)
print("G")
print_vector(g1, labels1)
print_vector(g2, labels2)
print("H")
print_vector(h1, labels1)
print_vector(h2, labels2)
print("\ntau [0+]: ", h1[0])

G
state 0-     : 0.0
state B      : 0.0
state 0+- LML: 23.166319596668128
state 0+- LMR: 4806.481408191596
state 0+- RML: 0.0
state 1+- LML: 277.8819464439966
state 1+- LMR: 6155.497176510371
state 1+- RML: 259.0943563320184
state 1+- RMR: 6153.214237799688
state 2+- LML: 4748.237103562842
state 2+- LMR: 6449.428929152336
state 2+- RML: 4726.824343307694
state 2+- RMR: 6457.97446596862
state 3+- LML: 5891.989447493682
state 3+- LMR: 6640.125128082522
state 3+- RML: 5888.999468290948
state 3+- RMR: 6656.793531091504
state 4+- LML: 6430.2869583061965
state 4+- LMR: 6578.662097742168
state 4+- RML: 6411.110877662439
state 4+- RMR: 6596.444084042213
state 5+- LML: 6674.786816720229
state 5+- LMR: 6283.237650135423
state 5+- RML: 6639.767083344665
state 5+- RMR: 6281.3657950637735
state 6+- LML: 6629.769960310708
state 6+- LMR: 5585.674155957314
state 6+- RML: 6596.415297562357
state 6+- RMR: 5584.244060138155
state 7+- LML: 6287.3154795906175
state 7+- LMR: 4201.790706828424
state 7+- RML:

### The flux

In [45]:
flux = 1/(tau[0]+h1[0][0])
print(tau[0], h1[0][0])
dt = 0.002 # Change if needed
sc = 10
flux /= (dt*sc)
print(flux*(dt*sc), "1/time")
print(flux, "1/ps")

160.44671225856615 2516.6539274948245
0.0003735384412339904 1/time
0.01867692206169952 1/ps


## 6. The rate constant
We can compute an accurate rate constant using only our MSM.

In [22]:
# rate constant = flux * Pcross

print("The rate constant k is: ", flux*y1[0][0]/dt)

The rate constant k is:  0.0035127224734512307 1/ps


## 7. Direct rate computation via $\tau_{\mathcal{A},1}$ 

In [23]:
# Construct g and h vectors
absor = np.array([NS - 1])
kept = np.array([i for i in range(NS) if i not in absor])

g1, g2, h1, h2 = mfpt_to_absorbing_states(M, np.nan_to_num(tau1), np.nan_to_num(tau_m), np.nan_to_num(tau2), absor, kept, remove_initial_m=False) #, doprint=True)
print("G")
print_vector(g1, labels1[-1])
print_vector(g2, [labels1[0]] + labels2)
print("H")
print_vector(h1, labels1[-1])
print_vector(h2, [labels1[0]] + labels2)
print("interesting")
print(h2[0])
mfpt = h2[0][0]  # tau_A,1

[34] g1:  [[0.]] D [[0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [1.]] tp [[160.44671226]
 [ 23.1663196 ]
 [ 65.79970812]
 [  0.        ]
 [277.88194644]
 [247.86955728]
 [259.09435633]
 [245.58661857]
 [141.18681608]
 [130.6355739 ]
 [119.77405583]
 [139.18111072]
 [130.52830352]
 [116.28681111]
 [127.53832431]
 [132.95521412]
 [140.61386036]
 [119.31122325]
 [121.43777972]
 [137.09320955]
 [166.78144619]
 [141.27267238]
 [131.76171282]
 [139.40081731]
 [156.95110696]
 [134.1891367 ]
 [123.59644421]
 [132.75904088]
 [150.40330991]
 [136.04440377]
 [122.29429317]
 [144.62973995]
 [165.06511436]
 [127.53120698]]
Eigenvalues of Mp:
[ 9.88416204e-01+0.00000000e+00j  8.74754078e-01+0.00000000e+00j
 -9.88416204e-01+0.00000000e+00j -8.74754078e-01+0.00000000e+00j
  6.89343933e-01+0.00000000e+00j -6.89343933e-01+0.00000000e+00j
  3.22720378e-

In [24]:
k_flux_pcross = flux * y1[0][0] / dt
k_mfpt = 1 / (mfpt * dt)

print(f"Rate constant from P_cross × flux: {k_flux_pcross:.10e} [1/ps]")
print(f"Rate constant from MFPT:           {k_mfpt:.10e} [1/ps]")
print(f"Relative difference:               {abs(k_flux_pcross - k_mfpt)/k_mfpt*100:.2f}%")


Rate constant from P_cross × flux: 3.5127224735e-03 [1/ps]
Rate constant from MFPT:           3.5127224735e-03 [1/ps]
Relative difference:               0.00%
