In [4]:
%load_ext autoreload
%autoreload 2
%matplotlib qt

import matplotlib.pyplot as plt
from datetime import datetime
from pprint import pprint    # to print the vars of the pathensemble object
import numpy as np
import os
import glob
import copy

# Reading
from tistools import read_inputfile, get_LMR_interfaces, read_pathensemble, get_weights
from tistools import set_tau_distrib, set_tau_first_hit_M_distrib
from tistools import collect_tau, collect_tau1, collect_tau2, collect_taum
from tistools import ACCFLAGS, REJFLAGS

# REPPTIS analysis
from tistools import unwrap_by_weight, get_local_probs, get_global_probs_from_dict

# MSM functions
from tistools import construct_M
from tistools import mfpt_to_first_last_state, construct_tau_vector
from tistools import create_labels_states
import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning)

def pathensembles_nskip(obj, nskip):
    keys = [
        'cyclenumbers', 'flags', 'generation', 'lambmaxs', 'lambmins',
        'lengths', 'lmrs', 'newpathnumbers', 'orders', 'pathnumbers',
        'shootlinks', 'weights']
    for key in keys:
        attr = getattr(obj, key)
        setattr(obj, key, attr[:nskip])

indir_list = [
    # "/mnt/tw06_biommeda_pyretis/04.2024_MSM_elias/simulations/cosbump2-walls/brownian/REPPTIS/",
    # "/mnt/tw06_biommeda_pyretis/04.2024_MSM_elias/simulations/cosbump2-walls/langevin-gamma5/REPPTIS/",
    # "/mnt/tw06_biommeda_pyretis/04.2024_MSM_elias/simulations/cosbump3-walls/brownian/REPPTIS/",
    # "/mnt/tw06_biommeda_pyretis/04.2024_MSM_elias/simulations/cosbump3-walls/langevin-gamma5/REPPTIS/",
    # "/mnt/tw06_biommeda_pyretis/04.2024_MSM_elias/simulations/cosdip2-walls/brownian/REPPTIS/",
    # "/mnt/tw06_biommeda_pyretis/04.2024_MSM_elias/simulations/cosdip2-walls/langevin-gamma5/REPPTIS/",
    # "/mnt/tw06_biommeda_pyretis/04.2024_MSM_elias/simulations/cosbumpmeta-walls/brownian/REPPTIS/",
    # "/mnt/tw06_biommeda_pyretis/04.2024_MSM_elias/simulations/cosbumpmeta-walls/brownian/fine_intf/REPPTIS/",
    # "/mnt/tw06_biommeda_pyretis/04.2024_MSM_elias/simulations/cosbumpmeta-walls/brownian/intf_shift/REPPTIS/",
    # "/mnt/tw06_biommeda_pyretis/04.2024_MSM_elias/simulations/cosbumpmeta-walls/langevin_gamma5/REPPTIS/",
    # "/mnt/tw06_biommeda_pyretis/04.2024_MSM_elias/simulations/cosbumpmeta-walls/langevin_gamma5/fine_intf/REPPTIS/",
    # "/mnt/tw06_biommeda_pyretis/04.2024_MSM_elias/simulations/cosbumpmeta-walls/langevin_gamma5/intf_shift/REPPTIS/",
    # "/mnt/tw06_biommeda_pyretis/04.2024_MSM_elias/simulations/cosdipmeta-walls/brownian/REPPTIS/",
    # "/mnt/tw06_biommeda_pyretis/04.2024_MSM_elias/simulations/cosdipmeta-walls/langevin_gamma5/REPPTIS/",
    # "/mnt/tw06_biommeda_pyretis/04.2024_MSM_elias/simulations/flat_w-walls/brownian-gamma5/30k-cycles/REPPTIS",
    # "/mnt/tw06_biommeda_pyretis/04.2024_MSM_elias/simulations/flat_w-walls/langevin-gamma5/REPPTIS/",
    # "/mnt/tw06_biommeda_pyretis/04.2024_MSM_elias/simulations/flat_w-walls/newtonian/REPPTIS/"
    "/mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/MSM-REPPTIS/1D-experiments/REPPTIS/"
    ]

for indir in indir_list:
    zero_minus_one = False
    inputfile = indir + "/repptis.rst"    # When using PyRETIS, the input file for REPPTIS simulations is a .rst file
    # Move to working directory
    os.chdir(indir)
    print(os.getcwd())
    # Set the ensemble folders and print them
    folders = glob.glob(indir + "/0[0-9][0-9]")
    folders = sorted(folders)

    # Reading all input
    #===================
    interfaces, zero_left, timestep = read_inputfile(inputfile)
    LMR_interfaces, LMR_strings = get_LMR_interfaces(interfaces, zero_left)
    pathensembles_original = []
    for i,fol in enumerate(folders):
        pe = read_pathensemble(fol+"/pathensemble.txt")
        pe.set_name(fol)
        pe.set_interfaces([LMR_interfaces[i], LMR_strings[i]])
        if i==0:
            pe.set_zero_minus_one(zero_minus_one)   # TODO this is never used
            pe.set_in_zero_minus(True)
        if i==1:
            pe.set_in_zero_plus(True)
        w, _ = get_weights(pe.flags, ACCFLAGS, REJFLAGS, verbose = False)
        pe.set_weights(w)
        pathensembles_original.append(pe)

        pe.set_orders(load=False, acc_only=True, save=True)        # for the 1st time you run this notebook for a certain simulation, this will store .npy files
        # pe.set_orders(load=True, acc_only=True)                  # for the next times, you can read npy files (save=True/False is not important)

    stored_values = []
    # This loops over the npy file and calculates tau from cycle 100 every 10 cycles
    for nskip in range(100, 30010, 10):
        pathensembles = copy.deepcopy(pathensembles_original)
        for i, pe in enumerate(pathensembles):
            pathensembles_nskip(pe,nskip)
        # Analysis output is saved to the data dictionary.
        data = {}
        for i, pe in enumerate(pathensembles):
            if i == 0:
                data[i] = {}
                continue  #  [0-] is not used for Pcross calculations
            
            # Classify the paths according to their path type.
            pathtypes = ("LML", "LMR", "RML", "RMR")
            pathtype_cycles = {}
            for ptype in pathtypes:
                pathtype_cycles[ptype] = unwrap_by_weight(
                        (pe.lmrs == ptype).astype(int), pe.weights)
            
            data[i] = {}
            plocfull = get_local_probs(pe, tr=False)
            data[i]["full"] = {}
            for ptype in pathtypes:
                data[i]["full"][ptype] = plocfull[ptype]
    
        psfull = []
        for i in range(1, len(pathensembles)):   # do not use the 0- ensemble
            psfull.append({"LMR": data[i]["full"]["LMR"], 
                    "RML": data[i]["full"]["RML"], 
                    "RMR": data[i]["full"]["RMR"],
                    "LML": data[i]["full"]["LML"]})

        Pminfull, Pplusfull, Pcrossfull = get_global_probs_from_dict(psfull)

        pmps = [data[i]["full"]["LMR"] for i in range(1,len(pathensembles))]
        pmms = [data[i]["full"]["LML"] for i in range(1,len(pathensembles))]
        ppps = [data[i]["full"]["RMR"] for i in range(1,len(pathensembles))]
        ppms = [data[i]["full"]["RML"] for i in range(1,len(pathensembles))]

        N = len(interfaces)
        NS = 4*N-5

        labels1, labels2 = create_labels_states(N)

        if N > 2:  
            M = construct_M(pmms, pmps, ppms, ppps, N)
        else:
            raise ValueError("The amount of interfaces needs to be 3 at least!")

        for i,fol in enumerate(folders):
            set_tau_distrib(pathensembles[i])
            if True:
                set_tau_first_hit_M_distrib(pathensembles[i])

        # Compute taus for pathlength analysis
        tau_mm, tau_mp, tau_pm, tau_pp = collect_tau(pathensembles)
        tau1_mm, tau1_mp, tau1_pm, tau1_pp = collect_tau1(pathensembles)
        tau2_mm, tau2_mp, tau2_pm, tau2_pp = collect_tau2(pathensembles)
        taum_mm, taum_mp, taum_pm, taum_pp = collect_taum(pathensembles)

        tau  = construct_tau_vector(N, NS, tau_mm, tau_mp, tau_pm, tau_pp)
        tau1 = construct_tau_vector(N, NS, tau1_mm, tau1_mp, tau1_pm, tau1_pp)
        taum = construct_tau_vector(N, NS, taum_mm, taum_mp, taum_pm, taum_pp)
        tau2 = construct_tau_vector(N, NS, tau2_mm, tau2_mp, tau2_pm, tau2_pp)
        tau_m = tau-tau1-tau2

        g1, g2, h1, h2 = mfpt_to_first_last_state(M, tau1, tau_m, tau2)
        timestamp = datetime.now().strftime("%H:%M:%S")
        print(f"{nskip:5d} cycles, tau {h1[0][0]}")
        stored_values.append(h1[0][0])

        # Not sure if we need this, need to check later
        del data
        del M, N, NS
        del tau_mm, tau_mp, tau_pm, tau_pp, tau1_mm, tau1_mp, tau1_pm, tau1_pp
        del tau2_mm, tau2_mp, tau2_pm, tau2_pp, taum_mm, taum_mp, taum_pm, taum_pp
        del tau, tau1, taum, tau2, tau_m, g1, g2, h1, h2
        del pmms, pmps, ppms, ppps

    np.save('tau_vs_cycle_interval_10.npy', stored_values)


Collect tau
ensemble 0 /mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/MSM-REPPTIS/1D-experiments/REPPTIS/000
ensemble 1 /mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/MSM-REPPTIS/1D-experiments/REPPTIS/001
ensemble 2 /mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/MSM-REPPTIS/1D-experiments/REPPTIS/002
ensemble 3 /mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/MSM-REPPTIS/1D-experiments/REPPTIS/003
ensemble 4 /mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/MSM-REPPTIS/1D-experiments/REPPTIS/004
Collect tau1
Collect tau2
Collect taum
 8370 cycles, tau nan
Weights of the different paths:
wRMR = 0
wRML = 4336
wLMR = 4043
wLML = 0
Local crossing probabilities:
pRMR = 0.0
pRML = 1.0
pLMR = 1.0
pLML = 0.0
Local crossing probabilities:
p2R = 0.482515813342881
p2L = 0.517484186657119
Weights of the different paths:
wRMR = 0
wRML = 4219
wLMR = 4160
wLML = 0
Local crossing probabilities:
pRMR = 0.0
pRML = 1.0
pLMR = 1.0
pLML = 0.0
Local crossing probabilities:
p2R = 0.49647929347177466
p2L = 0.5035

In [6]:
# PyRETIS code for block error analysis
    
def block_error(data, maxblock=None, blockskip=1):
    """
    Perform block error analysis to estimate the standard deviation in the input data.

    Parameters
    ----------
    data : numpy.array
        The data to analyze.
    maxblock : int, optional
        Maximum block length to consider. Defaults to half the length of the input data.
    blockskip : int, optional
        Skip factor for block lengths. Defaults to 1 (all block lengths considered).

    Returns
    -------
    blocklen : numpy.array
        Array of block lengths considered.
    block_avg : numpy.array
        Block averages as a function of block length.
    block_err : numpy.array
        Standard error estimates as a function of block length.
    block_err_avg : float
        Average error estimate for block lengths greater than maxblock//2.
    """
    n = len(data)
    maxblock = min(maxblock or n // 2, n // 2)
    
    blocklen = np.arange(1, maxblock + 1, blockskip, dtype=np.int_)
    n_blocks = len(blocklen)
    
    block = np.zeros(n_blocks)
    nblock = np.zeros(n_blocks)
    block_avg = np.zeros(n_blocks)
    block_var = np.zeros(n_blocks)

    for i, val in enumerate(data):
        block += val
        full_blocks = (i + 1) % blocklen == 0
        block[full_blocks] /= blocklen[full_blocks]
        nblock[full_blocks] += 1
        deltas = block[full_blocks] - block_avg[full_blocks]
        block_avg[full_blocks] += deltas / nblock[full_blocks]
        block_var[full_blocks] += deltas * (block[full_blocks] - block_avg[full_blocks])
        block[full_blocks] = 0.0

    block_var /= (nblock - 1)
    block_err = np.sqrt(block_var / nblock)
    
    large_blocks = blocklen > maxblock // 2
    block_err_avg = np.mean(block_err[large_blocks])
    
    return blocklen, block_avg, block_err, block_err_avg, maxblock, n//maxblock


def block_error_corr(data, maxblock=None, blockskip=1):
    """
    Run block error analysis and calculate correlation length estimates.

    Parameters
    ----------
    data : numpy.array
        Data to analyze.
    maxblock : int, optional
        Maximum block length to consider. Defaults to half the length of the input data.
    blockskip : int, optional
        Skip factor for block lengths. Defaults to 1 (all block lengths considered).

    Returns
    -------
    blen : numpy.array
        Block lengths considered.
    berr : numpy.array
        Error estimates as a function of block length.
    berr_avg : float
        Average error estimate for blocks with length > maxblock // 2.
    rel_err : numpy.array
        Relative error normalized by the overall average as a function of block length.
    avg_rel_err : float
        Average relative error for blocks with length > maxblock // 2.
    ncor : numpy.array
        Estimated correlation length as a function of block length.
    avg_ncor : float
        Average correlation length for blocks with length > maxblock // 2.
    """
    blen, bavg, berr, berr_avg, max_block_size, min_block_number = block_error(data, maxblock=maxblock, blockskip=blockskip)
    rel_err = berr / abs(bavg[0])
    avg_rel_err = berr_avg / abs(bavg[0])
    ncor = (berr / berr[0])**2
    avg_ncor = (berr_avg / berr[0])**2
    
    return blen, berr, berr_avg, rel_err, avg_rel_err, ncor, avg_ncor, max_block_size, min_block_number

indir_list = [
    # "/mnt/tw06_biommeda_pyretis/04.2024_MSM_elias/simulations/cosbump2-walls/brownian/REPPTIS/",
    # "/mnt/tw06_biommeda_pyretis/04.2024_MSM_elias/simulations/cosbump2-walls/langevin-gamma5/REPPTIS/",
    # "/mnt/tw06_biommeda_pyretis/04.2024_MSM_elias/simulations/cosbump3-walls/brownian/REPPTIS/",
    # "/mnt/tw06_biommeda_pyretis/04.2024_MSM_elias/simulations/cosbump3-walls/langevin-gamma5/REPPTIS/",
    # "/mnt/tw06_biommeda_pyretis/04.2024_MSM_elias/simulations/cosdip2-walls/brownian/REPPTIS/",
    # "/mnt/tw06_biommeda_pyretis/04.2024_MSM_elias/simulations/cosdip2-walls/langevin-gamma5/REPPTIS/",
    # "/mnt/tw06_biommeda_pyretis/04.2024_MSM_elias/simulations/cosbumpmeta-walls/brownian/REPPTIS/",
    # "/mnt/tw06_biommeda_pyretis/04.2024_MSM_elias/simulations/cosbumpmeta-walls/brownian/fine_intf/REPPTIS/",
    # "/mnt/tw06_biommeda_pyretis/04.2024_MSM_elias/simulations/cosbumpmeta-walls/brownian/intf_shift/REPPTIS/",
    # "/mnt/tw06_biommeda_pyretis/04.2024_MSM_elias/simulations/cosbumpmeta-walls/langevin_gamma5/REPPTIS/",
    # "/mnt/tw06_biommeda_pyretis/04.2024_MSM_elias/simulations/cosbumpmeta-walls/langevin_gamma5/fine_intf/REPPTIS/",
    # "/mnt/tw06_biommeda_pyretis/04.2024_MSM_elias/simulations/cosbumpmeta-walls/langevin_gamma5/intf_shift/REPPTIS/",
    # "/mnt/tw06_biommeda_pyretis/04.2024_MSM_elias/simulations/cosdipmeta-walls/brownian/REPPTIS/",
    # "/mnt/tw06_biommeda_pyretis/04.2024_MSM_elias/simulations/flat_w-walls/brownian-gamma5/30k-cycles/REPPTIS",
    # "/mnt/tw06_biommeda_pyretis/04.2024_MSM_elias/simulations/flat_w-walls/langevin-gamma5/REPPTIS/",
    "/mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/MSM-REPPTIS/1D-experiments/REPPTIS/"
    ]

for indir in indir_list:
    os.chdir(indir)
    stored_values = np.load('tau_vs_cycle_interval_10.npy')    
    stored_values = stored_values[~np.isnan(stored_values)] # remove nans in the beginning
    blen, berr, berr_avg, rel_err, avg_rel_err, ncor, avg_ncor, max_block_size, min_block_number = block_error_corr(stored_values,10)

    print("=" * 60)
    print(indir[57:-9])
    print(f"Total Data Points: {len(stored_values)}, max block size: {max_block_size}, min block number: {min_block_number}")
    print(f"Average Relative Error for blocks > maxblock/2: {avg_rel_err * 100:.1f}%")
    print(f"Average Correlation Length for Large Blocks: {int(avg_ncor)}")
    plt.figure(figsize=(8, 6))
    plt.plot(rel_err, marker='o', linestyle='-', label = indir[57:-9])
    plt.xlabel("Block Size")
    plt.ylabel("Rlative Error")
    plt.title(f"Ave Rel Err (blocks > maxblock/2): {avg_rel_err * 100:.1f}%, Block interval 10 cycles")
    plt.grid(True)
    plt.legend()
    plt.savefig("Block_Error_Tau.png", dpi=1000, bbox_inches='tight')

ZeroDivisionError: integer division or modulo by zero