In [1]:
%load_ext autoreload
%autoreload 2

import matplotlib.pyplot as plt
import numpy as np
#%matplotlib qt   # doesn't work on my laptop
from tistools import read_inputfile, get_LMR_interfaces, read_pathensemble, get_weights
from tistools import set_tau_distrib, set_tau_first_hit_M_distrib, cross_dist_distr, pathlength_distr
from tistools import ACCFLAGS, REJFLAGS

from tistools import get_lmr_masks, get_generation_mask, get_flag_mask, select_with_masks
from tistools import unwrap_by_weight, running_avg_local_probs, get_local_probs, get_globall_probs, get_global_probz
from tistools import make_plot_trajs

from pprint import pprint    # to print the vars of the pathensemble object

from istar_test import *

%matplotlib qt


In [2]:
import logging

logger = logging.getLogger(__name__)

In [3]:
%autoreload 2   
# something with pip install -e .

# Reading

In [131]:
# zero_minus_one = True if lambda_-1 interface is set
# zero_minus_one = False if lambda_-1 interface is not set

# data the maze
# ---------------
# indir = "/mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/PyRETIS3/toytis/simulations/sim_istarmazewall2608/"
indir = "/mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/PyRETIS3/toytis/simulations/sim_istarwell2_0209/"
zero_minus_one = False
inputfile = indir + "logging.log"


import os
import glob
os.chdir(indir)
print(os.getcwd())

folders = glob.glob(indir + "/0[0-9][0-9]")
folders = sorted(folders)
print(folders)

/mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/PyRETIS3/toytis/simulations/sim_istarwell2_0209
['/mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/PyRETIS3/toytis/simulations/sim_istarwell2_0209/000', '/mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/PyRETIS3/toytis/simulations/sim_istarwell2_0209/001', '/mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/PyRETIS3/toytis/simulations/sim_istarwell2_0209/002', '/mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/PyRETIS3/toytis/simulations/sim_istarwell2_0209/003', '/mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/PyRETIS3/toytis/simulations/sim_istarwell2_0209/004', '/mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/PyRETIS3/toytis/simulations/sim_istarwell2_0209/005']


In [132]:
# !!! last lines !!!  allow to speed up this notebook
# pe.set_orders(load=False...)  -> 1st time you run the code, this will store npy files
# pe.set_orders(load=True...)  -> next time you run the code, you can read npy files

# Reading all input
#===================
interfaces, zero_left, timestep = read_inputfile(inputfile)
LMR_interfaces, LMR_strings = get_LMR_interfaces(interfaces, zero_left)
pathensembles = []
for i,fol in enumerate(folders):
    print("#"*80)
    print(fol)
    pe = read_pathensemble(fol+"/pathensemble.txt")
    pe.set_name(fol)
    pe.set_interfaces([LMR_interfaces[i], LMR_strings[i]])
    if i==0:
        pe.set_zero_minus_one(zero_minus_one)   # TODO this is never used
        pe.set_in_zero_minus(True)
    if i==1:
        pe.set_in_zero_plus(True)
    w, _ = get_weights(pe.flags, ACCFLAGS, REJFLAGS, verbose = False)
    pe.set_weights(w)
    print("pathensemble info: ")
    pprint(vars(pe))
    pathensembles.append(pe)
    # read order parameters order.txt/order.npy into path ensemble object
    #pe.set_orders(load=False, acc_only=True, save=False) # if saving doesn't work
    #### CHANGE HERE ####
    # pe.set_orders(load=False, acc_only=True, save=True) # for the 1st time
    # pe.set_orders(load=True, acc_only=True) # for the next times, save=True/False is not important

################################################################################
/mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/PyRETIS3/toytis/simulations/sim_istarwell2_0209/000
pathensemble info: 
{'cyclenumbers': array([     0,      1,      2, ...,  99998,  99999, 100000]),
 'dirs': array([-1., -1., -1., ...,  1., -1.,  1.]),
 'flags': array(['ACC', 'SWD', 'ACC', ..., 'BTL', 'ACC', 'ACC'], dtype='<U3'),
 'generation': array(['ld', 's+', 'sh', ..., 'sh', 'sh', 'sh'], dtype='<U2'),
 'has_zero_minus_one': False,
 'in_zero_minus': True,
 'in_zero_plus': False,
 'interfaces': [[-0.35, -0.35, -0.35], ['l_[0]', 'l_[0]', 'l_[0]']],
 'istar_idx': array([[0, 0],
       [0, 0],
       [0, 0],
       ...,
       [0, 0],
       [0, 0],
       [0, 0]]),
 'lambmaxs': array([-0.34409914, -0.34409914, -0.34797086, ..., -0.39146701,
       -0.34502073, -0.34619167]),
 'lambmins': array([-0.54799961, -0.54799961, -0.54814511, ..., -0.49032907,
       -0.59127813, -0.60878195]),
 'lengths': array([122

In [133]:
for pe in pathensembles:
    print(f"ensemble {pathensembles.index(pe)}")
    print(max(pe.lengths))

for i in range(len(pathensembles)):
    accmask = get_flag_mask(pathensembles[i], "ACC")
    loadmask = get_generation_mask(pathensembles[i], "ld")
    mask = get_lmr_masks(pathensembles[i])
    print(i, np.average(select_with_masks(pathensembles[i].lengths, [~loadmask, accmask])))
    print("LMRlen", i, np.average(select_with_masks(pathensembles[i].lengths, [~loadmask, accmask])))

ensemble 0
949
ensemble 1
3370
ensemble 2
3042
ensemble 3
3430
ensemble 4
2913
ensemble 5
2901
0 122.20604501607717
LMRlen 0 122.20604501607717
1 195.4779636686775
LMRlen 1 195.4779636686775
2 217.26229152274746
LMRlen 2 217.26229152274746
3 311.3736109068756
LMRlen 3 311.3736109068756
4 310.4653771028921
LMRlen 4 310.4653771028921
5 217.8836640444601
LMRlen 5 217.8836640444601


In [None]:
plocrepptis, plocistar = cprobs_repptis_istar(pathensembles, interfaces, len(interfaces))

In [134]:
# make_plot_trajs("paths002test", "./002", interfaces, 5)

In [135]:
# if 'repptis' in fol:
#     plot_rv_repptis(pathensembles, interfaces, 3)
# else:
#     plot_rv_star(pathensembles, interfaces, 1)

In [136]:
# while True:
#     try:
#         plot_rv_comp(pathensembles, interfaces, 1, 1, (2,4))
#         break
#     except:
#         continue

# Analysis

In [137]:
# Setting path ensemble properties
#==================================
# for i,fol in enumerate(folders):
#     print(i)
#     if i != 1:
#         print("Calculating path lengths.")
#         set_tau_distrib(pathensembles[i])
#     print("Done.")
#     #else:
#         #TODO problem with ...
#     if i > 1:
#         print("Calculating first hitting lengths to middle interface")
#         set_tau_first_hit_M_distrib(pathensembles[i])
#         print("Done.")
#     #else:
#         # TODO problem with ....

In [138]:
# Analyze the [i*] simulation.
# Analysis output is saved to the data dictionary.
data = {}

data["running"] = {}

# analysis using all data: ["full"]
# data["full"] = get_transition_probs(pathensembles, interfaces)
# pprint(data)
# print("\n\n")
# data["full"] = get_transition_probzz2(pathensembles, interfaces)
w = compute_weight_matrices(pathensembles, interfaces)
for i in range(len(pathensembles)):
    print(f"sum weights pe {i}: ",np.sum(w[i]))

data["full"] = get_transition_probzz(w)
# data["full"] = get_transition_probs(w)
data["full1"] = get_simple_probs(w)
# data["full"] = get_summed_probs(pathensembles, interfaces)
pprint(data)

# for i, pe in enumerate(pathensembles):
#     print("doing pathensemble {}".format(i))
#     if i == 0:
#         data[i] = {}
#         continue  # TODO: make [0-] analysis ???

    # masks - TODO not used further on?
    # TODO these functions are duplicate in repptis_analysis
    #masks = get_lmr_masks(pe)
    #loadmask = get_generation_mask(pe, "ld")
    #print("Amount of loads: {}".format(np.sum(loadmask)))
    ##hardloadmask = get_hard_load_mask(loadmask)
    #accmask = get_flag_mask(pe, "ACC")

    # pathtype_cycles
    # pathtypes = ("LML", "LMR", "RML", "RMR")
    # pathtype_cycles = {}
    # for ptype in pathtypes:
    #     pathtype_cycles[ptype] = unwrap_by_weight(
    #             (pe.lmrs == ptype).astype(int), pe.weights)
    
    # # running average analysis: ["running"]
    # data[i] = {}
    # data[i]["running"] = {}
    # data[i]["running"]["plocal"] = {}
    # # you'll still have to hardload select pe.weigths... TODO. # this is comment wouter?
    # for (ptype, p_loc) in zip(pathtypes, 
    #                           running_avg_local_probs(pathtype_cycles, 
    #                                                   pe.weights, tr = False)):
    #     data[i]["running"]["plocal"][ptype] = p_loc

    # analysis using all data: ["full"]
    # plocfull = get_local_probs(pe, tr=False)
    # data[i]["full"] = {}
    # for ptype in pathtypes:
    #     data[i]["full"][ptype] = plocfull[ptype]

    # data[i] have now ["full"] and ["running"]

weights:
accepted      77751
rejected      22249
omitted       0
total trajs   100000
total weights 100000
sum weights ensemble 0= 0.0
weights:
accepted      58518
rejected      41482
omitted       0
total trajs   100000
total weights 100000
sum weights ensemble 1= 99998.0
weights:
accepted      50666
rejected      49334
omitted       0
total trajs   100000
total weights 101416
sum weights ensemble 2= 102339.0
weights:
accepted      47604
rejected      52396
omitted       0
total trajs   100000
total weights 124396
sum weights ensemble 3= 124395.0
weights:
accepted      47614
rejected      52386
omitted       0
total trajs   100000
total weights 124047
sum weights ensemble 4= 124046.0
weights:
accepted      51103
rejected      48897
omitted       0
total trajs   100000
total weights 101326
sum weights ensemble 5= 101325.0
sum weights pe 0:  0.0
sum weights pe 1:  99998.0
sum weights pe 2:  102339.0
sum weights pe 3:  124395.0
sum weights pe 4:  124046.0
sum weights pe 5:  101325.0
0 0 

In [139]:

# for i, pe in enumerate(pathensembles):
#     upe = pe.unify_pe()
#     # Pathlength distribution
#     data[i]["pathlengths"] = pathlength_distr(upe)  # these might be used later or not! TODO
        
# #=======================================
# # make figures
# makefigs = True 
# if makefigs:
#     for i, pe in enumerate(pathensembles):     
#         if i == 0:
#             continue
#         # Cross distances distribution
#         L, M, R, lmlpercs, lmllambs, rmrpercs, rmrlambs = cross_dist_distr(pe)
#         fig,ax = plt.subplots()
#         ax.plot(lmllambs, lmlpercs, lw=1, c="g")
#         ax.plot(rmrlambs, rmrpercs, lw=1, c="r")
#         for lamb in (L,M,R):
#             ax.axvline(lamb, color='k', linestyle='--', lw = 0.5)
#         ax.set_xlabel('Cross distance')
#         ax.set_ylabel('Frequency')
#         ax.set_title("Ensemble {}. L = {}, M = {}, R = {}".format(
#             pe.name, L, M, R))
#         ax.set_ylim(0)
#         fig.savefig(f"pathensemble_{i}_crossdist.pdf")
#         plt.close(fig)

#         # Pathlength distribution      
#         for ptype in pathtypes:
#             fig, ax = plt.subplots()
#             ax.plot(data[i]["pathlengths"][ptype]["bin_centers"], 
#                 data[i]["pathlengths"][ptype]["hist"])
#             ax.set_xlabel('Pathlength')
#             ax.set_ylabel('Frequency')
#             ax.set_title(f"{np.sum(data[i]['pathlengths'][ptype]['hist'])} " + \
#                          f"{ptype} paths. ")
#             ax.legend([f"mean = {data[i]['pathlengths'][ptype]['mean']:.2f}, " + \
#                           f"std = {data[i]['pathlengths'][ptype]['std']:.2f}"])
#             fig.savefig(f"pathensemble_{i}_pathlength_{ptype}.pdf")
#             plt.close(fig)

In [140]:
# Make a figure of the global crossing probabilities
# fig, ax = plt.subplots()
# ax.set_yscale("log")
# ax.plot(Pcrossfull, "o", c = "r")

# cosdip meta
# ax.errorbar([i for i in range(7)], Pcrossfull, yerr=[0, 0.004830, Pcrossfull[2]*0.05068988646, Pcrossfull[3]*0.05189862680, Pcrossfull[4]*0.05071184896, Pcrossfull[5]*0.05083284286, Pcrossfull[6]*0.05067963543], fmt="-o", c = "b", ecolor="r", capsize=6)

# cosbump meta
# ax.errorbar([i for i in range(7)], Pcrossfull, yerr=[0, 0.002535, Pcrossfull[2]*0.04393065503, Pcrossfull[3]*0.04910273500, Pcrossfull[4]*0.05239942040, Pcrossfull[5]*0.05789033634, Pcrossfull[6]*0.0614468], fmt="-o", c = "b", ecolor="r", capsize=6)

# 2 cosdips
# ax.errorbar([i for i in range(5)], Pcrossfull, yerr=[0, 0.007239, Pcrossfull[2]*0.0414296, Pcrossfull[3]*0.0445266, Pcrossfull[4]*0.0483538], fmt="-o", c = "b", ecolor="r", capsize=6)

# 3 cosbumps
# ax.errorbar([i for i in range(7)], Pcrossfull, yerr=[0, 0.002295, Pcrossfull[2]*0.0328798, Pcrossfull[3]*0.031594, Pcrossfull[4]*0.031474, Pcrossfull[5]*0.03080392, Pcrossfull[6]*0.0308589], fmt="-o", c = "b", ecolor="r", capsize=6)

# 2 cosbumps
#ax.errorbar([i for i in range(5)], Pcrossfull, yerr=[0, 0.002768, Pcrossfull[2]*0.04440278, Pcrossfull[3]*0.043053, Pcrossfull[4]*0.0463156], fmt="-o", c = "b", ecolor="r", capsize=6)

# flat dt=0.00002 30k cycles
# ax.errorbar([i for i in range(5)], Pcrossfull, yerr=[0, 0.003294, Pcrossfull[2]*0.07640968, Pcrossfull[3]*0.07789262, Pcrossfull[4]*0.0812692], fmt="-o", c = "b", ecolor="r", capsize=6)

# flat 100k cycles
#ax.errorbar([i for i in range(5)], Pcrossfull, yerr=[0, 0.002741, Pcrossfull[2]*0.034092, Pcrossfull[3]*0.033621, Pcrossfull[4]*0.0398], fmt="-o", c = "b", ecolor="r", capsize=6)

# ax.set_xlabel("intf")
# ax.set_ylabel(r"$P_A(\lambda_i|\lambda_A)$")
# ax.set_xticks(np.arange(len(interfaces)))
# fig.tight_layout()
# fig.show()
# fig.savefig("Global_probs.pdf")

# print("This should be the same as the repptis_report.pdf value:", Pcrossfull[-1])
# print("which is the case!")
# print("Here, the load immediately disappeared. For a simulation where this is")
# print("not the case, the above code should be adapted a little bit.")

# Now work with MSM

In [141]:
from tistools import construct_M
from tistools import global_cross_prob
from tistools import mfpt_to_first_last_state

from tistools import create_labels_states

In [142]:
def print_vector(g, states=None):
    if states is None:
        for i in range(len(g)):
            print("state", i, g[i])
    else:
        for i in range(len(g)):
            print("state", states[i], g[i][0])

In [143]:
print(interfaces)
N = len(interfaces)
assert N >= 3
NS = 2*N
print("N", N)
print("NS", NS)

#labels2 = ["0+- LML","0+- LMR","0+- RML","1+- LML","1+- LMR",
#           "1+- RML", "1+- RMR", "2+- LML", "2+- LMR",
#           "2+- RML", "2+- RMR", "3+- LML", "3+- LMR",]
labels1, labels2 = create_labels_states(N)
print(labels1, labels2)

[-0.35, -0.25, -0.12, 0.12, 0.25, 0.35]
N 6
NS 12
['0-     ', 'B      '] ['0+- LML', '0+- LMR', '0+- RML', '1+- LML', '1+- LMR', '1+- RML', '1+- RMR', '2+- LML', '2+- LMR', '2+- RML', '2+- RMR', '3+- LML', '3+- LMR', '3+- RML', '3+- RMR', '4+- LML', '4+- LMR']


In [144]:
def construct_M_istar(P, NS, N):
    """Construct transition matrix M"""
    # N -- number of interfaces
    # NS -- dimension of MSM, 4*N-5 when N>=4
    # P -- ndarray of probabilities for paths between end turns
    
    # assert N>=3
    assert N==P.shape[0]
    assert N==P.shape[1]
    assert NS==max(4, 2*N)

    # construct transition matrix
    M = np.zeros((NS,NS))
    
    # states [0-] and [0*+-]
    M[0,2] = 1
    M[2,1] = P[0,0]
    M[2,N+1:] = P[0, 1:]
    M[1,0] = 1
    M[-1,0] = 1
    M[N+1:,1] = P[1:, 0]

    # non-sampled paths
    M[N+1, -1] = 0

    for i in range(1,N):
        #print("starting from state i",i)
        M[2+i, N+i:2*N] = P[i,i:]
        M[N+i, 3:2+i] = P[i, 1:i]
    

    # for i in range(NS):
    #     if np.sum(M[i]) > 0:
    #         M[i] = M[i]/np.sum(M[i])
    #     else:
    #         M[i] = 0 
       
    # non-sampled paths
    if not M[N, -1] >= 0:
        M[N, -1] = 0
    M[N+1,1] = 1
    # return np.delete(np.delete(M, N, 0), N, 1)
    return M

In [145]:
p_ini = data["full"]
print("p matrix: ", p_ini)
p_ini[-2][-1] = 0
print("sum rows of p:")
for i in range(p_ini.shape[0]):
    print(np.sum(p_ini[i][:i]), np.sum(p_ini[i][i:]))
M = construct_M_istar(p_ini, NS, N)
M1 = construct_M_istar(data["full1"], NS, N)

# for r in range(M.shape[0]):
#     if np.sum(M[r]) != 0:
#         M[r] /= np.sum(M[r])
#Local crossing probabilities:
#pRMR = 0.34205627942625644.  #ppps
#pRML = 0.6579437205737436.   #ppms
#pLMR = 0.25316455696202533.  #pmps
#pLML = 0.7468354430379747.   #pmms

p matrix:  [[0.17274014 0.01434575 0.01698327 0.2359283  0.01278309 0.54721945]
 [1.         0.         0.04352895 0.4151079  0.0127716  0.52859155]
 [0.91988636 0.08011364 0.         0.5544101  0.01856474 0.42702516]
 [0.42654092 0.01812161 0.55533747 0.         0.06921778 0.93078222]
 [0.51337618 0.01905068 0.41717415 0.05039899 0.         1.        ]
 [0.         0.         0.         0.         0.         0.        ]]
sum rows of p:
0.0 1.0
1.0 1.0
1.0 0.9999999999999999
1.0 1.0
1.0 0.0
0.0 0.0


In [146]:
print("M")
print("shape", M.shape)
print("sum prob in rows", np.sum(M,axis=1))
print(M)
print(M1)
# row 8, 10, 12, 14. # counting starts from 0   not okay!!!!

M
shape (12, 12)
sum prob in rows [1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1.]
[[0.         0.         1.         0.         0.         0.
  0.         0.         0.         0.         0.         0.        ]
 [1.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.        ]
 [0.         0.17274014 0.         0.         0.         0.
  0.         0.01434575 0.01698327 0.2359283  0.01278309 0.54721945]
 [0.         0.         0.         0.         0.         0.
  0.         0.         0.04352895 0.4151079  0.0127716  0.52859155]
 [0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.5544101  0.01856474 0.42702516]
 [0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.06921778 0.93078222]
 [0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.         0.        ]
 [0.         1.         0

# Look at this Markov model

In [147]:
#import numpy.linalg
vals, vecs = np.linalg.eig(M)
print(vals)
vals, vecs = np.linalg.eig(M.T)
print(vals)

[ 1.00000000e+00+0.j         -3.97972680e-01+0.76473384j
 -3.97972680e-01-0.76473384j  4.19076197e-01+0.j
 -5.68571435e-01+0.j          5.83080564e-02+0.j
  4.47830551e-02+0.j         -7.29128115e-02+0.j
 -5.65612069e-02+0.j         -2.81764946e-02+0.j
  9.59159834e-17+0.j          0.00000000e+00+0.j        ]
[ 1.00000000e+00+0.j         -3.97972680e-01+0.76473384j
 -3.97972680e-01-0.76473384j -5.68571435e-01+0.j
  4.19076197e-01+0.j          5.83080564e-02+0.j
  4.47830551e-02+0.j         -7.29128115e-02+0.j
 -5.65612069e-02+0.j         -2.81764946e-02+0.j
  3.46588981e-18+0.j          0.00000000e+00+0.j        ]


In [148]:
print("what if chain propagates")
print("A[0,:]")
# check stationary behavior
A = M
for n in range(10):
    A = np.dot(A,M)
    #print(A)
    print(A[0,:])
    print(np.sum(A[2,:]))  # is 1 indeed

what if chain propagates
A[0,:]
[0.         0.17274014 0.         0.         0.         0.
 0.         0.01434575 0.01698327 0.2359283  0.01278309 0.54721945]
1.0
[7.19959591e-01 1.37164036e-01 0.00000000e+00 5.87951815e-03
 1.36352599e-01 6.44254920e-04 0.00000000e+00 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00]
1.0
[1.37164036e-01 0.00000000e+00 7.19959591e-01 0.00000000e+00
 0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
 2.55929278e-04 7.80358931e-02 2.65103499e-03 6.19335151e-02]
0.9999999999999999
[6.19335151e-02 1.59247824e-01 1.37164036e-01 1.48514324e-03
 4.44421989e-02 1.33609491e-04 0.00000000e+00 1.03283593e-02
 1.22272695e-02 1.69858840e-01 9.20330923e-03 3.93975895e-01]
0.9999999999999999
[5.53223719e-01 1.22446298e-01 6.19335151e-02 4.23301548e-03
 9.81683617e-02 4.63837509e-04 0.00000000e+00 1.96772079e-03
 2.39414085e-03 5.76165763e-02 2.60665398e-03 9.49461618e-02]
0.9999999999999998
[2.17392460e-01 4.07824837e-02 5.5322371

# Pcross with MSM

In [149]:
def global_cross_prob_star(M, doprint=False):
    # probability to arrive in -1 before 0
    # given that you are at 0 now and that you are leaving 0
    # = crossing probability from 0 to -1

    NS = len(M)
    assert NS>2

    # take pieces of transition matrix
    Mp = M[2:-1,2:-1]
    a = np.identity(NS-3)-Mp    # 1-Mp
    # a1 = np.linalg.inv(a)       # (1-Mp)^(-1)  --> bad practice!

    # other pieces
    D = M[2:-1, np.array([0,-1])]
    E = M[np.array([0,-1]), 2:-1]
    M11 = M[np.array([0,-1]),np.array([0,-1])]

    # compute Z vector
    z1 = np.array([[0],[1]])
    # z2 = np.dot(a1,np.dot(D,z1))
    z2 = np.linalg.solve(a, np.dot(D,z1))

    # compute H vector
    y1 = np.dot(M11,z1) + np.dot(E,z2)
    y2 = np.dot(D,z1) + np.dot(Mp,z2)

    if doprint:
        print("Mp eigenvals")
        vals, vecs = np.linalg.eig(Mp)
        print(vals)
        print("1-Mp eigenvals")
        vals, vecs = np.linalg.eig(a)
        print(vals)
        #print(np.dot(a,a1)  # identity matrix indeed
        print("other pieces M")
        print(D)
        print(E)
        print(M11)
        print("vector z1,z2")
        print(z1)
        print(z2)
        print("vector y1,y2")
        print(y1)
        print(y2)
        print("check", np.sum((y2-z2)**2))  # 0, so z2 and y2 indeed the same
    return z1, z2, y1, y2

In [150]:
# global crossing prob
z1, z2, y1, y2 = global_cross_prob_star(M, True)
print("Z")
print_vector(z1, labels1)
print_vector(z2, labels2)
print("Y")
print_vector(y1, labels1)
print_vector(y2, labels2)
print("global crossing prob", y1[0])

Mp eigenvals
[ 0.        +0.j          0.56883462+0.j         -0.56883462+0.j
  0.05831524+0.00127015j  0.05831524-0.00127015j -0.05831524+0.00127015j
 -0.05831524-0.00127015j  0.        +0.j          0.        +0.j        ]
1-Mp eigenvals
[1.        +0.j         1.56883462+0.j         0.43116538+0.j
 1.05831524+0.00127015j 1.05831524-0.00127015j 0.94168476+0.00127015j
 0.94168476-0.00127015j 1.        +0.j         1.        +0.j        ]
other pieces M
[[0.         0.54721945]
 [0.         0.52859155]
 [0.         0.42702516]
 [0.         0.93078222]
 [0.         0.        ]
 [0.         0.        ]
 [0.         0.        ]
 [0.         0.        ]
 [0.         0.        ]]
[[1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0.]]
[0. 0.]
vector z1,z2
[[0]
 [1]]
[[0.63855278]
 [0.68686852]
 [0.63570959]
 [0.95337046]
 [0.        ]
 [0.        ]
 [0.05502754]
 [0.36548052]
 [0.32633583]]
vector y1,y2
[[0.63855278]
 [0.        ]]
[[0.63855278]
 [0.68686852]
 [0.63570959]
 [0.95337046

In [154]:
# P_loc with MSM
plocMSM = np.ones(len(interfaces))

for lint in range(2, len(interfaces)+1):
    wi = compute_weight_matrices(pathensembles[:lint], interfaces[:lint], len(interfaces))
    pi = get_transition_probzz(wi)
    # pi = get_transition_probs(wi)
    # pi = get_simple_probs(wi)
    Mi = construct_M_istar(pi, max(4, 2*len(interfaces[:lint])), len(interfaces[:lint]))
    z1, z2, y1, y2 = global_cross_prob_star(Mi)
    plocMSM[lint-1] = y1[0][0]
    print(f"ploc till intf {lint-1}: ", y1[0][0])

# Make a figure of the global crossing probabilities
fig, ax = plt.subplots()
ax.set_yscale("log")
ax.plot(plocMSM, "o", c = "r")
ax.errorbar([i for i in range(len(plocMSM))], plocMSM, fmt="-o", c = "b", ecolor="r", capsize=6)
ax.set_xlabel("intf")
ax.set_ylabel(r"$P_A(\lambda_i|\lambda_A)$")
ax.set_xticks(np.arange(len(interfaces)))
fig.tight_layout()
fig.show()


weights:
accepted      77751
rejected      22249
omitted       0
total trajs   100000
total weights 100000
sum weights ensemble 0= 0.0
weights:
accepted      58518
rejected      41482
omitted       0
total trajs   100000
total weights 100000
sum weights ensemble 1= 99998.0
0.0 [0. 0.] 1 0
q:  [[1.         0.82725986]
 [0.         0.        ]]
p:  [[0.17274014 0.82725986]
 [0.         0.        ]]
Local crossing probabilities computed
ploc till intf 1:  0.8272598636455519
weights:
accepted      77751
rejected      22249
omitted       0
total trajs   100000
total weights 100000
sum weights ensemble 0= 0.0
weights:
accepted      58518
rejected      41482
omitted       0
total trajs   100000
total weights 100000
sum weights ensemble 1= 99998.0
weights:
accepted      50666
rejected      49334
omitted       0
total trajs   100000
total weights 101416
sum weights ensemble 2= 103759.0
0 0 2 0.9840282559921807 45017.0
1 0 2 0.981418918918919 49728.0
1 1 0 1.0 924.0
1 1 2 1.0 2969.0
1 2 0 0.9716

In [152]:
print(plocMSM)
for pp in plocMSM:
    print(pp)
print("\n\n")
pcrosslocMSM = np.empty(len(plocMSM))

for i in range (len(pcrosslocMSM)):
    pcrosslocMSM[i] = plocMSM[i]/np.prod(pcrosslocMSM[:i])
    print(pcrosslocMSM[i])
    

[1.         0.82815042 0.81389948 0.79838387 0.65067041 0.63958266]
1.0
0.8281504232252875
0.8138994771000272
0.7983838685343494
0.6506704124653093
0.6395826553421684



1.0
0.8281504232252875
0.9827918386254527
0.9809367016416317
0.8149844180341366
0.9829594877671927


# Bootstrap 👢 analysis

In [None]:
from tistools import bootstrap_istar_analysis

In [None]:
from tistools import find_closest_number_lte
def bootstrap_istar_analysis2(pathensembles, interfaces, nN=10, nB=1000):
    N = len(interfaces)
    NS = max(3, 2*len(interfaces))
    data = {}
    # for each pathensemble, we save the indices of accepted cycle numbers in a
    # dictionary, because we will use this a lot. We do not accept load cycles, 
    # so if a load cycle is sampled, we will just not use it. 
    pathcycle_ids = {}
    for i, pe in enumerate(pathensembles):
        loadmask = get_generation_mask(pe, "load")
        accmask = get_flag_mask(pe, "ACC")
        pathcycle_ids[i] = select_with_masks(pe.cyclenumbers,
                                             [accmask, ~loadmask])
    for Bcycle in np.arange((pathensembles[0].cyclenumbers)[-1]//nN,
                            (pathensembles[0].cyclenumbers)[-1],
                            ((pathensembles[0].cyclenumbers)[-1]//nN)):
        logger.info(f"Doing bootstrap analysis for cycle {Bcycle}")
        # We produce a list of the data for each timeslice
        ts_data = {}
        for j in range(nB):
            if j % 100 == 0:
                logger.info(f"Doing bootstrap sample {j}")
            # A. Select cycle numbers randomly within the timeslice [1, Bcycle],
            #    using replacement. We start from one to discard the initial
            #    load cycle.
            cycle_ids = np.random.choice(np.arange(start=1,stop=Bcycle), 
                                         Bcycle, replace=True)
            # Store the data for each pathensemble in a dictionary
            boot_data = {}
            boot_pes = []
            for i, pe in enumerate(pathensembles):
                if pe.in_zero_minus:
                    logger.info(f"Passing pathensemble {i} because this is "+\
                                f"the zero minus ensemble: {pe.name}")
                    pass
                # map the cycle numbers to the indices of accepted cycles
                boot_cycle_ids = find_closest_number_lte(cycle_ids,
                                                         pathcycle_ids[i])
                # sample the pathensemble at the given cycle indices
                boot_pes.append(pe.sample_pe(boot_cycle_ids))
                #boot_data[i]['pe'] = boot_pe
            # B. get the transition crossing probabilities
            boot_data['w_path'] = compute_weight_matrices(boot_pes, interfaces)
            boot_data['p'] = get_transition_probzz(boot_data['w_path'])
            # C. Calculate the global crossing probabilities 
            Mj = construct_M_istar(boot_data['p'], NS, N)
            z1, z2, y1, y2 = global_cross_prob_star(Mj)
            boot_data['z1'] = z1
            boot_data['z2'] = z2
            boot_data['y1'] = y1
            boot_data['y2'] = y2
            ts_data[j] = boot_data
        # save the boot_data for this timeslice 
        data[Bcycle] = {}
        data[Bcycle]['data'] = ts_data
        # D. Calculate the mean and std of the local crossing probabilities for
        #    each pathensemble, and the mean and std of the global crossing for 
        #    each bootstrap sample.
        ts_stats = {}
        # first the local crossing probabilities
        for attr in ['p', 'z1', 'z2', 'y1', 'y2']:
            ts_stats[attr] = {}
            ts_stats[attr]['mean'] = \
                np.mean(np.array([ts_data[j][attr]
                                  for j in ts_data.keys()]),axis=0)
            ts_stats[attr]['std'] = \
                np.std(np.array([ts_data[j][attr]
                                 for j in ts_data.keys()]),axis=0)
            
        data[Bcycle]['stats'] = ts_stats

    return data

In [None]:
bs_data = bootstrap_istar_analysis2(pathensembles, interfaces)

print(bs_data)

AttributeError: 'PathEnsemble' object has no attribute 'orders'

# Collecting times

In [None]:
#TODO I need data too?

In [None]:
def collect_tau(pathensembles, data):
    # pathensembles -- list of pathensemble instances
    
    print("Collect tau")
    
    # average path lengths
    taumm = np.zeros(len(pathensembles))
    taump = np.zeros(len(pathensembles))
    taupm = np.zeros(len(pathensembles))
    taupp = np.zeros(len(pathensembles))
    
    # for [1+-] and higher (i>=2)
    for i in range(2,len(pathensembles)):
        print("ensemble", i, pathensembles[i].name)
        taumm[i] = pathensembles[i].tauavg['LML']-2
        taump[i] = pathensembles[i].tauavg['LMR']-2
        taupm[i] = pathensembles[i].tauavg['RML']-2
        taupp[i] = pathensembles[i].tauavg['RMR']-2

    # for [0-] (i=0)
    print("ensemble", 0, pathensembles[0].name)
    if pe.has_zero_minus_one:
        # TODO pieces missing
        taumm[0] = pathensembles[0].tauavg['LML']-2
        taump[0] = pathensembles[0].tauavg['LMR']-2
        taupm[0] = pathensembles[0].tauavg['RML']-2
        taupp[0] = pathensembles[0].tauavg['RMR']-2
    else:
        taupp[0] = pathensembles[0].tauavg['RMR']-2

    # for [0+-] (i=1)
    print("ensemble", 1, pathensembles[1].name)
    taumm[1] = data[1]["pathlengths"]["LML"]["mean"] - 2
    taump[1] = data[1]["pathlengths"]["LMR"]["mean"] - 2
    taupm[1] = data[1]["pathlengths"]["RML"]["mean"] - 2
    #taupp[1] = data[1]["pathlengths"]["RMR"]["mean"] - 2

    return taumm, taump, taupm, taupp

# this is the same:
# 1) after:
#   data[2]["pathlengths"] = pathlength_distr(upe)  # use correct upe!
#   This gives the whole distrib, mean, std, etc
#   print(data[2]["pathlengths"]["RMR"]["mean"])
# 2) after:
#   set_tau_distrib(pathensembles[2])
#   print(pathensembles[2].tauavg['RMR'])

# TODO for [0-]
# likely not okay yet, what about L*L etc??????
# There are paths missing TODO!!!!!!!! when lambda-1

In [None]:
# TODO fix +-1 issues!!!!!!
# TODO tau1 is not fool proof if you have too many phase points!!

def collect_tau1(pathensembles, data):
    # average path lengths, but only the part before the 1st crossing
    #---------------------------
    print("Collect tau1")
    taumm1 = np.zeros(len(pathensembles))
    taump1 = np.zeros(len(pathensembles))
    taupm1 = np.zeros(len(pathensembles))
    taupp1 = np.zeros(len(pathensembles))
    
    # for [1+-] and higher (i>=2)
    for i in range(2,len(pathensembles)):
        taumm1[i] = pathensembles[i].tau1avg['LML'] - 1
        taump1[i] = pathensembles[i].tau1avg['LMR'] - 1
        taupm1[i] = pathensembles[i].tau1avg['RML'] - 1
        taupp1[i] = pathensembles[i].tau1avg['RMR'] - 1
    # for [0-] (i=0) -> just 0
    # for [0+-] (i=1) LML -> just 0
    # for [0+-] (i=1) LMR -> just 0
    # for [0+-] (i=1) RML
    taupm1[1] = data[1]["pathlengths"]["RML"]["mean"] - 2
    
    return taumm1, taump1, taupm1, taupp1

def collect_tau2(pathensembles, data):
    # average path lengths, but only the part after the last crossing
    #---------------------------
    print("Collect tau2")
    taumm2 = np.zeros(len(pathensembles))
    taump2 = np.zeros(len(pathensembles))
    taupm2 = np.zeros(len(pathensembles))
    taupp2 = np.zeros(len(pathensembles))
    
    # for [1+-] and higher (i>=2)
    for i in range(2,len(pathensembles)):
        taumm2[i] = pathensembles[i].tau2avg['LML'] - 1
        taump2[i] = pathensembles[i].tau2avg['LMR'] - 1
        taupm2[i] = pathensembles[i].tau2avg['RML'] - 1
        taupp2[i] = pathensembles[i].tau2avg['RMR'] - 1
    # for [0-] (i=0) -> just 0
    # for [0+-] (i=1) LML -> just 0
    # for [0+-] (i=1) LMR
    taump2[1] = data[1]["pathlengths"]["LMR"]["mean"] - 2
    # for [0+-] (i=1) RML -> just 0
    
    return taumm2, taump2, taupm2, taupp2

def collect_taum(pathensembles, data):
    # average path lengths, but only the part between first/last crossing
    #---------------------------
    print("Collect taum")
    taumm_m = np.zeros(len(pathensembles))
    taump_m = np.zeros(len(pathensembles))
    taupm_m = np.zeros(len(pathensembles))
    taupp_m = np.zeros(len(pathensembles))
    
    # for [1+-] and higher (i>=2)
    for i in range(2,len(pathensembles)):
        #print("ensemble", i, pathensembles[i].name)
        taumm_m[i] = pathensembles[i].tauavg['LML'] \
                   - pathensembles[i].tau1avg['LML'] \
                   - pathensembles[i].tau2avg['LML']
        taump_m[i] = pathensembles[i].tauavg['LMR'] \
                   - pathensembles[i].tau1avg['LMR'] \
                   - pathensembles[i].tau2avg['LMR']
        taupm_m[i] = pathensembles[i].tauavg['RML'] \
                   - pathensembles[i].tau1avg['RML'] \
                   - pathensembles[i].tau2avg['RML']
        taupp_m[i] = pathensembles[i].tauavg['RMR'] \
                   - pathensembles[i].tau1avg['RMR'] \
                   - pathensembles[i].tau2avg['RMR']

    # for [0-] (i=0)
    if pe.has_zero_minus_one:
        # TODO there are more paths!!!
        taumm_m[0] = pathensembles[0].tauavg['LML']-2
        taump_m[0] = pathensembles[0].tauavg['LMR']-2
        taupm_m[0] = pathensembles[0].tauavg['RML']-2
        taupp_m[0] = pathensembles[0].tauavg['RMR']-2
    else:
        taupp_m[0] = pathensembles[0].tauavg['RMR']-2

    # for [0+-] (i=1) LML
    taumm_m[1] = data[1]["pathlengths"]["LML"]["mean"] - 2
    # for [0+-] (i=1) LMR -> just 0
    # for [0+-] (i=1) RML -> just 0

    return taumm_m, taump_m, taupm_m, taupp_m


In [None]:
def print_all_tau(pathensembles, taumm, taump, taupm, taupp):
    # print all tau
    print(f"                  mm            mp            pm            pp")
    for i in range(len(pathensembles)):
        print(f"{i} {pathensembles[i].name[-3:]}  {taumm[i]:13.1f} {taump[i]:13.1f} {taupm[i]:13.1f} {taupp[i]:13.1f}")

In [None]:
taumm, taump, taupm, taupp = collect_tau(pathensembles, data)
taumm1, taump1, taupm1, taupp1 = collect_tau1(pathensembles, data)
taumm2, taump2, taupm2, taupp2 = collect_tau2(pathensembles, data)
taumm_m, taump_m, taupm_m, taupp_m = collect_taum(pathensembles, data)

Collect tau
ensemble 2 /mnt/0bf0c339-34bb-4500-a5fb-f3c2a863de29/DATA/PyRETIS3/toytis/sim_istarcosdiplgv/002


AttributeError: 'PathEnsemble' object has no attribute 'tauavg'

In [None]:
print("tau")
print_all_tau(pathensembles, taumm, taump, taupm, taupp)
print("\ntau1")
print_all_tau(pathensembles, taumm1, taump1, taupm1, taupp1)
print("\ntau_m")
print_all_tau(pathensembles, taumm_m, taump_m, taupm_m, taupp_m)
print("\ntau2")
print_all_tau(pathensembles, taumm2, taump2, taupm2, taupp2)

tau
                  mm            mp            pm            pp
0 000            0.0           0.0           0.0         122.6
1 001           41.0         132.9         129.9           0.0
2 002          558.3         690.8         697.8         849.9
3 003          857.0         578.4         574.5         468.0
4 004          220.2         169.6         170.6         196.5
5 005          174.1         139.2         137.9         163.1
6 006          144.0         114.5         112.1         141.0

tau1
                  mm            mp            pm            pp
0 000            0.0           0.0           0.0           0.0
1 001            0.0           0.0         129.9           0.0
2 002          135.1         133.9         330.5         305.8
3 003          338.2         321.8          54.6          55.0
4 004           54.1          53.4          55.7          54.6
5 005           55.7          54.7          41.4          42.9
6 006           43.1          41.4          3

In [None]:
def construct_tau_vector(N, NS, taumm, taump, taupm, taupp):
    assert N>=4
    assert NS==4*N-5
    assert len(taumm) == N
    assert len(taump) == N
    assert len(taupm) == N
    assert len(taupp) == N
    # unravel the values into one vector
    tau = np.zeros(NS)
    # [0-]
    tau[0] = taupp[0]
    # [0+-]
    tau[1] = taumm[1]
    tau[2] = taump[1]
    tau[3] = taupm[1]
    # [1+-] etc
    for i in range(1,N-2):
        tau[4*i]   = taumm[i+1]
        tau[4*i+1] = taump[i+1]
        tau[4*i+2] = taupm[i+1]
        tau[4*i+3] = taupp[i+1]
    # [(N-2)^(-1)]
    tau[-3] = taumm[-1]
    tau[-2] = taump[-1]
    # B
    tau[-1] = 0.   # whatever
    return tau

In [None]:
tau  = construct_tau_vector(N, NS, taumm, taump, taupm, taupp)
tau1 = construct_tau_vector(N, NS, taumm1, taump1, taupm1, taupp1)
taum = construct_tau_vector(N, NS, taumm_m, taump_m, taupm_m, taupp_m)
tau2 = construct_tau_vector(N, NS, taumm2, taump2, taupm2, taupp2)
tau_m = tau-tau1-tau2  # yes, this is the same thing

print("tau")
print(tau)
print("\n")
print("tau1")
print(tau1)
print("taum")
print(taum)
print("tau2")
print(tau2)

print("\n")
print("tau = tau1+taum+tau2 => difference is", np.sum((tau-tau1-taum-tau2)**2))

NameError: name 'construct_tau_vector' is not defined

# Compute tau for [0+]

In [None]:
g1, g2, h1, h2 = vector_G(M, tau1, tau_m, tau2) #, doprint=True)
print("G")
print_vector(g1, labels1)
print_vector(g2, labels2)
print("H")
print_vector(h1, labels1)
print_vector(h2, labels2)
print("interesting")
print(h1[0])

NameError: name 'vector_G' is not defined

In [None]:
print(h1[0])

[93.16899943]


# Flux

In [None]:
flux = 1/(tau[0]+h1[0])
# dt = 0.0002
# dt = 0.00002
dt = 0.01
flux
print(flux/dt, "1/time")

[0.46346194] 1/time
