# Simple Monte Carlo Experiment

In [None]:
import numpy as np

%matplotlib inline
import matplotlib.pyplot as plt

The path analyzer

In [None]:
from crossing_tree import crossing_tree

def path_analyze(X, T, scale=1.0):
    xi, ti, offspring, Vnk, Znk, Wnk = crossing_tree(X, T, scale=scale, origin=X[0])
    # for j in xrange(len(Znk)):
    #     assert np.allclose(2 * Vnk[j][:, :2].sum(axis=1) + 2, Znk[j])

    # Nn[n] -- the total number of crossings of grid with spacing \delta 2^n
    Nn = np.r_[len(xi), [len(index_) for index_ in offspring]] - 1

    # Dnk[n][k] -- the total number of crossings of grid \delta 2^{n+1}
    #  with exactly 2(k+1) subcrossings of grid \delta 2^n.
    freq = [np.bincount(Zk)[2::2] for Zk in Znk]
    Dnk = np.zeros((len(Znk), max(len(f) for f in freq)), np.int)
    for l, f in enumerate(freq):
        Dnk[l, :len(f)] = f

    # Vnde[n][d][e] -- the total number of up-down(e=0) and down-up(e=1)
    #  excursions in a downward (d=0) or upward (d=1) crossing of level
    #  n+1
    Vnde = np.array([(Vk[Vk[:, 2] < 0, :2].sum(axis=0),
                      Vk[Vk[:, 2] > 0, :2].sum(axis=0))
                     for Vk in Vnk], dtype=np.int)

    # Wnp[n][p] -- the p-th empirical quantile of the n-th level crossing durations.
    prc = [0.5, 1.0, 2.5, 5.0, 10, 25, 50, 75, 90, 95, 97.5, 99, 99.5]
    empty_ = np.full_like(prc, np.nan)
    Wnp = np.stack([np.percentile(Wk, prc) if len(Wk) > 0 else empty_
                    for Wk in Wnk])

    # The average crossing duration and its standard deviation
    Wavgn = np.array([np.mean(Wk) if len(Wk) > 0 else np.nan
                      for Wk in Wnk])
    Wstdn = np.array([np.std(Wk) if len(Wk) > 0 else np.nan
                      for Wk in Wnk])

    return Nn, Dnk, Vnde, Wnp, Wavgn, Wstdn, scale

An MC experiment kernel.

In [None]:
from sklearn.utils import check_random_state
from crossing_tree.processes import fbm

def mc_run(n_replications, delta=1.0, random_state=None, **kwargs):
    if isinstance(delta, str):
        if delta == "iqr":
            # interquartile range
            delta_ = lambda X: np.subtract(*np.percentile(np.diff(X), [75, 25]))
        elif delta == "rng":
            # Use the range estimate as suggested by Geoffrey on 2015-05-28
            delta_ = lambda X: (X.max() - X.min()) / (2**6)
        elif delta == "med":
            # Use the median absolute difference [Jones, Rolls; 2009] p. 11 (0911.5204v2)
            delta_ = lambda X: np.median(np.abs(np.diff(X)))
        elif delta == "std":
            # the standard deviation of increments
            delta_ = lambda X: np.diff(X).std()
        else:
            raise ValueError("""Invalid delta setting. Accepted values """
                             """are: [`iqr`, `std`, `med`, `rng`].""")

    ## setup the generator
    gen_ = fbm(time=True, **kwargs)
    random_state_ = check_random_state(random_state)
    gen_.initialize(random_state_, threads=1)
    
    results_ = list()
    for j in xrange(n_replications):
        T, X = gen_()
        scale = delta_(X)
        results_.append(path_analyze(X, T, scale=scale))
    return results_

Setup the parallel backend

In [None]:
from joblib import Parallel, delayed
par_ = Parallel(n_jobs=-1, verbose=10)

Initialize the random states

In [None]:
random_state = np.random.RandomState(0xDEADC0DE)

# Create a bunch of random seed
MAX_RAND_SEED = np.iinfo(np.int32).max
seeds = random_state.randint(MAX_RAND_SEED, size=(8,))

Create the experiment schedule

In [None]:
N, H, M = (1<<21) - 1, .65, 125
jobs_ = (delayed(mc_run)(M, delta="med", random_state=seed_, N=N, H=H) for seed_ in seeds)

Run the experiment and flatten the results

In [None]:
results_ = [res_ for list_ in par_(jobs_) for res_ in list_]

Collect the results

In [None]:
Nmn = [Nn for Nn, Dnk, Vnde, Wnp, Wavgn, Wstdn, scale in results_]
L = max(Nn.shape[0] for Nn in Nmn)
Nmn = np.stack([np.pad(Nn, (0, L - Nn.shape[0]), mode="constant").\
                astype(np.float) for Nn in Nmn])

Dmnk = [Dnk for Nn, Dnk, Vnde, Wnp, Wavgn, Wstdn, scale in results_]
K = max(Dnk.shape[1] for Dnk in Dmnk)
Dmnk = np.stack([np.pad(Dnk, ((0, L - 1 - Dnk.shape[0]), (0, K - Dnk.shape[1])),
                        mode="constant").astype(np.float)
                 for Dnk in Dmnk])

Wmnp = [Wnp for Nn, Dnk, Vnde, Wnp, Wavgn, Wstdn, scale in results_]
Wmnp = np.stack([np.pad(Wnp, ((0, L - 1 - Wnp.shape[0]), (0, 0)),
                        mode="constant").astype(np.float)
                 for Wnp in Wmnp])

Plot the number of crossings

In [None]:
plt.plot(np.log(Nmn.T));
plt.show()

Compute the probability distribution of the number of sub-crossings.

In [None]:
total_ = Dmnk.sum(axis=-1, keepdims=True)
total_[total_ < 1.] = 1.0
Dmnk /= total_

Plot the probabilities

In [None]:
from math import log
fig = plt.figure(figsize=(16, 9))
ax = fig.add_subplot(111)
ax.plot(Dmnk.mean(axis=0).T)
ax.set_yscale("log", basey=2)

Plot the quantiles of the crossing durations

In [None]:
fig = plt.figure(figsize=(16, 9))
ax=  fig.add_subplot(111)
prc = [0.5, 1.0, 2.5, 5.0, 10, 25, 50, 75, 90, 95, 97.5, 99, 99.5]
colors = plt.cm.rainbow_r(np.linspace(0, 1, num=len(Wmnp)))
for Wnp, Nn, col_ in zip(Wmnp, Nmn, colors):
    wnp_ =  Wnp / 2**(np.arange(len(Nn)-1, dtype=np.float)[:, np.newaxis]/H)
    for wp_ in wnp_[:-5]:
        ax.plot(wp_, prc, color=col_)
ax.set_xscale("log", basex=2)