# the Monte Carlo experiment

In [None]:
import numpy as np

%matplotlib inline
import matplotlib.pyplot as plt

A handy routines to store and recover python objects, in particular, the experiment resutls dictionaires.

In [None]:
import time, gzip
import os, cPickle

def save(obj, path, prefix=None):
    prefix_ = "" if prefix is None else "%s_"%(prefix,)
    filename_ = os.path.join(path, "%s%s.gz"%(prefix_, time.strftime("%Y%m%d-%H%M%S"),))
    with gzip.open(filename_, "wb+", 9) as fout_:
        cPickle.dump(obj, fout_)
    return filename_

def load(filename):
    with gzip.open(filename, "rb") as f:
        return cPickle.load(f)

The path analyzer

In [None]:
from crossing_tree import crossing_tree

def path_analyze(X, T, scale=1.0):
    xi, ti, offspring, Vnk, Znk, Wnk = crossing_tree(X, T, scale=scale, origin=X[0])
    # Sanity check.
    # for j in xrange(len(Znk)):
    #     assert np.allclose(2 * Vnk[j][:, :2].sum(axis=1) + 2, Znk[j])

    # Nn[n] -- the total number of crossings of grid with spacing \delta 2^n
    Nn = np.r_[len(xi), [len(index_) for index_ in offspring]] - 1

    # Dnk[n][k] -- the total number of crossings of grid \delta 2^{n+1}
    #  with exactly 2(k+1) subcrossings of grid \delta 2^n.
    freq = [np.bincount(Zk)[2::2] for Zk in Znk]
    Dnk = np.zeros((len(Znk), max(len(f) for f in freq)), np.int)
    for l, f in enumerate(freq):
        Dnk[l, :len(f)] = f

    # Vnde[n][d][e] -- the total number of up-down(e=0) and down-up(e=1)
    #  excursions in a downward (d=0) or upward (d=1) crossing of level
    #  n+1
    Vnde = np.array([(Vk[Vk[:, 2] < 0, :2].sum(axis=0),
                      Vk[Vk[:, 2] > 0, :2].sum(axis=0))
                     for Vk in Vnk], dtype=np.int)

    # Wnp[n][p] -- the p-th empirical quantile of the n-th level crossing
    #  durations.
    prc = [0.1, 0.5, 1.0, 2.5, 5.0, 10, 25, 50, 75, 90, 95, 97.5, 99, 99.5, 99.9]
    empty_ = np.full_like(prc, np.nan)
    Wnp = np.stack([np.percentile(Wk, prc) if len(Wk) > 0 else empty_ for Wk in Wnk])

    # The average crossing duration and its standard deviation
    Wavgn = np.array([np.mean(Wk) if len(Wk) > 0 else np.nan for Wk in Wnk])
    Wstdn = np.array([np.std(Wk) if len(Wk) > 0 else np.nan for Wk in Wnk])
    return scale, Nn, Dnk, Vnde, Wnp, Wavgn, Wstdn

Collect a list of results returned by path_analyze into aligned data tensors.

In [None]:
def collect(results):
    results = list(results)

    scale_m = np.array([scale for scale, Nn, Dnk, Vnde, Wnp, Wavgn, Wstdn in results])

    Nmn = [Nn for scale, Nn, Dnk, Vnde, Wnp, Wavgn, Wstdn in results]
    L = max(Nn.shape[0] for Nn in Nmn)
    Nmn = np.stack([np.pad(Nn, (0, L - Nn.shape[0]), mode="constant").astype(np.float)
                    for Nn in Nmn])

    Dmnk = [Dnk for scale, Nn, Dnk, Vnde, Wnp, Wavgn, Wstdn in results]
    K = max(Dnk.shape[1] for Dnk in Dmnk)
    Dmnk = np.stack([np.pad(Dnk, ((0, L - 1 - Dnk.shape[0]), (0, K - Dnk.shape[1])),
                            mode="constant").astype(np.float)
                     for Dnk in Dmnk])

    Wmnp = np.stack([np.pad(Wnp.astype(np.float), ((0, L - 1 - Wnp.shape[0]), (0, 0)),
                            mode="constant", constant_values=np.nan)
                     for scale, Nn, Dnk, Vnde, Wnp, Wavgn, Wstdn in results])

    Vmnde = np.stack([np.pad(Vnde.astype(np.float), ((0, L - 1 - Vnde.shape[0]),
                                                     (0, 0), (0, 0)),
                             mode="constant", constant_values=np.nan)
                      for scale, Nn, Dnk, Vnde, Wnp, Wavgn, Wstdn in results])

    Wavgmn = np.stack([np.pad(Wavgn.astype(np.float), (0, L - 1 - Wavgn.shape[0]),
                              mode="constant", constant_values=np.nan)
                       for scale, Nn, Dnk, Vnde, Wnp, Wavgn, Wstdn in results])

    Wstdmn = np.stack([np.pad(Wstdn.astype(np.float), (0, L - 1 - Wstdn.shape[0]),
                              mode="constant", constant_values=np.nan)
                       for scale, Nn, Dnk, Vnde, Wnp, Wavgn, Wstdn in results])

    return scale_m, Nmn, Dmnk, Vmnde, Wmnp, Wavgmn, Wstdmn

A function implementing various delta choices.

In [None]:
import warnings

def get_delta_method(delta=1.0):
    if isinstance(delta, str):
        if delta == "std":
            # the standard deviation of increments
            delta_ = lambda X: np.diff(X).std()
        elif delta == "med":
            # Use the median absolute difference [Jones, Rolls; 2009] p. 11 (arxiv:0911.5204v2)
            delta_ = lambda X: np.median(np.abs(np.diff(X)))
        elif delta == "iqr":
            # Interquartile range
            delta_ = lambda X: np.subtract(*np.percentile(np.diff(X), [75, 25]))
        elif delta == "rng":
            # Use the range estimate as suggested by Geoffrey on 2015-05-28
            warnings.warn("""Use of `range`-based grid resolution """
                          """is discouraged since it may cause misaligned """
                          """crossing trees.""", RuntimeWarning)
            delta_ = lambda X: (X.max() - X.min()) / (2**12)
        else:
            raise ValueError("""Invalid `delta` setting. Accepted values """
                             """are: [`iqr`, `std`, `med`, `rng`].""")
    elif isinstance(delta, float) and delta > 0:
        delta_ = lambda X: delta
    else:
        raise TypeError("""`delta` must be either a float, or a method """
                        """identifier.""")
    return delta_

An MC experiment kernel.

In [None]:
from sklearn.base import clone

def experiment(experiment_id, n_replications, methods, generator):
    generator = clone(generator)
    generator.start()

    deltas = [get_delta_method(method_) for method_ in methods]

    results = {method_: list() for method_ in methods}
    for j in xrange(n_replications):
        T, X = generator.draw()

        # Apply all methods to the same sample path.
        for delta, method in zip(deltas, methods):
            results[method].append(path_analyze(X, T, scale=delta(X)))

    generator.finish()

    return experiment_id, results

## Experiments

In [None]:
from joblib import Parallel, delayed

A couple of random seeds from [here](https://www.random.org/bytes/).

In [None]:
# old seeds: [0xDEADF00D, 0xFABACABA, 0x738E2A0B, 0x6508C9F4, 0xFBA15A24,
#             0x6DAEDD6B, 0xC05DE9CE, 0xFC3021A6,]

master_seeds = [0x2357D9C1, 0x4FE1E92D, 0xDC9F3966, 0x18E04C68, 0xEC864D20,
                0x8A63D1FE, 0xACBC4A59, 0x24F187FF, 0x97D37D23, 0x87AFB2AE,
                0x3CADD47C, 0xFD6851AC, 0x77B6FE1A, 0xCBB3EBA1, 0x024F215B,]

the Monte Carlo experiemnt is run in parallel batches, with each
initialized to a randomly picked seed.

In [None]:
MAX_RAND_SEED = np.iinfo(np.int32).max

## Hermite experiment

In [None]:
from crossing_tree.processes import HermiteProcess

seed = master_seeds.pop()
print "Using seed %X"%(seed,)
random_state = np.random.RandomState(seed)

Setup

In [None]:
n_samples, n_downsample = 1 << 19, 1 << 4
degrees, methods = [2, 3, 4], ["med", "std", "iqr",]
hurst_exponents = [0.6, 0.7, 0.8, 0.9, 0.95,]
n_per_batch, n_batches = 125, 8

Run the experiment for the Hermite process.

In [None]:
par_ = Parallel(n_jobs=-1, verbose=0)
for degree_ in degrees:
    for hurst_ in hurst_exponents:
        name_ = "HRP%d_%d-%d-%0.3f-%dx%d"%(degree_, n_downsample, n_samples, hurst_, n_per_batch, n_batches)
        print name_,

        # Schedule the experiments
        seeds = random_state.randint(MAX_RAND_SEED, size=(n_batches,))
        schedule_ = (delayed(experiment)(seed_, n_per_batch, methods,
                                         HermiteProcess(N=n_samples,
                                                        degree=degree_,
                                                        n_downsample=n_downsample,
                                                        hurst=hurst_,
                                                        random_state=seed_))
                                    for seed_ in seeds)

        # Run the experiment and collect the results
        tick_ = time.time()
        experiment_ids = list()
        results_ = {method: list() for method in methods}
        for id_, dict_ in par_(schedule_):
            experiment_ids.append(id_)
            for method in methods:
                results_[method].extend(dict_[method])
        results = {key_: collect(list_) for key_, list_ in results_.iteritems()}
        tock_ = time.time()

        # Save the results and log
        filename_ = save((tick_, tock_, experiment_ids, results), "../results/", name_)
        print "%0.3fsec."%(tock_ - tick_,), filename_

## Weierstrass experiment

In [None]:
from crossing_tree.processes import WeierstrassFunction

seed = master_seeds.pop()
print "Using seed %X"%(seed,)
random_state = np.random.RandomState(seed)

Setup

In [None]:
n_samples, lambda_0 = 1 << 23, 1.2
methods = ["med", "std", "iqr",]

holder_exponents = [0.5, 0.6, 0.7, 0.8, 0.9, 0.95,]
n_per_batch, n_batches = 125, 8

Run the experimnet for the random Weierstrass function $[0, 1]\mapsto \mathbb{R}$:
$$ W_H(t) = \sum_{k\geq 0} \lambda_0^{-k H} \bigl(\cos(2 \pi \lambda_0^k t + \phi_k) - \cos \phi_k\bigr)\,, $$
with $(\phi_k)_{k\geq0} \sim \mathbb{U}[0, 2\pi]$, and $\lambda_0 > 1$ -- the fundamental harmonic.

In [None]:
par_ = Parallel(n_jobs=-1, verbose=0)
for holder_ in holder_exponents:
    name_ = "WEI_%g-%d-%0.3f-%dx%d"%(lambda_0, n_samples, holder_, n_per_batch, n_batches)
    print name_,

    # Schedule the experiments
    seeds = random_state.randint(MAX_RAND_SEED, size=(n_batches,))
    schedule_ = (delayed(experiment)(seed_, n_per_batch, methods,
                                     WeierstrassFunction(N=n_samples,
                                                         lambda_0=lambda_0,
                                                         holder=holder_,
                                                         random_state=seed_,
                                                         one_sided=False))
                                for seed_ in seeds)

    # Run the experiment and collect the results
    tick_ = time.time()
    experiment_ids = list()
    results_ = {method: list() for method in methods}
    for id_, dict_ in par_(schedule_):
        experiment_ids.append(id_)
        for method in methods:
            results_[method].extend(dict_[method])
    results = {key_: collect(list_) for key_, list_ in results_.iteritems()}
    tock_ = time.time()

    # Save the results and log
    filename_ = save((tick_, tock_, experiment_ids, results), "../results/", name_)
    print "%0.3fsec."%(tock_ - tick_,), filename_

## fBM experiment

In [None]:
from crossing_tree.processes import FractionalBrownianMotion

seed = master_seeds.pop()
print "Using seed %X"%(seed,)
random_state = np.random.RandomState(seed)

Setup

In [None]:
n_samples, methods = 1 << 23, ["med", "std", "iqr",]
hurst_exponents = [0.500, 0.550, 0.600, 0.650, 0.700, 0.750, 0.800, 0.850, 0.900,
                   0.910, 0.915, 0.920, 0.925, 0.930, 0.935, 0.940, 0.945, 0.950,]
n_per_batch, n_batches = 125, 8

Run the experiment for the Fractional Brownian Motion.

In [None]:
par_ = Parallel(n_jobs=-1, verbose=0)
for hurst_ in hurst_exponents:
    name_ = "FBM-%d-%0.3f-%dx%d"%(n_samples, hurst_, n_per_batch, n_batches)
    print name_,

    # Schedule the experiments
    seeds = random_state.randint(MAX_RAND_SEED, size=(n_batches,))
    schedule_ = (delayed(experiment)(seed_, n_per_batch, methods,
                                     FractionalBrownianMotion(N=n_samples,
                                                              hurst=hurst_,
                                                              random_state=seed_))
                                for seed_ in seeds)

    # Run the experiment and collect the results
    tick_ = time.time()
    experiment_ids = list()
    results_ = {method: list() for method in methods}
    for id_, dict_ in par_(schedule_):
        experiment_ids.append(id_)
        for method in methods:
            results_[method].extend(dict_[method])
    results = {key_: collect(list_) for key_, list_ in results_.iteritems()}
    tock_ = time.time()

    # Save the results and log
    filename_ = save((tick_, tock_, experiment_ids, results), "../results/", name_)
    print "%0.3fsec."%(tock_ - tick_,), filename_