# the Monte Carlo experiment

In [1]:
import numpy as np

%matplotlib inline
import matplotlib.pyplot as plt

A handy routines to store and recover python objects, in particular, the experiment resutls dictionaires.

In [2]:
import time, gzip
import os, cPickle

def save(obj, path, prefix=None):
    prefix_ = "" if prefix is None else "%s_"%(prefix,)
    filename_ = os.path.join(path, "%s%s.gz"%(prefix_, time.strftime("%Y%m%d-%H%M%S"),))
    with gzip.open(filename_, "wb+", 9) as fout_:
        cPickle.dump(obj, fout_)
    return filename_

def load(filename):
    with gzip.open(filename, "rb") as f:
        return cPickle.load(f)

The path analyzer

In [3]:
from crossing_tree import structural_statistics

Collect a list of results returned by path_analyze into aligned data tensors.

In [4]:
from crossing_tree import collect_structural_statistics

A function implementing various delta choices.

In [5]:
import warnings

def get_delta_method(delta=1.0):
    if isinstance(delta, str):
        if delta == "std":
            # the standard deviation of increments
            delta_ = lambda X: np.diff(X).std()
        elif delta == "med":
            # Use the median absolute difference [Jones, Rolls; 2009] p. 11 (arxiv:0911.5204v2)
            delta_ = lambda X: np.median(np.abs(np.diff(X)))
        elif delta == "iqr":
            # Interquartile range
            delta_ = lambda X: np.subtract(*np.percentile(np.diff(X), [75, 25]))
        elif delta == "rng":
            # Use the range estimate as suggested by Geoffrey on 2015-05-28
            warnings.warn("""Use of `range`-based grid resolution """
                          """is discouraged since it may cause misaligned """
                          """crossing trees.""", RuntimeWarning)
            delta_ = lambda X: (X.max() - X.min()) / (2**12)
        else:
            raise ValueError("""Invalid `delta` setting. Accepted values """
                             """are: [`iqr`, `std`, `med`, `rng`].""")
    elif isinstance(delta, float) and delta > 0:
        delta_ = lambda X: delta
    else:
        raise TypeError("""`delta` must be either a float, or a method """
                        """identifier.""")
    return delta_

An MC experiment kernel.

In [6]:
from sklearn.base import clone

def experiment(experiment_id, n_replications, methods, generator):
    generator = clone(generator)
    generator.start()

    deltas = [get_delta_method(method_) for method_ in methods]

    results = {method_: list() for method_ in methods}
    for j in xrange(n_replications):
        T, X = generator.draw()

        # Apply all methods to the same sample path.
        for delta, method in zip(deltas, methods):
            result_ = structural_statistics(X, T, scale=delta(X), origin=X[0])
            results[method].append(result_)

    generator.finish()

    return experiment_id, results

## Experiments

In [7]:
from joblib import Parallel, delayed

A couple of random seeds from [here](https://www.random.org/bytes/).

In [8]:
# old seeds: [0xDEADF00D, 0xFABACABA, 0x738E2A0B, 0x6508C9F4, 0xFBA15A24,
#             0x6DAEDD6B, 0xC05DE9CE, 0xFC3021A6,]

master_seeds = [0x2357D9C1, 0x4FE1E92D, 0xDC9F3966, 0x18E04C68, 0xEC864D20,
                0x8A63D1FE, 0xACBC4A59, 0x24F187FF, 0x97D37D23, 0x87AFB2AE,
                0x3CADD47C, 0xFD6851AC, 0x77B6FE1A, 0xCBB3EBA1, 0x024F215B,]

the Monte Carlo experiemnt is run in parallel batches, with each
initialized to a randomly picked seed.

In [9]:
MAX_RAND_SEED = np.iinfo(np.int32).max

## Hermite experiment

In [10]:
from crossing_tree.processes import HermiteProcess

seed = master_seeds.pop()
print "Using seed %X"%(seed,)
random_state = np.random.RandomState(seed)

skip = True

Using seed 24F215B


Setup

In [11]:
n_samples, n_downsample = 1 << 19, 1 << 4
degrees, methods = [2, 3, 4], ["med", "std", "iqr",]
hurst_exponents = [0.6, 0.7, 0.8, 0.9,]
n_per_batch, n_batches = 125, 8

Run the experiment for the Hermite process.

In [12]:
if skip:
    par_ = Parallel(n_jobs=-1, verbose=0)
    for degree_ in degrees:
        for hurst_ in hurst_exponents:
            name_ = "HRP%d_%d-%d-%0.3f-%dx%d"%(degree_, n_downsample, n_samples, hurst_, n_per_batch, n_batches)
            print name_,

            # Schedule the experiments
            seeds = random_state.randint(MAX_RAND_SEED, size=(n_batches,))
            schedule_ = (delayed(experiment)(seed_, n_per_batch, methods,
                                             HermiteProcess(N=n_samples,
                                                            degree=degree_,
                                                            n_downsample=n_downsample,
                                                            hurst=hurst_,
                                                            random_state=seed_))
                                        for seed_ in seeds)

            # Run the experiment and collect the results
            tick_ = time.time()
            experiment_ids = list()
            results_ = {method: list() for method in methods}
            for id_, dict_ in par_(schedule_):
                experiment_ids.append(id_)
                for method in methods:
                    results_[method].extend(dict_[method])
            results = {key_: collect_structural_statistics(list_)
                       for key_, list_ in results_.iteritems()}
            tock_ = time.time()

            # Save the results and log
            filename_ = save((tick_, tock_, experiment_ids, results), "../results/", name_)
            print "%0.3fsec."%(tock_ - tick_,), filename_

HRP2_16-524288-0.600-125x8 467.902sec. ../results/HRP2_16-524288-0.600-125x8_20161011-220307.gz
HRP2_16-524288-0.700-125x8 480.998sec. ../results/HRP2_16-524288-0.700-125x8_20161011-221116.gz
HRP2_16-524288-0.800-125x8 498.085sec. ../results/HRP2_16-524288-0.800-125x8_20161011-221943.gz
HRP2_16-524288-0.900-125x8 534.033sec. ../results/HRP2_16-524288-0.900-125x8_20161011-222846.gz
HRP3_16-524288-0.600-125x8 516.212sec. ../results/HRP3_16-524288-0.600-125x8_20161011-223733.gz
HRP3_16-524288-0.700-125x8 533.479sec. ../results/HRP3_16-524288-0.700-125x8_20161011-224634.gz
HRP3_16-524288-0.800-125x8 551.205sec. ../results/HRP3_16-524288-0.800-125x8_20161011-225554.gz
HRP3_16-524288-0.900-125x8 579.142sec. ../results/HRP3_16-524288-0.900-125x8_20161011-230544.gz
HRP4_16-524288-0.600-125x8 570.336sec. ../results/HRP4_16-524288-0.600-125x8_20161011-231525.gz
HRP4_16-524288-0.700-125x8 597.338sec. ../results/HRP4_16-524288-0.700-125x8_20161011-232530.gz
HRP4_16-524288-0.800-125x8 628.181sec. .

## Weierstrass experiment

In [13]:
from crossing_tree.processes import WeierstrassFunction

seed = master_seeds.pop()
print "Using seed %X"%(seed,)
random_state = np.random.RandomState(seed)

skip = True

Using seed CBB3EBA1


Setup

In [14]:
n_samples, lambda_0 = 1 << 23, 1.2
methods = ["med", "std", "iqr",]

holder_exponents = [0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 0.99]
n_per_batch, n_batches = 125, 8

Run the experimnet for the random Weierstrass function $[0, 1]\mapsto \mathbb{R}$:
$$ W_H(t) = \sum_{k\geq 0} \lambda_0^{-k H} \bigl(\cos(2 \pi \lambda_0^k t + \phi_k) - \cos \phi_k\bigr)\,, $$
with $(\phi_k)_{k\geq0} \sim \mathbb{U}[0, 2\pi]$, and $\lambda_0 > 1$ -- the fundamental harmonic.

In [15]:
if skip:
    par_ = Parallel(n_jobs=-1, verbose=0)
    for holder_ in holder_exponents:
        name_ = "WEI_%g-%d-%0.3f-%dx%d"%(lambda_0, n_samples, holder_, n_per_batch, n_batches)
        print name_,

        # Schedule the experiments
        seeds = random_state.randint(MAX_RAND_SEED, size=(n_batches,))
        schedule_ = (delayed(experiment)(seed_, n_per_batch, methods,
                                         WeierstrassFunction(N=n_samples,
                                                             lambda_0=lambda_0,
                                                             holder=holder_,
                                                             random_state=seed_,
                                                             one_sided=False))
                                    for seed_ in seeds)

        # Run the experiment and collect the results
        tick_ = time.time()
        experiment_ids = list()
        results_ = {method: list() for method in methods}
        for id_, dict_ in par_(schedule_):
            experiment_ids.append(id_)
            for method in methods:
                results_[method].extend(dict_[method])
        results = {key_: collect_structural_statistics(list_)
                   for key_, list_ in results_.iteritems()}
        tock_ = time.time()

        # Save the results and log
        filename_ = save((tick_, tock_, experiment_ids, results), "../results/", name_)
        print "%0.3fsec."%(tock_ - tick_,), filename_

WEI_1.2-8388608-0.500-125x8 14294.285sec. ../results/WEI_1.2-8388608-0.500-125x8_20161012-034536.gz
WEI_1.2-8388608-0.600-125x8 14508.481sec. ../results/WEI_1.2-8388608-0.600-125x8_20161012-074732.gz
WEI_1.2-8388608-0.700-125x8 14778.991sec. ../results/WEI_1.2-8388608-0.700-125x8_20161012-115401.gz
WEI_1.2-8388608-0.800-125x8 15123.950sec. ../results/WEI_1.2-8388608-0.800-125x8_20161012-160615.gz
WEI_1.2-8388608-0.900-125x8 15374.462sec. ../results/WEI_1.2-8388608-0.900-125x8_20161012-202241.gz
WEI_1.2-8388608-0.950-125x8 15571.504sec. ../results/WEI_1.2-8388608-0.950-125x8_20161013-004225.gz
WEI_1.2-8388608-0.990-125x8 15642.739sec. ../results/WEI_1.2-8388608-0.990-125x8_20161013-050321.gz


## fBM experiment

In [16]:
from crossing_tree.processes import FractionalBrownianMotion

seed = master_seeds.pop()
print "Using seed %X"%(seed,)
random_state = np.random.RandomState(seed)

skip = True

Using seed 77B6FE1A


Setup

In [17]:
n_samples, methods = 1 << 23, ["med", "std", "iqr",]
hurst_exponents = [0.500, 0.550, 0.600, 0.650, 0.700, 0.750, 0.800, 0.850, 0.900,
                   0.910, 0.915, 0.920, 0.925, 0.930, 0.935, 0.940, 0.945, 0.950,]
n_per_batch, n_batches = 125, 8

Run the experiment for the Fractional Brownian Motion.

In [18]:
if skip:
    par_ = Parallel(n_jobs=-1, verbose=0)
    for hurst_ in hurst_exponents:
        name_ = "FBM-%d-%0.3f-%dx%d"%(n_samples, hurst_, n_per_batch, n_batches)
        print name_,

        # Schedule the experiments
        seeds = random_state.randint(MAX_RAND_SEED, size=(n_batches,))
        schedule_ = (delayed(experiment)(seed_, n_per_batch, methods,
                                         FractionalBrownianMotion(N=n_samples,
                                                                  hurst=hurst_,
                                                                  random_state=seed_))
                                    for seed_ in seeds)

        # Run the experiment and collect the results
        tick_ = time.time()
        experiment_ids = list()
        results_ = {method: list() for method in methods}
        for id_, dict_ in par_(schedule_):
            experiment_ids.append(id_)
            for method in methods:
                results_[method].extend(dict_[method])
        results = {key_: collect_structural_statistics(list_)
                   for key_, list_ in results_.iteritems()}
        tock_ = time.time()

        # Save the results and log
        filename_ = save((tick_, tock_, experiment_ids, results), "../results/", name_)
        print "%0.3fsec."%(tock_ - tick_,), filename_

FBM-8388608-0.500-125x8 2031.947sec. ../results/FBM-8388608-0.500-125x8_20161013-053727.gz
FBM-8388608-0.550-125x8 2154.489sec. ../results/FBM-8388608-0.550-125x8_20161013-061329.gz
FBM-8388608-0.600-125x8 2277.297sec. ../results/FBM-8388608-0.600-125x8_20161013-065135.gz
FBM-8388608-0.650-125x8 2392.415sec. ../results/FBM-8388608-0.650-125x8_20161013-073137.gz
FBM-8388608-0.700-125x8 2521.615sec. ../results/FBM-8388608-0.700-125x8_20161013-081349.gz
FBM-8388608-0.750-125x8 2637.187sec. ../results/FBM-8388608-0.750-125x8_20161013-085757.gz
FBM-8388608-0.800-125x8 2762.201sec. ../results/FBM-8388608-0.800-125x8_20161013-094410.gz
FBM-8388608-0.850-125x8 2891.440sec. ../results/FBM-8388608-0.850-125x8_20161013-103234.gz
FBM-8388608-0.900-125x8 3011.573sec. ../results/FBM-8388608-0.900-125x8_20161013-112257.gz
FBM-8388608-0.910-125x8 3046.946sec. ../results/FBM-8388608-0.910-125x8_20161013-121358.gz
FBM-8388608-0.915-125x8 3052.126sec. ../results/FBM-8388608-0.915-125x8_20161013-130503.gz

## Additional experiments

### Weierstrass experiment pt.2

In [23]:
from crossing_tree.processes import WeierstrassFunction

seed = master_seeds.pop()
print "Using seed %X"%(seed,)
random_state = np.random.RandomState(seed)

skip = True

Using seed FD6851AC


Setup

In [25]:
n_samples, lambda_0 = 1 << 23, 1.2
methods = ["med", "std", "iqr",]

holder_exponents = [0.910, 0.915, 0.920, 0.925,
                    0.930, 0.935, 0.940, 0.945,]
n_per_batch, n_batches = 125, 8

Run the experimnet for the random Weierstrass function $[0, 1]\mapsto \mathbb{R}$:
$$ W_H(t) = \sum_{k\geq 0} \lambda_0^{-k H} \bigl(\cos(2 \pi \lambda_0^k t + \phi_k) - \cos \phi_k\bigr)\,, $$
with $(\phi_k)_{k\geq0} \sim \mathbb{U}[0, 2\pi]$, and $\lambda_0 > 1$ -- the fundamental harmonic.

In [26]:
if skip:
    par_ = Parallel(n_jobs=-1, verbose=0)
    for holder_ in holder_exponents:
        name_ = "WEI_%g-%d-%0.3f-%dx%d"%(lambda_0, n_samples, holder_, n_per_batch, n_batches)
        print name_,

        # Schedule the experiments
        seeds = random_state.randint(MAX_RAND_SEED, size=(n_batches,))
        schedule_ = (delayed(experiment)(seed_, n_per_batch, methods,
                                         WeierstrassFunction(N=n_samples,
                                                             lambda_0=lambda_0,
                                                             holder=holder_,
                                                             random_state=seed_,
                                                             one_sided=False))
                                    for seed_ in seeds)

        # Run the experiment and collect the results
        tick_ = time.time()
        experiment_ids = list()
        results_ = {method: list() for method in methods}
        for id_, dict_ in par_(schedule_):
            experiment_ids.append(id_)
            for method in methods:
                results_[method].extend(dict_[method])
        results = {key_: collect_structural_statistics(list_)
                   for key_, list_ in results_.iteritems()}
        tock_ = time.time()

        # Save the results and log
        filename_ = save((tick_, tock_, experiment_ids, results), "../results/", name_)
        print "%0.3fsec."%(tock_ - tick_,), filename_

WEI_1.2-8388608-0.910-125x8 15308.835sec. ../results/WEI_1.2-8388608-0.910-125x8_20161014-013511.gz
WEI_1.2-8388608-0.915-125x8 15404.482sec. ../results/WEI_1.2-8388608-0.915-125x8_20161014-055208.gz
WEI_1.2-8388608-0.920-125x8 15420.649sec. ../results/WEI_1.2-8388608-0.920-125x8_20161014-100922.gz
WEI_1.2-8388608-0.925-125x8 15478.133sec. ../results/WEI_1.2-8388608-0.925-125x8_20161014-142733.gz
WEI_1.2-8388608-0.930-125x8 15618.317sec. ../results/WEI_1.2-8388608-0.930-125x8_20161014-184805.gz
WEI_1.2-8388608-0.935-125x8 15464.825sec. ../results/WEI_1.2-8388608-0.935-125x8_20161014-230603.gz
WEI_1.2-8388608-0.940-125x8 15349.082sec. ../results/WEI_1.2-8388608-0.940-125x8_20161015-032205.gz
WEI_1.2-8388608-0.945-125x8 15335.909sec. ../results/WEI_1.2-8388608-0.945-125x8_20161015-073754.gz


### Weierstrass experiment pt.3

In [None]:
from crossing_tree.processes import WeierstrassFunction

seed = master_seeds.pop()
print "Using seed %X"%(seed,)
random_state = np.random.RandomState(seed)

skip = False

Setup

In [None]:
n_samples, lambda_0 = 1 << 23, 1.7
methods = ["med", "std", "iqr",]

holder_exponents = [0.500, 0.550, 0.600, 0.650, 0.700, 0.750, 0.800, 0.850, 0.900,
                    0.910, 0.915, 0.920, 0.925, 0.930, 0.935, 0.940, 0.945, 0.950,]
n_per_batch, n_batches = 125, 8

Run the experimnet for the random Weierstrass function $[0, 1]\mapsto \mathbb{R}$:
$$ W_H(t) = \sum_{k\geq 0} \lambda_0^{-k H} \bigl(\cos(2 \pi \lambda_0^k t + \phi_k) - \cos \phi_k\bigr)\,, $$
with $(\phi_k)_{k\geq0} \sim \mathbb{U}[0, 2\pi]$, and $\lambda_0 > 1$ -- the fundamental harmonic.

In [None]:
if skip:
    par_ = Parallel(n_jobs=-1, verbose=0)
    for holder_ in holder_exponents:
        name_ = "WEI_%g-%d-%0.3f-%dx%d"%(lambda_0, n_samples, holder_, n_per_batch, n_batches)
        print name_,

        # Schedule the experiments
        seeds = random_state.randint(MAX_RAND_SEED, size=(n_batches,))
        schedule_ = (delayed(experiment)(seed_, n_per_batch, methods,
                                         WeierstrassFunction(N=n_samples,
                                                             lambda_0=lambda_0,
                                                             holder=holder_,
                                                             random_state=seed_,
                                                             one_sided=False))
                                    for seed_ in seeds)

        # Run the experiment and collect the results
        tick_ = time.time()
        experiment_ids = list()
        results_ = {method: list() for method in methods}
        for id_, dict_ in par_(schedule_):
            experiment_ids.append(id_)
            for method in methods:
                results_[method].extend(dict_[method])
        results = {key_: collect_structural_statistics(list_)
                   for key_, list_ in results_.iteritems()}
        tock_ = time.time()

        # Save the results and log
        filename_ = save((tick_, tock_, experiment_ids, results), "../results/", name_)
        print "%0.3fsec."%(tock_ - tick_,), filename_