# the Monte Carlo experiment

In [1]:
import numpy as np

%matplotlib inline
import matplotlib.pyplot as plt

A handy routines to store and recover python objects, in particular, the experiment resutls dictionaires.

In [2]:
import time, gzip
import os, cPickle

def save(obj, path, prefix=None):
    prefix_ = "" if prefix is None else "%s_"%(prefix,)
    filename_ = os.path.join(path, "%s%s.gz"%(prefix_, time.strftime("%Y%m%d-%H%M%S"),))
    with gzip.open(filename_, "wb+", 9) as fout_:
        cPickle.dump(obj, fout_)
    return filename_

def load(filename):
    with gzip.open(filename, "rb") as f:
        return cPickle.load(f)

The path analyzer

In [3]:
from crossing_tree import structural_statistics

Collect a list of results returned by path_analyze into aligned data tensors.

In [4]:
from crossing_tree import collect_structural_statistics

A function implementing various delta choices.

In [5]:
import warnings

def get_delta_method(delta=1.0):
    if isinstance(delta, str):
        if delta == "std":
            # the standard deviation of increments
            delta_ = lambda X: np.diff(X).std()
        elif delta == "med":
            # Use the median absolute difference [Jones, Rolls; 2009] p. 11 (arxiv:0911.5204v2)
            delta_ = lambda X: np.median(np.abs(np.diff(X)))
        elif delta == "mean":
            # Use the mean absolute difference
            delta_ = lambda X: np.mean(np.abs(np.diff(X)))
        elif delta == "iqr":
            # Interquartile range
            delta_ = lambda X: np.subtract(*np.percentile(np.diff(X), [75, 25]))
        elif delta == "rng":
            # Use the range estimate as suggested by Geoffrey on 2015-05-28
            warnings.warn("""Use of `range`-based grid resolution """
                          """is discouraged since it may cause misaligned """
                          """crossing trees.""", RuntimeWarning)
            delta_ = lambda X: (X.max() - X.min()) / (2**12)
        else:
            raise ValueError("""Invalid `delta` setting. Accepted values """
                             """are: [`iqr`, `std`, `med`, `rng`, `mean`].""")
    elif isinstance(delta, float) and delta > 0:
        delta_ = lambda X: delta
    else:
        raise TypeError("""`delta` must be either a float, or a method """
                        """identifier.""")
    return delta_

An MC experiment kernel.

In [6]:
from sklearn.base import clone

def experiment(experiment_id, n_replications, methods, generator):
    generator = clone(generator)
    generator.start()

    deltas = [get_delta_method(method_) for method_ in methods]

    results = {method_: list() for method_ in methods}
    for j in xrange(n_replications):
        T, X = generator.draw()

        # Apply all methods to the same sample path.
        for delta, method in zip(deltas, methods):
            result_ = structural_statistics(X, T, scale=delta(X), origin=X[0])
            results[method].append(result_)

    generator.finish()

    return experiment_id, results

## Experiments

In [7]:
from joblib import Parallel, delayed

A couple of random seeds from [here](https://www.random.org/bytes/).

In [8]:
# Extra random seeds should be prepended to the array.
master_seeds = [0xB5BC12B7, 0x3298B667, 0xC54247B6, 0x0A868C68, 0x3AC964A8,
                0x6F82F10A, 0x8BF7DA79, 0x48009E58, 0x7A908C10, 0x91A42FF2,
                0x3C5E5D39, 0x7A72D405, 0x6D47DBAA, 0x819028DE, 0xA9A5642B,
                0x04DFFE11, 0x8A4AC197, 0xFED5E293, 0xC43A534A, 0xAC2A48CE,
                0x7B3E134E, 0xCC05D35A, 0x14B0DBDF, 0x1A4BD8DD, 0x1B319B92,
                0xD9A6BDD2, 0xF618C88E, 0xCB9055BA, 0xD262541D, 0xB3DB7B23,]

the Monte Carlo experiemnt is run in parallel batches, with each
initialized to a randomly picked seed.

In [9]:
MAX_RAND_SEED = np.iinfo(np.int32).max

The folder to store the results in

In [10]:
OUTPUT_PATH = "../results/"

## fBM experiment

In [11]:
from crossing_tree.processes import FractionalBrownianMotion

seed = master_seeds.pop()
print "Using seed %X"%(seed,)
random_state = np.random.RandomState(seed)

skip = False

Using seed B3DB7B23


Setup

In [12]:
n_samples, methods = 1 << 23, ["med", "std", "iqr", "mean",]
hurst_exponents = [0.500, 0.550, 0.600, 0.650, 0.700, 0.750, 0.800, 0.850, 0.900,
                   0.910, 0.915, 0.920, 0.925, 0.930, 0.935, 0.940, 0.945, 0.950,
                   0.990,]
n_per_batch, n_batches = 125, 8

Run the experiment for the Fractional Brownian Motion.

In [13]:
if not skip:
    par_ = Parallel(n_jobs=-1, verbose=0)
    for hurst_ in hurst_exponents:
        name_ = "FBM-%d-%0.3f-%dx%d"%(n_samples, hurst_, n_per_batch, n_batches)
        print name_,

        # Schedule the experiments
        seeds = random_state.randint(MAX_RAND_SEED, size=(n_batches,))
        schedule_ = (delayed(experiment)(seed_, n_per_batch, methods,
                                         FractionalBrownianMotion(N=n_samples,
                                                                  hurst=hurst_,
                                                                  random_state=seed_))
                                    for seed_ in seeds)

        # Run the experiment and collect the results
        tick_ = time.time()
        experiment_ids = list()
        results_ = {method: list() for method in methods}
        for id_, dict_ in par_(schedule_):
            experiment_ids.append(id_)
            for method in methods:
                results_[method].extend(dict_[method])
        results = {key_: collect_structural_statistics(list_)
                   for key_, list_ in results_.iteritems()}
        tock_ = time.time()

        # Save the results and log
        filename_ = save((tick_, tock_, experiment_ids, results), OUTPUT_PATH, name_)
        print "%0.3fsec."%(tock_ - tick_,), filename_

FBM-8388608-0.500-125x8 1030.696sec. ../results/FBM-8388608-0.500-125x8_20161019-214206.gz
FBM-8388608-0.550-125x8 1064.368sec. ../results/FBM-8388608-0.550-125x8_20161019-220002.gz
FBM-8388608-0.600-125x8 1105.569sec. ../results/FBM-8388608-0.600-125x8_20161019-221839.gz
FBM-8388608-0.650-125x8 1156.223sec. ../results/FBM-8388608-0.650-125x8_20161019-223807.gz
FBM-8388608-0.700-125x8 1200.116sec. ../results/FBM-8388608-0.700-125x8_20161019-225821.gz
FBM-8388608-0.750-125x8 1249.725sec. ../results/FBM-8388608-0.750-125x8_20161019-231924.gz
FBM-8388608-0.800-125x8 1311.252sec. ../results/FBM-8388608-0.800-125x8_20161019-234130.gz
FBM-8388608-0.850-125x8 1378.502sec. ../results/FBM-8388608-0.850-125x8_20161020-000444.gz
FBM-8388608-0.900-125x8 1460.501sec. ../results/FBM-8388608-0.900-125x8_20161020-002921.gz
FBM-8388608-0.910-125x8 1481.666sec. ../results/FBM-8388608-0.910-125x8_20161020-005420.gz
FBM-8388608-0.915-125x8 1507.095sec. ../results/FBM-8388608-0.915-125x8_20161020-011945.gz

## Hermite process experiment

In [14]:
from crossing_tree.processes import HermiteProcess

seed = master_seeds.pop()
print "Using seed %X"%(seed,)
random_state = np.random.RandomState(seed)

skip = False

Using seed D262541D


Setup: use no downsampling.

In [15]:
n_samples, n_downsample = 1 << 23, 1
degrees, methods = [2, 3, 4], ["med", "std", "iqr", "mean",]
hurst_exponents = [       0.550, 0.600, 0.650, 0.700, 0.750, 0.800, 0.850, 0.900,
                   0.910, 0.915, 0.920, 0.925, 0.930, 0.935, 0.940, 0.945, 0.950,
                   0.990,]
n_per_batch, n_batches = 125, 8

Run the experiment for the Hermite process.

In [16]:
if not skip:
    par_ = Parallel(n_jobs=-1, verbose=0)
    for degree_ in degrees:
        for hurst_ in hurst_exponents:
            name_ = "HRP%d_%d-%d-%0.3f-%dx%d"%(degree_, n_downsample, n_samples, hurst_, n_per_batch, n_batches)
            print name_,

            # Schedule the experiments
            seeds = random_state.randint(MAX_RAND_SEED, size=(n_batches,))
            schedule_ = (delayed(experiment)(seed_, n_per_batch, methods,
                                             HermiteProcess(N=n_samples,
                                                            degree=degree_,
                                                            n_downsample=n_downsample,
                                                            hurst=hurst_,
                                                            random_state=seed_))
                                        for seed_ in seeds)

            # Run the experiment and collect the results
            tick_ = time.time()
            experiment_ids = list()
            results_ = {method: list() for method in methods}
            for id_, dict_ in par_(schedule_):
                experiment_ids.append(id_)
                for method in methods:
                    results_[method].extend(dict_[method])
            results = {key_: collect_structural_statistics(list_)
                       for key_, list_ in results_.iteritems()}
            tock_ = time.time()

            # Save the results and log
            filename_ = save((tick_, tock_, experiment_ids, results), OUTPUT_PATH, name_)
            print "%0.3fsec."%(tock_ - tick_,), filename_

HRP2_1-8388608-0.550-125x8 1228.748sec. ../results/HRP2_1-8388608-0.550-125x8_20161020-051513.gz
HRP2_1-8388608-0.600-125x8 1250.329sec. ../results/HRP2_1-8388608-0.600-125x8_20161020-053616.gz
HRP2_1-8388608-0.650-125x8 1279.614sec. ../results/HRP2_1-8388608-0.650-125x8_20161020-055749.gz
HRP2_1-8388608-0.700-125x8 1313.558sec. ../results/HRP2_1-8388608-0.700-125x8_20161020-061956.gz
HRP2_1-8388608-0.750-125x8 1360.610sec. ../results/HRP2_1-8388608-0.750-125x8_20161020-064250.gz
HRP2_1-8388608-0.800-125x8 1407.357sec. ../results/HRP2_1-8388608-0.800-125x8_20161020-070632.gz
HRP2_1-8388608-0.850-125x8 1468.904sec. ../results/HRP2_1-8388608-0.850-125x8_20161020-073117.gz
HRP2_1-8388608-0.900-125x8 1540.926sec. ../results/HRP2_1-8388608-0.900-125x8_20161020-075714.gz
HRP2_1-8388608-0.910-125x8 1559.643sec. ../results/HRP2_1-8388608-0.910-125x8_20161020-082331.gz
HRP2_1-8388608-0.915-125x8 1569.231sec. ../results/HRP2_1-8388608-0.915-125x8_20161020-084958.gz
HRP2_1-8388608-0.920-125x8 157

## Weierstrass experiment -- $\lambda_0 = 1.2$

In [17]:
from crossing_tree.processes import WeierstrassFunction

seed = master_seeds.pop()
print "Using seed %X"%(seed,)
random_state = np.random.RandomState(seed)

skip = False

Using seed CB9055BA


Setup

In [None]:
n_samples, lambda_0 = 1 << 23, 1.2
methods = ["med", "std", "iqr", "mean",]
holder_exponents = [0.500, 0.550, 0.600, 0.650, 0.700, 0.750, 0.800, 0.850, 0.900,
                    0.910, 0.915, 0.920, 0.925, 0.930, 0.935, 0.940, 0.945, 0.950,
                    0.990,]
n_per_batch, n_batches = 125, 8

Run the experimnet for the random Weierstrass function $[0, 1]\mapsto \mathbb{R}$:
$$ W_H(t) = \sum_{k\geq 0} \lambda_0^{-k H} \bigl(\cos(2 \pi \lambda_0^k t + \phi_k) - \cos \phi_k\bigr)\,, $$
with $(\phi_k)_{k\geq0} \sim \mathbb{U}[0, 2\pi]$, and $\lambda_0 > 1$ -- the fundamental harmonic.

In [None]:
if not skip:
    par_ = Parallel(n_jobs=-1, verbose=0)
    for holder_ in holder_exponents:
        name_ = "WEI_%g-%d-%0.3f-%dx%d"%(lambda_0, n_samples, holder_, n_per_batch, n_batches)
        print name_,

        # Schedule the experiments
        seeds = random_state.randint(MAX_RAND_SEED, size=(n_batches,))
        schedule_ = (delayed(experiment)(seed_, n_per_batch, methods,
                                         WeierstrassFunction(N=n_samples,
                                                             lambda_0=lambda_0,
                                                             holder=holder_,
                                                             random_state=seed_,
                                                             one_sided=False))
                                    for seed_ in seeds)

        # Run the experiment and collect the results
        tick_ = time.time()
        experiment_ids = list()
        results_ = {method: list() for method in methods}
        for id_, dict_ in par_(schedule_):
            experiment_ids.append(id_)
            for method in methods:
                results_[method].extend(dict_[method])
        results = {key_: collect_structural_statistics(list_)
                   for key_, list_ in results_.iteritems()}
        tock_ = time.time()

        # Save the results and log
        filename_ = save((tick_, tock_, experiment_ids, results), OUTPUT_PATH, name_)
        print "%0.3fsec."%(tock_ - tick_,), filename_

WEI_1.2-8388608-0.500-125x8 13237.550sec. ../results/WEI_1.2-8388608-0.500-125x8_20161021-080209.gz
WEI_1.2-8388608-0.550-125x8 13259.886sec. ../results/WEI_1.2-8388608-0.550-125x8_20161021-114320.gz
WEI_1.2-8388608-0.600-125x8 13295.768sec. ../results/WEI_1.2-8388608-0.600-125x8_20161021-152507.gz
WEI_1.2-8388608-0.650-125x8

## Additional experiments

### Hermite process experiment: with downsampling

In [None]:
from crossing_tree.processes import HermiteProcess

seed = master_seeds.pop()
print "Using seed %X"%(seed,)
random_state = np.random.RandomState(seed)

skip = False

Setup

In [None]:
n_samples, n_downsample = 1 << 19, 1 << 4
degrees, methods = [2, 3, 4], ["med", "std", "iqr", "mean",]
hurst_exponents = [       0.550, 0.600, 0.650, 0.700, 0.750, 0.800, 0.850, 0.900,
                   0.910, 0.915, 0.920, 0.925, 0.930, 0.935, 0.940, 0.945, 0.950,
                   0.990,]
n_per_batch, n_batches = 125, 8

Run the experiment for the Hermite process.

In [None]:
if not skip:
    par_ = Parallel(n_jobs=-1, verbose=0)
    for degree_ in degrees:
        for hurst_ in hurst_exponents:
            name_ = "HRP%d_%d-%d-%0.3f-%dx%d"%(degree_, n_downsample, n_samples, hurst_, n_per_batch, n_batches)
            print name_,

            # Schedule the experiments
            seeds = random_state.randint(MAX_RAND_SEED, size=(n_batches,))
            schedule_ = (delayed(experiment)(seed_, n_per_batch, methods,
                                             HermiteProcess(N=n_samples,
                                                            degree=degree_,
                                                            n_downsample=n_downsample,
                                                            hurst=hurst_,
                                                            random_state=seed_))
                                        for seed_ in seeds)

            # Run the experiment and collect the results
            tick_ = time.time()
            experiment_ids = list()
            results_ = {method: list() for method in methods}
            for id_, dict_ in par_(schedule_):
                experiment_ids.append(id_)
                for method in methods:
                    results_[method].extend(dict_[method])
            results = {key_: collect_structural_statistics(list_)
                       for key_, list_ in results_.iteritems()}
            tock_ = time.time()

            # Save the results and log
            filename_ = save((tick_, tock_, experiment_ids, results), OUTPUT_PATH, name_)
            print "%0.3fsec."%(tock_ - tick_,), filename_

### Weierstrass experiment -- $\lambda_0 = 3$

In [None]:
from crossing_tree.processes import WeierstrassFunction

seed = master_seeds.pop()
print "Using seed %X"%(seed,)
random_state = np.random.RandomState(seed)

skip = True

Setup

In [None]:
n_samples, lambda_0 = 1 << 23, 3.0
methods = ["med", "std", "iqr", "mean",]
holder_exponents = [0.500, 0.550, 0.600, 0.650, 0.700, 0.750, 0.800, 0.850, 0.900,
                    0.910, 0.915, 0.920, 0.925, 0.930, 0.935, 0.940, 0.945, 0.950,
                    0.990,]
n_per_batch, n_batches = 125, 8

Run the experimnet for the random Weierstrass function $[0, 1]\mapsto \mathbb{R}$:
$$ W_H(t) = \sum_{k\geq 0} \lambda_0^{-k H} \bigl(\cos(2 \pi \lambda_0^k t + \phi_k) - \cos \phi_k\bigr)\,, $$
with $(\phi_k)_{k\geq0} \sim \mathbb{U}[0, 2\pi]$, and $\lambda_0 > 1$ -- the fundamental harmonic.

In [None]:
if not skip:
    par_ = Parallel(n_jobs=-1, verbose=0)
    for holder_ in holder_exponents:
        name_ = "WEI_%g-%d-%0.3f-%dx%d"%(lambda_0, n_samples, holder_, n_per_batch, n_batches)
        print name_,

        # Schedule the experiments
        seeds = random_state.randint(MAX_RAND_SEED, size=(n_batches,))
        schedule_ = (delayed(experiment)(seed_, n_per_batch, methods,
                                         WeierstrassFunction(N=n_samples,
                                                             lambda_0=lambda_0,
                                                             holder=holder_,
                                                             random_state=seed_,
                                                             one_sided=False))
                                    for seed_ in seeds)

        # Run the experiment and collect the results
        tick_ = time.time()
        experiment_ids = list()
        results_ = {method: list() for method in methods}
        for id_, dict_ in par_(schedule_):
            experiment_ids.append(id_)
            for method in methods:
                results_[method].extend(dict_[method])
        results = {key_: collect_structural_statistics(list_)
                   for key_, list_ in results_.iteritems()}
        tock_ = time.time()

        # Save the results and log
        filename_ = save((tick_, tock_, experiment_ids, results), OUTPUT_PATH, name_)
        print "%0.3fsec."%(tock_ - tick_,), filename_

### Weierstrass experiment -- $\lambda_0 = 1.7$

In [None]:
from crossing_tree.processes import WeierstrassFunction

seed = master_seeds.pop()
print "Using seed %X"%(seed,)
random_state = np.random.RandomState(seed)

skip = True

Setup

In [None]:
n_samples, lambda_0 = 1 << 23, 1.7
methods = ["med", "std", "iqr", "mean",]
holder_exponents = [0.500, 0.550, 0.600, 0.650, 0.700, 0.750, 0.800, 0.850, 0.900,
                    0.910, 0.915, 0.920, 0.925, 0.930, 0.935, 0.940, 0.945, 0.950,
                    0.990,]
n_per_batch, n_batches = 125, 8

Run the experimnet for the random Weierstrass function $[0, 1]\mapsto \mathbb{R}$:
$$ W_H(t) = \sum_{k\geq 0} \lambda_0^{-k H} \bigl(\cos(2 \pi \lambda_0^k t + \phi_k) - \cos \phi_k\bigr)\,, $$
with $(\phi_k)_{k\geq0} \sim \mathbb{U}[0, 2\pi]$, and $\lambda_0 > 1$ -- the fundamental harmonic.

In [None]:
if not skip:
    par_ = Parallel(n_jobs=-1, verbose=0)
    for holder_ in holder_exponents:
        name_ = "WEI_%g-%d-%0.3f-%dx%d"%(lambda_0, n_samples, holder_, n_per_batch, n_batches)
        print name_,

        # Schedule the experiments
        seeds = random_state.randint(MAX_RAND_SEED, size=(n_batches,))
        schedule_ = (delayed(experiment)(seed_, n_per_batch, methods,
                                         WeierstrassFunction(N=n_samples,
                                                             lambda_0=lambda_0,
                                                             holder=holder_,
                                                             random_state=seed_,
                                                             one_sided=False))
                                    for seed_ in seeds)

        # Run the experiment and collect the results
        tick_ = time.time()
        experiment_ids = list()
        results_ = {method: list() for method in methods}
        for id_, dict_ in par_(schedule_):
            experiment_ids.append(id_)
            for method in methods:
                results_[method].extend(dict_[method])
        results = {key_: collect_structural_statistics(list_)
                   for key_, list_ in results_.iteritems()}
        tock_ = time.time()

        # Save the results and log
        filename_ = save((tick_, tock_, experiment_ids, results), OUTPUT_PATH, name_)
        print "%0.3fsec."%(tock_ - tick_,), filename_

### fBM experiment: super long

In [None]:
from crossing_tree.processes import FractionalBrownianMotion

seed = master_seeds.pop()
print "Using seed %X"%(seed,)
random_state = np.random.RandomState(seed)

skip = True

Setup

In [None]:
n_samples, methods = 1 << 25, ["med", "std", "iqr", "mean",]
hurst_exponents = [0.500, 0.550, 0.600, 0.650, 0.700, 0.750, 0.800, 0.850, 0.900,
                   0.910, 0.915, 0.920, 0.925, 0.930, 0.935, 0.940, 0.945, 0.950,
                   0.990,]
n_per_batch, n_batches, n_threads = 5, 2, 4

Run the experiment for the Fractional Brownian Motion.

In [None]:
if not skip:
    par_ = Parallel(n_jobs=-1, verbose=0)
    for hurst_ in hurst_exponents:
        name_ = "FBM-%d-%0.3f-%dx%d"%(n_samples, hurst_, n_per_batch, n_batches)
        print name_,

        # Schedule the experiments
        seeds = random_state.randint(MAX_RAND_SEED, size=(n_batches,))
        schedule_ = (delayed(experiment)(seed_, n_per_batch, methods,
                                         FractionalBrownianMotion(N=n_samples,
                                                                  hurst=hurst_,
                                                                  random_state=seed_,
                                                                  n_threads=n_threads))
                                    for seed_ in seeds)

        # Run the experiment and collect the results
        tick_ = time.time()
        experiment_ids = list()
        results_ = {method: list() for method in methods}
        for id_, dict_ in par_(schedule_):
            experiment_ids.append(id_)
            for method in methods:
                results_[method].extend(dict_[method])
        results = {key_: collect_structural_statistics(list_)
                   for key_, list_ in results_.iteritems()}
        tock_ = time.time()

        # Save the results and log
        filename_ = save((tick_, tock_, experiment_ids, results), OUTPUT_PATH, name_)
        print "%0.3fsec."%(tock_ - tick_,), filename_