# Plots and analysis

In [None]:
import os, re
import time
import numpy as np
import pandas as pd

%matplotlib inline
import matplotlib.pyplot as plt

an unpickler function

In [None]:
import gzip, cPickle

fmt_ = re.compile(r"^(?P<generator>[^-]+)-(?P<P>\d+)-(?P<hurst>(\d*\.)?\d+)" +
                  r"-(?P<M>\d+)x(?P<n_jobs>\d+)" +
                  r"_(?P<dttm>[\d-]+)$", flags=re.I | re.U)
keys_ = [tup_[0] for tup_ in sorted(fmt_.groupindex.iteritems(),
                                    key=lambda tup_: tup_[1])]

def load(filename):
    base_ = os.path.basename(filename)
    try:
        matches_ = fmt_.match(base_[:-3])
    except IndexError:
        raise ValueError("""Invalid filename format.""")

    info_ = {key_: matches_.group(key_) for key_ in keys_}
    with gzip.open(filename, "rb") as f:
        start, finish, seeds, results = cPickle.load(f)
    return info_, start, finish, seeds, results

A function to scan the folder and group the experiment results by generator-delta.

In [None]:
path = "../results/"

experiment = dict()
for base_ in os.listdir(path):
    if not base_.endswith(".gz"):
        continue
    info_, start, finish, seeds, results = load(os.path.join(path, base_))
    
    gen, hurst = info_["generator"], float(info_["hurst"])
    log2_len, n_mc_rep = int(info_["P"]), "%(M)sx%(n_jobs)s"%info_

    if log2_len not in experiment:
        experiment[log2_len] = dict()

    if n_mc_rep not in experiment[log2_len]:
        experiment[log2_len][n_mc_rep] = dict()
    if gen not in experiment[log2_len][n_mc_rep]:
        experiment[log2_len][n_mc_rep][gen] = dict()

    for method, data in results.iteritems():
        if method not in experiment[log2_len][n_mc_rep][gen]:
            experiment[log2_len][n_mc_rep][gen][method] = dict()
        if not hurst in experiment[log2_len][n_mc_rep][gen][method]:
            experiment[log2_len][n_mc_rep][gen][method][hurst] = dict()
        experiment[log2_len][n_mc_rep][gen][method][hurst] = data

Compute the empirical probabilities by averaging across all replications

In [None]:
def offspring_empirical(Dmnk, levels, laplace=False):
    # Get pooled frequencies
    Djk = Dmnk[:, levels].sum(axis=1, keepdims=False, dtype=np.float)
    Dj = Djk.sum(axis=1, keepdims=True)
    # Compute the empirical probabilities
    Pjk = Djk / Dj if not laplace else (Djk + 1.0) / (Dj + Djk.shape[1])
    levels = np.arange(Dmnk.shape[1], dtype=np.int)[levels]
    return levels + 1, np.nanmean(Pjk, axis=0), np.nanstd(Pjk, axis=0)

Get theoretical values of the probability according to the conjectured distribution:
$$ Z \sim \text{Geom}\bigl(4^{\frac{1}{2}-\frac{1}{2h}}\bigr) \text{ over } \{2n\,:\,n\geq 1\} \,. $$

For $\theta = 2^{1-h^{-1}}$, the law, once again, is
$$ \mathbb{P}(Z=2k) = \theta \cdot (1-\theta)^{k-1}\,. $$

In [None]:
from math import log

def offspring_prob(Z_max, hurst):
    Z = np.arange(2, Z_max, 2)
    theta = 2.0 ** (1.0 - 1.0 / hurst)
    return Z, theta * np.exp((Z // 2 - 1) * log(1 - theta))

Use the geometric distribution's mean value to estimate the hurst exponent:
$$ \mathbb{E} Z
    = 2 \theta \sum_{k\geq 1} k (1 - \theta)^{k-1}
    = 2 \theta \sum_{k\geq 1} \sum_{j\geq k} (1 - \theta)^{j-1}
    = 2 \theta \sum_{k\geq 1} \theta^{-1} (1 - \theta)^{k-1}
    = 2 \theta^{-1} \,, $$
whence
$$ 2^{1-h^{-1}} = \frac{2}{\mathbb{E} Z} \Leftrightarrow h = \frac{\log 2}{\log \mathbb{E}Z}\,. $$

In [None]:
def offspring_hurst(Dmnk, levels, laplace=False):
    # Get pooled frequencies
    Dmj = Dmnk[:, levels].sum(axis=2, dtype=np.float)
    # Compute the sum of the left-closed tails sums,
    #  and divide by the total number of offspring.
    Mmj = 2 * Dmnk[:, levels, ::-1].cumsum(axis=-1).sum(axis=-1) / Dmj
    Hmj = np.log(2) / np.log(Mmj)
    levels = np.arange(Dmnk.shape[1], dtype=np.int)[levels]
    return levels + 1, np.nanmean(Hmj, axis=0), np.nanstd(Hmj, axis=0)

## Experiments

In [None]:
output_path = os.path.join("../plots", time.strftime("%Y%m%d_%H%M%S"))

if not os.path.exists(output_path):
    os.mkdir(output_path)

print output_path

Create the output folder.

In [None]:
method = "med"

experiments = [# (524288, "125x8", "HRP2_16", method),
               # (524288, "125x8", "HRP3_16", method),
               # (524288, "125x8", "HRP4_16", method),
               (2097152, "125x8", "WEI_1.2", method),
               (8388608, "125x8", "WEI_1.2", method),
               (8388608, "125x8", "WEI_1.7", method),
               (8388608, "125x8", "FBM", method),
               (2097152, "125x8", "FBM", method),
               (131072, "125x8", "HRP2_16", method),
               (131072, "125x8", "HRP3_16", method),
               (131072, "125x8", "HRP4_16", method),]

exponents = [0.500, 0.550, 0.600, 0.650, 0.700, 0.750, 0.800, 0.850, 0.900,
             0.910, 0.915, 0.920, 0.925, 0.930, 0.935, 0.940, 0.945, 0.950,
             0.990]

### FIGURE 01

for label `fig:fbm_offspring_distribution`

In [None]:
def figure_01(fig, log2_len, n_mc_rep, gen, method, p=6, q=7, bars=True, legend=True):
#     exponents = [0.5, 0.6, 0.7, 0.8, 0.9]
    data_ = experiment[log2_len][n_mc_rep][gen][method]

    ax = fig.add_subplot(111)

    color_ = plt.cm.rainbow(np.linspace(0, 1, num=len(exponents)))[::-1]
    for col_, hurst_ in zip(color_, exponents):
        try:
            scale_m, Nmn, Dmnk, Vmnde, Wmnp, Wavgmn, Wstdmn = data_[hurst_]
        except KeyError:
            continue
        levels, Pk_avg, Pk_std = offspring_empirical(Dmnk, slice(p, q), laplace=False)
        k, Pk = offspring_prob(2*(Pk_avg.shape[0] + 1), hurst=hurst_)
        ax.plot(k, Pk, linestyle='-', color='black', alpha=0.5)
        if bars:
            ax.errorbar(k, Pk_avg, yerr=Pk_std, fmt='-s',
                        color=col_, markersize=3, alpha=1.0,
                        label="%s %0.3f"%(gen, hurst_))
        else:
            ax.plot(k, Pk_avg, "-s", color=col_, markersize=3,
                    alpha=1.0, label="%s %0.3f"%(gen, hurst_))

    ax.set_xticks(np.arange(2, 43, 2))
    ax.grid(alpha=0.5, linestyle=":", color="grey")
    ax.set_xlim(1.9, 12.1)
    ax.set_yscale("log", basey=2)
    ax.set_ylim(.5e-4, 1.1)

    ax.set_ylabel("probability")
    ax.set_xlabel("number of offspring")
    
    if legend:
        legend_ = ax.legend(loc="lower left", frameon=True,
                            ncol=2, fontsize=8)
        legend_.get_frame() #.set_facecolor("whitesmoke")


Generate a figure-01 for different sizes and numbers of replications.

In [None]:
p, q = 6, 10 # 5, 8
for experiment_ in experiments:
    log2_len, n_mc_rep, gen, method = experiment_
    name_ = "fig_01-%d_%s-%s-%d-%s-%s.pdf"%(p, str(q) if isinstance(q, int) else "X",
                                               gen, log2_len, n_mc_rep, method,)

    fig = plt.figure(figsize=(6, 5))
    figure_01(fig, log2_len, n_mc_rep, gen, method,
              p, q, bars=False, legend=True)
    fig.savefig(os.path.join(output_path, name_), format="pdf")
    plt.close()

### FIGURE 04

for label `fig:fbm_hurst_crossing_tree`

In [None]:
def figure_04(fig, log2_len, n_mc_rep, gen, method, p=6, q=7, bars=False, legend=True):
#     exponents = [0.5, 0.6, 0.7, 0.8, 0.9] 
#     exponents = [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95]
    data_ = experiment[log2_len][n_mc_rep][gen][method]
    ax = fig.add_subplot(111)

    first_, last_ = np.inf, -np.inf
    color_ = plt.cm.rainbow(np.linspace(0, 1, num=len(exponents)))[::-1]
    for col_, hurst_ in zip(color_, exponents):
        try:
            scale_m, Nmn, Dmnk, Vmnde, Wmnp, Wavgmn, Wstdmn = data_[hurst_]
        except KeyError:
            continue
        levels, Hj_avg, Hj_std = offspring_hurst(Dmnk, slice(p, q))
        ax.axhline(y=hurst_, color='black', linestyle='-', alpha=0.25, zorder=-99)

        mask = Hj_avg < hurst_ * 1.35
        if bars:
            ax.errorbar(levels[mask], Hj_avg[mask], yerr=Hj_std[mask],
                        fmt="-s", color=col_, markersize=3, alpha=1.0,
                        label="%s %0.3f"%(gen, hurst_))
        else:
            ax.plot(levels[mask], Hj_avg[mask], "-s", 
                    color=col_, markersize=3, alpha=1.0,
                    label="%s %0.3f"%(gen, hurst_))
        first_ = min(levels[mask][0], first_)
        last_ = max(levels[mask][-1], last_)

    last_ = 20 # min(last_, 20)
    ax.set_xticks(np.arange(first_, last_ + 1))
    ax.grid(color="grey", linestyle=":", alpha=0.5)
    ax.set_xlim(first_ - 0.1, last_ + 1.1)
    ax.set_ylim(0.45, 1.01)
    ## Add a legend with white opaque background.
    #     ax.set_title( 'Crossing tree estimates of the Hurst exponent' )
    ax.set_xlabel("level $\\delta 2^k$")
    ax.set_ylabel("$H$")

    if legend:
        legend_ = ax.legend(loc="lower right", frameon=1,
                            ncol=2, fontsize=8)
        legend_.get_frame() #.set_facecolor("whitesmoke")

Create a figure-04 plot of mean-based hurst estimates

In [None]:
p, q = 0, None
for experiment_ in experiments:
    log2_len, n_mc_rep, gen, method = experiment_
    name_ = "fig_04-%d_%s-%s-%d-%s-%s.pdf"%(p, str(q) if isinstance(q, int) else "X",
                                               gen, log2_len, n_mc_rep, method,)

    fig = plt.figure(figsize=(6, 5))
    figure_04(fig, log2_len, n_mc_rep, gen, method,
              p, q, bars=False, legend=True)
    fig.savefig(os.path.join(output_path, name_), format="pdf")
    plt.close()

### FIGURE 08

for label `fig:fbm_avg_crossing_durations` 

In [None]:
def figure_08(fig, log2_len, n_mc_rep, gen, method, bars=False, legend=True):
#     exponents = [0.5, 0.6, 0.7, 0.8, 0.9] 
#     exponents = [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95]
    
    data_ = experiment[log2_len][n_mc_rep][gen][method]
    ax = fig.add_subplot(111)

    color_ = plt.cm.rainbow(np.linspace(0, 1, num=len(exponents)))[::-1]
    for col_, hurst_ in zip(color_, exponents):
        try:
            scale_m, Nmn, Dmnk, Vmnde, Wmnp, Wavgmn, Wstdmn = data_[hurst_]
        except KeyError:
            continue    
        level = np.arange(Wavgmn.shape[-1], dtype=np.float)
        scale_ = 2 ** (-level / hurst_)
        Wavgn_ = np.nanmean(Wavgmn / (scale_m[:, np.newaxis] ** (1 / hurst_)), axis=0) * scale_
        if bars:
            Wstdn_ = np.nanstd(Wavgmn / (scale_m[:, np.newaxis] ** (1 / hurst_)), axis=0) * scale_
            ax.errorbar(1+level, Wavgn_, yerr=Wstdn_, fmt="-s", color=col_,
                        markersize=3, alpha=1.0, label="%s %0.3f"%(gen, hurst_))
        else:
            ax.plot(1+level, Wavgn_, "-s", color=col_, markersize=3,
                    alpha=1.0, label="%s %0.3f"%(gen, hurst_))

    ax.set_xticks(range(1, 21))
    ax.grid(color="grey", linestyle=":", alpha=0.5)
    ax.set_yscale("log", basey=2)
    ax.set_xlim(0.9, 20.1)
    ax.set_xlabel("level")
    ax.set_ylabel("$\\left(2^n \\delta\\right)^{-H^{-1}} {\\mathbb{E}W^n}$")
    if legend:
        legend_ = ax.legend(loc="lower left", frameon=1,
                            ncol=2, fontsize=8)
        legend_.get_frame() #.set_facecolor("whitesmoke")


Create a figure-08 plot of scaled average crossing durations.

In [None]:
for experiment_ in experiments:
    log2_len, n_mc_rep, gen, method = experiment_
    name_ = "fig_08-%s-%d-%s-%s.pdf"%(gen, log2_len, n_mc_rep, method,)

    fig = plt.figure(figsize=(6, 5))
    figure_08(fig, log2_len, n_mc_rep, gen, method,
              bars=False, legend=True)
    fig.savefig(os.path.join(output_path, name_), format="pdf")
    plt.close()
    

### FIGURE