In [1]:
import warnings
import numpy as np
import pandas as pd


import sys
from pathlib import Path
# in jupyter (lab / notebook), based on notebook path
module_path = str(Path.cwd().parents[0] / "src")


if module_path not in sys.path:
    sys.path.append(module_path)

from HurstIndexSolver.HurstIndexSolver import HurstIndexSolver
from tqdm.notebook import tqdm
from IPython.display import display, Markdown
import joblib
import seaborn as sns
import matplotlib.pyplot as plt
warnings.filterwarnings("ignore")

In [2]:
def feval(func: str, para: str, Class=""):
    if len(Class) != 0:
        Class += "."
    func = Class + func + "({:s})"
    return eval(func.format(para))


In [3]:
N, m = 30000, 30
Hlist = np.linspace(0.3, 0.8, 11)

Methods = {
    'AM'  : ["EstHurstAbsoluteMoments", ", minimal=minimal, method='L2'"],
    'AV'  : ["EstHurstAggregateVariance",  ", minimal=minimal, method='L2'"],
    'GHE' : ["EstHurstGHE", ", method='L2'"],
    'HM'  : ["EstHurstHiguchi", ", method='L2'"],
    'DFA' : ["EstHurstDFAnalysis",", minimal=minimal, method='L2'"],
    'VRR' : ["EstHurstRegrResid", ""],
    'RS'  : ["EstHurstRSAnalysis",", minimal=minimal, method='L2'"],
    'RS2' : ["RS4Hurst",", minimal=minimal, method='L2'"],
    'TTA' : ["EstHurstTTA", ", method='L2'"],
    'PM'  : ["EstHurstPeriodogram", ", cutoff=0.3, method='L2'"],
    'AWC' : ["EstHurstAWC", ", method='L2'"],
    'VVL' : ["EstHurstVVL", ", method='L2'"],
    'LW'  : ["EstHurstLocalWhittle", ""],
    'LSSD': ["EstHurstLSSD", ", max_scale=maxscale"],
    'LSV' : ["EstHurstLSV", ", max_scale=maxscale"]
    }

HSolver = HurstIndexSolver()
samples = [30_000, 10_000, 5_000, 1_000, 252]
result = {}



In [None]:
for sample in tqdm(samples):
    result[sample] = {}
    maxscale = 100
    minimal = min(sample//10,50)
    for method in tqdm(Methods):
        name = Methods[method][0]
        addParas = Methods[method][1]
        result[sample][method] = {}
        for H in Hlist:
            h = []
            file = "..data/raw/zhang/FGN_{:.2f}_30.csv".format(H)
            fH = pd.read_csv(file).values
            result[sample][method][H] = {}
            for i in range(m):
                ts = fH[-sample:, i]
                htmp = feval(name, "ts" + addParas, Class="HSolver")
                h.append(htmp)
                result[sample][method][H][i] =htmp

In [None]:
# Save dict for later use
joblib.dump(result, "result_dict.pkl.gz")

In [None]:
def extract_df(result:dict, size:int)->pd.DataFrame:
    user_ids = []
    frames = []
    
    for user_id, d in result[size].items():
        user_ids.append(user_id)
        frames.append(pd.DataFrame.from_dict(d, orient='index'))
    df = pd.concat(frames, keys=user_ids)
    df = df.reset_index()
    return  df

In [4]:
def plot_boxplot(result:dict, samples:list[int])->None:
    for sample in samples:
        display(Markdown(f"## Sample size = {sample:,.0f}"))
        df= extract_df(result, sample)
        summary = []
        for level in df.level_1.unique():
            frames = []         
            wk_df = df.query("level_1 == @level").drop("level_1",axis=1 ).set_index("level_0")
            for i in range(len(wk_df)):
                frames.append(wk_df.iloc[i,:].to_frame())
            fig, ax = plt.subplots(figsize=(20,8))
            final_df = pd.concat(frames, axis=1)
            sns.boxplot(data=final_df, ax=ax)
            plt.axhline(level, ls="--", c="r")
            plt.title(f"Estimated Hurst values for sample size= {sample:,.0f} and hurst={level:.2f}")
            plt.xlabel("Method")
            plt.show()
            s = final_df.mean(axis=0)
            s.name = level
            summary.append(s)
        err_df = pd.concat(summary, axis=1).T
        display(Markdown("### MAE table"))
        display(err_df.sub(err_df.index.values, axis=0)
             .abs().div(err_df.index.values, axis=0)
             .style.format( "{:.2%}")
             .highlight_min(color = 'lightgreen', axis = 1)
             .highlight_max(color = 'red', axis = 1))
    return

In [None]:
plot_boxplot(result, samples)