In [17]:
import os
import re
import sys
import pylab
import warnings
import numpy as np
import pandas as pd
import scipy as sci
import statsmodels.api as sm
import matplotlib.pyplot as plt

from MFDFA import MFDFA
from MFDFA import fgn # Fractional Gaussian noises

sys.path.append('../modules')
import plot_hurst_tfs as plt_hurst_tfs
import estimate_hurst_mfdfa as estimate_mfdfa
import estimate_theil_index as estimate_theil
import estimate_temporal_theil_scaling as estimate_tts
import estimate_temporal_fluctuation_scaling as estimate_tfs

warnings.filterwarnings("ignore")
pd.options.mode.chained_assignment = None
pd.set_option("display.max_columns", None)

In [18]:
input_path_raw = "../input_files/raw_data"
input_path_processed = "../input_files/processed_data"
input_path_data_dictionary = "../input_files/data_dictionary"
log_path = "../logs"
output_path = "../output_files"
input_generation_date = "2023-07-01"

In [20]:
delta_t = 0.001
n_points = 512
n_simulations = 30
n_aux = 0

#saved_data = True
#saved_data_dtts = True
#saved_data_theil = True
#saved_data_tts = True

saved_data = True
saved_data_dtts = True
saved_data_theil = True
saved_data_tts = True

#hurst_values = [0.4, 0.5, 0.6, 0.7, 0.8]
#hurst_values = [0.25, 0.3, 0.35, 0.46]
hurst_values = [0.25, 0.3, 0.35]#, 0.45, 0.55, 0.65, 0.75, 0.85, 0.9, 0.95]

# Generate Fractional Brownian Motion

In [22]:
def generate_fractional_brownian_motion(
    delta_t,
    hurst_values,
    n_points,
    n_simulations
):
    """Generate multiple fractional brownian motion (fBm)
    Generate multiple simulations of fBm with the same amount of points and same parameters:
        delta_t: Integration time of fractional Ohrstein-Uhlenbeck process
        hurst_values: Multiple Hurst used in the simulations of fBm
        n_points: Number of steps in fBm sample
        n_simulations: Number of simulations per Hurst value
    """
    # The time array of the trajectory
    time = np.arange(0, delta_t * n_points, delta_t)
    if isinstance(hurst_values, float) == True:
        hurst_values = [hurst_values]
        
    # Iteration process
    df_fbm = []
    for hurst in hurst_values:
        n_sim = 1
        while n_sim < n_simulations:
            # Fractional Gaussian noise
            y = ((delta_t * n_points) ** hurst) * fgn(N = time.size, H = hurst)
            
            # Estimation of Hurst exponent using MF-DFA
            lag = np.unique(np.logspace(0.5, 3, 1000).astype(int))
            lag, dfa = MFDFA(y + y.min(), lag = lag, q = 1, order = 1)
            H_hat = np.polyfit(np.log(lag)[4:20],np.log(dfa[4:20]),1)[0]
            
            if np.abs(hurst - H_hat[0]) < 0.02:
                # Dataframe local
                df_local = pd.DataFrame(
                    {
                        "symbol" : ["fBm_H_{}_simulation_{}".format(hurst, n_sim)] * len(y),
                        "ticker_name" : ["Fractional Brownian motion (fBm) H={}, N={}".format(hurst, n_sim)] * len(y),
                        "date" : time,
                        "step" : np.arange(len(y)),
                        "log_return" : y,
                        "real_hurst_exponent" : [hurst] * len(y),
                        "estimated_hurst_exponent" : [H_hat[0]] * len(y)
                    }
                )

                # Estimate absolute log-return ----
                df_local["absolute_log_return"] = np.abs(df_local["log_return"])

                # Estimate log-return volatility ----
                df_temp_1 = df_local.rename(columns = {"absolute_log_return" : "temp_1"}).groupby(["symbol"])["temp_1"].max()
                df_local = df_local.merge(df_temp_1, left_on = "symbol", right_on = "symbol")
                df_local["z_score"] = df_local[["absolute_log_return"]].apply(lambda x: (x - np.mean(x)) / np.std(x))
                df_local["log_volatility"] = np.sqrt(np.abs(df_local["z_score"])) / df_local["temp_1"]

                # Replace NaN with zeros ----
                df_local["log_return"] = df_local["log_return"].fillna(0)
                df_local["absolute_log_return"] = df_local["absolute_log_return"].fillna(0)
                df_local["log_volatility"] = df_local["log_volatility"].fillna(0)

                # Estimate cumulative sum of log-return, absolute log-return and log-return volatility ----
                df_local["cum_log_return"] = df_local["log_return"].cumsum()
                df_local["cum_absolute_log_return"] = df_local["absolute_log_return"].cumsum()
                df_local["cum_log_volatility"] = df_local["log_volatility"].cumsum()

                # Estimate cumulative mean of log-return, absolute log-return and log-return volatility ----
                df_local["cummean_log_return"] = df_local["log_return"].rolling(df_local.shape[0], min_periods = 1).mean()
                df_local["cummean_absolute_log_return"] = df_local["absolute_log_return"].rolling(df_local.shape[0], min_periods = 1).mean()
                df_local["cummean_log_volatility"] = df_local["log_volatility"].rolling(df_local.shape[0], min_periods = 1).mean()

                # Estimate cumulative variance of log-return, absolute log-return and log-return volatility ----
                df_local["cumvariance_log_return"] = df_local["log_return"].rolling(df_local.shape[0], min_periods = 2).var()
                df_local["cumvariance_absolute_log_return"] = df_local["absolute_log_return"].rolling(df_local.shape[0], min_periods = 2).var()
                df_local["cumvariance_log_volatility"] = df_local["log_volatility"].rolling(df_local.shape[0], min_periods = 2).var()

                # Append simulation to final dataframe
                df_fbm.append(df_local.fillna(0))
                
                n_sim += 1
            else:
                n_sim = n_sim
            
        print("- Processed fBm H = {}".format(round(hurst, 3)))
    
    df_fbm = pd.concat(df_fbm)
    del [df_fbm["temp_1"], df_fbm["z_score"]]
    
    return df_fbm

In [23]:
df_local_final = []
for hurst in hurst_values:
    if saved_data == True:
        df_local = pd.read_csv("{}/df_fBm_{}_{}.csv".format(input_path_processed, hurst, re.sub("-", "", input_generation_date)), low_memory = False)
    else:
        df_local = generate_fractional_brownian_motion(
            delta_t = delta_t,
            hurst_values = hurst,
            n_points = n_points,
            n_simulations = n_simulations
        )
        df_local.to_csv("{}/df_fBm_{}_{}.csv".format(input_path_processed, hurst, re.sub("-", "", input_generation_date)), index = False)
    df_local_final.append(df_local)
df_local = pd.concat(df_local_final)
df_local

Unnamed: 0,symbol,ticker_name,date,step,log_return,real_hurst_exponent,estimated_hurst_exponent,absolute_log_return,log_volatility,cum_log_return,cum_absolute_log_return,cum_log_volatility,cummean_log_return,cummean_absolute_log_return,cummean_log_volatility,cumvariance_log_return,cumvariance_absolute_log_return,cumvariance_log_volatility
0,fBm_H_0.25_simulation_1,"Fractional Brownian motion (fBm) H=0.25, N=1",0.000,0,-0.181215,0.25,0.256085,0.181215,1.057906,-0.181215,0.181215,1.057906,-0.181215,0.181215,1.057906,0.000000,0.000000,0.000000
1,fBm_H_0.25_simulation_1,"Fractional Brownian motion (fBm) H=0.25, N=1",0.001,1,0.398712,0.25,0.256085,0.398712,2.616634,0.217497,0.579928,3.674541,0.108748,0.289964,1.837270,0.168158,0.023652,1.214816
2,fBm_H_0.25_simulation_1,"Fractional Brownian motion (fBm) H=0.25, N=1",0.002,2,-0.096702,0.25,0.256085,0.096702,1.051872,0.120795,0.676630,4.726412,0.040265,0.225543,1.575471,0.098149,0.024276,0.813025
3,fBm_H_0.25_simulation_1,"Fractional Brownian motion (fBm) H=0.25, N=1",0.003,3,0.234812,0.25,0.256085,0.234812,1.590787,0.355607,0.911442,6.317199,0.088902,0.227861,1.579300,0.074895,0.016206,0.542075
4,fBm_H_0.25_simulation_1,"Fractional Brownian motion (fBm) H=0.25, N=1",0.004,4,-0.227747,0.25,0.256085,0.227747,1.531188,0.127860,1.139189,7.848387,0.025572,0.227838,1.569677,0.076224,0.012154,0.407019
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14843,fBm_H_0.35_simulation_29,"Fractional Brownian motion (fBm) H=0.35, N=29",0.507,507,-0.168050,0.35,0.359838,0.168050,4.697864,1.277797,37.210242,1480.279060,0.002515,0.073249,2.913935,0.008140,0.002770,1.387763
14844,fBm_H_0.35_simulation_29,"Fractional Brownian motion (fBm) H=0.35, N=29",0.508,508,-0.045997,0.35,0.359838,0.045997,2.516737,1.231800,37.256239,1482.795797,0.002420,0.073195,2.913155,0.008128,0.002766,1.385341
14845,fBm_H_0.35_simulation_29,"Fractional Brownian motion (fBm) H=0.35, N=29",0.509,509,-0.098097,0.35,0.359838,0.098097,2.406367,1.133702,37.354336,1485.202164,0.002223,0.073244,2.912161,0.008132,0.002762,1.383123
14846,fBm_H_0.35_simulation_29,"Fractional Brownian motion (fBm) H=0.35, N=29",0.510,510,0.080966,0.35,0.359838,0.080966,1.343076,1.214668,37.435302,1486.545241,0.002377,0.073259,2.909090,0.008128,0.002756,1.385229


## Estimate DTTS

In [7]:
df_dtts_final = []
for hurst in hurst_values:
    if saved_data_dtts == True:
        df_dtts = pd.read_csv("{}/df_fBm_dtts_{}_{}.csv".format(input_path_processed, hurst, re.sub("-", "", input_generation_date)), low_memory = False)
    else:
        df_dtts = estimate_theil.estimate_diffusive_algorithm_global(
            df_data = df_local,
            minimal_steps = 0,
            log_path = log_path,
            log_filename = "log_dtts_fbm",
            verbose = 1,
            tqdm_bar = True
        )
        df_dtts.to_csv("{}/df_fBm_dtts_{}_{}.csv".format(input_path_processed, hurst, re.sub("-", "", input_generation_date)), index = False)
    df_dtts_final.append(df_dtts)
df_dtts = pd.concat(df_dtts_final)
df_dtts

Unnamed: 0,level_0,symbol,time,sub_time,diffusive_log_return,diffusive_absolute_log_return,diffusive_log_volatility
0,0,fBm_H_0.25_simulation_1,0,0,-0.181215,0.181215,1.057906
1,1,fBm_H_0.25_simulation_1,1,0,0.217497,0.579928,3.674541
2,2,fBm_H_0.25_simulation_1,2,0,0.120795,0.676630,4.726412
3,3,fBm_H_0.25_simulation_1,3,0,0.355607,0.911442,6.317199
4,4,fBm_H_0.25_simulation_1,4,0,0.127860,1.139189,7.848387
...,...,...,...,...,...,...,...
10827141,70,fBm_H_0.35_simulation_25,200,130,-0.359061,5.343123,208.688129
10827142,71,fBm_H_0.35_simulation_25,201,130,-0.302714,5.399469,210.675411
10827143,72,fBm_H_0.35_simulation_25,202,130,-0.306378,5.403133,214.740885
10827144,73,fBm_H_0.35_simulation_25,203,130,-0.392843,5.489598,216.541418


In [7]:
df_theil = []
if saved_data_theil == True:
    for j in sorted(df_local["real_hurst_exponent"].unique()):
        for k in np.arange(n_simulations):
            try:
                df_theil_local = pd.read_csv("{}/df_fBm_theil_h_{}_sim_{}_{}.csv".format(input_path_processed, j, k, re.sub("-", "", input_generation_date)), low_memory = False)
                df_theil.append(df_theil_local)
                print("Load TTS data for H={}, N={}".format(round(j, 2), k))
            except:
                print("Load TTS data for H={}, N={}".format(round(j, 2), k))
else:
    for k in np.arange(n_simulations):
        df_temp = df_dtts[df_dtts["symbol"].str.contains("simulation_{}".format(k))]
        for j in sorted(df_local["real_hurst_exponent"].unique()):
            try:
                df_theil_local = estimate_theil.estimate_shannon_index_global(
                    df_dtts_data = df_temp[df_temp["symbol"].str.contains("H_{}_".format(j))],
                    minimal_steps = 0,
                    log_path = log_path,
                    log_filename = "log_theil_fbm",
                    verbose = 1,
                    tqdm_bar = True
                )
                df_theil_local.to_csv("{}/df_fBm_theil_h_{}_sim_{}_{}.csv".format(input_path_processed, j, k, re.sub("-", "", input_generation_date)), index = False)
                df_theil.append(df_theil_local)
                print("Saved TTS data for H={}, N={}".format(round(j, 2), k))
            except:
                print("Saved TTS data for H={}, N={}".format(round(j, 2), k))
df_theil = pd.concat(df_theil)
df_theil

Load TTS data for H=0.25, N=0
Load TTS data for H=0.25, N=1
Load TTS data for H=0.25, N=2
Load TTS data for H=0.25, N=3
Load TTS data for H=0.25, N=4
Load TTS data for H=0.25, N=5
Load TTS data for H=0.25, N=6
Load TTS data for H=0.25, N=7
Load TTS data for H=0.25, N=8
Load TTS data for H=0.25, N=9
Load TTS data for H=0.25, N=10
Load TTS data for H=0.25, N=11
Load TTS data for H=0.25, N=12
Load TTS data for H=0.25, N=13
Load TTS data for H=0.25, N=14
Load TTS data for H=0.25, N=15
Load TTS data for H=0.25, N=16
Load TTS data for H=0.25, N=17
Load TTS data for H=0.25, N=18
Load TTS data for H=0.25, N=19
Load TTS data for H=0.25, N=20
Load TTS data for H=0.25, N=21
Load TTS data for H=0.25, N=22
Load TTS data for H=0.25, N=23
Load TTS data for H=0.25, N=24
Load TTS data for H=0.25, N=25
Load TTS data for H=0.25, N=26
Load TTS data for H=0.25, N=27
Load TTS data for H=0.25, N=28
Load TTS data for H=0.25, N=29
Load TTS data for H=0.3, N=0
Load TTS data for H=0.3, N=1
Load TTS data for H=0.

Unnamed: 0,symbol,time,time_series,mean_value,theil_index,shannon_index
0,fBm_H_0.25_simulation_1,0,log-return,-0.181215,0.000000,1.098612
1,fBm_H_0.25_simulation_1,0,absolute log-return,0.181215,0.000000,1.098612
2,fBm_H_0.25_simulation_1,0,log-return volatility,1.057906,0.000000,1.098612
3,fBm_H_0.25_simulation_1,1,log-return,0.308105,0.043888,1.747872
4,fBm_H_0.25_simulation_1,1,absolute log-return,0.489320,0.017243,1.774516
...,...,...,...,...,...,...
1531,fBm_H_0.35_simulation_29,510,absolute log-return,18.867968,0.194989,6.734527
1532,fBm_H_0.35_simulation_29,510,log-return volatility,749.395698,0.192191,6.737326
1533,fBm_H_0.35_simulation_29,511,log-return,1.065902,0.101582,6.829890
1534,fBm_H_0.35_simulation_29,511,absolute log-return,18.881756,0.195571,6.735900


## Estimate TTS

In [24]:
df_tts_final = []
for hurst in hurst_values:
    if saved_data_tts == True:
        df_tts = pd.read_csv("{}/df_fBm_tts_{}_{}.csv".format(input_path_processed, hurst, re.sub("-", "", input_generation_date)), low_memory = False)
    else:
        df_tts = estimate_tts.estimate_tts_parameters(
            df_fts = df_theil,
            minimal_steps = n_points - 1,
            p_norm = 2,
            log_path = log_path,
            log_filename = "tts_evolution_fbm",
            verbose = 1,
            tqdm_bar = True
        )
        df_tts.to_csv("{}/df_fBm_tts_{}_{}.csv".format(input_path_processed, hurst, re.sub("-", "", input_generation_date)), index = False)
    df_tts_final.append(df_tts)
df_tts = pd.concat(df_tts_final)
df_tts

Unnamed: 0,symbol,max_step,time_series,p_norm,coefficient_tts,error_coefficient_tts,exponent_tts,error_exponent_tts,average_error_tts,rsquared_tts
0,fBm_H_0.25_simulation_1,511,absolute log-return,2,1.021011,0.001583,1.795234e-01,0.001460,0.099591,0.976012
1,fBm_H_0.25_simulation_1,511,log-return,2,0.681413,0.071629,3.028696e-07,0.108107,0.485969,-0.002515
2,fBm_H_0.25_simulation_1,511,log-return volatility,2,1.017990,0.001228,1.821372e-01,0.001176,0.093246,0.985006
3,fBm_H_0.25_simulation_10,511,absolute log-return,2,1.009180,0.001021,1.709616e-01,0.000965,0.094137,0.977246
4,fBm_H_0.25_simulation_10,511,log-return,2,2.480849,1.669434,5.253062e+00,3.027661,0.838431,0.005910
...,...,...,...,...,...,...,...,...,...,...
256,fBm_H_0.35_simulation_8,511,log-return,2,2.367417,5.128943,5.570334e+00,11.147892,0.793416,0.000607
257,fBm_H_0.35_simulation_8,511,log-return volatility,2,1.009940,0.000862,1.703293e-01,0.000811,0.074773,0.992066
258,fBm_H_0.35_simulation_9,511,absolute log-return,2,1.017251,0.001207,1.891788e-01,0.001204,0.090049,0.985535
259,fBm_H_0.35_simulation_9,511,log-return,2,0.296431,0.192874,1.374177e-06,0.773999,0.727604,-0.000140


## Final relation

In [25]:
def prepare_data(
    df_fts,
    df_tts,
    interval,
    threshold_tts=0  
):
    """Preparation of data for plotting
    Join original data with optimal window size data:
        df_fts: Dataframe with multiple financial time series
        df_tts: Dataframe with temporal Theil scaling parameters
        interval: Select transformation for estimation of time between minimum and maximum date
        threshold_tts: Threshold of determination coefficient of temporal Theil scaling (TTS)
    """
    
    # Estimation of interval of time for each ticker ----
    df_fts["min_date"] = pd.to_datetime(df_fts.groupby(["symbol"])["date"].transform("min"), errors = "coerce", infer_datetime_format = True)
    df_fts["max_date"] = pd.to_datetime(df_fts.groupby(["symbol"])["date"].transform("max"), errors = "coerce", infer_datetime_format = True)
    
    interval_dict = {"years" : "Y", "months" : "M", "weeks" : "W", "days" : "D", "hours" : "h", "minutes" : "m", "seconds" : "s", "milliseconds" : "ms"}
    df_fts["duration"] = (df_fts["max_date"] - df_fts["min_date"]) / np.timedelta64(1, interval_dict[interval])
    df_dates = df_fts[["symbol", "min_date", "max_date", "duration"]].drop_duplicates(["symbol", "min_date", "max_date", "duration"])
    
    # Log-return data ----
    df_logr = (
        df_fts[["date", "symbol", "step", "cummean_log_return", "cumvariance_log_return"]]
            .rename(columns = {"cummean_log_return" : "cummean", "cumvariance_log_return" : "cumvariance"})
    )
    df_logr["time_series"] = "log-return"
    
    # Absolute log-return data ----
    df_loga = (
        df_fts[["date", "symbol", "step", "cummean_absolute_log_return", "cumvariance_absolute_log_return"]]
            .rename(columns = {"cummean_absolute_log_return" : "cummean", "cumvariance_absolute_log_return" : "cumvariance"})
    )
    df_loga["time_series"] = "absolute log-return"
    
    # Log-return volatility data ----
    df_logv = (
        df_fts[["date", "symbol", "step", "cummean_log_volatility", "cumvariance_log_volatility"]]
            .rename(columns = {"cummean_log_volatility" : "cummean", "cumvariance_log_volatility" : "cumvariance"})
    )
    df_logv["time_series"] = "log-return volatility"
    
    # Merge final data (Hurst exponent and Temporal Theil Scaling (TTS)) ----
    df_plot_data = (
        pd.concat([df_logr, df_loga, df_logv])
            .merge(df_dates, left_on = ["symbol"], right_on = ["symbol"])
            .merge(df_fts[["symbol", "real_hurst_exponent", "estimated_hurst_exponent"]].drop_duplicates(), left_on = ["symbol"], right_on = ["symbol"])
            .merge(df_tts, left_on = ["symbol", "time_series", "step"], right_on = ["symbol", "time_series", "max_step"])
    )
    
    df_plot_data["coefficient_tts"].replace([np.inf, -np.inf], np.nan, inplace = True)
    df_plot_data = df_plot_data[(df_plot_data["exponent_tts"] != 0)]    
    df_plot_data = df_plot_data[df_plot_data["rsquared_tts"] >= threshold_tts]
    
    del [df_plot_data["max_step"]]
        
    return df_plot_data

In [26]:
df_final = prepare_data(
    df_fts = df_local,
    df_tts = df_tts,
    interval = "days",
    threshold_tts = 0.99
)

In [27]:
df_final

Unnamed: 0,date,symbol,step,cummean,cumvariance,time_series,min_date,max_date,duration,real_hurst_exponent,estimated_hurst_exponent,p_norm,coefficient_tts,error_coefficient_tts,exponent_tts,error_exponent_tts,average_error_tts,rsquared_tts
105,0.511,fBm_H_0.25_simulation_12,511,1.518842,0.419744,log-return volatility,1970-01-01,1970-01-01,0.0,0.25,0.268905,2,1.012201,0.000663,0.172824,0.000623,0.078603,0.990850
106,0.511,fBm_H_0.25_simulation_12,511,1.518842,0.419744,log-return volatility,1970-01-01,1970-01-01,0.0,0.25,0.268905,2,1.012201,0.000663,0.172824,0.000623,0.078603,0.990850
107,0.511,fBm_H_0.25_simulation_12,511,1.518842,0.419744,log-return volatility,1970-01-01,1970-01-01,0.0,0.25,0.268905,2,1.012201,0.000663,0.172824,0.000623,0.078603,0.990850
156,0.511,fBm_H_0.25_simulation_18,511,0.141251,0.010829,absolute log-return,1970-01-01,1970-01-01,0.0,0.25,0.248823,2,1.006700,0.000603,0.181750,0.000619,0.067665,0.991911
157,0.511,fBm_H_0.25_simulation_18,511,0.141251,0.010829,absolute log-return,1970-01-01,1970-01-01,0.0,0.25,0.248823,2,1.006700,0.000603,0.181750,0.000619,0.067665,0.991911
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
682,0.511,fBm_H_0.35_simulation_18,511,2.842424,1.368289,log-return volatility,1970-01-01,1970-01-01,0.0,0.35,0.363161,2,1.007290,0.000592,0.170174,0.000569,0.082008,0.992114
683,0.511,fBm_H_0.35_simulation_18,511,2.842424,1.368289,log-return volatility,1970-01-01,1970-01-01,0.0,0.35,0.363161,2,1.007290,0.000592,0.170174,0.000569,0.082008,0.992114
690,0.511,fBm_H_0.35_simulation_19,511,2.650173,1.121562,log-return volatility,1970-01-01,1970-01-01,0.0,0.35,0.337721,2,1.015731,0.000704,0.179404,0.000673,0.082286,0.990014
691,0.511,fBm_H_0.35_simulation_19,511,2.650173,1.121562,log-return volatility,1970-01-01,1970-01-01,0.0,0.35,0.337721,2,1.015731,0.000704,0.179404,0.000673,0.082286,0.990014


In [28]:
df_final.groupby(["time_series", "real_hurst_exponent"])["exponent_tts"].agg(["mean", "std"]).reset_index()

Unnamed: 0,time_series,real_hurst_exponent,mean,std
0,absolute log-return,0.25,0.185668,0.004291
1,absolute log-return,0.3,0.174628,0.009391
2,absolute log-return,0.35,0.165551,0.0
3,log-return volatility,0.25,0.173332,0.006756
4,log-return volatility,0.3,0.174481,0.004883
5,log-return volatility,0.35,0.174795,0.004746


# Stop code to avoid overwriting results
To avoid automatically overwriting results, this cell is set to stop the notebook from running

In [None]:
import sys
sys.exit("Avoid overwritting result with the same value of Hurst exponent")

In [37]:
df = pd.read_csv("{}/df_fBm_tts_20230701.csv".format(input_path_processed), low_memory = False)
df["real_hurst_exponent"] = (
    df["symbol"].replace("(fBm_H_)|(_simulation_\d\d)", "", regex = True)
        .replace("(fBm_H_)|(_simulation_\d)|(_simulation_\d\d)", "", regex = True)
        .astype(float)
)
df[df["rsquared_tts"] >= 0.97].groupby(["time_series", "real_hurst_exponent"])["exponent_tts"].agg(["mean", "std"]).reset_index()

Unnamed: 0,time_series,real_hurst_exponent,mean,std
0,absolute log-return,0.4,0.217623,0.008948
1,absolute log-return,0.46,0.218866,0.009081
2,absolute log-return,0.52,0.218787,0.008455
3,absolute log-return,0.58,0.217247,0.007686
4,absolute log-return,0.64,0.216949,0.010746
5,absolute log-return,0.7,0.220507,0.010561
6,absolute log-return,0.76,0.221775,0.00896
7,absolute log-return,0.82,0.226448,0.024808
8,absolute log-return,0.88,0.22107,0.025788
9,absolute log-return,0.94,0.215932,0.020331
