In [1]:
import os
import pandas as pd


In [2]:
import os
import pandas as pd

# Use current working directory as base
base_dir = os.getcwd()
folder_path = os.path.join(base_dir, "LOG_RETURNS_KALMAN_INTERPOLATED", "weekly")

log_returns = {}

for file in os.listdir(folder_path):
    if file.endswith(".xlsx"):
        file_path = os.path.join(folder_path, file)
        card_name = file.replace(".xlsx", "")
        log_returns[card_name] = pd.read_excel(file_path)

print(f"Loaded {len(log_returns)} log return datasets.")

Loaded 9 log return datasets.


In [3]:
log_returns.keys()

dict_keys(['weekly_data_aggregated_Charizard-Holo_#11_grade_9', 'weekly_data_aggregated_Charizard-Holo_#4_grade_8_FINAL', 'weekly_data_aggregated_Charizard-Holo_#4_grade_9_FINAL', 'weekly_data_aggregated_Charizard_GX_#9_grade_10', 'weekly_data_aggregated_Full_ArtCharizard_GX_#SV49_grade_10', 'weekly_data_aggregated_Full_ArtCharizard_Vmax_#020_grade_10', 'weekly_data_aggregated_Full_ArtCharizard_Vmax_#SV107_grade_10', 'weekly_data_aggregated_Full_ArtMoltres_&_Zapdos_&_Articuno_GX_#69_(SECRET)_grade_10', 'weekly_data_aggregated_Full_ArtM_Charizard_EX_#101_grade_10'])

In [4]:
# Replace with any dataset name to preview
sample_card = list(log_returns.keys())[0]  # First dataset
print(f"Preview of {sample_card}:")
print(log_returns[sample_card].head())


Preview of weekly_data_aggregated_Charizard-Holo_#11_grade_9:
   Date  Log_Returns
0     1    -0.151131
1     2     0.010052
2     3    -0.363843
3     4    -0.041622
4     5     0.147636


In [5]:
!pip install hurst statsmodels numpy pandas


Defaulting to user installation because normal site-packages is not writeable


In [6]:
!pip install fractional

Defaulting to user installation because normal site-packages is not writeable


In [7]:
!pip install fdiff

Defaulting to user installation because normal site-packages is not writeable


# !! USE THE FOLLOWING TWO HURST EXPONENTS (the most refined so far):

In [10]:
from scipy.signal import periodogram


def gph_estimator(series, m=None):
    series = series.dropna().values
    n = len(series)

    if m is None:
        m = int(n ** 0.6)  # Slightly higher exponent for robustness

    freqs, spectrum = periodogram(series, scaling='spectrum')

    valid_indices = freqs > 0
    freqs = freqs[valid_indices]
    spectrum = spectrum[valid_indices]

    eps = 1e-10  # Small constant for stability
    log_freqs = np.log(freqs[:m])
    log_spectrum = np.log(spectrum[:m] + eps)

    X = sm.add_constant(-2 * log_freqs)
    model = sm.OLS(log_spectrum, X).fit()

    d = model.params[1] / 2
    hurst_exp = d + 0.5

    return hurst_exp


  # Apply functions to each dataset
hurst_results_gph = {
    card: {
        "Hurst_GPH": gph_estimator(df["Log_Returns"]),

    }
    for card, df in log_returns.items()
}

# Convert to DataFrame for better visualization
hurst_df_gph = pd.DataFrame(hurst_results_gph).T
hurst_df_gph


Unnamed: 0,Hurst_GPH
weekly_data_aggregated_Charizard-Holo_#11_grade_9,0.435077
weekly_data_aggregated_Charizard-Holo_#4_grade_8_FINAL,0.47892
weekly_data_aggregated_Charizard-Holo_#4_grade_9_FINAL,0.406118
weekly_data_aggregated_Charizard_GX_#9_grade_10,0.514788
weekly_data_aggregated_Full_ArtCharizard_GX_#SV49_grade_10,0.604624
weekly_data_aggregated_Full_ArtCharizard_Vmax_#020_grade_10,0.295768
weekly_data_aggregated_Full_ArtCharizard_Vmax_#SV107_grade_10,0.458987
weekly_data_aggregated_Full_ArtMoltres_&_Zapdos_&_Articuno_GX_#69_(SECRET)_grade_10,0.564974
weekly_data_aggregated_Full_ArtM_Charizard_EX_#101_grade_10,0.492442


In [9]:
import numpy as np
import pandas as pd
import statsmodels.api as sm

def local_whittle_estimator(series, m=None):
    series = series.dropna().values  # Remove NaN values
    n = len(series)

    if m is None:
        m = int(n ** 0.8)  # Default bandwidth choice

    # Compute the Fourier frequencies
    freqs = (2 * np.pi * np.arange(1, m + 1)) / n  # Exclude zero frequency

    # Compute the periodogram
    fft_vals = np.fft.fft(series - np.mean(series))  # Demeaned FFT
    periodogram_vals = (np.abs(fft_vals[1:m + 1]) ** 2) / (n / 2)  # Correct normalization

    # Prevent log(0) issues
    eps = 1e-10
    log_freqs = np.log(freqs)
    log_periodogram = np.log(periodogram_vals + eps)  # Avoid log(0) issues

    # OLS regression to estimate d
    X = sm.add_constant(-2 * log_freqs)  # -2 * log(freq) as explanatory variable
    model = sm.OLS(log_periodogram, X).fit()

    d_estimate = model.params[1] / 2  # Estimate of fractional differencing parameter

    hurst_exp = d_estimate + 0.5  # Convert d to Hurst exponent

    return hurst_exp

# Apply function to each Pokémon card dataset
hurst_results_whittle = {
    card: {
        "Hurst_LW": local_whittle_estimator(df["Log_Returns"])  # Apply Local Whittle
    }
    for card, df in log_returns.items()
}

# Convert to DataFrame for better visualization
hurst_df_whittle = pd.DataFrame(hurst_results_whittle).T  # Transpose for readability

hurst_df_whittle


Unnamed: 0,Hurst_LW
weekly_data_aggregated_Charizard-Holo_#11_grade_9,0.290372
weekly_data_aggregated_Charizard-Holo_#4_grade_8_FINAL,0.367479
weekly_data_aggregated_Charizard-Holo_#4_grade_9_FINAL,0.410069
weekly_data_aggregated_Charizard_GX_#9_grade_10,0.288198
weekly_data_aggregated_Full_ArtCharizard_GX_#SV49_grade_10,0.457975
weekly_data_aggregated_Full_ArtCharizard_Vmax_#020_grade_10,0.388883
weekly_data_aggregated_Full_ArtCharizard_Vmax_#SV107_grade_10,0.481805
weekly_data_aggregated_Full_ArtMoltres_&_Zapdos_&_Articuno_GX_#69_(SECRET)_grade_10,0.317092
weekly_data_aggregated_Full_ArtM_Charizard_EX_#101_grade_10,0.361169


# NOW fractal dimension:

# !! USE THE FOLLOWING FRACTAL DIMENSIONS (the most refined):

In [11]:
import numpy as np
import pandas as pd
from scipy.stats import linregress

# **Hall–Wood Fractal Dimension**
def hall_wood_fd(series):
    series = series.dropna().values  # Remove NaN values
    N = len(series)

    if N < 20:  # Skip very short series
        return np.nan

    scales = np.arange(2, min(20, N // 3))  # Use more scales for stability
    log_scales = np.log(scales)
    log_vars = []

    eps = 1e-10  # Small constant to prevent log(0)

    for scale in scales:
        chunks = [series[i:i + scale] for i in range(0, N - scale + 1, scale)]
        chunk_vars = [np.var(chunk) for chunk in chunks if len(chunk) == scale]

        if len(chunk_vars) > 0:
            log_vars.append(np.log(np.mean(chunk_vars) + eps))

    if len(log_vars) < 2:
        return np.nan  # Not enough data points for regression

    log_scales = log_scales[:len(log_vars)]  # Ensure matching lengths
    slope, _, _, _, _ = linregress(log_scales, log_vars)

    return 2 - slope  # Fractal Dimension D

# **Genton Fractal Dimension**
def genton_fd(series):
    series = series.dropna().values  # Remove NaN values
    N = len(series)

    if N < 20:
        return np.nan

    scales = np.arange(2, min(20, N // 3))  # More scales
    log_scales = np.log(scales)
    log_range = []

    eps = 1e-10  # Small constant for stability

    for scale in scales:
        chunks = [series[i:i + scale] for i in range(0, N - scale + 1, scale)]
        chunk_ranges = [np.percentile(chunk, 75) - np.percentile(chunk, 25) for chunk in chunks if len(chunk) == scale]  # Use IQR

        if len(chunk_ranges) > 0:
            log_range.append(np.log(np.mean(chunk_ranges) + eps))

    if len(log_range) < 2:
        return np.nan  # Not enough data points for regression

    log_scales = log_scales[:len(log_range)]  # Ensure matching lengths
    slope, _, _, _, _ = linregress(log_scales, log_range)

    return 2 - slope  # Fractal Dimension D

# **Apply both estimators to all datasets**
fractal_results = {
    card: {
        "Hall_Wood_FD": hall_wood_fd(df["Log_Returns"]),
        "Genton_FD": genton_fd(df["Log_Returns"])
    }
    for card, df in log_returns.items()
}

# Convert to DataFrame for visualization
fractal_df = pd.DataFrame(fractal_results).T
fractal_df


Unnamed: 0,Hall_Wood_FD,Genton_FD
weekly_data_aggregated_Charizard-Holo_#11_grade_9,1.89838,1.959747
weekly_data_aggregated_Charizard-Holo_#4_grade_8_FINAL,1.902332,1.873357
weekly_data_aggregated_Charizard-Holo_#4_grade_9_FINAL,1.881985,1.930387
weekly_data_aggregated_Charizard_GX_#9_grade_10,1.851629,1.908126
weekly_data_aggregated_Full_ArtCharizard_GX_#SV49_grade_10,1.851156,1.846044
weekly_data_aggregated_Full_ArtCharizard_Vmax_#020_grade_10,1.868915,1.828421
weekly_data_aggregated_Full_ArtCharizard_Vmax_#SV107_grade_10,1.844801,1.970871
weekly_data_aggregated_Full_ArtMoltres_&_Zapdos_&_Articuno_GX_#69_(SECRET)_grade_10,1.909754,1.89509
weekly_data_aggregated_Full_ArtM_Charizard_EX_#101_grade_10,1.913229,1.963094


# !! USE THE FOLLOWING ONLY FOR THE APPROX. ENTROPY:

In [12]:
import numpy as np
import pandas as pd

# Function to compute Approximate Entropy (ApEn)
def approximate_entropy(series, m=2, r=None):
    series = series.dropna().values  # Remove NaNs
    N = len(series)

    eps = 1e-10  # Small constant for numerical stability

    if N < m + 1:
        return np.nan  # Not enough data

    if r is None:
        r = 0.2 * np.std(series)  # Set default tolerance

    # Create patterns
    patterns_m = np.array([series[i:i + m] for i in range(N - m + 1)])
    patterns_m1 = np.array([series[i:i + m + 1] for i in range(N - m)])

    def phi(patterns):
        distances = np.abs(patterns[:, None, :] - patterns[None, :, :]).max(axis=2)
        C = np.sum(distances <= r, axis=1) / (N - len(patterns) + 1)
        return np.mean(np.log(C + eps))  # Prevent log(0) errors

    return abs(phi(patterns_m) - phi(patterns_m1))  # ApEn formula

# **Apply to all datasets**
approx_entropy_results = {
    card: approximate_entropy(df["Log_Returns"])
    for card, df in log_returns.items()
}

# Convert to DataFrame for better visualization
approx_entropy_df = pd.DataFrame.from_dict(approx_entropy_results, orient="index", columns=["Approx_Entropy"])

approx_entropy_df


Unnamed: 0,Approx_Entropy
weekly_data_aggregated_Charizard-Holo_#11_grade_9,1.540576
weekly_data_aggregated_Charizard-Holo_#4_grade_8_FINAL,1.499729
weekly_data_aggregated_Charizard-Holo_#4_grade_9_FINAL,1.517454
weekly_data_aggregated_Charizard_GX_#9_grade_10,1.479847
weekly_data_aggregated_Full_ArtCharizard_GX_#SV49_grade_10,1.410902
weekly_data_aggregated_Full_ArtCharizard_Vmax_#020_grade_10,1.261148
weekly_data_aggregated_Full_ArtCharizard_Vmax_#SV107_grade_10,1.352699
weekly_data_aggregated_Full_ArtMoltres_&_Zapdos_&_Articuno_GX_#69_(SECRET)_grade_10,1.384998
weekly_data_aggregated_Full_ArtM_Charizard_EX_#101_grade_10,1.596132


# Now finally the efficiency index:

# !! USE THE FOLLOWING FOR EI - THE BEST CURATED:

In [None]:
import numpy as np
import pandas as pd

# Expected values for an efficient market
M_star = {
    "Hurst": 0.5,
    "Fractal_Dimension": 1.5,
    "Approx_Entropy": 1.0
}

# Ranges for normalization
R_values = {
    "Hurst": 1.0,  # Hurst exponent range [0,1]
    "Fractal_Dimension": 1.0,  # Fractal dimension range [1,2]
    "Approx_Entropy": 2.0  # Entropy range as per Kristoufek & Vosvrda
}

# Load efficiency measures into a DataFrame (Assuming these DataFrames ready)
efficiency_data = pd.DataFrame({
    "Hurst_GPH": hurst_df_gph["Hurst_GPH"],  # GPH estimator
    "Hurst_LW": hurst_df_whittle["Hurst_LW"],  # Local Whittle estimator
    "Hall_Wood_FD": fractal_df["Hall_Wood_FD"],
    "Genton_FD": fractal_df["Genton_FD"],
    "Approx_Entropy": approx_entropy_df["Approx_Entropy"]
})

# Compute Efficiency Index (EI) function
def compute_efficiency_index(row):
    EI_values = []

    # Compute separately for both Hurst estimators (GPH & Local Whittle)
    for hurst in ["Hurst_GPH", "Hurst_LW"]:
        EI_values.append(((row[hurst] - M_star["Hurst"]) / R_values["Hurst"])**2)

    # Compute separately for both Fractal Dimension estimators
    for fd in ["Hall_Wood_FD", "Genton_FD"]:
        EI_values.append(((row[fd] - M_star["Fractal_Dimension"]) / R_values["Fractal_Dimension"])**2)

    # Compute for Approximate Entropy
    EI_values.append(((row["Approx_Entropy"] - M_star["Approx_Entropy"]) / R_values["Approx_Entropy"])**2)

    # Final Efficiency Index calculation
    return np.sqrt(sum(EI_values))

# Apply function to all rows
efficiency_data["Efficiency_Index"] = efficiency_data.apply(compute_efficiency_index, axis=1)

efficiency_data

Unnamed: 0,Hurst_GPH,Hurst_LW,Hall_Wood_FD,Genton_FD,Approx_Entropy,Efficiency_Index
weekly_data_aggregated_Charizard-Holo_#11_grade_9,0.435077,0.290372,1.89838,1.959747,1.540576,0.70092
weekly_data_aggregated_Charizard-Holo_#4_grade_8_FINAL,0.47892,0.367479,1.902332,1.873357,1.499729,0.617822
weekly_data_aggregated_Charizard-Holo_#4_grade_9_FINAL,0.406118,0.410069,1.881985,1.930387,1.517454,0.644195
weekly_data_aggregated_Charizard_GX_#9_grade_10,0.514788,0.288198,1.851629,1.908126,1.479847,0.626779
weekly_data_aggregated_Full_ArtCharizard_GX_#SV49_grade_10,0.604624,0.457975,1.851156,1.846044,1.410902,0.545875
weekly_data_aggregated_Full_ArtCharizard_Vmax_#020_grade_10,0.295768,0.388883,1.868915,1.828421,1.261148,0.561307
weekly_data_aggregated_Full_ArtCharizard_Vmax_#SV107_grade_10,0.458987,0.481805,1.844801,1.970871,1.352699,0.611326
weekly_data_aggregated_Full_ArtMoltres_&_Zapdos_&_Articuno_GX_#69_(SECRET)_grade_10,0.564974,0.317092,1.909754,1.89509,1.384998,0.631449
weekly_data_aggregated_Full_ArtM_Charizard_EX_#101_grade_10,0.492442,0.361169,1.913229,1.963094,1.596132,0.702417


## top functioning bootstrap (just wait 30 mins:):

#### what about replicability - add there the random state:

In [None]:
import numpy as np
import pandas as pd

# Function to compute the Efficiency Index
def compute_efficiency_index(row, M_star, R_values):
    EI_values = []

    # Compute for both Hurst exponents
    for hurst in ["Hurst_GPH", "Hurst_LW"]:
        EI_values.append(((row[hurst] - M_star["Hurst"]) / R_values["Hurst"])**2)

    # Compute for both Fractal Dimensions
    for fd in ["Hall_Wood_FD", "Genton_FD"]:
        EI_values.append(((row[fd] - M_star["Fractal_Dimension"]) / R_values["Fractal_Dimension"])**2)

    # Compute for Approximate Entropy
    EI_values.append(((row["Approx_Entropy"] - M_star["Approx_Entropy"]) / R_values["Approx_Entropy"])**2)

    return np.sqrt(sum(EI_values))

# Define expected values and ranges for efficiency measures
M_star = {"Hurst": 0.5, "Fractal_Dimension": 1.5, "Approx_Entropy": 1}
R_values = {"Hurst": 1, "Fractal_Dimension": 1, "Approx_Entropy": 2}  # Entropy range is double

# Step 1: Compute original Efficiency Index
efficiency_data["Efficiency_Index"] = efficiency_data.apply(compute_efficiency_index, axis=1, M_star=M_star, R_values=R_values)

# Function to perform bootstrapping for a given card
def bootstrap_efficiency(log_returns, num_bootstraps=1000):
    n = len(log_returns)
    bootstrap_EI = []

    for _ in range(num_bootstraps):
        # Step 2: Generate a bootstrapped sample with replacement
        bootstrapped_series = np.random.choice(log_returns, size=n, replace=True)

        # Step 3: Compute Efficiency Index for bootstrapped series
        bootstrapped_measures = {
            "Hurst_GPH": gph_estimator(pd.Series(bootstrapped_series)),
            "Hurst_LW": local_whittle_estimator(pd.Series(bootstrapped_series)),
            "Hall_Wood_FD": hall_wood_fd(pd.Series(bootstrapped_series)),
            "Genton_FD": genton_fd(pd.Series(bootstrapped_series)),
            "Approx_Entropy": approximate_entropy(pd.Series(bootstrapped_series))
        }
        bootstrapped_EI = compute_efficiency_index(bootstrapped_measures, M_star, R_values)
        bootstrap_EI.append(bootstrapped_EI)

    return bootstrap_EI

# Apply bootstrapping to all cards
bootstrap_results = {}
for card, df in log_returns.items():
    bootstrap_EI_values = bootstrap_efficiency(df["Log_Returns"])

    # Step 5: Compute statistical significance
    q05, q95 = np.percentile(bootstrap_EI_values, [5, 95])
    original_EI = efficiency_data.loc[card, "Efficiency_Index"]

    # Compute p-value based on rank of original EI
    r = sum(bootstrap_EI_values < original_EI) + 1  # Rank of original EI
    p_value = 1 - 2 * abs(r / (len(bootstrap_EI_values) + 1) - 0.5)

    # Store results
    bootstrap_results[card] = {
        "EI": original_EI,
        "EI_Boot_Mean": np.mean(bootstrap_EI_values),
        "EI_Boot_SD": np.std(bootstrap_EI_values),
        "q05": q05,
        "q95": q95,
        "p_value": p_value
    }

# Convert results to DataFrame
bootstrap_results_df = pd.DataFrame.from_dict(bootstrap_results, orient="index")
bootstrap_results_df

Unnamed: 0,EI,EI_Boot_Mean,EI_Boot_SD,q05,q95,p_value
weekly_data_aggregated_Charizard-Holo_#4_grade_8_FINAL,0.617822,0.597847,0.027066,0.554306,0.643779,0.461538
weekly_data_aggregated_Full_ArtM_Charizard_EX_#101_grade_10,0.702417,0.601189,0.031396,0.550232,0.651296,0.001998
weekly_data_aggregated_Full_ArtCharizard_Vmax_#020_grade_10,0.561307,0.455677,0.038978,0.389996,0.518594,0.011988
weekly_data_aggregated_Full_ArtMoltres_&_Zapdos_&_Articuno_GX_#69_(SECRET)_grade_10,0.631449,0.485096,0.03721,0.427373,0.547444,0.001998
weekly_data_aggregated_Full_ArtCharizard_Vmax_#SV107_grade_10,0.611326,0.612626,0.056594,0.520135,0.711077,0.997003
weekly_data_aggregated_Charizard_GX_#9_grade_10,0.626779,0.540346,0.044444,0.465843,0.610861,0.047952
weekly_data_aggregated_Full_ArtCharizard_GX_#SV49_grade_10,0.545875,0.52007,0.038139,0.457465,0.581335,0.477522
weekly_data_aggregated_Charizard-Holo_#11_grade_9,0.70092,0.619799,0.030586,0.571919,0.669403,0.00999
weekly_data_aggregated_Charizard-Holo_#4_grade_9_FINAL,0.644195,0.591417,0.029257,0.542111,0.640055,0.073926


In [None]:
# Compute the alternative inefficiency measure (distance in SDs)
bootstrap_results_df["↑distance↑"] = (
    abs(bootstrap_results_df["EI"] - bootstrap_results_df["EI_Boot_Mean"]) /
    bootstrap_results_df["EI_Boot_SD"]
)

# Sort by highest inefficiency (most inefficient cards first)
bootstrap_results_df = bootstrap_results_df.sort_values(by="↑distance↑", ascending=True)


bootstrap_results_df

Unnamed: 0,EI,EI_Boot_Mean,EI_Boot_SD,q05,q95,p_value,↑distance↑
weekly_data_aggregated_Full_ArtCharizard_Vmax_#SV107_grade_10,0.611326,0.612626,0.056594,0.520135,0.711077,0.997003,0.022971
weekly_data_aggregated_Full_ArtCharizard_GX_#SV49_grade_10,0.545875,0.52007,0.038139,0.457465,0.581335,0.477522,0.676597
weekly_data_aggregated_Charizard-Holo_#4_grade_8_FINAL,0.617822,0.597847,0.027066,0.554306,0.643779,0.461538,0.738009
weekly_data_aggregated_Charizard-Holo_#4_grade_9_FINAL,0.644195,0.591417,0.029257,0.542111,0.640055,0.073926,1.803917
weekly_data_aggregated_Charizard_GX_#9_grade_10,0.626779,0.540346,0.044444,0.465843,0.610861,0.047952,1.944777
weekly_data_aggregated_Charizard-Holo_#11_grade_9,0.70092,0.619799,0.030586,0.571919,0.669403,0.00999,2.65218
weekly_data_aggregated_Full_ArtCharizard_Vmax_#020_grade_10,0.561307,0.455677,0.038978,0.389996,0.518594,0.011988,2.709964
weekly_data_aggregated_Full_ArtM_Charizard_EX_#101_grade_10,0.702417,0.601189,0.031396,0.550232,0.651296,0.001998,3.224234
weekly_data_aggregated_Full_ArtMoltres_&_Zapdos_&_Articuno_GX_#69_(SECRET)_grade_10,0.631449,0.485096,0.03721,0.427373,0.547444,0.001998,3.933211


 Next Steps for Further Analysis

Compare Inefficient Cards to Trading Volume

    Do the most inefficient cards have the lowest liquidity?
    Does high inefficiency correlate with low sales activity?

Visualize Price Trajectories of Inefficient Cards

    Do the most inefficient cards have erratic price spikes & drops?
    If so, that confirms inefficiency.

Check If Inefficiency Persists Over Time

    Run the same bootstrapping analysis on monthly data.
    If inefficiency disappears, markets may be inefficient in the short term but efficient in the long run.