# Choosing the optimal number of BLIMFs for VMD

In [None]:
!pip install vmdpy --quiet

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from vmdpy import VMD

# Set random seeds for reproducibility
import random
random.seed(42)
np.random.seed(42)

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

In [None]:
# Ensure plot qualities
plt.rcParams['figure.figsize'] = (12,6)
import matplotlib as mpl
mpl.rcParams['figure.dpi'] = 300  # Adjust DPI for high-resolution figures

In [None]:
# Global variables
TRAIN_RATIO = 0.7
VAL_RATIO = 0.2
TEST_RATIO = 0.1

WINDOW_SIZE = 60

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Load the data
data = pd.read_csv("/content/drive/My Drive/Colab Notebooks/EMH/data/Bitcoin_data_2024_w_TI_n_Macro.csv",
                   index_col=0, parse_dates=True)
data = data[['Returns%']]

# Ensure the data is univariate
if data.shape[1] != 1:
    raise ValueError("Data must be univariate with only 'Returns%' column.")

In [None]:
data.isna().sum()

Unnamed: 0,0
Returns%,0


In [None]:
# Split the data
n = len(data)
train_end = int(n * TRAIN_RATIO)
val_end = train_end + int(n * VAL_RATIO)

train_data = data.iloc[:train_end]
val_data   = data.iloc[train_end:val_end]
test_data  = data.iloc[val_end:]

print("Train shape:", train_data.shape)
print("Validation shape:", val_data.shape)
print("Test shape:", test_data.shape)

Train shape: (356589, 1)
Validation shape: (101882, 1)
Test shape: (50943, 1)


In [None]:
def compute_residual_energy_table(signal, min_K=2, max_K=20, alpha=2000, tau=0, DC=0, init=1, tol=1e-7):
    original_energy = np.sum(signal ** 2)
    results = []

    for K in range(min_K, max_K + 1):
        u, _, _ = VMD(signal, alpha, tau, K, DC, init, tol)
        reconstructed = np.sum(u, axis=0)
        residual = signal[:reconstructed.shape[0]] - reconstructed
        residual_energy = np.sum(residual ** 2)
        r_res = (residual_energy / original_energy) * 100  # as percentage
        results.append({'K': K, 'Residual Energy Ratio (%)': round(r_res, 4)})
        print(f"K = {K}, Residual Energy Ratio = {r_res:.4f}%")

    return pd.DataFrame(results)

# Run the function on your training data
train_return = train_data['Returns%'].values
residual_energy_table = compute_residual_energy_table(train_return, min_K=5, max_K=18)

# Display table
import IPython.display as display
display.display(residual_energy_table)

K = 5, Residual Energy Ratio = 25.4756%
K = 6, Residual Energy Ratio = 18.6696%
K = 7, Residual Energy Ratio = 13.5532%
K = 8, Residual Energy Ratio = 9.9140%
