In [None]:
!pip install -U pip setuptools wheel
!pip install -r ../requirements.txt


# Wavelet21 Method Example

This notebook demonstrates the Wavelet21 method for structural breakpoint detection using wavelet decomposition and frequency domain analysis.


In [None]:
# Setup: import from local package (fixed path)
import sys, pathlib

# Import standard libraries first to avoid conflicts
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Add the parent of the repository root to sys.path so `StructualBreakV2` is importable
repo_parent = pathlib.Path().resolve().parent.parent
if str(repo_parent) not in sys.path:
    sys.path.insert(0, str(repo_parent))

from StructualBreakV2 import compute_predictors_for_values, run_batch, Wavelet21Method
print("Imports OK.")



In [None]:
# Create synthetic data with structural break
np.random.seed(42)
n = 200

# Create time series with structural break at t=100
t = np.arange(n)
# Pre-break: AR(1) with low variance
x_pre = np.random.normal(0, 0.5, 100)
for i in range(1, 100):
    x_pre[i] = 0.7 * x_pre[i-1] + np.random.normal(0, 0.5)

# Post-break: AR(1) with higher variance and different mean
x_post = np.random.normal(2, 1.0, 100)
for i in range(1, 100):
    x_post[i] = 0.3 * x_post[i-1] + np.random.normal(2, 1.0)

# Combine series
values = np.concatenate([x_pre, x_post])
periods = np.concatenate([np.zeros(100), np.ones(100)])

print(f"Created synthetic time series with {len(values)} observations")
print(f"Structural break at t=100 (index 100)")
print(f"Pre-break mean: {np.mean(values[:100]):.3f}, std: {np.std(values[:100]):.3f}")
print(f"Post-break mean: {np.mean(values[100:]):.3f}, std: {np.std(values[100:]):.3f}")


In [None]:
# Visualize the synthetic data
plt.figure(figsize=(12, 6))
plt.plot(t, values, 'b-', linewidth=1, label='Time Series')
plt.axvline(x=100, color='r', linestyle='--', alpha=0.7, label='True Break Point')
plt.xlabel('Time')
plt.ylabel('Value')
plt.title('Synthetic Time Series with Structural Break')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()


In [None]:
# Test Wavelet21 method on single series
print("Testing Wavelet21 method...")

# Method 1: Using the main API
predictors, metadata = compute_predictors_for_values(
    values, periods, method='wavelet21'
)

print(f"\nMethod: {metadata['method']}")
print(f"Status: {metadata['status']}")
print(f"Processing time: {metadata['processing_time']:.3f} seconds")
print(f"Number of observations: {metadata['n_observations']}")

print(f"\nKey Predictors:")
print(f"  p_wavelet_break: {predictors.get('p_wavelet_break', 'N/A'):.3f}")
print(f"  confidence: {predictors.get('confidence', 'N/A'):.3f}")
print(f"  S_local_max_over_j: {predictors.get('S_local_max_over_j', 'N/A'):.3f}")
print(f"  cnt_local_sum_over_j: {predictors.get('cnt_local_sum_over_j', 'N/A')}")
print(f"  log_energy_ratio_l2norm_over_j: {predictors.get('log_energy_ratio_l2norm_over_j', 'N/A'):.3f}")

print(f"\nResidual Diagnostics:")
print(f"  resid_kurtosis: {predictors.get('resid_kurtosis', 'N/A'):.3f}")
print(f"  resid_skewness: {predictors.get('resid_skewness', 'N/A'):.3f}")
print(f"  arch_lm_p: {predictors.get('arch_lm_p', 'N/A'):.3f}")

print(f"\nSegment Shifts:")
print(f"  mean_diff: {predictors.get('mean_diff', 'N/A'):.3f}")
print(f"  log_var_ratio: {predictors.get('log_var_ratio', 'N/A'):.3f}")
print(f"  ks_p_raw: {predictors.get('ks_p_raw', 'N/A'):.3f}")


In [None]:
# Method 2: Direct instantiation of Wavelet21Method
print("\n" + "="*50)
print("Testing direct Wavelet21Method instantiation...")

# Create Wavelet21 method instance
wavelet21 = Wavelet21Method()

# Get method information
info = wavelet21.get_method_info()
print(f"\nMethod Information:")
for key, value in info.items():
    print(f"  {key}: {value}")

# Test on the same data
predictors2, metadata2 = wavelet21.compute_predictors(values, periods)

print(f"\nDirect Method Results:")
print(f"  Status: {metadata2['status']}")
print(f"  Processing time: {metadata2['processing_time']:.3f} seconds")
print(f"  p_wavelet_break: {predictors2.get('p_wavelet_break', 'N/A'):.3f}")
print(f"  confidence: {predictors2.get('confidence', 'N/A'):.3f}")


In [None]:
# Compare with Roy24 method
print("\n" + "="*50)
print("Comparing Wavelet21 with Roy24 method...")

# Test Roy24 method
predictors_roy24, metadata_roy24 = compute_predictors_for_values(
    values, periods, method='roy24'
)

print(f"\nComparison Results:")
print(f"{'Metric':<30} {'Wavelet21':<12} {'Roy24':<12}")
print("-" * 54)

# Compare key metrics
metrics = ['p_wavelet_break', 'confidence']
for metric in metrics:
    w21_val = predictors.get(metric, 'N/A')
    r24_val = predictors_roy24.get(metric, 'N/A')
    if isinstance(w21_val, (int, float)) and isinstance(r24_val, (int, float)):
        print(f"{metric:<30} {w21_val:<12.3f} {r24_val:<12.3f}")
    else:
        print(f"{metric:<30} {str(w21_val):<12} {str(r24_val):<12}")

print(f"\nProcessing Times:")
print(f"  Wavelet21: {metadata['processing_time']:.3f} seconds")
print(f"  Roy24: {metadata_roy24['processing_time']:.3f} seconds")

print(f"\nMethod Status:")
print(f"  Wavelet21: {metadata['status']}")
print(f"  Roy24: {metadata_roy24['status']}")


In [None]:
# Test with different configurations
print("\n" + "="*50)
print("Testing Wavelet21 with different configurations...")

# Test with custom configuration
custom_config = {
    'wavelet_type': 'db4',  # Daubechies 4 wavelet
    'decomposition_levels': 4,
    'alpha': 0.01,  # More stringent threshold
    'mc_reps': 200  # Fewer MC repetitions for speed
}

wavelet21_custom = Wavelet21Method(custom_config)
predictors_custom, metadata_custom = wavelet21_custom.compute_predictors(values, periods)

print(f"\nCustom Configuration Results:")
print(f"  Wavelet type: {custom_config['wavelet_type']}")
print(f"  Decomposition levels: {custom_config['decomposition_levels']}")
print(f"  Alpha: {custom_config['alpha']}")
print(f"  MC repetitions: {custom_config['mc_reps']}")
print(f"  Status: {metadata_custom['status']}")
print(f"  Processing time: {metadata_custom['processing_time']:.3f} seconds")
print(f"  p_wavelet_break: {predictors_custom.get('p_wavelet_break', 'N/A'):.3f}")
print(f"  confidence: {predictors_custom.get('confidence', 'N/A'):.3f}")

print(f"\nAll MODW Features Available:")
modw_features = [k for k in predictors.keys() if any(x in k for x in ['j1_', 'j2_', 'j3_', 'S_local', 'cnt_', 'log_energy'])]
for feature in sorted(modw_features)[:10]:  # Show first 10
    print(f"  {feature}: {predictors.get(feature, 'N/A'):.3f}")
if len(modw_features) > 10:
    print(f"  ... and {len(modw_features) - 10} more MODW features")


In [None]:
# Real-data subset demo (Wavelet21) — edit the paths/params below
from pathlib import Path
import pandas as pd

# Inputs and params
INPUT_PARQUET = '../_tmp_notebook_example/subset_X_train.parquet'  # change to your X_train.parquet
ENGINE = 'fastparquet'  
NUM_SERIES = 100  # take first N series by id
OUT_DIR = Path('./_wavelet21_outputs')
OUT_DIR.mkdir(parents=True, exist_ok=True)

# Output files
subset_parquet = OUT_DIR / 'wavelet_subset.parquet'
out_pred_parquet = OUT_DIR / 'Wavelet21Predictors.parquet'
out_meta_parquet = OUT_DIR / 'Wavelet21Metadata.parquet'
out_pred_csv = OUT_DIR / 'Wavelet21Features.csv'

print(f'Reading input: {INPUT_PARQUET}')
df = pd.read_parquet(INPUT_PARQUET, engine=ENGINE)

# Normalize to expected MultiIndex [id, time] with columns ['value','period'] if needed
if not isinstance(df.index, pd.MultiIndex):
    # Try to set index from columns if available
    if {'id','time'}.issubset(df.columns):
        df = df.set_index(['id','time']).sort_index()
    else:
        raise ValueError("Input must have a MultiIndex [id,time] or columns ['id','time'].")

# Validate required columns
required_cols = {'value','period'}
if not required_cols.issubset(df.columns):
    raise ValueError(f"Input must contain columns {required_cols}.")

# Select subset by first NUM_SERIES ids
unique_ids = df.index.get_level_values('id').unique()
subset_ids = list(unique_ids[: max(1, NUM_SERIES)])
print(f'Selecting {len(subset_ids)} series (first ids): {subset_ids[:5]}{" ..." if len(subset_ids)>5 else ""}')

df_subset = df.loc[pd.IndexSlice[subset_ids, :]].copy()
print(f'Subset shape: {df_subset.shape}')

# Persist subset to parquet for batch API
df_subset.to_parquet(subset_parquet)
print(f'Wrote subset to: {subset_parquet}')

# Run Wavelet21 batch via high-level API
from StructualBreakV2 import run_batch
pred_df, meta_df = run_batch(
    str(subset_parquet),
    str(out_pred_parquet),
    str(out_meta_parquet),
    method='wavelet21'
)

# Also write a CSV of predictors like Roy24 example
pred_df.to_csv(out_pred_csv, index=False)

print('\nOutputs:')
print(f'  Predictors parquet: {out_pred_parquet}')
print(f'  Metadata parquet:   {out_meta_parquet}')
print(f'  Predictors CSV:     {out_pred_csv}')

# Simple summary
n_series = len(pred_df)
n_success = int((meta_df.get('status') == 'success').sum()) if 'status' in meta_df.columns else None
print(f"\nProcessed series: {n_series}")
if n_success is not None:
    print(f"Successful: {n_success}, Failed: {n_series - n_success}")
print('\nPredictors head:')
print(pred_df.head(3))
