Example Usage (Notebook)

This notebook provides:
- Setup and config validation
- Single-series demo
- Real-data subset demo from `X_train.parquet` (first N series)

Edit `INPUT_PARQUET` and `NUM_SERIES` as needed.


In [3]:
# Setup: Install required packages first
# Run this in terminal: pip install -r requirements.txt
# Or run this cell to install automatically:
import subprocess
import sys
subprocess.check_call([sys.executable, "-m", "pip", "install", "-r", "requirements.txt"])

0

In [None]:
# Setup: import from local package and validate config
import sys, pathlib

project_root = pathlib.Path().resolve()
parent_dir = project_root
if str(parent_dir) not in sys.path:
    sys.path.insert(0, str(parent_dir))

from StructualBreak import compute_predictors_for_values, run_batch, validate_config
validate_config()
print("Imports OK and config validated.")


In [2]:
# Single-series demo
import numpy as np

np.random.seed(42)
n = 120
break_point = 60
values = np.concatenate([
    np.random.normal(0, 1.0, break_point),
    np.random.normal(1.5, 1.5, n - break_point),
])
periods = np.concatenate([np.zeros(break_point), np.ones(n - break_point)])

preds, meta = compute_predictors_for_values(values, periods, B_boot=20, energy_enable=False)
print({k: round(v, 4) for k, v in preds.items()})
meta


{'p_mu_lag1': 1.0, 'p_sigma_lag1': 0.5714, 'overlap_frac_lag1': 0.475, 'p_mu_vol': 1.0, 'p_sigma_vol': 0.7143, 'overlap_frac_vol': 0.475, 'p_mu_resid_lag1': 1.0, 'p_sigma_resid_lag1': 0.8571, 'overlap_frac_resid_lag1': 0.4, 'p_mean': 0.0, 'p_var': 0.0034, 'p_MWU': 0.0, 'p_energy': nan, 'acf_absdiff_l1': 0.3219}


{'n_total_lag1': 119,
 'n_p0_lag1': 59,
 'n_p1_lag1': 60,
 'n_total_vol': 118,
 'n_p0_vol': 58,
 'n_p1_vol': 60,
 'n_period0': 59,
 'n_period1': 60,
 'n_total': 119}

In [5]:
# Real-data subset demo (edit the path below)
from pathlib import Path
import pandas as pd

INPUT_PARQUET = r"C:\Users\yehud\Downloads\X_train.parquet"  # <- change if needed
NUM_SERIES = 5  # take first N series by id

print("Reading:", INPUT_PARQUET)
df = pd.read_parquet(INPUT_PARQUET)

unique_ids = df.index.get_level_values("id").unique()
take_ids = list(unique_ids[: max(1, NUM_SERIES)])
sub_df = df.loc[df.index.get_level_values("id").isin(take_ids)].sort_index()
print(f"Total unique ids: {len(unique_ids)}; taking first {len(take_ids)}")

out_dir = Path("_tmp_notebook_example"); out_dir.mkdir(exist_ok=True)
subset_in = out_dir / "subset_X_train.parquet"
# Write CSV outputs
out_pred = out_dir / "subset_predictors.csv"
out_meta = out_dir / "subset_metadata.csv"
sub_df.to_parquet(subset_in)

pred_df, meta_df = run_batch(
    input_parquet=str(subset_in),
    out_pred_parquet=str(out_pred),
    out_meta_parquet=str(out_meta),
    B_boot=20,
    energy_enable=False,
    n_jobs=1,
    verbose=True,
)

print("Saved predictors to:", out_pred)
print("Saved metadata to:", out_meta)
print("Predictors shape:", pred_df.shape)
print("Metadata shape:", meta_df.shape)
pred_df.head()


Reading: C:\Users\yehud\Downloads\X_train.parquet


INFO:StructualBreak.batch_processor:Loading _tmp_notebook_example\subset_X_train.parquet
INFO:StructualBreak.batch_processor:Total series: 5


Total unique ids: 10001; taking first 5


Extracting predictors: 100%|██████████| 5/5 [00:03<00:00,  1.37it/s]
INFO:StructualBreak.batch_processor:Saved predictors to: _tmp_notebook_example\subset_predictors.csv
INFO:StructualBreak.batch_processor:Saved metadata to: _tmp_notebook_example\subset_metadata.csv


Saved predictors to: _tmp_notebook_example\subset_predictors.csv
Saved metadata to: _tmp_notebook_example\subset_metadata.csv
Predictors shape: (5, 14)
Metadata shape: (5, 9)


Unnamed: 0_level_0,p_mu_lag1,p_sigma_lag1,overlap_frac_lag1,p_mu_vol,p_sigma_vol,overlap_frac_vol,p_mu_resid_lag1,p_sigma_resid_lag1,overlap_frac_resid_lag1,p_mean,p_var,p_MWU,p_energy,acf_absdiff_l1
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
0,0.666667,0.714286,1.0,0.761905,0.761905,1.0,0.809524,0.761905,1.0,0.977803,0.7211479,0.999243,,0.115726
1,0.238095,0.714286,1.0,0.238095,0.714286,1.0,0.238095,0.714286,1.0,0.099297,2.067381e-08,0.197171,,0.246053
2,0.190476,0.52381,1.0,0.52381,0.952381,1.0,0.190476,0.571429,1.0,0.193963,1.638906e-11,0.117482,,0.361295
3,0.952381,0.47619,1.0,0.952381,0.47619,1.0,0.952381,0.47619,1.0,0.869832,0.002229744,0.584118,,0.099213
4,0.857143,0.52381,1.0,0.857143,0.52381,1.0,0.857143,0.952381,1.0,0.829085,0.4636402,0.888068,,0.186666
