In [None]:
!pip install -U pip setuptools wheel
!pip install -r ../requirements.txt


Example Usage (Notebook)

This notebook provides:
- Setup and config validation
- Single-series demo
- Real-data subset demo from `X_train.parquet` (first N series)

Edit `INPUT_PARQUET` and `NUM_SERIES` as needed.


In [None]:
# Setup: Install required packages first
# Run this in terminal: pip install -r requirements.txt
# Or run this cell to install automatically:
import subprocess
import sys
subprocess.check_call([sys.executable, "-m", "pip", "install", "-r", "requirements.txt"])

In [None]:
# Setup: import from local package and validate config
import sys, pathlib

project_root = pathlib.Path().resolve()
parent_dir = project_root
if str(parent_dir) not in sys.path:
    sys.path.insert(0, str(parent_dir))

from StructualBreak import compute_predictors_for_values, run_batch, validate_config
validate_config()
print("Imports OK and config validated.")


In [None]:
# Single-series demo
import numpy as np

np.random.seed(42)
n = 120
break_point = 60
values = np.concatenate([
    np.random.normal(0, 1.0, break_point),
    np.random.normal(1.5, 1.5, n - break_point),
])
periods = np.concatenate([np.zeros(break_point), np.ones(n - break_point)])

preds, meta = compute_predictors_for_values(values, periods, B_boot=20, energy_enable=False)
print({k: round(v, 4) for k, v in preds.items()})
meta


In [None]:
# Real-data subset demo (edit the path below)
from pathlib import Path
import pandas as pd

INPUT_PARQUET = r"C:\Users\yehud\Downloads\X_train.parquet"  # <- change if needed
NUM_SERIES = 5  # take first N series by id

print("Reading:", INPUT_PARQUET)
df = pd.read_parquet(INPUT_PARQUET)

unique_ids = df.index.get_level_values("id").unique()
take_ids = list(unique_ids[: max(1, NUM_SERIES)])
sub_df = df.loc[df.index.get_level_values("id").isin(take_ids)].sort_index()
print(f"Total unique ids: {len(unique_ids)}; taking first {len(take_ids)}")

out_dir = Path("_tmp_notebook_example"); out_dir.mkdir(exist_ok=True)
subset_in = out_dir / "subset_X_train.parquet"
# Write CSV outputs
out_pred = out_dir / "subset_predictors.csv"
out_meta = out_dir / "subset_metadata.csv"
sub_df.to_parquet(subset_in)

pred_df, meta_df = run_batch(
    input_parquet=str(subset_in),
    out_pred_parquet=str(out_pred),
    out_meta_parquet=str(out_meta),
    B_boot=20,
    energy_enable=False,
    n_jobs=1,
    verbose=True,
)

print("Saved predictors to:", out_pred)
print("Saved metadata to:", out_meta)
print("Predictors shape:", pred_df.shape)
print("Metadata shape:", meta_df.shape)
pred_df.head()
