# 2. Feature Engineering with ZigZag Labeling

This notebook covers:
- Implementing ZigZag indicator
- Labeling K-bars with HH/HL/LL/LH signals
- Generating 55+ technical features


In [None]:
import sys
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

project_root = Path('.').resolve().parent
sys.path.insert(0, str(project_root))

from data.fetch_data import CryptoDataFetcher
from src.zigzag_indicator import ZigZagIndicator
from src.features import FeatureEngineer
from src.utils import print_data_info

## Step 1: Fetch Data

In [None]:
fetcher = CryptoDataFetcher()
btc_15m = fetcher.fetch_symbol_timeframe('BTCUSDT', '15m')
print(f'Data shape: {btc_15m.shape}')

## Step 2: Apply ZigZag and Label K-bars

In [None]:
zigzag = ZigZagIndicator(depth=12, deviation=5, backstep=2)
btc_15m = zigzag.label_kbars(btc_15m)

# Show label distribution
print('Label Distribution:')
label_counts = btc_15m['zigzag_label'].value_counts().sort_index()
for label_id, count in label_counts.items():
    label_name = zigzag.get_label_name(label_id)
    print(f'  {label_name}: {count}')

## Step 3: Feature Engineering

In [None]:
fe = FeatureEngineer(lookback_periods=[5, 10, 20, 50, 200])
btc_15m = fe.calculate_all_features(btc_15m)

# Get feature columns
feature_cols = fe.get_feature_columns(btc_15m)
print(f'Total features: {len(feature_cols)}')
print(f'Sample features: {feature_cols[:15]}')

## Step 4: Data Quality Check

In [None]:
print('Missing values in features:')
missing = btc_15m[feature_cols].isnull().sum()
print(f'Total NaN count: {missing.sum()}')
print(f'Percentage: {(missing.sum() / (len(btc_15m) * len(feature_cols))) * 100:.2f}%')

# Fill NaN values
btc_15m[feature_cols] = btc_15m[feature_cols].fillna(method='ffill').fillna(0)
print('NaN values filled!')

## Next Step: Model Training
Proceed to notebook 03_model_training.ipynb