# 3. Model Training and Evaluation

This notebook covers:
- Training LSTM + XGBoost hybrid model
- Hyperparameter tuning
- Model evaluation and metrics
- Training volatility prediction model


In [None]:
import sys
from pathlib import Path
import pandas as pd
import numpy as np

project_root = Path('.').resolve().parent
sys.path.insert(0, str(project_root))

from data.fetch_data import CryptoDataFetcher
from src.zigzag_indicator import ZigZagIndicator
from src.features import FeatureEngineer
from src.models import LSTMXGBoostModel, VolatilityModel
from src.utils import time_series_split, normalize_data

## Step 1: Prepare Data

In [None]:
fetcher = CryptoDataFetcher()
btc_15m = fetcher.fetch_symbol_timeframe('BTCUSDT', '15m')

zigzag = ZigZagIndicator(depth=12, deviation=5, backstep=2)
btc_15m = zigzag.label_kbars(btc_15m)

fe = FeatureEngineer(lookback_periods=[5, 10, 20, 50, 200])
btc_15m = fe.calculate_all_features(btc_15m)

feature_cols = fe.get_feature_columns(btc_15m)
print(f'Data prepared. Shape: {btc_15m.shape}, Features: {len(feature_cols)}')

## Step 2: Time Series Split

In [None]:
train_df, val_df, test_df = time_series_split(btc_15m, train_ratio=0.7, validation_ratio=0.15)
print(f'Train: {len(train_df)}, Val: {len(val_df)}, Test: {len(test_df)}')

## Step 3: Train Signal Classification Model

In [None]:
# Prepare data for LSTM
model = LSTMXGBoostModel(timesteps=60, n_features=len(feature_cols))

X_train = train_df[feature_cols].values
y_train = train_df['zigzag_label'].values
X_val = val_df[feature_cols].values
y_val = val_df['zigzag_label'].values
X_test = test_df[feature_cols].values
y_test = test_df['zigzag_label'].values

# Normalize
X_train_norm, train_params = normalize_data(X_train)
X_val_norm = (X_val - train_params['mean']) / (train_params['std'] + 1e-8)
X_test_norm = (X_test - train_params['mean']) / (train_params['std'] + 1e-8)

# Create sequences
X_train_seq, y_train_seq = model.create_sequences(X_train_norm, y_train)
X_val_seq, y_val_seq = model.create_sequences(X_val_norm, y_val)
X_test_seq, y_test_seq = model.create_sequences(X_test_norm, y_test)

print(f'Sequences created. Shape: {X_train_seq.shape}')

## Step 4: Train Model

In [None]:
history = model.train(
    X_train_seq, y_train_seq,
    X_val_seq, y_val_seq,
    epochs=100,
    batch_size=32,
    early_stopping_patience=10
)

print(f'Training complete!')
print(f'LSTM Train Accuracy: {history["lstm_train_acc"]: .4f}')
print(f'LSTM Val Accuracy: {history["lstm_val_acc"]: .4f}')

## Step 5: Evaluate Model

In [None]:
metrics = model.evaluate(X_test_seq, y_test_seq)

print('
Model Evaluation:')
print(f'Accuracy: {metrics["accuracy"]: .4f}')
print(f'Precision: {metrics["precision"]: .4f}')
print(f'Recall: {metrics["recall"]: .4f}')
print(f'F1-Score: {metrics["f1"]: .4f}')

## Step 6: Train Volatility Model

In [None]:
vol_model = VolatilityModel(atr_multiplier=1.5, atr_window=14)

# Create labels and features
y_train_vol = vol_model.create_labels(train_df)
X_train_vol = vol_model.create_features(train_df)

y_val_vol = vol_model.create_labels(val_df)
X_val_vol = vol_model.create_features(val_df)

y_test_vol = vol_model.create_labels(test_df)
X_test_vol = vol_model.create_features(test_df)

# Train
vol_metrics = vol_model.train(X_train_vol, y_train_vol, X_val_vol, y_val_vol)

print('
Volatility Model Metrics:')
for key, value in vol_metrics.items():
    print(f'  {key}: {value: .4f}')

## Conclusion
Model training complete. Both signal classification and volatility models are ready for deployment.