# Test Notebook - BTC 15m Model Training

This notebook tests the complete training pipeline with BTC USDT 15-minute data.

Before running batch training on all 44 symbol-timeframe combinations,
we test with a single one to validate the pipeline.


## Step 1: Setup and Imports


In [None]:
import tensorflow as tf
print(f'TensorFlow Version: {tf.__version__}')
print(f'GPU Available: {len(tf.config.list_physical_devices("GPU"))}')
print(f'GPU Devices: {tf.config.list_physical_devices("GPU")}')

from google.colab import drive
drive.mount('/content/drive')

In [None]:
import sys
import os

project_root = '/content/drive/MyDrive/crypto-zigzag-ml'
sys.path.insert(0, project_root)

# Create necessary directories
os.makedirs(f'{project_root}/models/btcusdt_15m', exist_ok=True)
os.makedirs(f'{project_root}/training_logs', exist_ok=True)

import pandas as pd
import numpy as np
import json
from datetime import datetime
from tensorflow import keras
from tensorflow.keras import layers

from data.fetch_data import CryptoDataFetcher
from src.zigzag_indicator import ZigZagIndicator
from src.features import FeatureEngineer
from src.utils import time_series_split

print('\nAll imports successful!')

## Step 2: Fetch BTC 15m Data


In [None]:
print('Fetching BTC USDT 15m data...')
fetcher = CryptoDataFetcher()
btc_15m = fetcher.fetch_symbol_timeframe('BTCUSDT', '15m')

print(f'Shape: {btc_15m.shape}')
print(f'Date Range: {btc_15m.index.min()} to {btc_15m.index.max()}')
print(f'Columns: {list(btc_15m.columns)}')

## Step 3: Apply ZigZag Labeling


In [None]:
print('\nApplying ZigZag Indicator...')
zigzag = ZigZagIndicator(depth=12, deviation=5, backstep=2)
btc_15m = zigzag.label_kbars(btc_15m)

print('\nLabel Distribution:')
label_counts = btc_15m['zigzag_label'].value_counts().sort_index()
for label_id, count in label_counts.items():
    label_name = zigzag.get_label_name(label_id)
    pct = 100 * count / len(btc_15m)
    print(f'  {label_name:12s}: {count:6d} ({pct:5.2f}%)')

## Step 4: Feature Engineering


In [None]:
print('\nEngineering Features...')
fe = FeatureEngineer(lookback_periods=[5, 10, 20, 50, 200])
btc_15m = fe.calculate_all_features(btc_15m)

feature_cols = fe.get_feature_columns(btc_15m)
btc_15m[feature_cols] = btc_15m[feature_cols].fillna(method='ffill').fillna(0)

print(f'\nTotal Features: {len(feature_cols)}')
print(f'Sample Features: {feature_cols[:15]}')

## Step 5: Time Series Split


In [None]:
print('\nTime Series Split (70-15-15)...')
train_df, val_df, test_df = time_series_split(btc_15m, train_ratio=0.7, validation_ratio=0.15)

print(f'Train: {len(train_df):6d} bars ({100*len(train_df)/len(btc_15m):5.1f}%)')
print(f'Val:   {len(val_df):6d} bars ({100*len(val_df)/len(btc_15m):5.1f}%)')
print(f'Test:  {len(test_df):6d} bars ({100*len(test_df)/len(btc_15m):5.1f}%)')

## Step 6: Prepare Training Data


In [None]:
print('\nPreparing Training Data...')

# Select top 40 features
selected_features = feature_cols[:40]

# Extract
X_train = train_df[selected_features].values.astype(np.float32)
y_train = train_df['zigzag_label'].values
X_val = val_df[selected_features].values.astype(np.float32)
y_val = val_df['zigzag_label'].values
X_test = test_df[selected_features].values.astype(np.float32)
y_test = test_df['zigzag_label'].values

print(f'X_train: {X_train.shape}')
print(f'y_train: {y_train.shape}')

# Normalize
print('\nNormalizing...')
mean = X_train.mean(axis=0)
std = X_train.std(axis=0) + 1e-8

X_train = (X_train - mean) / std
X_val = (X_val - mean) / std
X_test = (X_test - mean) / std

print(f'X_train mean: {X_train.mean():.4f}, std: {X_train.std():.4f}')

## Step 7: Create Sequences


In [None]:
def create_sequences(X, y, timesteps=20):
    X_seq, y_seq = [], []
    for i in range(len(X) - timesteps):
        X_seq.append(X[i:(i + timesteps)])
        y_seq.append(y[i + timesteps])
    return np.array(X_seq, dtype=np.float32), np.array(y_seq)

print('Creating Sequences (timesteps=20)...')
X_train_seq, y_train_seq = create_sequences(X_train, y_train, timesteps=20)
X_val_seq, y_val_seq = create_sequences(X_val, y_val, timesteps=20)
X_test_seq, y_test_seq = create_sequences(X_test, y_test, timesteps=20)

print(f'X_train_seq: {X_train_seq.shape}')
print(f'X_val_seq: {X_val_seq.shape}')
print(f'X_test_seq: {X_test_seq.shape}')

# Check class distribution
unique, counts = np.unique(y_train_seq, return_counts=True)
print('\nClass Distribution in Train Set:')
for u, c in zip(unique, counts):
    pct = 100 * c / len(y_train_seq)
    print(f'  Class {u}: {c:6d} ({pct:5.2f}%)')

## Step 8: Calculate Class Weights


In [None]:
print('\nCalculating Aggressive Class Weights...')

unique, counts = np.unique(y_train_seq, return_counts=True)
total = len(y_train_seq)

class_weights = {}
for u, c in zip(unique, counts):
    if u == 0:  # NO_SIGNAL (majority)
        class_weights[u] = 1.0
    else:  # Signal classes (minority)
        # Aggressive weighting
        weight = total / (5 * c) * 3
        class_weights[u] = weight

print('\nClass Weights:')
for cls, weight in sorted(class_weights.items()):
    print(f'  Class {cls}: {weight:.4f}')

# Binary labels for detection model
y_train_binary = (y_train_seq != 0).astype(np.float32)
y_val_binary = (y_val_seq != 0).astype(np.float32)
y_test_binary = (y_test_seq != 0).astype(np.float32)

print(f'\nBinary Signal Distribution:')
print(f'  Train: {y_train_binary.sum():.0f}/{len(y_train_binary)} ({100*y_train_binary.mean():.2f}%)')
print(f'  Val: {y_val_binary.sum():.0f}/{len(y_val_binary)} ({100*y_val_binary.mean():.2f}%)')
print(f'  Test: {y_test_binary.sum():.0f}/{len(y_test_binary)} ({100*y_test_binary.mean():.2f}%)')

## Step 9: Build Classification Model


In [None]:
print('Building Classification Model (5-class)...')

clf_model = keras.Sequential([
    layers.LSTM(256, input_shape=(X_train_seq.shape[1], X_train_seq.shape[2]), return_sequences=True),
    layers.Dropout(0.3),
    layers.LSTM(128, return_sequences=False),
    layers.Dropout(0.3),
    layers.Dense(64, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(32, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(5, activation='softmax')
])

clf_model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.0005),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

print(clf_model.summary())

## Step 10: Build Detection Model


In [None]:
print('Building Detection Model (binary)...')

det_model = keras.Sequential([
    layers.LSTM(128, input_shape=(X_train_seq.shape[1], X_train_seq.shape[2]), return_sequences=True),
    layers.Dropout(0.2),
    layers.LSTM(64, return_sequences=False),
    layers.Dropout(0.2),
    layers.Dense(32, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(1, activation='sigmoid')
])

det_model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

print(det_model.summary())

## Step 11: Train Classification Model


In [None]:
print('Training Classification Model...')

early_stop = keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=20,
    restore_best_weights=True,
    verbose=1
)

reduce_lr = keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=10,
    min_lr=1e-6,
    verbose=1
)

clf_history = clf_model.fit(
    X_train_seq, y_train_seq,
    validation_data=(X_val_seq, y_val_seq),
    epochs=200,
    batch_size=32,
    class_weight=class_weights,
    callbacks=[early_stop, reduce_lr],
    verbose=1
)

print('\nClassification Training Complete!')

## Step 12: Train Detection Model


In [None]:
print('Training Detection Model...')

det_history = det_model.fit(
    X_train_seq, y_train_binary,
    validation_data=(X_val_seq, y_val_binary),
    epochs=150,
    batch_size=32,
    callbacks=[early_stop, reduce_lr],
    verbose=1
)

print('\nDetection Training Complete!')

## Step 13: Evaluate Models


In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix, classification_report

print('='*70)
print('CLASSIFICATION MODEL EVALUATION')
print('='*70)

# Predictions
y_clf_pred = clf_model.predict(X_test_seq, verbose=0)
y_clf_pred_labels = np.argmax(y_clf_pred, axis=1)

# Metrics
clf_loss, clf_acc = clf_model.evaluate(X_test_seq, y_test_seq, verbose=0)

print(f'\nTest Loss: {clf_loss:.4f}')
print(f'Test Accuracy: {clf_acc:.4f}')
print(f'Precision: {precision_score(y_test_seq, y_clf_pred_labels, average="weighted", zero_division=0):.4f}')
print(f'Recall: {recall_score(y_test_seq, y_clf_pred_labels, average="weighted", zero_division=0):.4f}')
print(f'F1-Score: {f1_score(y_test_seq, y_clf_pred_labels, average="weighted", zero_division=0):.4f}')

print(f'\nConfusion Matrix:')
cm = confusion_matrix(y_test_seq, y_clf_pred_labels)
print(cm)

print(f'\nClassification Report:')
print(classification_report(y_test_seq, y_clf_pred_labels, zero_division=0))

print('='*70)
print('DETECTION MODEL EVALUATION')
print('='*70)

# Binary predictions
y_det_pred = det_model.predict(X_test_seq, verbose=0).flatten()
y_det_pred_labels = (y_det_pred > 0.5).astype(int)

det_loss, det_acc = det_model.evaluate(X_test_seq, y_test_binary, verbose=0)

print(f'\nTest Loss: {det_loss:.4f}')
print(f'Test Accuracy: {det_acc:.4f}')
print(f'Precision: {precision_score(y_test_binary, y_det_pred_labels, zero_division=0):.4f}')
print(f'Recall: {recall_score(y_test_binary, y_det_pred_labels, zero_division=0):.4f}')
print(f'F1-Score: {f1_score(y_test_binary, y_det_pred_labels, zero_division=0):.4f}')

## Step 14: Save Models


In [None]:
print('\nSaving Models...')

model_dir = f'{project_root}/models/btcusdt_15m'

# Save classification model
clf_path = f'{model_dir}/classification.h5'
clf_model.save(clf_path)
print(f'  Classification Model: {clf_path}')

# Save detection model
det_path = f'{model_dir}/detection.h5'
det_model.save(det_path)
print(f'  Detection Model: {det_path}')

# Save parameters and metrics
params = {
    'symbol': 'BTCUSDT',
    'timeframe': '15m',
    'timestamp': datetime.now().isoformat(),
    'normalization': {
        'mean': mean.tolist(),
        'std': std.tolist()
    },
    'class_weights': {int(k): v for k, v in class_weights.items()},
    'metrics': {
        'classification': {
            'test_loss': float(clf_loss),
            'test_acc': float(clf_acc)
        },
        'detection': {
            'test_loss': float(det_loss),
            'test_acc': float(det_acc)
        }
    }
}

params_path = f'{model_dir}/params.json'
with open(params_path, 'w') as f:
    json.dump(params, f, indent=2)
print(f'  Parameters: {params_path}')

print('\n✓ All models saved successfully!')

## Step 15: Summary Report


In [None]:
print('\n' + '='*70)
print('TEST TRAINING SUMMARY - BTC USDT 15m')
print('='*70)

print(f'\nData:')
print(f'  Total bars: {len(btc_15m)}')
print(f'  Train: {len(train_df)} bars')
print(f'  Val: {len(val_df)} bars')
print(f'  Test: {len(test_df)} bars')

print(f'\nFeatures:')
print(f'  Total: {len(feature_cols)}')
print(f'  Selected: {len(selected_features)}')

print(f'\nSequences:')
print(f'  Timesteps: 20')
print(f'  Train sequences: {len(X_train_seq)}')
print(f'  Val sequences: {len(X_val_seq)}')
print(f'  Test sequences: {len(X_test_seq)}')

print(f'\nModels:')
print(f'  Classification (5-class): {clf_path}')
print(f'    Test Accuracy: {clf_acc:.4f}')
print(f'  Detection (Binary): {det_path}')
print(f'    Test Accuracy: {det_acc:.4f}')

print(f'\nParameters:')
print(f'  Saved: {params_path}')

print('='*70)
print('✓ TEST SUCCESSFUL - READY FOR BATCH TRAINING')
print('='*70)