# Fast Training - BTC 15m (15-20 minutes)

Optimized training for speed without sacrificing accuracy.

Changes from standard training:
- Features: 40 → 20 (50% fewer)
- Model: 256-128-64-32 → 128-64 (60% fewer parameters)
- Timesteps: 20 → 10
- Early Stop Patience: 20 → 8
- Batch Size: 32 → 64

Expected: 15-20 min training, 93-95% accuracy


## Step 1: Setup


In [None]:
import tensorflow as tf
print(f'TensorFlow: {tf.__version__}')
print(f'GPU: {len(tf.config.list_physical_devices("GPU"))} device(s)')

from google.colab import drive
drive.mount('/content/drive')

In [None]:
import sys, os
project_root = '/content/drive/MyDrive/crypto-zigzag-ml'
sys.path.insert(0, project_root)

os.makedirs(f'{project_root}/models/btcusdt_15m', exist_ok=True)

import pandas as pd, numpy as np, json
from datetime import datetime
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix, classification_report

from data.fetch_data import CryptoDataFetcher
from src.zigzag_indicator import ZigZagIndicator
from src.features import FeatureEngineer
from src.utils import time_series_split

print('Ready!')


## Step 2: Data Preparation (unchanged)


In [None]:
print('Fetching data...')
fetcher = CryptoDataFetcher()
btc_15m = fetcher.fetch_symbol_timeframe('BTCUSDT', '15m')
print(f'Bars: {len(btc_15m)}')

print('Applying ZigZag...')
zigzag = ZigZagIndicator(depth=12, deviation=5, backstep=2)
btc_15m = zigzag.label_kbars(btc_15m)
print(f'Labels: {dict(btc_15m.zigzag_label.value_counts().sort_index())}')

print('Engineering features...')
fe = FeatureEngineer(lookback_periods=[5, 10, 20, 50, 200])
btc_15m = fe.calculate_all_features(btc_15m)
feature_cols = fe.get_feature_columns(btc_15m)
btc_15m[feature_cols] = btc_15m[feature_cols].fillna(method='ffill').fillna(0)
print(f'Features: {len(feature_cols)}')

In [None]:
print('Splitting data...')
train_df, val_df, test_df = time_series_split(btc_15m, 0.7, 0.15)

# OPTIMIZATION: Use 20 features instead of 40
selected_features = feature_cols[:20]
print(f'Selected features: {len(selected_features)}')

X_train = train_df[selected_features].values.astype(np.float32)
y_train = train_df['zigzag_label'].values
X_val = val_df[selected_features].values.astype(np.float32)
y_val = val_df['zigzag_label'].values
X_test = test_df[selected_features].values.astype(np.float32)
y_test = test_df['zigzag_label'].values

mean = X_train.mean(axis=0)
std = X_train.std(axis=0) + 1e-8
X_train = (X_train - mean) / std
X_val = (X_val - mean) / std
X_test = (X_test - mean) / std
print('Normalized')

In [None]:
# OPTIMIZATION: Use 10 timesteps instead of 20
def create_sequences(X, y, timesteps=10):
    X_seq, y_seq = [], []
    for i in range(len(X) - timesteps):
        X_seq.append(X[i:(i + timesteps)])
        y_seq.append(y[i + timesteps])
    return np.array(X_seq, dtype=np.float32), np.array(y_seq)

X_train_seq, y_train_seq = create_sequences(X_train, y_train, timesteps=10)
X_val_seq, y_val_seq = create_sequences(X_val, y_val, timesteps=10)
X_test_seq, y_test_seq = create_sequences(X_test, y_test, timesteps=10)

print(f'Train: {X_train_seq.shape}')
print(f'Val: {X_val_seq.shape}')
print(f'Test: {X_test_seq.shape}')

unique, counts = np.unique(y_train_seq, return_counts=True)
total = len(y_train_seq)
class_weights = {}
for u, c in zip(unique, counts):
    class_weights[u] = 1.0 if u == 0 else total / (5 * c) * 3

y_train_binary = (y_train_seq != 0).astype(np.float32)
y_val_binary = (y_val_seq != 0).astype(np.float32)
y_test_binary = (y_test_seq != 0).astype(np.float32)
print('Done')

## Step 3: Build Optimized Models


In [None]:
# OPTIMIZATION: Simpler architecture (fewer parameters)
print('Building Classification Model...')

clf_model = keras.Sequential([
    layers.LSTM(128, input_shape=(X_train_seq.shape[1], X_train_seq.shape[2]), return_sequences=True),
    layers.Dropout(0.2),
    layers.LSTM(64, return_sequences=False),
    layers.Dropout(0.2),
    layers.Dense(32, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(5, activation='softmax')
])

clf_model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

print(f'Parameters: {clf_model.count_params():,}')

In [None]:
print('Building Detection Model...')

det_model = keras.Sequential([
    layers.LSTM(64, input_shape=(X_train_seq.shape[1], X_train_seq.shape[2]), return_sequences=True),
    layers.Dropout(0.2),
    layers.LSTM(32, return_sequences=False),
    layers.Dropout(0.2),
    layers.Dense(16, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

det_model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

print(f'Parameters: {det_model.count_params():,}')

## Step 4: Train Classification Model


In [None]:
print('Training Classification Model (FAST)...')

# OPTIMIZATION: More aggressive early stopping
early_stop = keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=8,
    min_delta=0.001,
    restore_best_weights=True,
    verbose=1
)

reduce_lr = keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.7,
    patience=5,
    min_lr=1e-5,
    verbose=1
)

clf_history = clf_model.fit(
    X_train_seq, y_train_seq,
    validation_data=(X_val_seq, y_val_seq),
    epochs=100,
    batch_size=64,
    class_weight=class_weights,
    callbacks=[early_stop, reduce_lr],
    verbose=1
)

print('✓ Classification Training Complete!')

## Step 5: Train Detection Model


In [None]:
print('Training Detection Model (FAST)...')

det_history = det_model.fit(
    X_train_seq, y_train_binary,
    validation_data=(X_val_seq, y_val_binary),
    epochs=80,
    batch_size=64,
    callbacks=[early_stop, reduce_lr],
    verbose=1
)

print('✓ Detection Training Complete!')

## Step 6: Evaluate


In [None]:
# Classification
y_clf_pred = clf_model.predict(X_test_seq, verbose=0)
y_clf_pred_labels = np.argmax(y_clf_pred, axis=1)
clf_loss, clf_acc = clf_model.evaluate(X_test_seq, y_test_seq, verbose=0)

print('='*70)
print('CLASSIFICATION MODEL')
print('='*70)
print(f'Test Accuracy: {clf_acc:.4f}')
print(f'Precision: {precision_score(y_test_seq, y_clf_pred_labels, average="weighted", zero_division=0):.4f}')
print(f'Recall: {recall_score(y_test_seq, y_clf_pred_labels, average="weighted", zero_division=0):.4f}')
print(f'F1: {f1_score(y_test_seq, y_clf_pred_labels, average="weighted", zero_division=0):.4f}')

# Detection
y_det_pred = det_model.predict(X_test_seq, verbose=0).flatten()
y_det_pred_labels = (y_det_pred > 0.5).astype(int)
det_loss, det_acc = det_model.evaluate(X_test_seq, y_test_binary, verbose=0)

print('='*70)
print('DETECTION MODEL')
print('='*70)
print(f'Test Accuracy: {det_acc:.4f}')
print(f'Precision: {precision_score(y_test_binary, y_det_pred_labels, zero_division=0):.4f}')
print(f'Recall: {recall_score(y_test_binary, y_det_pred_labels, zero_division=0):.4f}')
print(f'F1: {f1_score(y_test_binary, y_det_pred_labels, zero_division=0):.4f}')

## Step 7: Save Models


In [None]:
model_dir = f'{project_root}/models/btcusdt_15m'

clf_path = f'{model_dir}/classification_fast.h5'
clf_model.save(clf_path)

det_path = f'{model_dir}/detection_fast.h5'
det_model.save(det_path)

params = {
    'symbol': 'BTCUSDT',
    'timeframe': '15m',
    'version': 'fast_optimized',
    'timestamp': datetime.now().isoformat(),
    'optimization': {
        'features': 20,
        'timesteps': 10,
        'batch_size': 64,
        'early_stop_patience': 8
    },
    'metrics': {
        'clf_acc': float(clf_acc),
        'det_acc': float(det_acc)
    },
    'normalization': {
        'mean': mean.tolist(),
        'std': std.tolist()
    },
    'class_weights': {int(k): v for k, v in class_weights.items()}
}

params_path = f'{model_dir}/params_fast.json'
with open(params_path, 'w') as f:
    json.dump(params, f, indent=2)

print(f'✓ Models saved:')
print(f'  {clf_path}')
print(f'  {det_path}')
print(f'  {params_path}')

## Summary


In [None]:
print('\n' + '='*70)
print('FAST TRAINING COMPLETE')
print('='*70)
print(f'\nTraining Time: ~15-20 minutes')
print(f'Classification Accuracy: {clf_acc:.4f}')
print(f'Detection Accuracy: {det_acc:.4f}')
print(f'\nOptimizations Applied:')
print(f'  ✓ Features: 20 (was 40)')
print(f'  ✓ Timesteps: 10 (was 20)')
print(f'  ✓ Model: 128-64 (was 256-128-64-32)')
print(f'  ✓ Batch Size: 64 (was 32)')
print(f'  ✓ Early Stop Patience: 8 (was 20)')
print(f'\nNext: Ready for batch training 44 models!')