# BTC 15m Classification Model - Quick Test

Train a single 5-class signal classifier for BTC 15m to validate accuracy.
- Training time: 15-20 minutes
- Expected accuracy: 94-96%

**This is a quick test to verify model quality before batch training all 44 symbols.**


## Setup


In [None]:
!pip install tensorflow pandas numpy scikit-learn -q

import tensorflow as tf
print(f'TensorFlow: {tf.__version__}')
print(f'GPU: {tf.config.list_physical_devices("GPU")}')


In [None]:
from google.colab import drive
drive.mount('/content/drive')

import sys, os
project_root = '/content/drive/MyDrive/crypto-zigzag-ml'
os.makedirs(project_root, exist_ok=True)
sys.path.insert(0, project_root)

print('Google Drive mounted')

In [None]:
import pandas as pd, numpy as np, json
from datetime import datetime
from pathlib import Path
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix, classification_report

try:
    from data.fetch_data import CryptoDataFetcher
    from src.zigzag_indicator import ZigZagIndicator
    from src.features import FeatureEngineer
    from src.utils import time_series_split
    print('Modules imported successfully')
except Exception as e:
    print(f'Import error: {e}')
    print('You need to upload your data/, src/ modules to Google Drive first')


## Configuration


In [None]:
# 牵制參數
CONFIG = {
    'symbol': 'BTCUSDT',
    'timeframe': '15m',
    'features': 20,
    'timesteps': 10,
    'batch_size': 64,
    'epochs': 100,
    'early_stop_patience': 8,
    'lstm_layers': [128, 64],
}

print('Configuration:')
for k, v in CONFIG.items():
    print(f'  {k}: {v}')


## Helper Functions


In [None]:
def create_sequences(X, y, timesteps=10):
    """Create time series sequences"""
    X_seq, y_seq = [], []
    for i in range(len(X) - timesteps):
        X_seq.append(X[i:(i + timesteps)])
        y_seq.append(y[i + timesteps])
    return np.array(X_seq, dtype=np.float32), np.array(y_seq)

def build_clf_model(input_shape, lstm_layers=[128, 64]):
    """Build LSTM classification model"""
    model = keras.Sequential([
        layers.LSTM(lstm_layers[0], input_shape=input_shape, return_sequences=True),
        layers.Dropout(0.2),
        layers.LSTM(lstm_layers[1], return_sequences=False),
        layers.Dropout(0.2),
        layers.Dense(32, activation='relu'),
        layers.Dropout(0.2),
        layers.Dense(5, activation='softmax')
    ])
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=0.001),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    return model

print('Functions ready')

## Data Preparation


In [None]:
print(f'Fetching {CONFIG["symbol"]} {CONFIG["timeframe"]}...')

fetcher = CryptoDataFetcher()
data = fetcher.fetch_symbol_timeframe(CONFIG['symbol'], CONFIG['timeframe'])

print(f'Data shape: {data.shape}')
print(f'Date range: {data.index[0]} to {data.index[-1]}')
print(f'Samples: {len(data)}')


In [None]:
print('Applying ZigZag indicator...')

zigzag = ZigZagIndicator(depth=12, deviation=5, backstep=2)
data = zigzag.label_kbars(data)

print(f'ZigZag labels added')
print(f'Label distribution:')
print(data['zigzag_label'].value_counts().sort_index())


In [None]:
print('Engineering features...')

fe = FeatureEngineer(lookback_periods=[5, 10, 20, 50, 200])
data = fe.calculate_all_features(data)
feature_cols = fe.get_feature_columns(data)

print(f'Total features: {len(feature_cols)}')
print(f'Selected features (top {CONFIG["features"]}):')

# Fill NaN
data[feature_cols] = data[feature_cols].fillna(method='ffill').fillna(0)

selected_features = feature_cols[:CONFIG['features']]
for i, feat in enumerate(selected_features, 1):
    print(f'  {i:2d}. {feat}')


In [None]:
print('Time series split...')

train_df, val_df, test_df = time_series_split(data, 0.7, 0.15)

print(f'Train: {len(train_df)} ({len(train_df)/len(data)*100:.1f}%)')
print(f'Val:   {len(val_df)} ({len(val_df)/len(data)*100:.1f}%)')
print(f'Test:  {len(test_df)} ({len(test_df)/len(data)*100:.1f}%)')


In [None]:
print('Preparing training data...')

X_train = train_df[selected_features].values.astype(np.float32)
y_train = train_df['zigzag_label'].values
X_val = val_df[selected_features].values.astype(np.float32)
y_val = val_df['zigzag_label'].values
X_test = test_df[selected_features].values.astype(np.float32)
y_test = test_df['zigzag_label'].values

print(f'X_train: {X_train.shape}')
print(f'y_train: {y_train.shape}')


In [None]:
print('Normalizing...')

mean = X_train.mean(axis=0)
std = X_train.std(axis=0) + 1e-8
X_train = (X_train - mean) / std
X_val = (X_val - mean) / std
X_test = (X_test - mean) / std

print('Normalization complete')


In [None]:
print('Creating sequences...')

X_train_seq, y_train_seq = create_sequences(X_train, y_train, CONFIG['timesteps'])
X_val_seq, y_val_seq = create_sequences(X_val, y_val, CONFIG['timesteps'])
X_test_seq, y_test_seq = create_sequences(X_test, y_test, CONFIG['timesteps'])

print(f'X_train_seq: {X_train_seq.shape}')
print(f'y_train_seq: {y_train_seq.shape}')
print(f'X_test_seq: {X_test_seq.shape}')


In [None]:
print('Computing class weights...')

unique, counts = np.unique(y_train_seq, return_counts=True)
total = len(y_train_seq)
class_weights = {}

for u, c in zip(unique, counts):
    class_weights[u] = 1.0 if u == 0 else total / (5 * c) * 3

print('Class weights:')
for k, v in sorted(class_weights.items()):
    print(f'  {k}: {v:.4f}')


## Model Training


In [None]:
print('Building model...')

early_stop = keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=CONFIG['early_stop_patience'],
    restore_best_weights=True,
    verbose=1
)

model = build_clf_model(
    (X_train_seq.shape[1], X_train_seq.shape[2]),
    lstm_layers=CONFIG['lstm_layers']
)

model.summary()


In [None]:
print('Training...')
print('='*70)

history = model.fit(
    X_train_seq, y_train_seq,
    validation_data=(X_val_seq, y_val_seq),
    epochs=CONFIG['epochs'],
    batch_size=CONFIG['batch_size'],
    class_weight=class_weights,
    callbacks=[early_stop],
    verbose=1
)

print('='*70)
print('Training complete!')


## Evaluation


In [None]:
print('Evaluating model...')

loss, acc = model.evaluate(X_test_seq, y_test_seq, verbose=0)

print(f'Test Loss: {loss:.4f}')
print(f'Test Accuracy: {acc:.4f}')


In [None]:
print('Making predictions...')

y_pred_probs = model.predict(X_test_seq, verbose=0)
y_pred = np.argmax(y_pred_probs, axis=1)

print('Predicted classes distribution:')
for cls in range(5):
    count = np.sum(y_pred == cls)
    pct = count / len(y_pred) * 100
    print(f'  Class {cls}: {count:5d} ({pct:5.1f}%)')


In [None]:
print('Detailed metrics...')
print()

precision = precision_score(y_test_seq, y_pred, average='weighted', zero_division=0)
recall = recall_score(y_test_seq, y_pred, average='weighted', zero_division=0)
f1 = f1_score(y_test_seq, y_pred, average='weighted', zero_division=0)

print(f'Precision (weighted): {precision:.4f}')
print(f'Recall (weighted):    {recall:.4f}')
print(f'F1-Score (weighted):  {f1:.4f}')
print()

print('Classification Report:')
print(classification_report(y_test_seq, y_pred, zero_division=0))


In [None]:
print('Confusion Matrix:')
print()

cm = confusion_matrix(y_test_seq, y_pred)
print(cm)
print()

print('Diagonal accuracy (per class):')
for i in range(5):
    if cm[i].sum() > 0:
        class_acc = cm[i, i] / cm[i].sum()
        print(f'  Class {i}: {class_acc:.4f}')


## Save Model


In [None]:
print('Saving model...')

model_dir = f'{project_root}/models/{CONFIG["symbol"].lower()}_{CONFIG["timeframe"]}/'
Path(model_dir).mkdir(parents=True, exist_ok=True)

model.save(f'{model_dir}classification.h5')
print(f'Model saved: {model_dir}classification.h5')


In [None]:
print('Saving parameters...')

params = {
    'symbol': CONFIG['symbol'],
    'timeframe': CONFIG['timeframe'],
    'timestamp': datetime.now().isoformat(),
    'metrics': {
        'test_loss': float(loss),
        'test_acc': float(acc),
        'precision': float(precision),
        'recall': float(recall),
        'f1': float(f1)
    },
    'config': CONFIG,
    'normalization': {
        'mean': mean.tolist(),
        'std': std.tolist()
    },
    'class_weights': {int(k): float(v) for k, v in class_weights.items()},
    'data_info': {
        'total_samples': len(data),
        'train_samples': len(X_train_seq),
        'val_samples': len(X_val_seq),
        'test_samples': len(X_test_seq)
    }
}

with open(f'{model_dir}params.json', 'w') as f:
    json.dump(params, f, indent=2)

print(f'Parameters saved: {model_dir}params.json')


In [None]:
print()
print('='*70)
print('TRAINING COMPLETE - BTCUSDT 15m Classifier')
print('='*70)
print(f'Model saved to: {model_dir}')
print(f'Test Accuracy: {acc:.4f}')
print(f'F1-Score: {f1:.4f}')
print()
print('Next steps:')
print('1. Review this accuracy')
print('2. If satisfied, train all 44 models with: 10_colab_batch_clf_training.ipynb')
print('3. Or adjust CONFIG and retrain this single model')
print('='*70)
