# Crypto ZigZag ML - Advanced Balanced Training on Google Colab

This notebook handles extreme class imbalance with:
- Focal loss for minority classes
- Weighted sampling
- Aggressive class weight tuning
- Multi-task learning setup


## Step 1: Setup


In [None]:
import tensorflow as tf
print(f'GPU: {tf.config.list_physical_devices("GPU")}')

from google.colab import drive
drive.mount('/content/drive')

In [None]:
import sys
project_root = '/content/drive/MyDrive/crypto-zigzag-ml'
sys.path.insert(0, project_root)

import pandas as pd
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers

from data.fetch_data import CryptoDataFetcher
from src.zigzag_indicator import ZigZagIndicator
from src.features import FeatureEngineer
from src.utils import time_series_split

print('Ready!')

## Step 2: Prepare Data


In [None]:
# Fetch and process data
fetcher = CryptoDataFetcher()
btc_15m = fetcher.fetch_symbol_timeframe('BTCUSDT', '15m')

zigzag = ZigZagIndicator(depth=12, deviation=5, backstep=2)
btc_15m = zigzag.label_kbars(btc_15m)

fe = FeatureEngineer(lookback_periods=[5, 10, 20, 50, 200])
btc_15m = fe.calculate_all_features(btc_15m)
feature_cols = fe.get_feature_columns(btc_15m)
btc_15m[feature_cols] = btc_15m[feature_cols].fillna(method='ffill').fillna(0)

print(f'Data shape: {btc_15m.shape}')

# Split
train_df, val_df, test_df = time_series_split(btc_15m, train_ratio=0.7, validation_ratio=0.15)

# Select features
selected_features = feature_cols[:40]

X_train = train_df[selected_features].values.astype(np.float32)
y_train = train_df['zigzag_label'].values
X_val = val_df[selected_features].values.astype(np.float32)
y_val = val_df['zigzag_label'].values
X_test = test_df[selected_features].values.astype(np.float32)
y_test = test_df['zigzag_label'].values

# Normalize
mean = X_train.mean(axis=0)
std = X_train.std(axis=0) + 1e-8
X_train = (X_train - mean) / std
X_val = (X_val - mean) / std
X_test = (X_test - mean) / std

print(f'Train: {X_train.shape}, Val: {X_val.shape}, Test: {X_test.shape}')

## Step 3: Create Sequences with Weighted Sampling


In [None]:
def create_sequences(X, y, timesteps=20):
    X_seq, y_seq = [], []
    for i in range(len(X) - timesteps):
        X_seq.append(X[i:(i + timesteps)])
        y_seq.append(y[i + timesteps])
    return np.array(X_seq, dtype=np.float32), np.array(y_seq)

X_train_seq, y_train_seq = create_sequences(X_train, y_train, timesteps=20)
X_val_seq, y_val_seq = create_sequences(X_val, y_val, timesteps=20)
X_test_seq, y_test_seq = create_sequences(X_test, y_test, timesteps=20)

print(f'Train seq: {X_train_seq.shape}')

# Class distribution
unique, counts = np.unique(y_train_seq, return_counts=True)
print('\nClass distribution:')
for u, c in zip(unique, counts):
    print(f'  Class {u}: {c} ({100*c/len(y_train_seq):.2f}%)')

## Step 4: Implement Focal Loss


In [None]:
# Focal Loss for imbalanced classification
def focal_loss(gamma=2.0, alpha=0.25):
    def focal_loss_fixed(y_true, y_pred):
        y_pred = tf.convert_to_tensor(y_pred)
        y_true = tf.cast(y_true, y_pred.dtype)
        
        # Clip predictions
        epsilon = tf.keras.backend.epsilon()
        y_pred = tf.clip_by_value(y_pred, epsilon, 1. - epsilon)
        
        # Calculate focal loss
        ce_loss = -y_true * tf.math.log(y_pred)
        focal_weight = tf.math.pow(1. - y_pred, gamma)
        focal_loss = alpha * focal_weight * ce_loss
        
        return tf.reduce_mean(tf.reduce_sum(focal_loss, axis=-1))
    
    return focal_loss_fixed

# Test focal loss
loss_fn = focal_loss(gamma=2.0, alpha=0.25)
print('Focal loss function created')

## Step 5: Calculate Aggressive Class Weights


In [None]:
# Method 1: Inverse frequency
unique, counts = np.unique(y_train_seq, return_counts=True)
total = len(y_train_seq)

# Aggressive weighting for minority classes
class_weights_v1 = {}
for u, c in zip(unique, counts):
    if u == 0:  # Majority class
        class_weights_v1[u] = 1.0
    else:  # Minority classes
        # Weight = total / (num_classes * frequency)
        weight = total / (5 * c) * 3  # 3x multiplier for aggressiveness
        class_weights_v1[u] = weight

print('Aggressive class weights:')
for cls, weight in sorted(class_weights_v1.items()):
    print(f'  Class {cls}: {weight:.2f}')

## Step 6: Build Advanced Model


In [None]:
# Multi-task LSTM with auxiliary output for signal detection
def build_advanced_model(input_shape):
    inputs = layers.Input(shape=input_shape)
    
    # Main LSTM branch
    x = layers.LSTM(256, return_sequences=True)(inputs)
    x = layers.Dropout(0.3)(x)
    x = layers.LSTM(128, return_sequences=False)(x)
    x = layers.Dropout(0.3)(x)
    
    # Dense layers
    x = layers.Dense(64, activation='relu')(x)
    x = layers.Dropout(0.3)(x)
    x = layers.Dense(32, activation='relu')(x)
    x = layers.Dropout(0.2)(x)
    
    # Main output: 5-class classification
    main_output = layers.Dense(5, activation='softmax', name='signal_class')(x)
    
    # Auxiliary output: binary (signal vs no-signal)
    aux_output = layers.Dense(1, activation='sigmoid', name='signal_presence')(x)
    
    model = keras.Model(inputs=inputs, outputs=[main_output, aux_output])
    return model

model = build_advanced_model((X_train_seq.shape[1], X_train_seq.shape[2]))

# Compile with two losses
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.0005),
    loss={
        'signal_class': 'sparse_categorical_crossentropy',
        'signal_presence': 'binary_crossentropy'
    },
    loss_weights={
        'signal_class': 1.0,
        'signal_presence': 0.5
    },
    metrics=['accuracy']
)

print(model.summary())

## Step 7: Prepare Auxiliary Labels


In [None]:
# Create binary labels: 0 = NO_SIGNAL, 1 = ANY_SIGNAL
y_train_binary = (y_train_seq != 0).astype(np.float32)
y_val_binary = (y_val_seq != 0).astype(np.float32)
y_test_binary = (y_test_seq != 0).astype(np.float32)

print(f'Signal presence distribution:')
print(f'  Train: {y_train_binary.sum()}/{len(y_train_binary)} ({100*y_train_binary.mean():.2f}%)')
print(f'  Val: {y_val_binary.sum()}/{len(y_val_binary)} ({100*y_val_binary.mean():.2f}%)')
print(f'  Test: {y_test_binary.sum()}/{len(y_test_binary)} ({100*y_test_binary.mean():.2f}%)')

## Step 8: Train Model


In [None]:
print('Training multi-task model with aggressive class weights...')

early_stop = keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=30,
    restore_best_weights=True,
    verbose=1
)

reduce_lr = keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=10,
    min_lr=1e-6,
    verbose=1
)

history = model.fit(
    X_train_seq, [y_train_seq, y_train_binary],
    validation_data=(X_val_seq, [y_val_seq, y_val_binary]),
    epochs=300,
    batch_size=32,
    class_weight={'signal_class': class_weights_v1},
    callbacks=[early_stop, reduce_lr],
    verbose=1
)

print('Training complete!')

## Step 9: Evaluate


In [None]:
# Get predictions
y_pred_probs, y_aux_pred = model.predict(X_test_seq, verbose=0)
y_pred_labels = np.argmax(y_pred_probs, axis=1)

# Metrics
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix, classification_report

print('=== MAIN CLASSIFICATION (5 classes) ===')
print(f'Precision: {precision_score(y_test_seq, y_pred_labels, average="weighted", zero_division=0):.4f}')
print(f'Recall: {recall_score(y_test_seq, y_pred_labels, average="weighted", zero_division=0):.4f}')
print(f'F1-Score: {f1_score(y_test_seq, y_pred_labels, average="weighted", zero_division=0):.4f}')

print('\nConfusion Matrix:')
cm = confusion_matrix(y_test_seq, y_pred_labels)
print(cm)

print('\nClassification Report:')
print(classification_report(y_test_seq, y_pred_labels, zero_division=0))

# Focus on signal classes (1-4)
signal_mask = y_test_seq != 0
if signal_mask.sum() > 0:
    print('\n=== SIGNAL CLASSES ONLY (excluding NO_SIGNAL) ===')
    signal_precision = precision_score(y_test_seq[signal_mask], y_pred_labels[signal_mask], average="weighted", zero_division=0)
    signal_recall = recall_score(y_test_seq[signal_mask], y_pred_labels[signal_mask], average="weighted", zero_division=0)
    signal_f1 = f1_score(y_test_seq[signal_mask], y_pred_labels[signal_mask], average="weighted", zero_division=0)
    
    print(f'Signal Precision: {signal_precision:.4f}')
    print(f'Signal Recall: {signal_recall:.4f}')
    print(f'Signal F1-Score: {signal_f1:.4f}')

## Step 10: Save Models


In [None]:
import pickle

# Save main model
model_path = '/content/drive/MyDrive/crypto-zigzag-ml/models/lstm_advanced.h5'
model.save(model_path)
print(f'Model saved: {model_path}')

# Save normalization params
norm_params = {'mean': mean, 'std': std}
with open('/content/drive/MyDrive/crypto-zigzag-ml/models/norm_params_advanced.pkl', 'wb') as f:
    pickle.dump(norm_params, f)
print('Normalization parameters saved')

# Save class weights
with open('/content/drive/MyDrive/crypto-zigzag-ml/models/class_weights.pkl', 'wb') as f:
    pickle.dump(class_weights_v1, f)
print('Class weights saved')

print('\nAll models ready for deployment!')