# ZombitX64 Trading Signal Model Training

This notebook is used to train AI models for the trading signal system.

## 1. Environment Setup

In [None]:
import os
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, GRU, Bidirectional
from tensorflow.keras.layers import Conv1D, MaxPooling1D
import joblib
import pickle

# Add the parent directory to the path for imports
sys.path.append('..')
from app.core.market_data.fetcher import MarketDataFetcher
from app.core.ai.indicators import calculate_all_indicators

In [None]:
# Install safetensors if not already installed
try:
    import safetensors
    print(f"safetensors version: {safetensors.__version__}")
except ImportError:
    print("Installing safetensors...")
    !pip install safetensors
    import safetensors
    print(f"safetensors version: {safetensors.__version__}")

from safetensors.tensorflow import save_file, load_file

## 2. Data Collection

Fetch historical market data for training the models.

In [None]:
async def fetch_historical_data(symbols, timeframes, limit=1000):
    data_dict = {}
    fetcher = MarketDataFetcher()
    
    for symbol in symbols:
        data_dict[symbol] = {}
        for timeframe in timeframes:
            print(f"Fetching {symbol} {timeframe}...")
            df = await fetcher.fetch_data(symbol, timeframe, limit=limit)
            if not df.empty:
                # Calculate indicators
                df = calculate_all_indicators(df)
                data_dict[symbol][timeframe] = df
                print(f"  - Got {len(df)} rows with {len(df.columns)} features")
            else:
                print(f"  - No data available")
    
    await fetcher.close()
    return data_dict

# Define symbols and timeframes
symbols = ['BTCUSDT', 'ETHUSDT', 'EURUSD', 'GBPUSD']
timeframes = ['1h', '4h', '1d']

# Create event loop and run data collection
import asyncio
historical_data = asyncio.run(fetch_historical_data(symbols, timeframes))

## 3. Data Preparation and Feature Engineering

Prepare the data for training by creating labels and features.

In [None]:
def prepare_data(df, window_size=60, future_bars=10):
    """
    Prepare data for supervised learning:
    - Create labels using future price movements
    - Prepare feature windows
    
    Labels:
    - 0: BUY (price goes up significantly)
    - 1: SELL (price goes down significantly)
    - 2: HOLD (price doesn't move significantly)
    """
    # Clean data
    df = df.dropna()
    
    # Create labels
    future_returns = df['close'].pct_change(future_bars).shift(-future_bars)
    threshold = df['atr'].rolling(window=20).mean() * 1.5 / df['close']
    
    # Default to HOLD
    labels = np.full(len(df), 2)  # 2 = HOLD
    
    # Set BUY and SELL labels
    labels[future_returns > threshold] = 0  # BUY
    labels[future_returns < -threshold] = 1  # SELL
    
    # Select features
    feature_cols = [
        'open', 'high', 'low', 'close', 'rsi', 'macd', 'macd_signal', 'macd_hist',
        'bb_upper', 'bb_middle', 'bb_lower', 'stoch_k', 'stoch_d', 'adx',
        'ichi_tenkan', 'ichi_kijun', 'atr'
    ]
    
    # Filter to available features
    feature_cols = [col for col in feature_cols if col in df.columns]
    features_df = df[feature_cols]
    
    # Normalize features
    scaler = StandardScaler()
    features_scaled = scaler.fit_transform(features_df)
    
    # Create feature windows and matching labels
    X, y = [], []
    for i in range(len(features_scaled) - window_size - future_bars):
        X.append(features_scaled[i:i+window_size])
        y.append(labels[i+window_size])
    
    return np.array(X), np.array(y), scaler, feature_cols

# Prepare data for one symbol/timeframe
symbol = 'BTCUSDT'
timeframe = '1h'
X, y, scaler, feature_cols = prepare_data(historical_data[symbol][timeframe])

# Split into train/test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, shuffle=False  # Don't shuffle for time series data
)

print(f"Training data shape: {X_train.shape}")
print(f"Testing data shape: {X_test.shape}")
print(f"Class distribution: {np.bincount(y)}")
print(f"Features used: {feature_cols}")

In [None]:
def build_lstm_model(input_shape, output_classes=3):
    model = Sequential([
        LSTM(100, return_sequences=True, input_shape=input_shape),
        Dropout(0.2),
        LSTM(50, return_sequences=False),
        Dropout(0.2),
        Dense(25, activation='relu'),
        Dense(output_classes, activation='softmax')
    ])
    
    model.compile(
        optimizer='adam',
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    
    return model

# Build and train LSTM model
input_shape = (X_train.shape[1], X_train.shape[2])  # (window_size, num_features)
lstm_model = build_lstm_model(input_shape)

# Training callbacks
callbacks = [
    tf.keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True),
    tf.keras.callbacks.ReduceLROnPlateau(factor=0.2, patience=5)
]

# Train the model
history = lstm_model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=50,
    batch_size=32,
    callbacks=callbacks
)

# Evaluate the model
lstm_loss, lstm_accuracy = lstm_model.evaluate(X_test, y_test)
print(f"LSTM Test Accuracy: {lstm_accuracy:.4f}")

# Save the model in Keras h5 format
save_dir = "../app/core/ai/models"
os.makedirs(save_dir, exist_ok=True)
lstm_model.save(f"{save_dir}/latest_lstm_model.h5")

# Save model weights in safetensors format
# Convert weights to a tensor dictionary suitable for safetensors
weight_dict = {}
for i, layer in enumerate(lstm_model.layers):
    layer_weights = layer.get_weights()
    for j, weight in enumerate(layer_weights):
        # Convert each weight to a tensor with a unique name
        weight_dict[f"layer_{i}_weight_{j}"] = tf.convert_to_tensor(weight)

# Save weights in safetensors format
save_file(weight_dict, f"{save_dir}/latest_lstm_model.safetensors")
print(f"LSTM model weights saved in safetensors format")

# Also save scaler and feature columns
model_metadata = {
    "scaler": scaler,
    "feature_cols": feature_cols,
    "model_type": "LSTM",
    "training_date": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
    "input_shape": input_shape,
    "accuracy": float(lstm_accuracy)
}

with open(f"{save_dir}/latest_lstm_model_metadata.pkl", 'wb') as f:
    pickle.dump(model_metadata, f)

# Plot training history
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.legend()
plt.title('Loss')

plt.subplot(1, 2, 2)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.legend()
plt.title('Accuracy')
plt.tight_layout()

In [None]:
# 5. GRU Model Training
def build_gru_model(input_shape, output_classes=3):
    model = Sequential([
        GRU(100, return_sequences=True, input_shape=input_shape),
        Dropout(0.2),
        GRU(50, return_sequences=False),
        Dropout(0.2),
        Dense(25, activation='relu'),
        Dense(output_classes, activation='softmax')
    ])
    
    model.compile(
        optimizer='adam',
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    
    return model

# Build and train GRU model
gru_model = build_gru_model(input_shape)

# Train the model
gru_history = gru_model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=40,
    batch_size=32,
    callbacks=callbacks
)

# Evaluate the model
gru_loss, gru_accuracy = gru_model.evaluate(X_test, y_test)
print(f"GRU Test Accuracy: {gru_accuracy:.4f}")

# Save the model in Keras h5 format
gru_model.save(f"{save_dir}/latest_gru_model.h5")

# Save model weights in safetensors format
weight_dict = {}
for i, layer in enumerate(gru_model.layers):
    layer_weights = layer.get_weights()
    for j, weight in enumerate(layer_weights):
        weight_dict[f"gru_layer_{i}_weight_{j}"] = tf.convert_to_tensor(weight)

# Save weights in safetensors format
save_file(weight_dict, f"{save_dir}/latest_gru_model.safetensors")
print(f"GRU model weights saved in safetensors format")

# Also save GRU metadata
gru_metadata = {
    "scaler": scaler,
    "feature_cols": feature_cols,
    "model_type": "GRU",
    "training_date": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
    "input_shape": input_shape,
    "accuracy": float(gru_accuracy)
}

with open(f"{save_dir}/latest_gru_model_metadata.pkl", 'wb') as f:
    pickle.dump(gru_metadata, f)

In [None]:
# 6. Random Forest Model

# Reshape data for traditional ML models (flatten time windows)
X_train_flat = X_train.reshape(X_train.shape[0], -1)
X_test_flat = X_test.reshape(X_test.shape[0], -1)

print(f"Flattened feature shape: {X_train_flat.shape}")

# Train Random Forest
rf_model = RandomForestClassifier(
    n_estimators=100,
    max_depth=10,
    min_samples_split=10,
    random_state=42,
    n_jobs=-1,
    class_weight='balanced'
)

rf_model.fit(X_train_flat, y_train)

# Evaluate RF model
rf_pred = rf_model.predict(X_test_flat)
rf_accuracy = accuracy_score(y_test, rf_pred)
print(f"Random Forest Test Accuracy: {rf_accuracy:.4f}")

# Print detailed classification metrics
print("\nRandom Forest Classification Report:")
print(classification_report(y_test, rf_pred, target_names=["Buy", "Sell", "Hold"]))

# Save the model
rf_path = f"{save_dir}/latest_random_forest_model.joblib"
joblib.dump(rf_model, rf_path)

# Save scaler with the model
rf_metadata = {
    "scaler": scaler,
    "feature_cols": feature_cols,
    "model_type": "RandomForest",
    "training_date": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
    "accuracy": float(rf_accuracy),
    "feature_importances": rf_model.feature_importances_.tolist() if hasattr(rf_model, 'feature_importances_') else None
}

with open(f"{save_dir}/latest_random_forest_model_metadata.pkl", 'wb') as f:
    pickle.dump(rf_metadata, f)

In [None]:
# 7. Gradient Boosting Model

# Train Gradient Boosting
gb_model = GradientBoostingClassifier(
    n_estimators=100,
    learning_rate=0.1,
    max_depth=5,
    min_samples_split=10,
    random_state=42
)

gb_model.fit(X_train_flat, y_train)

# Evaluate GB model
gb_pred = gb_model.predict(X_test_flat)
gb_accuracy = accuracy_score(y_test, gb_pred)
print(f"Gradient Boosting Test Accuracy: {gb_accuracy:.4f}")

# Print confusion matrix
cm = confusion_matrix(y_test, gb_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=["Buy", "Sell", "Hold"],
            yticklabels=["Buy", "Sell", "Hold"])
plt.title('Confusion Matrix')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.show()

# Print detailed classification metrics
print("\nGradient Boosting Classification Report:")
print(classification_report(y_test, gb_pred, target_names=["Buy", "Sell", "Hold"]))

# Save the model
gb_path = f"{save_dir}/latest_gradient_boost_model.joblib"
joblib.dump(gb_model, gb_path)

# Save scaler with the model
gb_metadata = {
    "scaler": scaler,
    "feature_cols": feature_cols,
    "model_type": "GradientBoosting",
    "training_date": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
    "accuracy": float(gb_accuracy),
    "feature_importances": gb_model.feature_importances_.tolist() if hasattr(gb_model, 'feature_importances_') else None
}

with open(f"{save_dir}/latest_gradient_boost_model_metadata.pkl", 'wb') as f:
    pickle.dump(gb_metadata, f)

In [None]:
# 9. CNN-LSTM Hybrid Model

def build_cnn_lstm_model(input_shape, output_classes=3):
    model = Sequential([
        # CNN layers
        Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=input_shape),
        Conv1D(filters=64, kernel_size=3, activation='relu'),
        MaxPooling1D(pool_size=2),
        Dropout(0.25),
        
        # LSTM layers
        LSTM(100, return_sequences=True),
        Dropout(0.2),
        LSTM(50, return_sequences=False),
        Dropout(0.2),
        
        # Output layers
        Dense(25, activation='relu'),
        Dense(output_classes, activation='softmax')
    ])
    
    model.compile(
        optimizer='adam',
        loss='sparse_categorical_crossentropy', 
        metrics=['accuracy']
    )
    
    return model

# Build and train CNN-LSTM model
cnn_lstm_model = build_cnn_lstm_model(input_shape)

# Train the model
cnn_lstm_history = cnn_lstm_model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=40,
    batch_size=32,
    callbacks=callbacks
)

# Evaluate the model
cnn_lstm_loss, cnn_lstm_accuracy = cnn_lstm_model.evaluate(X_test, y_test)
print(f"CNN-LSTM Test Accuracy: {cnn_lstm_accuracy:.4f}")

# Save the model in Keras h5 format
cnn_lstm_model.save(f"{save_dir}/latest_cnn_lstm_model.h5")

# Save model weights in safetensors format
weight_dict = {}
for i, layer in enumerate(cnn_lstm_model.layers):
    layer_weights = layer.get_weights()
    for j, weight in enumerate(layer_weights):
        weight_dict[f"cnn_lstm_layer_{i}_weight_{j}"] = tf.convert_to_tensor(weight)

# Save weights in safetensors format
save_file(weight_dict, f"{save_dir}/latest_cnn_lstm_model.safetensors")
print(f"CNN-LSTM model weights saved in safetensors format")

# Save metadata
cnn_lstm_metadata = {
    "scaler": scaler,
    "feature_cols": feature_cols,
    "model_type": "CNN_LSTM",
    "training_date": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
    "input_shape": input_shape,
    "accuracy": float(cnn_lstm_accuracy)
}

with open(f"{save_dir}/latest_cnn_lstm_model_metadata.pkl", 'wb') as f:
    pickle.dump(cnn_lstm_metadata, f)

In [None]:
# 10. Model Comparison

# Collect model performance metrics
models = {
    'LSTM': lstm_accuracy,
    'GRU': gru_accuracy,
    'Random Forest': rf_accuracy,
    'Gradient Boosting': gb_accuracy,
    'CNN-LSTM': cnn_lstm_accuracy
}

# Plot comparison
plt.figure(figsize=(10, 6))
plt.bar(models.keys(), models.values(), color=['blue', 'green', 'red', 'purple', 'orange'])
plt.title('Model Accuracy Comparison')
plt.ylabel('Accuracy')
plt.ylim(0, 1.0)
plt.grid(axis='y', linestyle='--', alpha=0.7)

# Add accuracy values above bars
for i, (model, acc) in enumerate(models.items()):
    plt.text(i, acc + 0.02, f'{acc:.4f}', ha='center')

plt.tight_layout()
plt.savefig(f"{save_dir}/../model_comparison.png")
plt.show()

print("Training completed successfully!")
print(f"All models saved to {save_dir}")
print("\nBest performing model: " + max(models.items(), key=lambda x: x[1])[0])

# Update ensemble config to include all models
ensemble_config = {
    "models": [
        {
            "type": "lstm",
            "path": "latest_lstm_model.h5",
            "weight": 1.2
        },
        {
            "type": "gru",
            "path": "latest_gru_model.h5",
            "weight": 1.1
        },
        {
            "type": "random_forest",
            "path": "latest_random_forest_model.joblib",
            "weight": 0.9
        },
        {
            "type": "gradient_boost",
            "path": "latest_gradient_boost_model.joblib",
            "weight": 1.0
        },
        {
            "type": "cnn_lstm",
            "path": "latest_cnn_lstm_model.h5",
            "weight": 1.3
        }
    ]
}

# Save final ensemble configuration
ensemble_path = f"{save_dir}/latest_ensemble_model.pkl"
with open(ensemble_path, 'wb') as f:
    pickle.dump(ensemble_config, f)

print(f"Final ensemble model saved to {ensemble_path}")

# Load a model from safetensors to verify it works
print("\nVerifying safetensors model loading...")
try:
    # Load the LSTM model weights from safetensors
    loaded_weights = load_file(f"{save_dir}/latest_lstm_model.safetensors")
    print(f"Successfully loaded model weights: {len(loaded_weights)} tensor groups")
    print("Safetensors validation successful")
except Exception as e:
    print(f"Error loading safetensors model: {e}")

In [None]:
# 8. Create an Ensemble Model

# Let's define a wrapper for our models to standardize the predict interface
class ModelWrapper:
    def __init__(self, model, is_flat=False, scaler=None):
        self.model = model
        self.is_flat = is_flat  # True for RF and GB, False for LSTM and GRU
        self.scaler = scaler
    
    def predict(self, X):
        # Prepare X for the model type
        if self.is_flat:
            # Flatten for traditional ML models
            X_prep = X.reshape(X.shape[0], -1)
        else:
            # Keep dimensions for sequence models
            X_prep = X
        
        # Get predictions
        if hasattr(self.model, 'predict_proba'):
            probs = self.model.predict_proba(X_prep)
            predictions = []
            for prob in probs:
                cls = np.argmax(prob)
                confidence = prob[cls] * 100
                predictions.append((cls, confidence))
        else:
            # For models without predict_proba
            preds = self.model.predict(X_prep)
            predictions = [(int(p), 80.0) for p in preds]  # Default 80% confidence
        
        return predictions

# Create model wrappers
lstm_wrapper = ModelWrapper(lstm_model, is_flat=False, scaler=scaler)
gru_wrapper = ModelWrapper(gru_model, is_flat=False, scaler=scaler)
rf_wrapper = ModelWrapper(rf_model, is_flat=True, scaler=scaler)
gb_wrapper = ModelWrapper(gb_model, is_flat=True, scaler=scaler)

# Create ensemble configuration
ensemble_config = {
    "models": [
        {
            "type": "lstm",
            "path": "latest_lstm_model.h5",
            "weight": 1.2
        },
        {
            "type": "gru",
            "path": "latest_gru_model.h5",
            "weight": 1.1
        },
        {
            "type": "random_forest",
            "path": "latest_random_forest_model.joblib",
            "weight": 0.9
        },
        {
            "type": "gradient_boost",
            "path": "latest_gradient_boost_model.joblib",
            "weight": 1.0
        }
    ]
}

# Save ensemble configuration
ensemble_path = f"{save_dir}/latest_ensemble_model.pkl"
with open(ensemble_path, 'wb') as f:
    pickle.dump(ensemble_config, f)

print(f"Ensemble model configuration saved to {ensemble_path}")

# Test the ensemble on some data
sample_indices = np.random.choice(len(X_test), 5)
for idx in sample_indices:
    sample_X = X_test[idx:idx+1]
    sample_y = y_test[idx]
    
    # Get predictions from each model
    lstm_pred = lstm_wrapper.predict(sample_X)[0]
    gru_pred = gru_wrapper.predict(sample_X)[0]
    rf_pred = rf_wrapper.predict(sample_X)[0]
    gb_pred = gb_wrapper.predict(sample_X)[0]
    
    print(f"\nSample {idx}, True label: {sample_y}")
    print(f"LSTM prediction: Class {lstm_pred[0]} with {lstm_pred[1]:.2f}% confidence")
    print(f"GRU prediction: Class {gru_pred[0]} with {gru_pred[1]:.2f}% confidence")
    print(f"RF prediction: Class {rf_pred[0]} with {rf_pred[1]:.2f}% confidence")
    print(f"GB prediction: Class {gb_pred[0]} with {gb_pred[1]:.2f}% confidence")
    
    # Simple majority vote
    votes = [lstm_pred[0], gru_pred[0], rf_pred[0], gb_pred[0]]
    vote_counts = np.bincount(votes, weights=[1.2, 1.1, 0.9, 1.0], minlength=3)
    ensemble_class = np.argmax(vote_counts)
    ensemble_confidence = (vote_counts[ensemble_class] / sum(vote_counts)) * 100
    
    print(f"Ensemble prediction: Class {ensemble_class} with {ensemble_confidence:.2f}% confidence")