# HFT Simulator - End-to-End Example

This notebook demonstrates a complete workflow for:
1. Fetching market data
2. Feature engineering
3. Training an ML model
4. Running a backtest
5. Analyzing performance
6. Visualizing results

In [None]:
import sys
import os
sys.path.insert(0, '../src')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime, timedelta

# Create required directories
os.makedirs('../results', exist_ok=True)
os.makedirs('../data/cache', exist_ok=True)
os.makedirs('../models/saved', exist_ok=True)

# Suppress warnings
import warnings
warnings.filterwarnings('ignore')

print("Setup complete!")

## 1. Fetch Market Data

In [None]:
from data.fetcher import BinanceDataFetcher
from data.cache import DataCache

# Initialize fetcher and cache
fetcher = BinanceDataFetcher(symbol="BTCUSDT")
cache = DataCache(cache_dir='../data/cache')

# Fetch BTC-USD data (1-minute candles, ~3 days)
print("Fetching market data from Binance...")
data = fetcher.fetch_klines(
    interval='1m',
    limit=4320  # 3 days of 1-min data
)

print(f"Fetched {len(data)} candles")
print(f"Date range: {data.index[0]} to {data.index[-1]}")
data.head()

## 2. Preprocess Data

In [None]:
from data.preprocessor import DataPreprocessor

preprocessor = DataPreprocessor()

# Add technical indicators
print("Adding technical indicators...")
data_processed = preprocessor.add_technical_indicators(data)

print(f"Added {len(data_processed.columns) - 5} new features")
print(f"Total columns: {len(data_processed.columns)}")
print(f"Rows after cleaning: {len(data_processed)}")

# Show sample of indicators
indicator_cols = [c for c in data_processed.columns if c not in ['open', 'high', 'low', 'close', 'volume']]
data_processed[['close'] + indicator_cols[:5]].tail()

## 3. Create ML Features

In [None]:
from ml.features import FeatureEngineer

# Create feature engineer
print("Engineering ML features...")
feature_eng = FeatureEngineer()
features_df = feature_eng.create_all_features(data_processed)

print(f"Created {len(features_df.columns)} total features")
print(f"Feature categories: price, volatility, volume, candle, orderflow, time")
print(f"Dataset size: {len(features_df)} rows")

# Show feature sample
features_df.head()

In [None]:
# Prepare training data
print("Preparing training sequences...")
X, y, feature_names = feature_eng.prepare_training_data(
    features_df,
    target_col='close',
    lookback=30
)

print(f"\nClass distribution:")
print(f"  Up moves (1): {np.sum(y == 1)} ({np.mean(y)*100:.1f}%)")
print(f"  Down moves (0): {np.sum(y == 0)} ({(1-np.mean(y))*100:.1f}%)")

## 4. Train ML Model

In [None]:
from ml.models import PriceLSTM
import torch

# Create model
model = PriceLSTM(
    input_size=X.shape[2],
    hidden_size=32,
    num_layers=2
)

print(f"Model architecture:")
print(f"  Input size: {X.shape[2]} features")
print(f"  Hidden size: 32")
print(f"  LSTM layers: 2")
print(f"  Output: Binary classification (up/down)")

In [None]:
# Train model
print("Training LSTM model...")
print("This may take a few minutes...\n")

history = model.train_model(
    X, y,
    epochs=15,
    batch_size=32,
    validation_split=0.2
)

print(f"\nTraining complete!")
print(f"Final training loss: {history['train_loss'][-1]:.4f}")
print(f"Final validation loss: {history['val_loss'][-1]:.4f}")
print(f"Final validation accuracy: {history['val_accuracy'][-1]*100:.1f}%")

In [None]:
# Plot training history
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))

ax1.plot(history['train_loss'], label='Train Loss')
ax1.plot(history['val_loss'], label='Val Loss')
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Loss')
ax1.set_title('Training Loss')
ax1.legend()
ax1.grid(True, alpha=0.3)

ax2.plot(history['val_accuracy'], label='Val Accuracy', color='green')
ax2.axhline(0.5, color='red', linestyle='--', label='Random')
ax2.set_xlabel('Epoch')
ax2.set_ylabel('Accuracy')
ax2.set_title('Validation Accuracy')
ax2.legend()
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 5. Run Backtest

In [None]:
from backtest.backtester import Backtester, BacktestConfig
from strategies.momentum_strategy import MomentumStrategy
from strategies.portfolio import Portfolio
from strategies.risk_manager import RiskManager, RiskLimits

# Initialize strategy
print("Setting up backtest...")

strategy = MomentumStrategy(
    ml_threshold=0.55,
    momentum_threshold=0.0005,
    volume_threshold=1.2
)

# Configure backtest
config = BacktestConfig(
    initial_capital=100000,
    fee_rate=0.001,
    position_size_pct=0.3,
    use_risk_manager=True
)

# Run backtest
backtester = Backtester(strategy, config)

print("Running backtest...")
results = backtester.run(features_df, symbol='BTC')

print("\nBacktest complete!")

## 6. Performance Analysis

In [None]:
# Display results
summary = results['summary']

print("="*50)
print("BACKTEST RESULTS")
print("="*50)

print(f"\nðŸ“Š Portfolio Performance:")
print(f"  Initial Capital:    ${summary['initial_capital']:>12,.2f}")
print(f"  Final Value:        ${summary['current_value']:>12,.2f}")
print(f"  Total Return:       {summary['total_return_pct']:>12.2f}%")
print(f"  Total PnL:          ${summary['total_pnl']:>12,.2f}")

print(f"\nðŸ“ˆ Trading Activity:")
print(f"  Total Signals:      {results['num_signals']:>12}")
print(f"  Trades Executed:    {results['trades_executed']:>12}")
print(f"  Total Fees:         ${summary['total_fees']:>12,.2f}")

signal_stats = results['signal_stats']
print(f"\nðŸ“Š Signal Distribution:")
print(f"  BUY signals:        {signal_stats['buy_signals']:>12}")
print(f"  SELL signals:       {signal_stats['sell_signals']:>12}")
print(f"  HOLD signals:       {signal_stats['hold_signals']:>12}")

In [None]:
# Trade statistics
if not results['trades'].empty:
    trades_df = results['trades']
    winning = trades_df[trades_df['pnl'] > 0]
    losing = trades_df[trades_df['pnl'] < 0]
    
    print("\nðŸ“Š Trade Statistics:")
    print(f"  Total Trades:       {len(trades_df):>12}")
    print(f"  Winning Trades:     {len(winning):>12}")
    print(f"  Losing Trades:      {len(losing):>12}")
    if len(trades_df) > 0:
        win_rate = len(winning) / len(trades_df) * 100
        print(f"  Win Rate:           {win_rate:>11.1f}%")
    if len(winning) > 0:
        print(f"  Avg Win:            ${winning['pnl'].mean():>12,.2f}")
    if len(losing) > 0:
        print(f"  Avg Loss:           ${losing['pnl'].mean():>12,.2f}")
else:
    print("No trades executed.")

## 7. Visualizations

In [None]:
from analytics.visualizations import TradingVisualizer

viz = TradingVisualizer()
equity_curve = results['equity_curve']
trades = results['trades']

In [None]:
# Equity curve
if not equity_curve.empty:
    fig, ax = plt.subplots(figsize=(12, 5))
    ax.plot(equity_curve.index, equity_curve['equity'], linewidth=2, color='#2E86AB')
    ax.axhline(100000, color='red', linestyle='--', alpha=0.5, label='Initial Capital')
    ax.set_xlabel('Time')
    ax.set_ylabel('Portfolio Value ($)')
    ax.set_title('Equity Curve', fontsize=14, fontweight='bold')
    ax.legend()
    ax.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()

In [None]:
# Drawdown
if not equity_curve.empty:
    equity = equity_curve['equity']
    running_max = equity.expanding().max()
    drawdown = (equity - running_max) / running_max * 100
    
    fig, ax = plt.subplots(figsize=(12, 4))
    ax.fill_between(drawdown.index, drawdown, 0, alpha=0.3, color='red')
    ax.plot(drawdown.index, drawdown, color='darkred', linewidth=1)
    ax.set_xlabel('Time')
    ax.set_ylabel('Drawdown (%)')
    ax.set_title('Drawdown Over Time', fontsize=14, fontweight='bold')
    ax.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()

In [None]:
# PnL distribution
if not trades.empty and 'pnl' in trades.columns:
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))
    
    # Histogram
    ax1.hist(trades['pnl'], bins=30, alpha=0.7, color='#2E86AB', edgecolor='black')
    ax1.axvline(0, color='red', linestyle='--', linewidth=2)
    ax1.set_xlabel('PnL ($)')
    ax1.set_ylabel('Frequency')
    ax1.set_title('PnL Distribution', fontsize=12, fontweight='bold')
    ax1.grid(True, alpha=0.3)
    
    # Cumulative PnL
    cumulative_pnl = trades['pnl'].cumsum()
    ax2.plot(range(len(cumulative_pnl)), cumulative_pnl, linewidth=2, color='#2E86AB')
    ax2.axhline(0, color='red', linestyle='--', linewidth=1)
    ax2.set_xlabel('Trade Number')
    ax2.set_ylabel('Cumulative PnL ($)')
    ax2.set_title('Cumulative PnL', fontsize=12, fontweight='bold')
    ax2.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()

## 8. Export Results

In [None]:
# Save results
if not equity_curve.empty:
    equity_curve.to_csv('../results/equity_curve.csv')
    print("âœ“ Equity curve saved to: results/equity_curve.csv")

if not trades.empty:
    trades.to_csv('../results/trades.csv')
    print("âœ“ Trades saved to: results/trades.csv")

summary_df = pd.DataFrame([summary])
summary_df.to_csv('../results/summary.csv', index=False)
print("âœ“ Summary saved to: results/summary.csv")

print("\nAll results exported successfully!")

## Summary

This notebook demonstrated the complete HFT simulator workflow:

1. **Data Collection**: Fetched 3 days of 1-minute BTC-USD data from Binance
2. **Preprocessing**: Added technical indicators (SMA, EMA, RSI, Bollinger Bands, etc.)
3. **Feature Engineering**: Created 90+ features from price, volume, and market microstructure
4. **ML Model**: Trained an LSTM model to predict short-term price direction
5. **Backtesting**: Ran event-driven backtest with momentum strategy
6. **Analysis**: Calculated performance metrics and trade statistics
7. **Visualization**: Generated equity curves, drawdown charts, and PnL distributions
8. **Export**: Saved all results to CSV files

### Key Features

- **Real Market Data**: Live data from Binance API
- **ML Integration**: PyTorch LSTM for predictive signals
- **Risk Controls**: Position limits, stop-loss, drawdown monitoring
- **Comprehensive Metrics**: Return, Sharpe, drawdown, win rate
- **Professional Visualizations**: Publication-quality charts

### Next Steps

- Experiment with different strategy parameters
- Try alternative ML architectures (Transformer, GRU)
- Implement more sophisticated risk management
- Add market-making strategies
- Optimize for higher Sharpe ratios