In [None]:
# ================================================
# 🔧 SETUP - Add src to Python Path
# ================================================

import sys
import os

# Add src directory to Python path so 'core' module can be found
project_root = os.getcwd()
src_path = os.path.join(project_root, 'src')

if src_path not in sys.path:
    sys.path.insert(0, src_path)
    print(f"✅ Added to Python path: {src_path}")
else:
    print(f"✅ Already in path: {src_path}")

# Verify
print(f"📂 Working directory: {project_root}")
print(f"🔍 Python will search for modules in: {src_path}")
print("=" * 50)

In [None]:
import sys
import os

# Add src directory to Python path so 'core' module can be found
project_root = os.getcwd()
src_path = os.path.join(project_root, 'src')

if src_path not in sys.path:
    sys.path.insert(0, src_path)
    print(f"✅ Added to Python path: {src_path}")
else:
    print(f"✅ Already in path: {src_path}")

# Verify
print(f"📂 Working directory: {project_root}")
print(f"🔍 Python will search for modules in: {src_path}")
print("=" * 50)

# ================================================
# 🧪 TEST PROGRESSIVE TRAINING (100 candles only)
# ================================================

print("🧪 PROGRESSIVE TEST - Training on 100 candles")
print("=" * 70)

import os
import pandas as pd
from src.training.model_trainer import SimpleModelTrainer

data_folder = os.path.join(os.getcwd(), 'data')

# Training files
training_files = {
    'M': os.path.join(data_folder, 'BTCUSDT-M.json'),
    'W': os.path.join(data_folder, 'BTCUSDT-W.json'),
    'D': os.path.join(data_folder, 'BTCUSDT-D.json'),
    '1h': os.path.join(data_folder, 'BTCUSDT-1h.json'),
    '15m': os.path.join(data_folder, 'BTCUSDT-15m.json'),
    'parquet_path': os.path.join(data_folder, 'levels_cache', 'BTCUSDT-15m-levels.parquet.checkpoint'),
}

# Initialize trainer
test_trainer = SimpleModelTrainer()

start_time = pd.Timestamp('2023-04-03 13:45:00')
end_time = pd.Timestamp('2023-06-01 04:00:00')
""" start_time = pd.Timestamp('2023-04-10 00:00:00')
end_time = pd.Timestamp('2023-05-15 23:59:59') """
# Configure training with time range filter
test_trainer.configure_training(
    profit_threshold=3.0,
    loss_threshold=-2.0,
    lookforward_periods=[5, 10, 20],
    start_time=start_time,
    end_time=end_time,
)

print(f"\n⏱️  Starting progressive training...")
print(f"   Time range: {start_time} to {end_time}")
print(f"   This will process each candle individually\n")

# Train the model
success = test_trainer.train_model(
    training_files=training_files,
    level_timeframes=['W', 'D', '1h', '15m']
)

if success:
    info = test_trainer.get_model_info()
    print(f"\n🎉 TEST TRAINING SUCCESS!")
    print(f"   Model Type: {info['model_type']}")
    print(f"   Accuracy: {info['accuracy']:.1%}")
    print(f"   Features: {info['features']}")
    print(f"   Classes: {info['classes']}")
    print(f"\n✅ Progressive approach is working correctly!")
    print(f"   You can now train on full dataset")
else:
    print("❌ Test training failed!")

✅ Already in path: d:\Dev\trading-bot\src
📂 Working directory: d:\Dev\trading-bot
🔍 Python will search for modules in: d:\Dev\trading-bot\src
🧪 PROGRESSIVE TEST - Training on 100 candles

⏱️  Starting progressive training...
   Time range: 2023-04-03 13:45:00 to 2023-06-01 04:00:00
   This will process each candle individually

🎓 TRAINING MODEL

📋 Validating training files...
   ✅ Main file (15m): d:\Dev\trading-bot\data\BTCUSDT-15m.json
   ✅ Parquet file: d:\Dev\trading-bot\data\levels_cache\BTCUSDT-15m-levels.parquet.checkpoint

📊 Validating level timeframes: ['W', 'D', '1h', '15m']
   ✅ W: d:\Dev\trading-bot\data\BTCUSDT-W.json
   ✅ D: d:\Dev\trading-bot\data\BTCUSDT-D.json
   ✅ 1h: d:\Dev\trading-bot\data\BTCUSDT-1h.json
   ✅ 15m: d:\Dev\trading-bot\data\BTCUSDT-15m.json

✅ All 4 level timeframe files validated!
   Level timeframes: ['W', 'D', '1h', '15m']

📦 Loading precomputed features from parquet...
   ✅ Loaded 5,902 candles from precomputed parquet

📂 Loading level timeframe d

Level features: 100%|██████████| 5626/5626 [00:00<00:00, 12254.30it/s]
Exception ignored on calling ctypes callback function: <bound method DataIter._next_wrapper of <xgboost.data.SingleBatchInternalIter object at 0x000001894C077970>>
Traceback (most recent call last):
  File "d:\Dev\trading-bot\venv\lib\site-packages\xgboost\core.py", line 641, in _next_wrapper
    return self._handle_exception(lambda: self.next(input_data), 0)
  File "d:\Dev\trading-bot\venv\lib\site-packages\xgboost\core.py", line 557, in _handle_exception
    return fn()
  File "d:\Dev\trading-bot\venv\lib\site-packages\xgboost\core.py", line 641, in <lambda>
    return self._handle_exception(lambda: self.next(input_data), 0)
  File "d:\Dev\trading-bot\venv\lib\site-packages\xgboost\data.py", line 1280, in next


   ✅ Level features calculated in 0.48s (27 features)

💾 Step 4/4: Adding memory features...
   ✅ Memory features added in 0.000s (10 features)

🔗 Combining all feature groups...
   ✅ Features combined in 0.001s

✅ Features ready: 5626 samples, 48 features
   TA: 11, Level: 27, Memory: 10
🏷️  Generating trading labels...
🏷️  Generating labels based on price movements...
   BUY signals: 49 (0.9%)
   SELL signals: 165 (2.9%)
   HOLD signals: 5412 (96.2%)
✅ Training data ready: 5626 samples, 48 features
📊 Label distribution:
   hold: 5412 (96.2%)
   sell: 165 (2.9%)
   buy: 49 (0.9%)

✅ TRAINING DATA PREPARATION COMPLETE
⏱️  Total time: 2.92 seconds (0.05 minutes)
📊 Total samples: 5,626
📊 Total features: 48
⚡ Average time per sample: 0.0005 seconds

🔥 Training with 4500 samples, 48 features
🖥️  GPU Detection: ✅ Available
   🚀 Training with GPU...


    input_data(**self.kwargs)
  File "d:\Dev\trading-bot\venv\lib\site-packages\xgboost\core.py", line 730, in inner_f
    return func(**kwargs)
  File "d:\Dev\trading-bot\venv\lib\site-packages\xgboost\core.py", line 633, in input_data
    self.proxy.set_info(
  File "d:\Dev\trading-bot\venv\lib\site-packages\xgboost\core.py", line 730, in inner_f
    return func(**kwargs)
  File "d:\Dev\trading-bot\venv\lib\site-packages\xgboost\core.py", line 932, in set_info
    self.set_label(label)
  File "d:\Dev\trading-bot\venv\lib\site-packages\xgboost\core.py", line 1070, in set_label
    dispatch_meta_backend(self, label, "label", "float")
  File "d:\Dev\trading-bot\venv\lib\site-packages\xgboost\data.py", line 1218, in dispatch_meta_backend
    _meta_from_numpy(data, name, dtype, handle)
  File "d:\Dev\trading-bot\venv\lib\site-packages\xgboost\data.py", line 1159, in _meta_from_numpy
    _check_call(_LIB.XGDMatrixSetInfoFromInterface(handle, c_str(field), interface_str))
KeyboardInterrupt:

     ✅ GPU Training Complete: 2.66s | Accuracy: 98.5%

✅ TRAINING COMPLETE!
   Model: XGBoost-GPU
   Test Accuracy: 98.5%

🔍 Top 10 Features:
     support_resistance_ratio: 0.096
     levels_within_5.0pct: 0.073
     time: 0.070
     bb_upper: 0.067
     num_levels: 0.061
     resistance_count: 0.060
     volume_ma20: 0.057
     levels_within_1.0pct: 0.055
     levels_within_2.0pct: 0.053
     support_count: 0.046
💾 Model saved to: src/models/simple_trading_model.joblib

🎉 TEST TRAINING SUCCESS!
   Model Type: XGBoost-GPU
   Accuracy: 98.5%
   Features: 48
   Classes: ['buy', 'hold', 'sell']

✅ Progressive approach is working correctly!
   You can now train on full dataset


In [None]:
# ================================================
# 🚀 VECTORBT BACKTESTING - Replacing Manual Simulation
# ================================================

print("🚀 VECTORBT BACKTESTING MODE")
print("=" * 55)

import os
import pandas as pd
import json
from src.backtesting.vectorbt_engine import VectorBTBacktester
from src.training.model_trainer import SimpleModelTrainer

# Load model from file (independent from training cell)
print("📁 Loading model from file...")
simulation_trainer = SimpleModelTrainer()

try:
    success = simulation_trainer.load_model()
    if not success:
        print("❌ Failed to load model from file")
        print("   Make sure you've trained the model first (run Cell 2)")
        simulation_trainer = None
except Exception as e:
    print(f"❌ Error loading model: {e}")
    import traceback
    traceback.print_exc()
    simulation_trainer = None

if simulation_trainer is not None and simulation_trainer.is_trained:
    print("✅ Model loaded successfully!")
    
    # Display model info
    info = simulation_trainer.get_model_info()
    print(f"   Model: {info['model_type']} | Accuracy: {info['accuracy']:.1%} | Features: {info['features']}")
    
    # Configuration
    test_symbol = 'BTCUSDT'
    MAX_CANDLES = 100  # REDUCED for progressive backtesting (each candle recalculates levels!)
    test_data_folder = 'data_test'
    
    # Threshold settings
    buy_threshold = 0.10
    sell_threshold = 0.10
    
    # VectorBT settings
    initial_cash = 10000.0
    commission = 0.001  # 0.1%
    slippage = 0.0005   # 0.05%
    
    print(f"\n🎯 Symbol: {test_symbol}")
    print(f"📊 Backtesting: {MAX_CANDLES} candles (PROGRESSIVE - no data leakage)")
    print(f"⚙️  Thresholds: BUY≥{buy_threshold:.0%}, SELL≥{sell_threshold:.0%}")
    print(f"💰 Initial Cash: ${initial_cash:,.2f}")
    print(f"💸 Commission: {commission:.2%} | Slippage: {slippage:.2%}\n")
    
    # Helper function to load JSON data
    def load_json_data(filepath):
        """Load OHLCV data from JSON file"""
        with open(filepath, 'r') as f:
            data = json.load(f)
        
        # Extract candles array
        candles = data.get('candles', [])
        if not candles:
            return None
        
        df = pd.DataFrame(candles)
        
        # Convert time (Unix timestamp) to datetime
        df['datetime'] = pd.to_datetime(df['time'], unit='s')
        df = df.drop('time', axis=1)  # Remove Unix timestamp column
        
        return df
    
    # Get file paths
    test_files = {
        '15m': os.path.join(test_data_folder, f'{test_symbol}-15m.json'),
        '1h': os.path.join(test_data_folder, f'{test_symbol}-1h.json'),
        'D': os.path.join(test_data_folder, f'{test_symbol}-D.json'),
        'W': os.path.join(test_data_folder, f'{test_symbol}-W.json'),
        'M': os.path.join(test_data_folder, f'{test_symbol}-M.json'),
    }
    
    # Check if 15m file exists
    if not os.path.exists(test_files['15m']):
        print(f"❌ No 15m data file found for {test_symbol}")
    else:
        # Load 15m data (primary trading timeframe)
        print("📊 Loading 15m data...")
        test_data = load_json_data(test_files['15m'])
        
        if test_data is None:
            print("❌ Could not load test data")
        else:
            print(f"✅ Loaded {len(test_data)} total candles")
            print(f"📅 Range: {test_data['datetime'].min()} to {test_data['datetime'].max()}")
            
            # Get the last MAX_CANDLES for backtesting
            if len(test_data) < MAX_CANDLES:
                print(f"⚠️  Only {len(test_data)} candles available, using all")
            else:
                test_data = test_data.tail(MAX_CANDLES).reset_index(drop=True)
            
            print(f"\n🎬 Starting progressive backtest with {len(test_data)} candles...")
            print(f"📍 Backtest period: {test_data['datetime'].min()} to {test_data['datetime'].max()}")
            
            # Load all timeframe data as DataFrames (needed for level extraction)
            print("\n📂 Loading multi-timeframe data for level extraction...")
            data_dfs = {}
            
            for tf, filepath in test_files.items():
                if tf == '15m':
                    continue  # Already loaded
                if os.path.exists(filepath):
                    print(f"   Loading {tf}...")
                    df = load_json_data(filepath)
                    if df is not None:
                        data_dfs[tf] = df
                else:
                    print(f"   ⚠️  {tf} file not found, skipping")
            
            print(f"✅ Loaded {len(data_dfs)} additional timeframes for levels\n")
            
            # Set datetime as index (required for VectorBT)
            test_data_indexed = test_data.set_index('datetime')
            
            print("\n" + "=" * 55)
            print("🚀 VECTORBT PROGRESSIVE BACKTEST STARTING...")
            print("=" * 55)
            print("⚠️  Note: Progressive mode recalculates levels for EACH candle")
            print("   This is SLOW but prevents data leakage (realistic trading)")
            print("   Progress bar will show each candle being processed\n")
            
            # Initialize VectorBT backtester
            backtester = VectorBTBacktester(
                trainer=simulation_trainer,
                initial_cash=initial_cash,
                commission=commission,
                slippage=slippage
            )
            
            # Generate signals progressively (includes level extraction per candle)
            signals_df = backtester.generate_signals(
                data=test_data_indexed,
                data_dfs=data_dfs,
                buy_threshold=buy_threshold,
                sell_threshold=sell_threshold,
                timeframe='15m'
            )
            
            # Run backtest (THIS IS FAST - vectorized)
            print("\n🚀 Running backtest...")
            portfolio = backtester.run_backtest(signals_df=signals_df, freq='15T')
            
            # Display performance summary
            backtester.print_performance_summary()
            
            # Get trade analysis
            trades_df = backtester.get_trade_analysis()
            
            print("\n" + "=" * 55)
            print("📋 TRADE ANALYSIS")
            print("=" * 55)
            
            if len(trades_df) > 0:
                print(f"\n📊 Total Trades: {len(trades_df)}")
                
                winning_trades = trades_df[trades_df['PnL'] > 0]
                losing_trades = trades_df[trades_df['PnL'] < 0]
                
                print(f"✅ Winning: {len(winning_trades)} ({len(winning_trades)/len(trades_df)*100:.1f}%)")
                print(f"❌ Losing:  {len(losing_trades)} ({len(losing_trades)/len(trades_df)*100:.1f}%)")
                
                if len(winning_trades) > 0:
                    print(f"\n💚 Average Win: ${winning_trades['PnL'].mean():.2f}")
                    print(f"💚 Largest Win: ${winning_trades['PnL'].max():.2f}")
                
                if len(losing_trades) > 0:
                    print(f"💔 Average Loss: ${losing_trades['PnL'].mean():.2f}")
                    print(f"💔 Largest Loss: ${losing_trades['PnL'].min():.2f}")
                
                # Show sample trades
                print(f"\n📋 Sample Trades (first 10):")
                print(trades_df.head(10))
            else:
                print("⚠️  No trades executed during backtest period")
            
            print("\n" + "=" * 55)
            print("✅ VECTORBT BACKTEST COMPLETE!")
            print("=" * 55)
            
            # Signal distribution
            signal_counts = signals_df['signal'].value_counts()
            print(f"\n🎯 SIGNAL DISTRIBUTION:")
            for signal in ['BUY', 'SELL', 'HOLD']:
                count = signal_counts.get(signal, 0)
                pct = (count / len(signals_df) * 100) if len(signals_df) > 0 else 0
                print(f"   {signal}: {count} ({pct:.1f}%)")
            
            # Show trading signals (non-HOLD)
            trade_signals = signals_df[signals_df['signal'] != 'HOLD']
            print(f"\n📋 TRADING SIGNALS ({len(trade_signals)} total):")
            if len(trade_signals) > 0:
                for timestamp, row in trade_signals.head(20).iterrows():
                    emoji = "🟢" if row['signal'] == 'BUY' else "🔴"
                    print(f"   {emoji} {timestamp} | " +
                          f"{row['signal']:4s} @ ${row['close']:8.2f} | " +
                          f"Conf: {row['confidence']:5.1%}")
            else:
                print("   No trading signals generated")
            
            print("\n✅ Progressive backtest complete!")
            print("   ℹ️  Each candle was processed independently with levels extracted")
            print("      at that point in time (no future data leakage)")
            
            # ================================================
            # 📊 OPTIONAL: GENERATE INTERACTIVE PLOTS
            # ================================================
            
            try:
                print("\n📊 Generating interactive plots...")
                # Use static plots (no anywidget dependency)
                backtester.plot_results(use_widgets=False)
            except Exception as e:
                print(f"⚠️  Could not generate plots: {e}")
                if "anywidget" in str(e):
                    print("   ℹ️  anywidget not installed")
                    print("   Solution: Restart the kernel (Kernel → Restart)")
                print("   Use VectorBT_Backtest.ipynb for full visualization")
    
else:
    print("❌ No model available")
    print("   Please run Cell 2 first to train the model")
