# RL Trading Model Training & Evaluation

This notebook demonstrates how to train a reinforcement learning (RL) trading model using your full feature pipeline and visualize the results. It loads historical price data, extracts features, trains a PPO agent, and evaluates performance.

In [1]:
# ================================================
# 🔧 SETUP - Add src to Python Path
# ================================================

import sys
import os

# Add src directory to Python path so 'core' module can be found
project_root = os.getcwd()
src_path = os.path.join(project_root, 'src')

if src_path not in sys.path:
    sys.path.insert(0, src_path)
    print(f"✅ Added to Python path: {src_path}")
else:
    print(f"✅ Already in path: {src_path}")

# Verify
print(f"📂 Working directory: {project_root}")
print(f"🔍 Python will search for modules in: {src_path}")
print("=" * 50)

✅ Added to Python path: d:\Dev\trading-bot\src
📂 Working directory: d:\Dev\trading-bot
🔍 Python will search for modules in: d:\Dev\trading-bot\src


In [2]:
# Section 1: Import Required Libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from stable_baselines3 import PPO
import gymnasium as gym

# Project modules
from src.prediction.rl_predictor import RLPredictor


# Section 2: Load and Prepare Data
from src.core.trading_types import ChartInterval
from src.training.data_loader import DataLoader
from pathlib import Path

# Data Loader
loader = DataLoader()



In [3]:
# Generate predictions using the trained RL model

print("🎯 Generating RL Trading Signals...")
print("=" * 50)

rl_predictor = RLPredictor(model_dir='models/rl_demo')

symbol = 'BTCUSDT'
dfs = loader.load_data(symbol)
# Get the primary timeframe for feature engineering
features_df = dfs['15m']

# Load the trained model and generate predictions

# Calculate minimum data needed for meaningful predictions
window_size = 672  # Your RL model window size
desired_predictions = 2000
min_candles_needed = window_size + desired_predictions
actual_candles = min(len(features_df), min_candles_needed)
limited_features_df = features_df.tail(actual_candles)

print(f"📊 Data Configuration:")
print(f"   • Window size: {window_size}")
print(f"   • Input candles: {len(limited_features_df):,}")
print(f"   • Expected predictions: ~{len(limited_features_df) - window_size + 1:,}")

# ✅ Generate predictions with environment trading logic
result_df = rl_predictor.generate_predictions(limited_features_df)

print(f"\n🔍 Result DataFrame shape: {result_df.shape}")
print(f"📊 Available columns: {list(result_df.columns)}")

# ✅ NEW: Use the REAL RL actions from environment
rl_actions = result_df['rl_action'].to_numpy(dtype=np.float64)

# ✅ NEW: Use the REAL portfolio values from environment
portfolio_values = result_df['portfolio_value'].to_numpy(dtype=np.float64)
pnl_values = result_df['pnl'].to_numpy(dtype=np.float64)

print(f"\n💰 Environment Trading Results:")
print(f"   • RL Actions range: [{rl_actions.min():.3f}, {rl_actions.max():.3f}]")
print(f"   • Portfolio range: [${portfolio_values.min():,.2f}, ${portfolio_values.max():,.2f}]")
print(f"   • PnL range: [${pnl_values.min():+,.2f}, ${pnl_values.max():+,.2f}]")

# Calculate window size and pad RL actions to match original data length
missing_count = len(limited_features_df) - len(rl_actions)

# Pad RL actions at beginning with NaN for window period
padded_rl_actions = np.concatenate([
    np.full(missing_count, np.nan, dtype=np.float64),
    rl_actions
])

# ✅ Pad portfolio values to match data length
padded_portfolio_values = np.concatenate([
    np.full(missing_count, 1000000.0, dtype=np.float64),  # Initial value for window period
    portfolio_values
])

# ✅ Use ENVIRONMENT's exact thresholds and actual portfolio values
print(f"\n📈 Creating visualization using ENVIRONMENT's actual data...")

# 🧪 TEST: Use extreme thresholds to verify environment setup (should get ALL HOLD)
env_buy_threshold = 1.0   # TEST: Extreme threshold - should get 0 BUY signals
env_sell_threshold = -1.0  # TEST: Extreme threshold - should get 0 SELL signals
environment_initial_balance = 1000000.0

print(f"🧪 TESTING with extreme thresholds (should get ALL HOLD signals):")
print(f"   • BUY threshold: {env_buy_threshold} (extreme - should get 0 BUY)")
print(f"   • SELL threshold: {env_sell_threshold} (extreme - should get 0 SELL)")
print(f"   • Expected: ALL HOLD signals in visualization")
print(f"   • Purpose: Verify environment's portfolio values are used correctly")

# Simple trading visualization using the environment's REAL results
from src.reporting.model_testing_report import simple_trading_chart

# ✅ TEST: Use environment's actual portfolio values with extreme thresholds
results = simple_trading_chart(
    limited_features_df,
    padded_rl_actions,
    symbol,
    initial_cash=environment_initial_balance,
    commission=0.001,  # Match environment commission
    buy_threshold=env_buy_threshold,      # EXTREME: 1.0 (should get 0 BUY)
    sell_threshold=env_sell_threshold,    # EXTREME: -1.0 (should get 0 SELL)
    portfolio_values=padded_portfolio_values  # ENVIRONMENT's actual values
)

print(f"\n🔍 Environment vs Visualization Comparison:")
print(f"   • Environment Final PnL: ${result_df['pnl'].iloc[-1]:+,.2f}")
print(f"   • Environment Total Return: {result_df['pnl_pct'].iloc[-1]:+.2f}%")
print(f"   • Environment Final Portfolio: ${result_df['portfolio_value'].iloc[-1]:,.2f}")
print(f"   • Visualization Final Value: ${results['final_value']:,.2f}")
print(f"   • Visualization Return: {results['final_return_pct']:+.2f}%")

# ✅ TEST: Check if environment portfolio values are properly used
pnl_diff = abs(result_df['portfolio_value'].iloc[-1] - results['final_value'])
return_diff = abs(result_df['pnl_pct'].iloc[-1] - results['final_return_pct'])

print(f"\n🧪 ENVIRONMENT SETUP TEST:")
if pnl_diff < 1.0 and return_diff < 0.01:
    print(f"✅ SUCCESS: Environment portfolio values are correctly used in visualization!")
    print(f"   • Difference: ${pnl_diff:.2f} (should be ~$0)")
else:
    print(f"❌ PROBLEM: Environment portfolio values NOT properly used!")
    print(f"   • PnL diff: ${pnl_diff:.2f}")
    print(f"   • Return diff: {return_diff:.2f}%")
    print(f"   • Issue: Visualization is still calculating its own portfolio values")

# ✅ Additional debugging: Show actual trading activity
print(f"\n📊 Detailed Trading Analysis:")
print(f"   • Environment Buy Signals: {(result_df['action'] == 1).sum():,}")
print(f"   • Environment Sell Signals: {(result_df['action'] == -1).sum():,}")
print(f"   • Environment Hold Signals: {(result_df['action'] == 0).sum():,}")
print(f"   • Environment Trades: {(result_df['step_pnl'] != 0).sum():,}")
print(f"   • Non-zero PnL Steps: {(result_df['step_pnl'].abs() > 0.001).sum():,}")

print(f"\n✅ Trading Analysis Complete!")

🎯 Generating RL Trading Signals...
✅ GPU Available: NVIDIA GeForce RTX 3080 (10.0GB)
🖥️ RL Training Device: cuda
📥 Loading data for BTCUSDT...
🔧 Adding timeframe-specific technical indicators...
🔧 Adding timeframe-specific technical indicators...
🔧 Converting levels cache index to DatetimeIndex...
✅ Loaded levels cache: data\levels_cache\BTCUSDT-15m-levels.parquet
📊 Shape: 101,000 rows × 9 columns
🔄 Recalculating higher timeframe indicators for 15m...
🔧 Converting levels cache index to DatetimeIndex...
✅ Loaded levels cache: data\levels_cache\BTCUSDT-15m-levels.parquet
📊 Shape: 101,000 rows × 9 columns
🔄 Recalculating higher timeframe indicators for 15m...
📊 Data Configuration:
   • Window size: 672
   • Input candles: 2,672
   • Expected predictions: ~2,001
📊 Data Configuration:
   • Window size: 672
   • Input candles: 2,672
   • Expected predictions: ~2,001
✅ Model loaded on cuda
📥 Loaded normalizer from models/rl_demo\normalizer.pkl
📥 Loaded normalizer from: models/rl_demo\normaliz

  result.trades_df = pd.DataFrame(trades_log)


✅ Generated 2,000 predictions using environment trading logic
💰 Final Portfolio: $1,006,940.04
📈 Total PnL: $+6,940.04 (+0.69%)
🔄 Trading Signals: 993 BUY | 989 SELL | 18 HOLD
📊 Individual Trades: 874
💹 Avg Step PnL: $+5.17 | Max Exposure: $126,011.18
🎯 Win Rate: 50.2% (439/874 trades)

🔍 Result DataFrame shape: (2000, 63)
📊 Available columns: ['time', 'open', 'high', 'low', 'close', 'volume', 'rsi', 'macd', 'macd_signal', 'macd_hist', 'bb_upper', 'bb_lower', 'bb_position', 'volume_ma20', 'volume_ratio', 'obv', 'volatility', 'atr', 'adx', 'ema5', 'ema9', 'ema13', 'ema20', 'ema21', 'ema50', 'ema200', 'ema9_ema21_cross', 'ema20_ema50_cross', 'stochastic_k', 'stochastic_d', 'vwap', 'levels_json', 'ema20_1h', 'ema50_1h', 'ema200_1h', 'rsi_1h', 'macd_1h', 'macd_hist_1h', 'ema20_D', 'ema50_D', 'macd_hist_D', 'rsi_D', 'ema20_W', 'ema50_W', 'macd_hist_W', 'rsi_W', 'ema20_M', 'ema50_M', 'macd_hist_M', 'rsi_M', 'rl_action', 'portfolio_value', 'position', 'reward', 'step_pnl', 'unrealized_pnl', '


📊 BTCUSDT Trading Results:
   Strategy Return: +0.69%
   Buy & Hold:      +6.84%
   Outperformance:  -6.14%
   Max Drawdown:    -0.54%
   Win Rate:        0.0%
   Total Trades:    0
   Final Value:     $1,006,940.04

🔍 Environment vs Visualization Comparison:
   • Environment Final PnL: $+6,940.04
   • Environment Total Return: +0.69%
   • Environment Final Portfolio: $1,006,940.04
   • Visualization Final Value: $1,006,940.04
   • Visualization Return: +0.69%

🧪 ENVIRONMENT SETUP TEST:
✅ SUCCESS: Environment portfolio values are correctly used in visualization!
   • Difference: $0.00 (should be ~$0)

📊 Detailed Trading Analysis:
   • Environment Buy Signals: 993
   • Environment Sell Signals: 989
   • Environment Hold Signals: 18
   • Environment Trades: 1,998
   • Non-zero PnL Steps: 1,998

✅ Trading Analysis Complete!
