# Paper Trading on Alpaca with Real-Time Fine-Tuning

This notebook implements a complete paper trading system that:
- **Fetches real-time 1-min data** from Alpaca API
- **Stores data** in CSV for persistence
- **Executes paper trades** on Alpaca account
- **Fine-tunes the model** every 2 hours with latest data
- **Validates and accepts/rejects** fine-tuned models
- **Tracks performance** with detailed logging

## Workflow
```
‚îå‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îê
‚îÇ  Market Open                                        ‚îÇ
‚îÇ  ‚Üì                                                  ‚îÇ
‚îÇ  Fetch 1-min data from Alpaca API                 ‚îÇ
‚îÇ  ‚Üì                                                  ‚îÇ
‚îÇ  Append to CSV (paper_trading_data_1min.csv)       ‚îÇ
‚îÇ  ‚Üì                                                  ‚îÇ
‚îÇ  Get current state (positions, cash, prices)       ‚îÇ
‚îÇ  ‚Üì                                                  ‚îÇ
‚îÇ  Predict action with current model                 ‚îÇ
‚îÇ  ‚Üì                                                  ‚îÇ
‚îÇ  Execute trades on Alpaca paper account            ‚îÇ
‚îÇ  ‚Üì                                                  ‚îÇ
‚îÇ  Wait 1 minute ‚Üí Loop                              ‚îÇ
‚îÇ                                                      ‚îÇ
‚îÇ  Every 2 hours:                                     ‚îÇ
‚îÇ  ‚îú‚îÄ Load last 48h from CSV                         ‚îÇ
‚îÇ  ‚îú‚îÄ Split train/validation (80/20)                 ‚îÇ
‚îÇ  ‚îú‚îÄ Fine-tune model on train set                   ‚îÇ
‚îÇ  ‚îú‚îÄ Evaluate on validation set                     ‚îÇ
‚îÇ  ‚îú‚îÄ Accept if score >= 95% of original             ‚îÇ
‚îÇ  ‚îî‚îÄ Save model if accepted                         ‚îÇ
‚îî‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îò
```

**Disclaimer**: This is for educational purposes. Not financial advice.

# Part 1: Setup and Imports

In [None]:
# System setup
import sys
import os
from pathlib import Path

# Get relative paths dynamically
notebook_dir = os.path.dirname(os.path.abspath(__file__)) if '__file__' in locals() else os.getcwd()
project_root = os.path.abspath(os.path.join(notebook_dir, '..', '..'))

if project_root not in sys.path:

    sys.path.insert(0, project_root)
    print(f"‚úì Project root: {project_root}")


In [None]:
# Core imports
import numpy as np
import pandas as pd
import time
import warnings
import threading
import tempfile
from datetime import datetime, timedelta, timezone
from tqdm import tqdm

warnings.filterwarnings('ignore')

# FinRL imports
from finrl.config_tickers import DOW_30_TICKER
from finrl.config import INDICATORS
from finrl.meta.env_stock_trading.env_stocktrading import StockTradingEnv
from finrl.meta.data_processors.processor_alpaca import AlpacaProcessor

# DRL imports
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv

# Alpaca imports
import alpaca_trade_api as tradeapi

# Visualization
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker

# PyTorch (for ElegantRL agent)
import torch
import torch.nn as nn
from torch import Tensor
from torch.distributions.normal import Normal

print("‚úì All imports successful")

# Part 2: Configuration and API Keys

In [None]:
# Load API credentials from .env file
from dotenv import load_dotenv

# Load environment variables from notebook directory
env_path = os.path.join(notebook_dir, '.env')
load_dotenv(env_path)

API_KEY = os.getenv('ALPACA_API_KEY')
API_SECRET = os.getenv('ALPACA_API_SECRET')
API_BASE_URL = os.getenv('ALPACA_API_BASE_URL')

# Technical indicators
TECH_INDICATORS = INDICATORS

# Tickers (DOW 30)
TICKERS = DOW_30_TICKER

print(f"‚úì API credentials loaded from .env")
print(f"‚úì Using {len(TICKERS)} tickers")
print(f"‚úì Technical indicators: {len(TECH_INDICATORS)} indicators")

In [None]:
# Main configuration
CONFIG = {
    # Alpaca API
    'API_KEY': API_KEY,
    'API_SECRET': API_SECRET,
    'API_BASE_URL': API_BASE_URL,
    
    # Tickers and dimensions
    'TICKERS': TICKERS,
    'STOCK_DIM': len(TICKERS),
    
    # Model paths (Stable Baselines3 PPO)
    'TRAINED_MODEL': 'trained_models/agent_ppo.zip',  # SB3 PPO model (relative to FinRL root)
    'OUTPUT_DIR': 'paper_trading_finetune_results',
    
    # Trading parameters
    'INITIAL_CASH': 1_000_000,
    'HMAX': 100,
    'TRANSACTION_COST_PCT': 0.001,
    'REWARD_SCALING': 1e-4,
    'TURBULENCE_THRESH': 30,
    'MIN_ACTION_THRESHOLD': 5,
    
    # Fine-tuning parameters
    'FINETUNE_INTERVAL_HOURS': 2,
    'FINETUNE_LOOKBACK_HOURS': 48,
    'FINETUNE_LR': 1e-5,
    'FINETUNE_STEPS': 2000,
    'VALIDATION_SPLIT': 0.2,
    'VALIDATION_THRESHOLD': 0.95,
    
    # Data storage
    'DATA_CSV': 'paper_trading_data_1min.csv',
}

# Calculate state and action dimensions (Production format: 301 features)
# State: 1 (cash) + 30 (prices) + 30 (stocks) + 240 (tech indicators) = 301
# NO turbulence in state vector!
action_dim = CONFIG['STOCK_DIM']
state_dim = 1 + 2 * action_dim + len(TECH_INDICATORS) * action_dim

CONFIG['action_dim'] = action_dim
CONFIG['state_dim'] = state_dim

print("\nüìã CONFIGURATION")
print("="*60)
print(f"Model: {CONFIG['TRAINED_MODEL']} (Stable Baselines3 PPO)")
print(f"Output: {CONFIG['OUTPUT_DIR']}")
print(f"Data CSV: {CONFIG['DATA_CSV']}")
print(f"Tickers: {CONFIG['STOCK_DIM']}")
print(f"State dim: {state_dim}, Action dim: {action_dim} (Production format)")
print(f"Initial cash: ${CONFIG['INITIAL_CASH']:,}")
print(f"Fine-tune: Every {CONFIG['FINETUNE_INTERVAL_HOURS']}h using last {CONFIG['FINETUNE_LOOKBACK_HOURS']}h data")

# Part 3: Data Collection Functions

These functions fetch 1-min OHLCV data from Alpaca and store in CSV for fine-tuning.

## Key Features:
- **Live OHLCV bars**: Gets open, high, low, close, volume for all 30 tickers
- **Minute-level rounding**: Timestamps rounded to the minute for consistency
- **Technical indicators**: Calculates all FinRL indicators per symbol
- **Historical backfill**: Fetches 2 trading days of historical data on startup
- **Duplicate prevention**: Checks timestamps before appending
- **Robust calculation**: Uses expanding windows for indicators to avoid NaN values

In [None]:
def init_data_csv(csv_path):
    """Initialize CSV file for real-time data collection."""
    if not Path(csv_path).exists():
        columns = ['timestamp', 'tic', 'open', 'high', 'low', 'close', 'volume'] + TECH_INDICATORS
        df = pd.DataFrame(columns=columns)
        df.to_csv(csv_path, index=False)
        print(f"‚úì Initialized data CSV: {csv_path}")
    else:
        existing_df = pd.read_csv(csv_path)
        print(f"‚úì Data CSV exists: {csv_path} ({len(existing_df):,} records)")


def calculate_indicators(df):
    """
    Calculate FinRL indicators with proper handling for initial periods.
    Uses expanding windows to avoid NaN values.
    """
    try:
        df = df.sort_values('timestamp').reset_index(drop=True)
        
        if len(df) < 2:
            # Set defaults for insufficient data
            df['macd'] = 0.0
            df['boll_ub'] = df['close']
            df['boll_lb'] = df['close']
            df['rsi_30'] = 50.0
            df['cci_30'] = 0.0
            df['dx_30'] = 0.0
            df['close_30_sma'] = df['close']
            df['close_60_sma'] = df['close']
            return df
        
        # MACD (12-26 EMA)
        if len(df) >= 12:
            ema12 = df['close'].ewm(span=12, adjust=False, min_periods=1).mean()
            ema26 = df['close'].ewm(span=26, adjust=False, min_periods=1).mean()
            df['macd'] = ema12 - ema26
        else:
            df['macd'] = 0.0
        
        # Bollinger Bands (20-period)
        if len(df) >= 20:
            sma20 = df['close'].rolling(20, min_periods=1).mean()
            std20 = df['close'].rolling(20, min_periods=1).std()
            df['boll_ub'] = sma20 + (2 * std20)
            df['boll_lb'] = sma20 - (2 * std20)
        else:
            expanding_mean = df['close'].expanding(min_periods=1).mean()
            expanding_std = df['close'].expanding(min_periods=1).std()
            df['boll_ub'] = expanding_mean + (2 * expanding_std.fillna(0))
            df['boll_lb'] = expanding_mean - (2 * expanding_std.fillna(0))
        
        # RSI-30
        delta = df['close'].diff()
        if len(df) >= 30:
            gain = delta.where(delta > 0, 0).rolling(30, min_periods=1).mean()
            loss = -delta.where(delta < 0, 0).rolling(30, min_periods=1).mean()
        else:
            gain = delta.where(delta > 0, 0).expanding(min_periods=1).mean()
            loss = -delta.where(delta < 0, 0).expanding(min_periods=1).mean()
        rs = gain / loss.replace(0, 1e-10)
        df['rsi_30'] = 100 - (100 / (1 + rs))
        
        # CCI-30
        tp = (df['high'] + df['low'] + df['close']) / 3
        if len(df) >= 30:
            sma_tp = tp.rolling(30, min_periods=1).mean()
            mad = tp.rolling(30, min_periods=1).apply(lambda x: np.abs(x - x.mean()).mean())
        else:
            sma_tp = tp.expanding(min_periods=1).mean()
            mad = tp.expanding(min_periods=1).apply(lambda x: np.abs(x - x.mean()).mean())
        df['cci_30'] = (tp - sma_tp) / (0.015 * mad.replace(0, 1e-10))
        
        # DX-30 (Directional Movement Index)
        high_low = df['high'] - df['low']
        high_close = np.abs(df['high'] - df['close'].shift())
        low_close = np.abs(df['low'] - df['close'].shift())
        tr = pd.concat([high_low, high_close, low_close], axis=1).max(axis=1)
        
        if len(df) >= 30:
            atr = tr.rolling(30, min_periods=1).mean()
            plus_dm = df['high'].diff()
            minus_dm = -df['low'].diff()
            plus_dm = plus_dm.where((plus_dm > minus_dm) & (plus_dm > 0), 0)
            minus_dm = minus_dm.where((minus_dm > plus_dm) & (minus_dm > 0), 0)
            plus_di = 100 * (plus_dm.rolling(30, min_periods=1).mean() / atr.replace(0, 1e-10))
            minus_di = 100 * (minus_dm.rolling(30, min_periods=1).mean() / atr.replace(0, 1e-10))
        else:
            atr = tr.expanding(min_periods=1).mean()
            plus_dm = df['high'].diff()
            minus_dm = -df['low'].diff()
            plus_dm = plus_dm.where((plus_dm > minus_dm) & (plus_dm > 0), 0)
            minus_dm = minus_dm.where((minus_dm > plus_dm) & (minus_dm > 0), 0)
            plus_di = 100 * (plus_dm.expanding(min_periods=1).mean() / atr.replace(0, 1e-10))
            minus_di = 100 * (minus_dm.expanding(min_periods=1).mean() / atr.replace(0, 1e-10))
        
        df['dx_30'] = 100 * np.abs(plus_di - minus_di) / (plus_di + minus_di).replace(0, 1e-10)
        
        # SMAs
        df['close_30_sma'] = df['close'].rolling(30, min_periods=1).mean()
        df['close_60_sma'] = df['close'].rolling(60, min_periods=1).mean()
        
        # Fill any remaining NaN with safe defaults
        df['macd'] = df['macd'].fillna(0)
        df['boll_ub'] = df['boll_ub'].fillna(df['close'])
        df['boll_lb'] = df['boll_lb'].fillna(df['close'])
        df['rsi_30'] = df['rsi_30'].fillna(50)
        df['cci_30'] = df['cci_30'].fillna(0)
        df['dx_30'] = df['dx_30'].fillna(0)
        df['close_30_sma'] = df['close_30_sma'].fillna(df['close'])
        df['close_60_sma'] = df['close_60_sma'].fillna(df['close'])
        
        # Replace infinities
        df = df.replace([np.inf, -np.inf], 0)
        
        return df
    except Exception as e:
        print(f"‚úó Error calculating indicators: {e}")
        # Return with defaults if calculation fails
        df['macd'] = 0.0
        df['boll_ub'] = df.get('close', 0)
        df['boll_lb'] = df.get('close', 0)
        df['rsi_30'] = 50.0
        df['cci_30'] = 0.0
        df['dx_30'] = 0.0
        df['close_30_sma'] = df.get('close', 0)
        df['close_60_sma'] = df.get('close', 0)
        return df


def get_date_range_for_day(days_ago):
    """
    Get start/end timestamps for a single trading day.
    Market hours: 9:30 AM - 4:00 PM EST (14:30 - 21:00 UTC)
    """
    now = datetime.utcnow()
    target_date = now - timedelta(days=days_ago)
    
    # Market hours in UTC
    start = target_date.replace(hour=14, minute=30, second=0, microsecond=0)
    end = target_date.replace(hour=21, minute=0, second=0, microsecond=0)
    
    start_str = start.strftime("%Y-%m-%dT%H:%M:%SZ")
    end_str = end.strftime("%Y-%m-%dT%H:%M:%SZ")
    date_str = target_date.strftime("%Y-%m-%d")
    
    return start_str, end_str, date_str


def fetch_day_bars_alpaca(alpaca_api, symbol, start, end):
    """Fetch 1-minute bars for one symbol for one day using Alpaca REST API."""
    try:
        # Use Alpaca's get_bars method
        bars = alpaca_api.get_bars(
            symbol,
            '1Min',
            start=start,
            end=end,
            limit=10000,
            adjustment='split'
        ).df
        
        if bars is None or len(bars) == 0:
            return []
        
        # Convert to records
        bars = bars.reset_index()
        bars.columns = ['timestamp', 'open', 'high', 'low', 'close', 'volume', 'trade_count', 'vwap']
        
        records = []
        for _, row in bars.iterrows():
            records.append({
                'timestamp': row['timestamp'],
                'open': float(row['open']),
                'high': float(row['high']),
                'low': float(row['low']),
                'close': float(row['close']),
                'volume': int(row['volume'])
            })
        
        return records
    except Exception as e:
        print(f"   ‚úó Error fetching {symbol}: {e}")
        return []


def fetch_historical_data_to_csv(alpaca_api, csv_path, required_trading_days=2, min_hours_per_day=4):
    """
    Fetch historical data from Alpaca and populate CSV.
    Ensures we have 2 complete trading days with at least 4 hours of data each.
    """
    print(f"\nüì• Fetching historical data for {required_trading_days} trading days...")
    print(f"   Minimum: {min_hours_per_day} hours of data per day")
    
    # Check if CSV already has sufficient data
    if Path(csv_path).exists():
        existing_df = pd.read_csv(csv_path)
        if len(existing_df) > 0:
            existing_df['timestamp'] = pd.to_datetime(existing_df['timestamp'])
            
            # Check if we have enough trading days
            existing_df['date'] = existing_df['timestamp'].dt.date
            days_by_symbol = existing_df.groupby(['date', 'tic']).size().reset_index(name='bars')
            
            # Count days with at least min_hours worth of data
            min_bars_needed = min_hours_per_day * 60
            valid_days_per_symbol = days_by_symbol[days_by_symbol['bars'] >= min_bars_needed]
            valid_days = valid_days_per_symbol['date'].value_counts()
            
            sufficient_days = (valid_days >= len(CONFIG['TICKERS']) * 0.8).sum()  # 80% of symbols
            
            if sufficient_days >= required_trading_days:
                latest_timestamp = existing_df['timestamp'].max()
                hours_old = (datetime.utcnow() - latest_timestamp).total_seconds() / 3600
                
                if hours_old < 24:  # Data is recent (less than 1 day old)
                    print(f"‚úì CSV has {sufficient_days} trading days with sufficient data")
                    print(f"   Latest data: {latest_timestamp} ({hours_old:.1f}h ago)")
                    print(f"   Skipping historical fetch")
                    return
    
    try:
        print(f"\nüîç Searching for {required_trading_days} trading days...")
        
        trading_days_found = 0
        days_searched = 0
        max_search = 14  # Search up to 2 weeks back
        all_historical_data = []
        
        while trading_days_found < required_trading_days and days_searched < max_search:
            days_ago = days_searched + 1
            start, end, date_str = get_date_range_for_day(days_ago)
            
            print(f"\n   üìÖ {date_str}: Checking...")
            
            # Test first symbol to see if it's a trading day
            test_bars = fetch_day_bars_alpaca(alpaca_api, CONFIG['TICKERS'][0], start, end)
            
            if not test_bars:
                print(f"      ‚ùå Non-trading day (weekend/holiday)")
                days_searched += 1
                continue
            
            # Check if we have minimum hours of data
            min_bars_needed = min_hours_per_day * 60
            if len(test_bars) < min_bars_needed:
                print(f"      ‚ö†Ô∏è  Partial day ({len(test_bars)} bars < {min_bars_needed})")
                # Still fetch but warn
            
            print(f"      ‚úÖ Trading day! Fetching {len(CONFIG['TICKERS'])} symbols...")
            
            # Fetch all symbols for this day
            day_data = []
            for symbol in CONFIG['TICKERS']:
                bars = fetch_day_bars_alpaca(alpaca_api, symbol, start, end)
                if bars:
                    for bar in bars:
                        bar['tic'] = symbol
                        day_data.append(bar)
                    print(f"         {symbol}: {len(bars)} bars")
            
            if day_data:
                all_historical_data.extend(day_data)
                trading_days_found += 1
                print(f"      ‚úì Day {trading_days_found}/{required_trading_days} complete ({len(day_data)} total bars)")
            
            days_searched += 1
        
        if not all_historical_data:
            print("\n‚ö†Ô∏è  No historical data collected")
            return
        
        # Create DataFrame
        df = pd.DataFrame(all_historical_data)
        df['timestamp'] = pd.to_datetime(df['timestamp'])
        df = df.sort_values(['timestamp', 'tic']).reset_index(drop=True)
        
        print(f"\nüìä Processing {len(df):,} bars for {len(df['tic'].unique())} symbols...")
        
        # Calculate indicators per symbol
        processed_data = []
        for symbol in CONFIG['TICKERS']:
            symbol_df = df[df['tic'] == symbol].copy()
            if len(symbol_df) == 0:
                continue
            
            symbol_df = calculate_indicators(symbol_df)
            processed_data.append(symbol_df)
            print(f"   ‚úì {symbol}: {len(symbol_df)} bars with indicators")
        
        if not processed_data:
            print("‚úó No data processed")
            return
        
        # Combine all symbols
        final_df = pd.concat(processed_data, ignore_index=True)
        
        # Select required columns
        required_cols = ['timestamp', 'tic', 'open', 'high', 'low', 'close', 'volume'] + TECH_INDICATORS
        final_df = final_df[required_cols]
        
        # Save to CSV
        final_df.to_csv(csv_path, index=False)
        
        print(f"\n‚úÖ SUCCESS: Saved {len(final_df):,} records to CSV")
        print(f"   Date range: {final_df['timestamp'].min()} to {final_df['timestamp'].max()}")
        print(f"   Trading days found: {trading_days_found}")
        
        # Summary
        unique_dates = final_df['timestamp'].dt.date.nunique()
        records_per_ticker = len(final_df) // len(CONFIG['TICKERS'])
        print(f"   {unique_dates} unique dates, ~{records_per_ticker} bars per ticker")
        
    except Exception as e:
        print(f"\n‚úó Failed to fetch historical data: {e}")
        import traceback
        traceback.print_exc()


def fetch_and_append_data(alpaca_api, csv_path):
    """
    Fetch latest 1-min OHLCV data from Alpaca and append to CSV.
    Uses live monitor logic with proper OHLCV bars, rounded to minute level.
    Returns: (df_new, price, tech, turbulence)
    """
    try:
        # Get current time rounded to the minute
        current_time = datetime.utcnow()
        rounded_time = current_time.replace(second=0, microsecond=0)
        
        print(f"üì° Fetching OHLCV bars for {len(CONFIG['TICKERS'])} tickers...")
        
        # Fetch OHLCV bars for all tickers
        all_bars = []
        for ticker in CONFIG['TICKERS']:
            try:
                # Get latest bar from Alpaca
                bar = alpaca_api.get_latest_bar(ticker)
                
                if bar:
                    # Round bar timestamp to minute level
                    bar_timestamp = bar.t
                    if bar_timestamp.tzinfo is None:
                        bar_timestamp = bar_timestamp.replace(tzinfo=timezone.utc)
                    else:
                        bar_timestamp = bar_timestamp.astimezone(timezone.utc)
                    
                    rounded_bar_time = bar_timestamp.replace(second=0, microsecond=0)
                    
                    all_bars.append({
                        'timestamp': rounded_bar_time,
                        'tic': ticker,
                        'open': float(bar.o),
                        'high': float(bar.h),
                        'low': float(bar.l),
                        'close': float(bar.c),
                        'volume': int(bar.v)
                    })
                else:
                    print(f"   ‚ö†Ô∏è  No bar for {ticker}")
            except Exception as e:
                print(f"   ‚úó Error fetching {ticker}: {e}")
        
        if not all_bars:
            print("‚úó No OHLCV data fetched")
            return None, None, None, None
        
        # Create DataFrame
        df_new = pd.DataFrame(all_bars)
        df_new['timestamp'] = pd.to_datetime(df_new['timestamp'])
        
        # Check if CSV exists and load existing data for indicator calculation
        if Path(csv_path).exists():
            existing_df = pd.read_csv(csv_path)
            if len(existing_df) > 0:
                existing_df['timestamp'] = pd.to_datetime(existing_df['timestamp'])
                
                # Check for duplicates (same timestamp and ticker)
                last_timestamp = existing_df['timestamp'].max()
                new_timestamp = df_new['timestamp'].iloc[0]
                
                if new_timestamp <= last_timestamp:
                    print(f"‚ö†Ô∏è  Data already exists for {new_timestamp}, skipping append")
                    # Return existing values for trading
                    price = df_new['close'].values
                    return df_new, price, None, 0
        
        # Calculate technical indicators per symbol
        print(f"üìä Calculating technical indicators...")
        processed_data = []
        
        # Load historical data for proper indicator calculation
        if Path(csv_path).exists():
            historical_df = pd.read_csv(csv_path)
            historical_df['timestamp'] = pd.to_datetime(historical_df['timestamp'])
        else:
            historical_df = pd.DataFrame()
        
        for ticker in CONFIG['TICKERS']:
            ticker_new = df_new[df_new['tic'] == ticker].copy()
            if len(ticker_new) == 0:
                continue
            
            # Combine with historical data for this ticker
            if len(historical_df) > 0:
                ticker_historical = historical_df[historical_df['tic'] == ticker].copy()
                ticker_combined = pd.concat([ticker_historical, ticker_new], ignore_index=True)
                ticker_combined = ticker_combined.sort_values('timestamp').drop_duplicates(subset=['timestamp'], keep='last')
                
                # Keep last 500 bars for indicator calculation
                ticker_combined = ticker_combined.tail(500)
            else:
                ticker_combined = ticker_new
            
            # Calculate indicators
            ticker_combined = calculate_indicators(ticker_combined)
            
            # Get only the new row with indicators
            new_row = ticker_combined[ticker_combined['timestamp'] == ticker_new['timestamp'].iloc[0]]
            processed_data.append(new_row)
        
        if not processed_data:
            print("‚úó No data processed")
            return None, None, None, None
        
        # Combine all tickers
        final_df = pd.concat(processed_data, ignore_index=True)
        
        # Select required columns in correct order
        required_cols = ['timestamp', 'tic', 'open', 'high', 'low', 'close', 'volume'] + TECH_INDICATORS
        final_df = final_df[required_cols]
        
        # Append to CSV
        final_df.to_csv(csv_path, mode='a', header=False, index=False)
        print(f"‚úì Appended {len(final_df)} records at {rounded_time.strftime('%Y-%m-%d %H:%M:%S UTC')}")
        
        # Extract price and tech for trading (format compatible with existing code)
        price = final_df['close'].values
        
        # Build tech array (all indicators for all tickers, flattened)
        tech_cols = TECH_INDICATORS
        tech_data = []
        for ticker in CONFIG['TICKERS']:
            ticker_row = final_df[final_df['tic'] == ticker]
            if len(ticker_row) > 0:
                for col in tech_cols:
                    tech_data.append(ticker_row[col].values[0])
            else:
                tech_data.extend([0] * len(tech_cols))
        
        tech = np.array(tech_data)
        turbulence = 0  # Placeholder, can calculate if needed
        
        return final_df, price, tech, turbulence
        
    except Exception as e:
        print(f"‚úó Data fetch error: {e}")
        import traceback
        traceback.print_exc()
        return None, None, None, None


def load_recent_data(csv_path, hours=48):
    """Load last N hours of data from CSV for fine-tuning."""
    if not Path(csv_path).exists():
        print(f"‚ö†Ô∏è  CSV not found: {csv_path}")
        return None
    
    df = pd.read_csv(csv_path)
    
    if len(df) == 0:
        print("‚ö†Ô∏è  CSV is empty")
        return None
    
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    
    # Get last N hours
    cutoff = datetime.utcnow() - timedelta(hours=hours)
    df_filtered = df[df['timestamp'] >= cutoff]
    
    df_filtered = df_filtered.sort_values(['timestamp', 'tic']).reset_index(drop=True)
    
    # Show data availability
    if len(df_filtered) < 100:
        hours_available = (df['timestamp'].max() - df['timestamp'].min()).total_seconds() / 3600
        print(f"‚ö†Ô∏è  Only {len(df_filtered)} records in {hours}h window (total available: {hours_available:.1f}h)")
    else:
        print(f"‚úì Loaded {len(df_filtered):,} records from CSV ({hours}h window)")
        unique_timestamps = df_filtered['timestamp'].nunique()
        print(f"   {unique_timestamps} unique timestamps")
    
    return df_filtered


print("‚úì Data collection functions defined")

# Part 4: Environment Creation for Fine-Tuning

Create StockTradingEnv from DataFrame for model training/evaluation.

In [None]:
def create_env_from_df(df, config):
    """
    Create StockTradingEnv from DataFrame (Production format: 301 features).
    State: 1 (cash) + 30 (prices) + 30 (stocks) + 240 (tech indicators) = 301
    """
    # Create day index
    df = df.copy()
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    df = df.sort_values(['timestamp', 'tic']).reset_index(drop=True)
    
    unique_dates = sorted(df['timestamp'].unique())
    date_to_day = {date: idx for idx, date in enumerate(unique_dates)}
    df['day'] = df['timestamp'].map(date_to_day)
    df = df.rename(columns={'timestamp': 'date'})
    
    # Set index for StockTradingEnv
    df_indexed = df.sort_values(['day', 'tic']).set_index('day')
    
    state_space = 1 + 2 * config['STOCK_DIM'] + len(TECH_INDICATORS) * config['STOCK_DIM']
    
    env = StockTradingEnv(
        df=df_indexed,
        stock_dim=config['STOCK_DIM'],
        hmax=config['HMAX'],
        initial_amount=config['INITIAL_CASH'],
        num_stock_shares=[0] * config['STOCK_DIM'],
        buy_cost_pct=[config['TRANSACTION_COST_PCT']] * config['STOCK_DIM'],
        sell_cost_pct=[config['TRANSACTION_COST_PCT']] * config['STOCK_DIM'],
        reward_scaling=config['REWARD_SCALING'],
        state_space=state_space,
        action_space=config['STOCK_DIM'],
        tech_indicator_list=TECH_INDICATORS,
        print_verbosity=100000,
    )
    
    return DummyVecEnv([lambda: env])


def evaluate_model_on_df(model, df, config):
    """Evaluate SB3 PPO model performance on DataFrame."""
    env = create_env_from_df(df, config)
    obs = env.reset()
    total_reward = 0
    done = False
    
    while not done:
        action, _ = model.predict(obs, deterministic=True)
        obs, reward, done, info = env.step(action)
        total_reward += reward[0]
    
    return total_reward


print("‚úì Environment creation functions defined")

# Part 5: Fine-Tuning Logic with Validation

Fine-tune SB3 PPO model with validation and rollback (from finetune_simulation.py).

In [None]:
def finetune_model_with_csv_data(model, csv_path, config):
    """
    Fine-tune SB3 PPO model using data from CSV (from finetune_simulation.py logic).
    Returns: (finetuned_model, result_dict)
    """
    print("\n" + "="*80)
    print("FINE-TUNING MODEL WITH CSV DATA")
    print("="*80)
    
    # Load data
    df = load_recent_data(csv_path, hours=config['FINETUNE_LOOKBACK_HOURS'])
    
    if df is None or len(df) < 100:
        print("‚úó Insufficient data for fine-tuning")
        return model, None
    
    # Split train/validation
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    unique_dates = sorted(df['timestamp'].unique())
    
    if len(unique_dates) < 2:
        print("‚úó Need at least 2 unique timestamps")
        return model, None
    
    split_idx = int(len(unique_dates) * (1 - config['VALIDATION_SPLIT']))
    
    train_df = df[df['timestamp'].isin(unique_dates[:split_idx])].copy()
    val_df = df[df['timestamp'].isin(unique_dates[split_idx:])].copy()
    
    print(f"üìä Train: {len(train_df):,} records ({len(unique_dates[:split_idx])} timestamps)")
    print(f"üìä Val: {len(val_df):,} records ({len(unique_dates[split_idx:])} timestamps)")
    
    # Evaluate original model
    print("üß™ Evaluating original model...")
    original_score = evaluate_model_on_df(model, val_df, config)
    print(f"   Original score: {original_score:.2f}")
    
    # Clone model (SB3 PPO)
    with tempfile.NamedTemporaryFile(suffix='.zip', delete=False) as tmp:
        tmp_path = tmp.name
        model.save(tmp_path)
        model_ft = PPO.load(tmp_path)
    os.remove(tmp_path)
    
    # Fine-tune
    print(f"üîÑ Fine-tuning ({config['FINETUNE_STEPS']} steps, lr={config['FINETUNE_LR']})...")
    model_ft.learning_rate = config['FINETUNE_LR']
    ft_env = create_env_from_df(train_df, config)
    model_ft.set_env(ft_env)
    model_ft.learn(
        total_timesteps=config['FINETUNE_STEPS'],
        reset_num_timesteps=False,
        progress_bar=False
    )
    
    # Evaluate fine-tuned
    print("üß™ Evaluating fine-tuned model...")
    finetuned_score = evaluate_model_on_df(model_ft, val_df, config)
    print(f"   Fine-tuned score: {finetuned_score:.2f}")
    
    # Decision (from finetune_simulation.py)
    threshold = original_score * config['VALIDATION_THRESHOLD']
    accepted = finetuned_score >= threshold
    
    improvement = ((finetuned_score - original_score) / abs(original_score) * 100) if original_score != 0 else 0
    
    result = {
        'timestamp': datetime.utcnow(),
        'original_score': original_score,
        'finetuned_score': finetuned_score,
        'threshold': threshold,
        'accepted': accepted,
        'improvement': improvement,
        'train_records': len(train_df),
        'val_records': len(val_df),
    }
    
    if accepted:
        print(f"‚úÖ ACCEPTED (+{improvement:.2f}%)")
        return model_ft, result
    else:
        print(f"‚ùå REJECTED ({improvement:.2f}%)")
        return model, result


print("‚úì Fine-tuning functions defined")

# Part 6: Paper Trading Class with Real-Time Fine-Tuning

AlpacaPaperTrading class merged with fine-tuning logic (from finrl-papertrading-demo.ipynb + realtime_finetune.py).

In [None]:
class AlpacaPaperTradingWithFineTuning:
    """
    Enhanced Alpaca Paper Trading class with real-time fine-tuning.
    Combines logic from:
    - finrl-papertrading-demo.ipynb (AlpacaPaperTrading class)
    - realtime_finetune.py (data collection + fine-tuning)
    - finetune_simulation.py (validation logic)
    - historical_data.py (robust data fetching)
    """
    
    def __init__(self, config, model_path):
        self.config = config
        
        # Initialize Alpaca
        self.alpaca_processor = AlpacaProcessor(
            API_KEY=config['API_KEY'],
            API_SECRET=config['API_SECRET'],
            API_BASE_URL=config['API_BASE_URL']
        )
        
        self.alpaca = tradeapi.REST(
            config['API_KEY'],
            config['API_SECRET'],
            config['API_BASE_URL'],
            'v2'
        )
        
        # Load SB3 PPO model
        print(f"ü§ñ Loading SB3 PPO model: {model_path}")
        self.model = PPO.load(model_path)
        print("‚úì Model loaded")
        
        # Initialize state (from finrl-papertrading-demo.ipynb)
        self.tickers = config['TICKERS']
        self.stocks = np.zeros(config['STOCK_DIM'])
        self.stocks_cd = np.zeros(config['STOCK_DIM'])
        self.cash = None
        self.price = np.zeros(config['STOCK_DIM'])
        self.turbulence_bool = 0
        
        # Fine-tuning tracking
        self.last_finetune = datetime.utcnow() - timedelta(hours=config['FINETUNE_INTERVAL_HOURS'])
        self.finetune_history = []
        self.trading_history = []
        self.cycle = 0
        self.model_version = 'original'  # Track if using original or fine-tuned model
        self.finetune_count = 0  # Count of accepted fine-tunes
        
        # Initialize data CSV and fetch historical data
        print("\nüìä Initializing data collection system...")
        init_data_csv(config['DATA_CSV'])
        
        # Fetch 2 trading days of historical data for fine-tuning
        # Uses robust logic from historical_data.py
        fetch_historical_data_to_csv(
            self.alpaca,  # Pass Alpaca REST API
            config['DATA_CSV'],
            required_trading_days=2,
            min_hours_per_day=4
        )
        
        print("‚úì Paper trading instance initialized")
    
    def sigmoid_sign(self, ary, thresh):
        """Sigmoid transformation for turbulence (from finrl-papertrading-demo.ipynb)."""
        def sigmoid(x):
            return 1 / (1 + np.exp(-x * np.e)) - 0.5
        return sigmoid(ary / thresh) * thresh
    
    def get_state(self):
        """
        Get current state from Alpaca API (Production format: 301 features).
        
        State vector: [cash(1)] + [prices(30)] + [stocks(30)] + [tech_indicators(240)]
        NO turbulence in state vector!
        
        Returns state vector with current prices, holdings, and technical indicators.
        """
        # Fetch latest OHLCV data and append to CSV
        df_new, price, tech, turbulence = fetch_and_append_data(
            self.alpaca,  # Pass Alpaca REST API for OHLCV bars
            self.config['DATA_CSV']
        )
        
        if price is None:
            print("‚ö†Ô∏è  Failed to fetch data, using cached values")
            price = self.price
            turbulence = 0
            tech = np.zeros(len(TECH_INDICATORS) * self.config['STOCK_DIM'])
        
        # Calculate turbulence boolean (used for trading logic, NOT in state)
        turbulence_bool = 1 if turbulence >= self.config['TURBULENCE_THRESH'] else 0
        
        # Scale technical indicators
        tech_scaled = tech * 2 ** -7
        
        # Get current positions from Alpaca
        positions = self.alpaca.list_positions()
        stocks = np.zeros(len(self.tickers))
        for position in positions:
            if position.symbol in self.tickers:
                ind = self.tickers.index(position.symbol)
                stocks[ind] = abs(int(float(position.qty)))
        
        # Update instance variables
        self.stocks = stocks
        self.price = price
        self.turbulence_bool = turbulence_bool
        self.cash = float(self.alpaca.get_account().cash)
        
        # Build state vector (Production format: NO turbulence!)
        # Model expects: 1 (cash) + 30 (prices) + 30 (stocks) + 240 (tech) = 301 features
        amount = np.array(self.cash * (2 ** -12), dtype=np.float32)
        scale = np.array(2 ** -6, dtype=np.float32)
        
        state = np.hstack((
            amount,
            price * scale,
            stocks * scale,
            tech_scaled,
        )).astype(np.float32)
        
        # Handle NaN/Inf
        state[np.isnan(state)] = 0.0
        state[np.isinf(state)] = 0.0
        
        return state
    
    def submit_order(self, qty, stock, side, resp):
        """Submit order to Alpaca (from finrl-papertrading-demo.ipynb)."""
        if qty > 0:
            try:
                self.alpaca.submit_order(stock, qty, side, "market", "day")
                print(f"   ‚úì {side.upper()} {qty} {stock}")
                resp.append(True)
            except Exception as e:
                print(f"   ‚úó {side.upper()} {qty} {stock} failed: {e}")
                resp.append(False)
        else:
            resp.append(True)
    
    def trade(self):
        """
        Execute trading decision using SB3 PPO model.
        From finrl-papertrading-demo.ipynb trade() logic.
        """
        state = self.get_state()
        
        # Get action from SB3 PPO model
        action = self.model.predict(state.reshape(1, -1), deterministic=True)[0]
        action = action.flatten()
        
        # Scale actions
        action = (action * self.config['HMAX']).astype(int)
        
        # Log action statistics
        print(f"\nüìä Action Stats:")
        print(f"   Range: [{np.min(action):.2f}, {np.max(action):.2f}]")
        print(f"   Mean(abs): {np.mean(np.abs(action)):.2f}")
        print(f"   üíµ Cash: ${self.cash:,.2f}, Turbulence: {self.turbulence_bool}")
        
        # Display signals
        min_action = self.config['MIN_ACTION_THRESHOLD']
        sell_signals = [(self.tickers[i], action[i]) for i in range(len(action)) if action[i] < -min_action]
        buy_signals = [(self.tickers[i], action[i]) for i in range(len(action)) if action[i] > min_action]
        
        print(f"   üìâ SELL ({len(sell_signals)}): {sell_signals[:5]}")
        print(f"   üìà BUY ({len(buy_signals)}): {buy_signals[:5]}")
        
        # Execute trades (from finrl-papertrading-demo.ipynb)
        self.stocks_cd += 1
        
        if self.turbulence_bool == 0:
            threads = []
            
            # SELL orders
            for index in np.where(action < -min_action)[0]:
                sell_num_shares = min(self.stocks[index], -action[index])
                qty = abs(int(sell_num_shares))
                respSO = []
                t = threading.Thread(
                    target=lambda q=qty, s=self.tickers[index]: self.submit_order(q, s, 'sell', respSO)
                )
                t.start()
                threads.append(t)
                self.stocks_cd[index] = 0
            
            for t in threads:
                t.join()
            
            # Update cash
            self.cash = float(self.alpaca.get_account().cash)
            
            threads = []
            
            # BUY orders
            for index in np.where(action > min_action)[0]:
                tmp_cash = max(0, self.cash)
                buy_num_shares = min(tmp_cash // self.price[index], abs(int(action[index])))
                qty = abs(int(buy_num_shares)) if not np.isnan(buy_num_shares) else 0
                respSO = []
                t = threading.Thread(
                    target=lambda q=qty, s=self.tickers[index]: self.submit_order(q, s, 'buy', respSO)
                )
                t.start()
                threads.append(t)
                self.stocks_cd[index] = 0
            
            for t in threads:
                t.join()
            
        else:
            # High turbulence - liquidate all positions
            print("‚ö†Ô∏è  HIGH TURBULENCE - Liquidating all positions")
            threads = []
            positions = self.alpaca.list_positions()
            
            for position in positions:
                side = 'sell' if position.side == 'long' else 'buy'
                qty = abs(int(float(position.qty)))
                respSO = []
                t = threading.Thread(
                    target=lambda q=qty, sym=position.symbol, s=side: self.submit_order(q, sym, s, respSO)
                )
                t.start()
                threads.append(t)
            
            for t in threads:
                t.join()
            
            self.stocks_cd[:] = 0
        
        # Log trade
        portfolio_value = self.cash + np.sum(self.stocks * self.price)
        trade_log = {
            'timestamp': datetime.utcnow(),
            'cycle': self.cycle,
            'portfolio_value': portfolio_value,
            'cash': self.cash,
            'action': action.tolist(),
        }
        self.trading_history.append(trade_log)
    
    def check_and_finetune(self):
        """
        Check if it's time to fine-tune and execute if needed.
        From realtime_finetune.py fine-tuning logic.
        """
        current_time = datetime.utcnow()
        time_since_finetune = (current_time - self.last_finetune).total_seconds() / 3600
        
        if time_since_finetune >= self.config['FINETUNE_INTERVAL_HOURS']:
            print(f"\n‚è∞ Time to fine-tune (last: {time_since_finetune:.1f}h ago)")
            
            self.model, ft_result = finetune_model_with_csv_data(
                self.model,
                self.config['DATA_CSV'],
                self.config
            )
            
            if ft_result:
                self.finetune_history.append(ft_result)
                self.last_finetune = current_time
                
                # Save results
                output_dir = Path(self.config['OUTPUT_DIR'])
                output_dir.mkdir(parents=True, exist_ok=True)
                
                pd.DataFrame(self.finetune_history).to_csv(
                    output_dir / 'finetune_history.csv',
                    index=False
                )
                
                if ft_result['accepted']:
                    self.finetune_count += 1
                    self.model_version = f'finetuned_v{self.finetune_count}'
                    model_path = output_dir / f'model_cycle_{self.cycle}.zip'
                    self.model.save(str(model_path))
                    print(f"üíæ Saved fine-tuned model: {model_path}")
    
    def square_off_all_positions(self):
        """Liquidate all positions before market close."""
        print("\nüîö Squaring off all positions before market close...")
        positions = self.alpaca.list_positions()
        
        if len(positions) == 0:
            print("   No positions to square off")
            return
        
        threads = []
        for position in positions:
            side = 'sell' if position.side == 'long' else 'buy'
            qty = abs(int(float(position.qty)))
            respSO = []
            print(f"   Closing {qty} {position.symbol} ({side})")
            t = threading.Thread(
                target=lambda q=qty, sym=position.symbol, s=side: self.submit_order(q, sym, s, respSO)
            )
            t.start()
            threads.append(t)
        
        for t in threads:
            t.join()
        
        print("‚úì All positions squared off")
    
    def run(self):
        """
        Main trading loop with fine-tuning.
        From finrl-papertrading-demo.ipynb + realtime_finetune.py.
        """
        # Wait for market to open
        clock = self.alpaca.get_clock()
        if not clock.is_open:
            time_to_open = (clock.next_open.replace(tzinfo=timezone.utc) - clock.timestamp.replace(tzinfo=timezone.utc)).total_seconds()
            print(f"‚è∞ Market closed - waiting {int(time_to_open/60)} minutes for market open...")
            time.sleep(time_to_open)
        
        # Wait 15 minutes after market open before first trade
        clock = self.alpaca.get_clock()
        print(f"‚úÖ Market opened - waiting 15 minutes before first trade...")
        time.sleep(15 * 60)  # Wait 15 minutes
        
        print(f"‚úÖ Starting paper trading with fine-tuning (Model: {self.model_version})")
        
        output_dir = Path(self.config['OUTPUT_DIR'])
        output_dir.mkdir(parents=True, exist_ok=True)
        
        try:
            while True:
                self.cycle += 1
                
                # Check market status
                clock = self.alpaca.get_clock()
                closing_time = clock.next_close.replace(tzinfo=timezone.utc).timestamp()
                curr_time = clock.timestamp.replace(tzinfo=timezone.utc).timestamp()
                time_to_close = closing_time - curr_time
                
                # Square off positions 15 minutes before market close
                if time_to_close < (15 * 60):  # 15 minutes = 900 seconds
                    self.square_off_all_positions()
                    print("üîö Market closing in <15 mins - stopping trading")
                    break
                
                print(f"\n{'='*80}")
                print(f"CYCLE {self.cycle} - {datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')}")
                print(f"Model: {self.model_version} | Time to close: {int(time_to_close/60)} mins")
                print(f"{'='*80}")
                
                # Execute trade
                self.trade()
                
                # Check and fine-tune if needed
                self.check_and_finetune()
                
                # Save trading history
                pd.DataFrame(self.trading_history).to_csv(
                    output_dir / 'trading_history.csv',
                    index=False
                )
                
                # Wait for next interval (60 seconds)
                time.sleep(60)
                
        except KeyboardInterrupt:
            print("\n‚ö†Ô∏è  Interrupted by user")
        
        # Final summary
        print("\n" + "="*80)
        print("TRADING SESSION SUMMARY")
        print("="*80)
        print(f"Total cycles: {self.cycle}")
        print(f"Trading decisions: {len(self.trading_history)}")
        print(f"Fine-tuning sessions: {len(self.finetune_history)}")
        
        if self.trading_history:
            final_value = self.trading_history[-1]['portfolio_value']
            initial_value = self.trading_history[0]['portfolio_value']
            total_return = (final_value - initial_value) / initial_value * 100
            print(f"\nüí∞ Portfolio Performance:")
            print(f"   Initial: ${initial_value:,.2f}")
            print(f"   Final: ${final_value:,.2f}")
            print(f"   Return: {total_return:+.2f}%")
        
        if self.finetune_history:
            accepted = sum(1 for r in self.finetune_history if r['accepted'])
            print(f"\nüîÑ Fine-tuning:")
            print(f"   Accepted: {accepted}/{len(self.finetune_history)}")
            avg_improvement = np.mean([r['improvement'] for r in self.finetune_history])
            print(f"   Avg improvement: {avg_improvement:.2f}%")
        
        print(f"\n‚úì Results saved to: {output_dir}")


print("‚úì AlpacaPaperTradingWithFineTuning class defined")

# Part 7: Run Paper Trading Loop

Initialize the trading system and start the paper trading loop with automatic fine-tuning.

In [None]:
# Initialize paper trading with fine-tuning
trader = AlpacaPaperTradingWithFineTuning(
    config=CONFIG,
    model_path=CONFIG['TRAINED_MODEL']
)

# Start trading loop
print("\nüöÄ Starting Alpaca paper trading with real-time fine-tuning")
print(f"   Fine-tune interval: {CONFIG['FINETUNE_INTERVAL_HOURS']} hours")
print(f"   Lookback window: {CONFIG['FINETUNE_LOOKBACK_HOURS']} hours")
print(f"   Acceptance threshold: {CONFIG['VALIDATION_THRESHOLD']*100}%")

trader.run()

# Part 8: Performance Analysis

Visualize trading performance and fine-tuning results.

In [None]:
# Load results
output_dir = Path(CONFIG['OUTPUT_DIR'])
trading_history_path = output_dir / 'trading_history.csv'
finetune_history_path = output_dir / 'finetune_history.csv'

if trading_history_path.exists():
    df_trading = pd.read_csv(trading_history_path)
    df_trading['timestamp'] = pd.to_datetime(df_trading['timestamp'])
    
    # Plot portfolio value
    fig, axes = plt.subplots(2, 1, figsize=(14, 10))
    
    # Portfolio value over time
    axes[0].plot(df_trading['timestamp'], df_trading['portfolio_value'], linewidth=2, color='blue')
    axes[0].set_title('Portfolio Value Over Time', fontsize=16, fontweight='bold')
    axes[0].set_xlabel('Time', fontsize=12)
    axes[0].set_ylabel('Portfolio Value ($)', fontsize=12)
    axes[0].grid(True, alpha=0.3)
    axes[0].xaxis.set_major_formatter(plt.matplotlib.dates.DateFormatter('%H:%M'))
    
    # Cash vs holdings value
    df_trading['holdings_value'] = df_trading['portfolio_value'] - df_trading['cash']
    axes[1].plot(df_trading['timestamp'], df_trading['cash'], label='Cash', linewidth=2, color='green')
    axes[1].plot(df_trading['timestamp'], df_trading['holdings_value'], label='Holdings', linewidth=2, color='orange')
    axes[1].set_title('Cash vs Holdings Value', fontsize=16, fontweight='bold')
    axes[1].set_xlabel('Time', fontsize=12)
    axes[1].set_ylabel('Value ($)', fontsize=12)
    axes[1].legend(fontsize=12)
    axes[1].grid(True, alpha=0.3)
    axes[1].xaxis.set_major_formatter(plt.matplotlib.dates.DateFormatter('%H:%M'))
    
    plt.tight_layout()
    plt.savefig(output_dir / 'trading_performance.png', dpi=150, bbox_inches='tight')
    plt.show()
    
    # Performance metrics
    initial_value = df_trading['portfolio_value'].iloc[0]
    final_value = df_trading['portfolio_value'].iloc[-1]
    total_return = (final_value - initial_value) / initial_value * 100
    max_value = df_trading['portfolio_value'].max()
    min_value = df_trading['portfolio_value'].min()
    volatility = df_trading['portfolio_value'].pct_change().std() * np.sqrt(252 * 6.5 * 60)  # Annualized
    
    print("\n" + "="*60)
    print("PERFORMANCE METRICS")
    print("="*60)
    print(f"Initial Portfolio Value: ${initial_value:,.2f}")
    print(f"Final Portfolio Value:   ${final_value:,.2f}")
    print(f"Total Return:            {total_return:+.2f}%")
    print(f"Max Value:               ${max_value:,.2f}")
    print(f"Min Value:               ${min_value:,.2f}")
    print(f"Volatility (annualized): {volatility:.2%}")
    print(f"Total Cycles:            {len(df_trading)}")
    print("="*60)
else:
    print("‚ö†Ô∏è  No trading history found")

# Load and visualize fine-tuning results
if finetune_history_path.exists():
    df_finetune = pd.read_csv(finetune_history_path)
    df_finetune['timestamp'] = pd.to_datetime(df_finetune['timestamp'])
    
    fig, axes = plt.subplots(2, 1, figsize=(14, 10))
    
    # Performance improvement
    colors = ['green' if x else 'red' for x in df_finetune['accepted']]
    axes[0].bar(range(len(df_finetune)), df_finetune['improvement'], color=colors, alpha=0.7)
    axes[0].axhline(y=0, color='black', linestyle='--', linewidth=1)
    axes[0].set_title('Fine-Tuning Performance Improvement', fontsize=16, fontweight='bold')
    axes[0].set_xlabel('Fine-Tuning Session', fontsize=12)
    axes[0].set_ylabel('Improvement (%)', fontsize=12)
    axes[0].grid(True, alpha=0.3, axis='y')
    
    # Acceptance rate
    cumulative_accepted = df_finetune['accepted'].cumsum()
    cumulative_total = range(1, len(df_finetune) + 1)
    acceptance_rate = [a/t * 100 for a, t in zip(cumulative_accepted, cumulative_total)]
    
    axes[1].plot(cumulative_total, acceptance_rate, marker='o', linewidth=2, markersize=8, color='purple')
    axes[1].set_title('Cumulative Fine-Tuning Acceptance Rate', fontsize=16, fontweight='bold')
    axes[1].set_xlabel('Fine-Tuning Session', fontsize=12)
    axes[1].set_ylabel('Acceptance Rate (%)', fontsize=12)
    axes[1].grid(True, alpha=0.3)
    axes[1].set_ylim(0, 100)
    
    plt.tight_layout()
    plt.savefig(output_dir / 'finetune_performance.png', dpi=150, bbox_inches='tight')
    plt.show()
    
    # Fine-tuning summary
    accepted_count = df_finetune['accepted'].sum()
    total_count = len(df_finetune)
    avg_improvement = df_finetune['improvement'].mean()
    max_improvement = df_finetune['improvement'].max()
    
    print("\n" + "="*60)
    print("FINE-TUNING SUMMARY")
    print("="*60)
    print(f"Total Sessions:       {total_count}")
    print(f"Accepted:             {accepted_count} ({accepted_count/total_count*100:.1f}%)")
    print(f"Rejected:             {total_count - accepted_count}")
    print(f"Avg Improvement:      {avg_improvement:+.2f}%")
    print(f"Max Improvement:      {max_improvement:+.2f}%")
    print("="*60)
else:
    print("‚ö†Ô∏è  No fine-tuning history found")

print(f"\n‚úì All results saved to: {output_dir}")