# Part 1: Setup and Configuration

In [None]:
# System setup
import sys
import os
from pathlib import Path

# Get relative paths dynamically
notebook_dir = os.path.dirname(os.path.abspath(__file__)) if '__file__' in locals() else os.getcwd()
project_root = os.path.abspath(os.path.join(notebook_dir, '..'))

if project_root not in sys.path:
print(f"‚úì Project root: {project_root}")

    sys.path.insert(0, project_root)

‚úì Project root: /Users/ayushraj/Documents/Python/FinRL


In [None]:
# Core imports
import numpy as np
import pandas as pd
import time
import warnings
import threading
import tempfile
from datetime import datetime, timedelta, timezone
from tqdm import tqdm

warnings.filterwarnings('ignore')

# Add FinRL to path
finrl_path = PROJECT_ROOT / 'FinRL'
if str(finrl_path) not in sys.path:
    sys.path.append(str(finrl_path))

# FinRL imports
from finrl.config import INDICATORS
from finrl.meta.env_stock_trading.env_stocktrading import StockTradingEnv
from finrl.meta.data_processors.processor_alpaca import AlpacaProcessor

# DRL imports
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv

# Alpaca imports
import alpaca_trade_api as tradeapi

# Explainability imports
sys.path.append(str(PROJECT_ROOT))
from explainable_drl.explainable_agent import ExplainableAgent

print("‚úì All imports successful")

‚úì All imports successful


# Part 2: Configuration

In [12]:
# Load API credentials from .env
from dotenv import load_dotenv

load_dotenv(PROJECT_ROOT / '.env')

API_KEY = os.getenv('ALPACA_API_KEY')
API_SECRET = os.getenv('ALPACA_API_SECRET')
API_BASE_URL = os.getenv('ALPACA_API_BASE_URL', 'https://paper-api.alpaca.markets')

# DOW 30 tickers (excluding VIXY)
TICKERS = [
    'AAPL', 'AMGN', 'AXP', 'BA', 'CAT', 'CRM', 'CSCO', 'CVX', 'DIS', 'DOW',
    'GS', 'HD', 'HON', 'IBM', 'INTC', 'JNJ', 'JPM', 'KO', 'MCD', 'MMM',
    'MRK', 'MSFT', 'NKE', 'PG', 'TRV', 'UNH', 'V', 'VZ', 'WBA', 'WMT'
]

TECH_INDICATORS = INDICATORS

print(f"‚úì API credentials loaded")
print(f"‚úì Tickers: {len(TICKERS)}")
print(f"‚úì Technical indicators: {len(TECH_INDICATORS)}")

‚úì API credentials loaded
‚úì Tickers: 30
‚úì Technical indicators: 8


In [None]:
# Main configuration
CONFIG = {
    # Alpaca API
    'API_KEY': API_KEY,
    'API_SECRET': API_SECRET,
    'API_BASE_URL': API_BASE_URL,
    
    # Tickers
    'TICKERS': TICKERS,
    'STOCK_DIM': len(TICKERS),
    
    # Model paths
    'TRAINED_MODEL': 'trained_models/agent_ppo.zip',
    'OUTPUT_DIR': 'production_paper_trading_results',
    
    # Trading parameters
    'INITIAL_CASH': 1_000_000,
    'HMAX': 100,
    'MAX_STOCK': 100,
    'TRANSACTION_COST_PCT': 0.001,
    'REWARD_SCALING': 1e-4,
    'TURBULENCE_THRESHOLD': 500,
    'MIN_ACTION_THRESHOLD': 10,
    
    # Trading timing
    'TIME_INTERVAL_MIN': 1,  # Trade every minute
    'INITIAL_TRADE_DELAY_MIN': 15,  # Wait 15 min after market open
    
    # Fine-tuning parameters
    'FINETUNE_INTERVAL_HOURS': 2,
    'FINETUNE_LOOKBACK_HOURS': 48,
    'FINETUNE_LR': 1e-5,
    'FINETUNE_STEPS': 2000,
    'VALIDATION_SPLIT': 0.2,
    'ROLLBACK_THRESHOLD': 0.95,
    
    # Data storage
    'DATA_CSV': 'production_paper_trading_data.csv',
    
    # Explainability
    'ENABLE_EXPLANATIONS': True,
    'SHAP_BACKGROUND_SAMPLES': 500,
}

# Calculate state dimensions (matches StockTradingEnv)
# State: 1 (cash) + 30 (prices) + 30 (stocks) + 240 (tech indicators) = 301
state_dim = 1 + 2 * CONFIG['STOCK_DIM'] + len(TECH_INDICATORS) * CONFIG['STOCK_DIM']
action_dim = CONFIG['STOCK_DIM']

CONFIG['state_dim'] = state_dim
CONFIG['action_dim'] = action_dim

print("\nüìã CONFIGURATION")
print("="*80)
print(f"Model: {CONFIG['TRAINED_MODEL']}")
print(f"Output: {CONFIG['OUTPUT_DIR']}")
print(f"Data CSV: {CONFIG['DATA_CSV']}")
print(f"State dim: {state_dim}, Action dim: {action_dim}")
print(f"Initial cash: ${CONFIG['INITIAL_CASH']:,}")
print(f"Fine-tune: Every {CONFIG['FINETUNE_INTERVAL_HOURS']}h")
print(f"Lookback: {CONFIG['FINETUNE_LOOKBACK_HOURS']}h")
print(f"Explainability: {CONFIG['ENABLE_EXPLANATIONS']}")
print("="*80)


üìã CONFIGURATION
Model: trained_models/agent_ppo.zip
Output: production_paper_trading_results
Data CSV: production_paper_trading_data.csv
State dim: 301, Action dim: 30
Initial cash: $1,000,000
Fine-tune: Every 2h
Lookback: 48h
Explainability: True


# Part 3: Data Management Functions

CSV-based data storage with historical backfill and real-time updates.

In [14]:
def init_data_csv(csv_path):
    """Initialize CSV file for data collection."""
    if not Path(csv_path).exists():
        # Create with all required columns
        columns = ['date', 'tic', 'open', 'high', 'low', 'close', 'volume'] + TECH_INDICATORS
        df = pd.DataFrame(columns=columns)
        df.to_csv(csv_path, index=False)
        print(f"‚úì Initialized CSV: {csv_path}")
    else:
        existing_df = pd.read_csv(csv_path)
        print(f"‚úì CSV exists: {csv_path} ({len(existing_df):,} records)")


def fetch_historical_data_to_csv(alpaca_api, csv_path, required_days=2):
    """
    Fetch historical 1-min data from Alpaca and populate CSV.
    Uses last 2 completed trading days to avoid API restrictions.
    """
    print(f"\nüì• Fetching historical data for {required_days} trading days...")
    
    # Check if CSV already has sufficient data
    if Path(csv_path).exists():
        existing_df = pd.read_csv(csv_path)
        if len(existing_df) > 0:
            existing_df['date'] = pd.to_datetime(existing_df['date'])
            span_hours = (existing_df['date'].max() - existing_df['date'].min()).total_seconds() / 3600
            age_hours = (datetime.utcnow() - existing_df['date'].max()).total_seconds() / 3600
            
            if span_hours >= 12 and age_hours < 24:  # At least 12h of recent data
                print(f"‚úì CSV has {span_hours:.1f}h of data ({age_hours:.1f}h old)")
                print(f"  Skipping historical fetch")
                return
    
    try:
        # Calculate date range
        today = datetime.utcnow().date()
        end_date = today - timedelta(days=1)  # Yesterday
        start_date = end_date - timedelta(days=2)  # 2 days before
        
        print(f"  Download range: {start_date} to {end_date}")
        
        # Initialize Alpaca processor
        alpaca_processor = AlpacaProcessor(
            API_KEY=CONFIG['API_KEY'],
            API_SECRET=CONFIG['API_SECRET'],
            API_BASE_URL=CONFIG['API_BASE_URL']
        )
        
        # Download raw data
        print(f"  Downloading...")
        df_raw = alpaca_processor.download_data(
            start_date=start_date.strftime('%Y-%m-%d'),
            end_date=end_date.strftime('%Y-%m-%d'),
            ticker_list=TICKERS,
            time_interval='1Min'
        )
        
        if df_raw is None or len(df_raw) == 0:
            print("  ‚ö†Ô∏è  No data returned from Alpaca")
            return
        
        print(f"  ‚úì Downloaded: {len(df_raw):,} records")
        
        # Process data
        if 'date' in df_raw.columns:
            df_raw.rename(columns={'date': 'timestamp'}, inplace=True)
        
        df_raw['timestamp'] = pd.to_datetime(df_raw['timestamp'], utc=True, errors='coerce')
        
        # Set processor attributes for clean_data
        alpaca_processor.start = start_date.strftime('%Y-%m-%d')
        alpaca_processor.end = end_date.strftime('%Y-%m-%d')
        alpaca_processor.time_interval = '1Min'
        
        # Clean and add indicators
        df_clean = alpaca_processor.clean_data(df_raw)
        df_clean = df_clean.sort_values(by=['timestamp', 'tic']).reset_index(drop=True)
        df_clean = alpaca_processor.add_technical_indicator(df_clean, TECH_INDICATORS)
        df_clean = df_clean.ffill().bfill()
        
        print(f"  ‚úì Processed: {len(df_clean):,} records")
        
        # Convert to timezone-naive
        df_clean['timestamp'] = pd.to_datetime(df_clean['timestamp'])
        if df_clean['timestamp'].dt.tz is not None:
            df_clean['timestamp'] = df_clean['timestamp'].dt.tz_localize(None)
        
        df_clean.rename(columns={'timestamp': 'date'}, inplace=True)
        
        # Save to CSV
        required_cols = ['date', 'tic', 'open', 'high', 'low', 'close', 'volume'] + TECH_INDICATORS
        df_clean = df_clean[required_cols]
        df_clean.to_csv(csv_path, index=False)
        
        print(f"‚úÖ Saved {len(df_clean):,} records to {csv_path}")
        print(f"   Date range: {df_clean['date'].min()} to {df_clean['date'].max()}")
        
    except Exception as e:
        print(f"‚úó Failed to fetch historical data: {e}")
        import traceback
        traceback.print_exc()


def append_latest_data_to_csv(alpaca_processor, csv_path):
    """
    Fetch latest 1-min data from Alpaca and append to CSV.
    Returns: DataFrame of new data (or None if failed/duplicate)
    """
    try:
        # Fetch latest data with technical indicators
        price, tech, turbulence = alpaca_processor.fetch_latest_data(
            ticker_list=TICKERS,
            time_interval='1Min',
            tech_indicator_list=TECH_INDICATORS
        )
        
        if price is None:
            print("‚ö†Ô∏è  No data fetched")
            return None
        
        # Get current timestamp (rounded to minute)
        current_time = datetime.utcnow().replace(second=0, microsecond=0)
        
        # Build DataFrame
        records = []
        for i, ticker in enumerate(TICKERS):
            record = {
                'date': current_time,
                'tic': ticker,
                'open': price[i],  # Using close as proxy
                'high': price[i],
                'low': price[i],
                'close': price[i],
                'volume': 0,
            }
            
            # Add tech indicators
            for j, tech_name in enumerate(TECH_INDICATORS):
                idx = i * len(TECH_INDICATORS) + j
                record[tech_name] = tech[idx] if idx < len(tech) else 0
            
            records.append(record)
        
        df_new = pd.DataFrame(records)
        
        # Check for duplicates
        if Path(csv_path).exists():
            existing_df = pd.read_csv(csv_path)
            if len(existing_df) > 0:
                existing_df['date'] = pd.to_datetime(existing_df['date'])
                last_timestamp = existing_df['date'].max()
                
                if current_time <= last_timestamp:
                    print(f"  ‚ö†Ô∏è  Data already exists for {current_time}")
                    return None
        
        # Append to CSV
        df_new.to_csv(csv_path, mode='a', header=False, index=False)
        print(f"  üíæ Appended {len(df_new)} records at {current_time}")
        
        return df_new
        
    except Exception as e:
        print(f"‚úó Error appending data: {e}")
        import traceback
        traceback.print_exc()
        return None


def load_recent_data_from_csv(csv_path, hours=48):
    """Load last N hours of data from CSV."""
    if not Path(csv_path).exists():
        print(f"‚ö†Ô∏è  CSV not found: {csv_path}")
        return None
    
    df = pd.read_csv(csv_path)
    
    if len(df) == 0:
        print("‚ö†Ô∏è  CSV is empty")
        return None
    
    df['date'] = pd.to_datetime(df['date'])
    
    # Get last N hours
    cutoff = datetime.utcnow() - timedelta(hours=hours)
    df_filtered = df[df['date'] >= cutoff].copy()
    
    # Filter to only expected tickers
    df_filtered = df_filtered[df_filtered['tic'].isin(TICKERS)]
    
    # Filter to complete timestamps (all 30 stocks)
    df_filtered = df_filtered.sort_values(['date', 'tic']).reset_index(drop=True)
    timestamp_counts = df_filtered.groupby('date')['tic'].count()
    complete_timestamps = timestamp_counts[timestamp_counts == CONFIG['STOCK_DIM']].index
    df_filtered = df_filtered[df_filtered['date'].isin(complete_timestamps)]
    
    if len(df_filtered) == 0:
        print("‚ö†Ô∏è  No complete timestamps found")
        return None
    
    # Create day index
    unique_dates = sorted(df_filtered['date'].unique())
    date_to_day = {date: idx for idx, date in enumerate(unique_dates)}
    df_filtered['day'] = df_filtered['date'].map(date_to_day)
    
    time_span = (df_filtered['date'].max() - df_filtered['date'].min()).total_seconds() / 3600
    print(f"‚úì Loaded {len(df_filtered):,} rows ({len(unique_dates)} timestamps, {time_span:.1f}h)")
    
    return df_filtered


print("‚úì Data management functions defined")

‚úì Data management functions defined


# Part 4: Trading Helper Functions

In [15]:
def sigmoid_sign(ary, thresh):
    """Sigmoid transformation for turbulence."""
    def sigmoid(x):
        return 1 / (1 + np.exp(-x * np.e)) - 0.5
    return sigmoid(ary / thresh) * thresh


def submit_order(alpaca, qty, stock, side, resp):
    """Submit order to Alpaca."""
    if qty > 0:
        try:
            alpaca.submit_order(stock, qty, side, "market", "day")
            print(f"    ‚úì {side.upper()} {qty} {stock}")
            resp.append(True)
        except Exception as e:
            print(f"    ‚úó {side.upper()} {qty} {stock} failed: {e}")
            resp.append(False)
    else:
        resp.append(True)


def get_state_from_alpaca(alpaca_processor):
    """
    Get current state from Alpaca (Production format: 301 features).
    
    State vector: [cash(1)] + [prices(30)] + [stocks(30)] + [tech_indicators(240)]
    NO turbulence in state vector!
    
    Returns:
        state, price, stocks, cash, turbulence, turbulence_bool, tech
    """
    # Fetch latest data with technical indicators
    price, tech, turbulence = alpaca_processor.fetch_latest_data(
        ticker_list=TICKERS,
        time_interval='1Min',
        tech_indicator_list=TECH_INDICATORS
    )
    
    # Determine turbulence threshold
    turbulence_bool = 1 if turbulence >= CONFIG['TURBULENCE_THRESHOLD'] else 0
    
    # Scale tech indicators
    tech_scaled = tech * 2 ** -7
    
    # Get current positions from Alpaca
    alpaca = tradeapi.REST(
        CONFIG['API_KEY'],
        CONFIG['API_SECRET'],
        CONFIG['API_BASE_URL'],
        'v2'
    )
    
    positions = alpaca.list_positions()
    stocks = np.zeros(CONFIG['STOCK_DIM'])
    for position in positions:
        if position.symbol in TICKERS:
            ind = TICKERS.index(position.symbol)
            stocks[ind] = abs(int(float(position.qty)))
    
    # Get current cash
    cash = float(alpaca.get_account().cash)
    
    # Build state vector (NO turbulence!)
    # Model expects: 1 (cash) + 30 (prices) + 30 (stocks) + 240 (tech) = 301 features
    amount = np.array(cash * (2 ** -12), dtype=np.float32)
    scale = np.array(2 ** -6, dtype=np.float32)
    
    state = np.hstack((
        amount,
        price * scale,
        stocks * scale,
        tech_scaled,
    )).astype(np.float32)
    
    # Handle NaN/Inf
    state[np.isnan(state)] = 0.0
    state[np.isinf(state)] = 0.0
    
    return state, price, stocks, cash, turbulence, turbulence_bool, tech


print("‚úì Trading helper functions defined")

‚úì Trading helper functions defined


# Part 5: Fine-Tuning Functions

In [16]:
def create_env(df, config):
    """Create StockTradingEnv for training/evaluation."""
    state_space = 1 + 2 * config['STOCK_DIM'] + len(TECH_INDICATORS) * config['STOCK_DIM']
    
    df_indexed = df.copy()
    df_indexed = df_indexed.sort_values(['day', 'tic'])
    df_indexed = df_indexed.set_index('day')
    
    env = StockTradingEnv(
        df=df_indexed,
        stock_dim=config['STOCK_DIM'],
        hmax=config['HMAX'],
        initial_amount=config['INITIAL_CASH'],
        num_stock_shares=[0] * config['STOCK_DIM'],
        buy_cost_pct=[config['TRANSACTION_COST_PCT']] * config['STOCK_DIM'],
        sell_cost_pct=[config['TRANSACTION_COST_PCT']] * config['STOCK_DIM'],
        reward_scaling=config['REWARD_SCALING'],
        state_space=state_space,
        action_space=config['STOCK_DIM'],
        tech_indicator_list=TECH_INDICATORS,
        print_verbosity=100000,
    )
    
    return DummyVecEnv([lambda: env])


def evaluate_model_on_df(model, df, config):
    """Evaluate model performance on DataFrame."""
    env = create_env(df, config)
    obs = env.reset()
    total_reward = 0
    done = False
    
    while not done:
        action, _ = model.predict(obs, deterministic=True)
        obs, reward, done, info = env.step(action)
        total_reward += reward[0]
    
    return total_reward


def finetune_model_with_validation(model, csv_path, config):
    """
    Fine-tune model using recent CSV data with validation.
    Returns: (model, result_dict)
    """
    print(f"\n{'='*80}")
    print("FINE-TUNING MODEL")
    print(f"{'='*80}")
    
    # Load data
    df = load_recent_data_from_csv(csv_path, hours=config['FINETUNE_LOOKBACK_HOURS'])
    
    if df is None or len(df) < 100:
        print("‚úó Insufficient data for fine-tuning")
        return model, None
    
    # Get unique dates
    unique_dates = sorted(df['date'].unique())
    
    # Need at least 10 timestamps for meaningful split
    if len(unique_dates) < 10:
        print(f"‚úó Need at least 10 timestamps (have {len(unique_dates)})")
        return model, None
    
    # Split train/validation
    split_idx = max(len(unique_dates) - 2, int(len(unique_dates) * (1 - config['VALIDATION_SPLIT'])))
    
    train_df = df[df['date'].isin(unique_dates[:split_idx])].copy()
    val_df = df[df['date'].isin(unique_dates[split_idx:])].copy()
    
    # Reset day indices
    train_dates = sorted(train_df['date'].unique())
    val_dates = sorted(val_df['date'].unique())
    
    train_date_to_day = {date: idx for idx, date in enumerate(train_dates)}
    val_date_to_day = {date: idx for idx, date in enumerate(val_dates)}
    
    train_df['day'] = train_df['date'].map(train_date_to_day)
    val_df['day'] = val_df['date'].map(val_date_to_day)
    
    print(f"  Train: {len(train_df):,} rows ({len(train_dates)} timestamps)")
    print(f"  Val: {len(val_df):,} rows ({len(val_dates)} timestamps)")
    
    # Evaluate original
    print(f"  Evaluating original model...")
    original_score = evaluate_model_on_df(model, val_df, config)
    print(f"  Original score: {original_score:.2f}")
    
    # Clone model
    with tempfile.NamedTemporaryFile(suffix='.zip', delete=False) as tmp:
        tmp_path = tmp.name
        model.save(tmp_path)
        model_ft = PPO.load(tmp_path)
    os.remove(tmp_path)
    
    # Fine-tune
    print(f"  Fine-tuning ({config['FINETUNE_STEPS']} steps, lr={config['FINETUNE_LR']})...")
    model_ft.learning_rate = config['FINETUNE_LR']
    ft_env = create_env(train_df, config)
    model_ft.set_env(ft_env)
    model_ft.learn(
        total_timesteps=config['FINETUNE_STEPS'],
        reset_num_timesteps=False,
        progress_bar=False
    )
    
    # Evaluate fine-tuned
    print(f"  Evaluating fine-tuned model...")
    finetuned_score = evaluate_model_on_df(model_ft, val_df, config)
    print(f"  Fine-tuned score: {finetuned_score:.2f}")
    
    # Decision
    threshold = original_score * config['ROLLBACK_THRESHOLD']
    accepted = finetuned_score >= threshold
    improvement = ((finetuned_score - original_score) / original_score * 100) if original_score != 0 else 0
    
    result = {
        'timestamp': datetime.utcnow(),
        'original_score': original_score,
        'finetuned_score': finetuned_score,
        'threshold': threshold,
        'accepted': accepted,
        'improvement_pct': improvement,
        'train_records': len(train_df),
        'val_records': len(val_df),
    }
    
    if accepted:
        print(f"‚úÖ ACCEPTED (+{improvement:.2f}%)")
        return model_ft, result
    else:
        print(f"‚ùå REJECTED ({improvement:.2f}%)")
        return model, result


print("‚úì Fine-tuning functions defined")

‚úì Fine-tuning functions defined


# Part 6: Main Trading Class

In [17]:
class ProductionPaperTrading:
    """
    Production paper trading with explainability and fine-tuning.
    """
    
    def __init__(self, config, model_path):
        self.config = config
        
        # Initialize Alpaca
        self.alpaca_processor = AlpacaProcessor(
            API_KEY=config['API_KEY'],
            API_SECRET=config['API_SECRET'],
            API_BASE_URL=config['API_BASE_URL']
        )
        
        self.alpaca = tradeapi.REST(
            config['API_KEY'],
            config['API_SECRET'],
            config['API_BASE_URL'],
            'v2'
        )
        
        # Load PPO model
        print(f"ü§ñ Loading PPO model: {model_path}")
        ppo_model = PPO.load(model_path)
        print("‚úì PPO model loaded")
        
        # Wrap with ExplainableAgent if enabled
        if config['ENABLE_EXPLANATIONS']:
            self.model = ExplainableAgent(
                ppo_model, 
                stock_dim=config['STOCK_DIM'],
                hmax=config['MAX_STOCK']
            )
            print("‚úì Wrapped with ExplainableAgent")
        else:
            self.model = ppo_model
            print("‚úì Using PPO without explanations")
        
        # Initialize state
        self.tickers = config['TICKERS']
        self.stocks_cd = np.zeros(config['STOCK_DIM'])
        
        # Fine-tuning tracking
        self.last_finetune = datetime.utcnow() - timedelta(hours=config['FINETUNE_INTERVAL_HOURS'])
        self.finetune_history = []
        self.trading_history = []
        self.cycle = 0
        self.model_version = 'original'
        self.finetune_count = 0
        
        # Initialize data CSV
        print(f"\nüìä Initializing data collection...")
        init_data_csv(config['DATA_CSV'])
        
        # Fetch historical data
        fetch_historical_data_to_csv(
            self.alpaca,
            config['DATA_CSV'],
            required_days=2
        )
        
        # Train explainers if enabled
        if config['ENABLE_EXPLANATIONS']:
            self._train_explainers()
        
        print("‚úì ProductionPaperTrading initialized")
    
    def _train_explainers(self):
        """Train SHAP and LIME explainers on historical data."""
        print(f"\nüîç Training SHAP + LIME explainers...")
        
        try:
            # Load historical data for explainer training
            df_hist = load_recent_data_from_csv(
                self.config['DATA_CSV'],
                hours=self.config['FINETUNE_LOOKBACK_HOURS']
            )
            
            if df_hist is None or len(df_hist) < 100:
                print("‚ö†Ô∏è  Insufficient data for explainer training")
                print("   Will train explainers after collecting more data")
                return
            
            # Save to temp file for training
            temp_path = Path(self.config['OUTPUT_DIR']) / 'temp_explainer_data.csv'
            temp_path.parent.mkdir(parents=True, exist_ok=True)
            df_hist.to_csv(temp_path, index=False)
            
            # Train explainers
            self.model.train_explainers(
                train_data_path=str(temp_path),
                n_samples=self.config['SHAP_BACKGROUND_SAMPLES']
            )
            
            # Clean up
            temp_path.unlink()
            
            print("‚úì SHAP + LIME explainers trained")
            
        except Exception as e:
            print(f"‚ö†Ô∏è  Failed to train explainers: {e}")
            print("   Will continue without explainability")
    
    def execute_trade(self):
        """
        Execute trading decision with optional explanations.
        """
        # Get current state (301 features)
        state, price, stocks, cash, turbulence, turbulence_bool, tech = get_state_from_alpaca(
            self.alpaca_processor
        )
        
        # Get prediction
        if self.config['ENABLE_EXPLANATIONS'] and hasattr(self.model, 'predict_with_explanation'):
            # Check if explainers are trained
            if self.model.shap_explainer is not None and self.model.lime_explainer is not None:
                result = self.model.predict_with_explanation(
                    state.reshape(1, -1),
                    explain_method='all'
                )
                action = result['action']  # Already scaled
                explanations = result['methods']
            else:
                # Fallback to regular predict
                action, _ = self.model.predict(state.reshape(1, -1), deterministic=True)
                action = action[0]
                action = (action * self.config['MAX_STOCK']).astype(int)
                explanations = None
        else:
            action, _ = self.model.predict(state.reshape(1, -1), deterministic=True)
            action = action[0]
            action = (action * self.config['MAX_STOCK']).astype(int)
            explanations = None
        
        # Update cooldown
        self.stocks_cd += 1
        
        # Execute trades
        decisions = []
        
        if turbulence_bool == 0:
            # Normal trading
            min_action = self.config['MIN_ACTION_THRESHOLD']
            threads = []
            
            # SELL orders
            sell_indices = np.where(action < -min_action)[0]
            for index in sell_indices:
                sell_num_shares = min(stocks[index], -action[index])
                qty = abs(int(sell_num_shares))
                respSO = []
                
                t = threading.Thread(
                    target=lambda q=qty, s=self.tickers[index]: submit_order(
                        self.alpaca, q, s, 'sell', respSO
                    )
                )
                t.start()
                threads.append(t)
                self.stocks_cd[index] = 0
                
                if qty > 0:
                    decisions.append({
                        'ticker': self.tickers[index],
                        'action': 'SELL',
                        'qty': qty,
                        'price': price[index],
                    })
            
            # Wait for sells
            for t in threads:
                t.join()
            
            # Update cash
            cash = float(self.alpaca.get_account().cash)
            
            # BUY orders
            threads = []
            buy_indices = np.where(action > min_action)[0]
            for index in buy_indices:
                tmp_cash = max(0, cash)
                buy_num_shares = min(tmp_cash // price[index], abs(int(action[index])))
                qty = abs(int(buy_num_shares)) if not np.isnan(buy_num_shares) else 0
                respSO = []
                
                t = threading.Thread(
                    target=lambda q=qty, s=self.tickers[index]: submit_order(
                        self.alpaca, q, s, 'buy', respSO
                    )
                )
                t.start()
                threads.append(t)
                self.stocks_cd[index] = 0
                
                if qty > 0:
                    decisions.append({
                        'ticker': self.tickers[index],
                        'action': 'BUY',
                        'qty': qty,
                        'price': price[index],
                    })
            
            # Wait for buys
            for t in threads:
                t.join()
            
            # HOLD
            hold_indices = np.where((action >= -min_action) & (action <= min_action))[0]
            for index in hold_indices:
                decisions.append({
                    'ticker': self.tickers[index],
                    'action': 'HOLD',
                    'qty': 0,
                    'price': price[index],
                })
        
        else:
            # High turbulence - liquidate all
            print("  ‚ö†Ô∏è  HIGH TURBULENCE - Liquidating all positions")
            threads = []
            positions = self.alpaca.list_positions()
            
            for position in positions:
                side = 'sell' if position.side == 'long' else 'buy'
                qty = abs(int(float(position.qty)))
                respSO = []
                
                t = threading.Thread(
                    target=lambda q=qty, sym=position.symbol, s=side: submit_order(
                        self.alpaca, q, sym, s, respSO
                    )
                )
                t.start()
                threads.append(t)
                
                decisions.append({
                    'ticker': position.symbol,
                    'action': 'SELL_TURBULENCE',
                    'qty': qty,
                    'price': 0,
                })
            
            for t in threads:
                t.join()
            
            self.stocks_cd[:] = 0
        
        # Get final values
        cash = float(self.alpaca.get_account().cash)
        portfolio_value = float(self.alpaca.get_account().last_equity)
        
        # Append new data to CSV
        append_latest_data_to_csv(self.alpaca_processor, self.config['DATA_CSV'])
        
        return {
            'decisions': decisions,
            'portfolio_value': portfolio_value,
            'cash': cash,
            'turbulence': turbulence,
            'turbulence_bool': turbulence_bool,
            'explanations': explanations,
        }
    
    def check_and_finetune(self):
        """Check if it's time to fine-tune and execute if needed."""
        current_time = datetime.utcnow()
        time_since_finetune = (current_time - self.last_finetune).total_seconds() / 3600
        
        if time_since_finetune >= self.config['FINETUNE_INTERVAL_HOURS']:
            print(f"\n‚è∞ Time to fine-tune (last: {time_since_finetune:.1f}h ago)")
            
            self.model, ft_result = finetune_model_with_validation(
                self.model,
                self.config['DATA_CSV'],
                self.config
            )
            
            if ft_result:
                self.finetune_history.append(ft_result)
                self.last_finetune = current_time
                
                # Save results
                output_dir = Path(self.config['OUTPUT_DIR'])
                output_dir.mkdir(parents=True, exist_ok=True)
                
                pd.DataFrame(self.finetune_history).to_csv(
                    output_dir / 'finetune_history.csv',
                    index=False
                )
                
                if ft_result['accepted']:
                    self.finetune_count += 1
                    self.model_version = f'finetuned_v{self.finetune_count}'
                    
                    # Save fine-tuned model
                    model_path = output_dir / f'model_cycle_{self.cycle}.zip'
                    self.model.save(str(model_path))
                    print(f"üíæ Saved fine-tuned model: {model_path}")
                    
                    # Retrain explainers on updated model
                    if self.config['ENABLE_EXPLANATIONS']:
                        print("  Re-training explainers on fine-tuned model...")
                        self._train_explainers()
    
    def square_off_all_positions(self):
        """Liquidate all positions before market close."""
        print("\nüîö Squaring off all positions...")
        positions = self.alpaca.list_positions()
        
        if len(positions) == 0:
            print("   No positions to square off")
            return
        
        threads = []
        for position in positions:
            side = 'sell' if position.side == 'long' else 'buy'
            qty = abs(int(float(position.qty)))
            respSO = []
            
            t = threading.Thread(
                target=lambda q=qty, sym=position.symbol, s=side: submit_order(
                    self.alpaca, q, sym, s, respSO
                )
            )
            t.start()
            threads.append(t)
        
        for t in threads:
            t.join()
        
        print("‚úì All positions squared off")
    
    def run(self):
        """Main trading loop."""
        # Wait for market to open
        clock = self.alpaca.get_clock()
        if not clock.is_open:
            time_to_open = (clock.next_open.replace(tzinfo=timezone.utc) - 
                          clock.timestamp.replace(tzinfo=timezone.utc)).total_seconds()
            print(f"‚è∞ Market closed - waiting {int(time_to_open/60)} minutes...")
            time.sleep(time_to_open)
        
        # Wait initial delay after market open
        print(f"‚úÖ Market opened - waiting {self.config['INITIAL_TRADE_DELAY_MIN']} minutes...")
        time.sleep(self.config['INITIAL_TRADE_DELAY_MIN'] * 60)
        
        print(f"\nüöÄ Starting paper trading (Model: {self.model_version})")
        print("="*80)
        
        output_dir = Path(self.config['OUTPUT_DIR'])
        output_dir.mkdir(parents=True, exist_ok=True)
        
        try:
            while True:
                self.cycle += 1
                
                # Check market status
                clock = self.alpaca.get_clock()
                closing_time = clock.next_close.replace(tzinfo=timezone.utc).timestamp()
                curr_time = clock.timestamp.replace(tzinfo=timezone.utc).timestamp()
                time_to_close = closing_time - curr_time
                
                # Square off 15 min before close
                if time_to_close < (15 * 60):
                    self.square_off_all_positions()
                    print("üîö Market closing soon - stopping trading")
                    break
                
                print(f"\n{'='*80}")
                print(f"CYCLE {self.cycle} - {datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')}")
                print(f"Model: {self.model_version} | Time to close: {int(time_to_close/60)} mins")
                print(f"{'='*80}")
                
                # Execute trade
                trade_result = self.execute_trade()
                
                # Log trade
                trade_log = {
                    'timestamp': datetime.utcnow(),
                    'cycle': self.cycle,
                    'portfolio_value': trade_result['portfolio_value'],
                    'cash': trade_result['cash'],
                    'turbulence': trade_result['turbulence'],
                    'num_trades': len([d for d in trade_result['decisions'] if d['action'] != 'HOLD']),
                }
                self.trading_history.append(trade_log)
                
                # Check and fine-tune
                self.check_and_finetune()
                
                # Save trading history
                pd.DataFrame(self.trading_history).to_csv(
                    output_dir / 'trading_history.csv',
                    index=False
                )
                
                # Wait for next interval
                time.sleep(self.config['TIME_INTERVAL_MIN'] * 60)
                
        except KeyboardInterrupt:
            print("\n‚ö†Ô∏è  Interrupted by user")
        
        # Final summary
        self._print_summary()
    
    def _print_summary(self):
        """Print final trading summary."""
        print(f"\n{'='*80}")
        print("TRADING SESSION SUMMARY")
        print(f"{'='*80}")
        print(f"Total cycles: {self.cycle}")
        print(f"Trading decisions: {len(self.trading_history)}")
        print(f"Fine-tuning sessions: {len(self.finetune_history)}")
        
        if self.trading_history:
            final_value = self.trading_history[-1]['portfolio_value']
            initial_value = self.trading_history[0]['portfolio_value']
            total_return = (final_value - initial_value) / initial_value * 100
            
            print(f"\nüí∞ Portfolio Performance:")
            print(f"   Initial: ${initial_value:,.2f}")
            print(f"   Final: ${final_value:,.2f}")
            print(f"   Return: {total_return:+.2f}%")
        
        if self.finetune_history:
            accepted = sum(1 for r in self.finetune_history if r['accepted'])
            avg_improvement = np.mean([r['improvement_pct'] for r in self.finetune_history])
            
            print(f"\nüîÑ Fine-tuning:")
            print(f"   Accepted: {accepted}/{len(self.finetune_history)}")
            print(f"   Avg improvement: {avg_improvement:.2f}%")
        
        print(f"\n‚úì Results saved to: {self.config['OUTPUT_DIR']}")
        print("="*80)


print("‚úì ProductionPaperTrading class defined")

‚úì ProductionPaperTrading class defined


# Part 7: Run Paper Trading

Execute the paper trading loop with explainability and fine-tuning.

In [18]:
# Initialize paper trading system
trader = ProductionPaperTrading(
    config=CONFIG,
    model_path=CONFIG['TRAINED_MODEL']
)

# Start trading loop
print("\nüöÄ Starting production paper trading")
print(f"   Trade interval: {CONFIG['TIME_INTERVAL_MIN']} minute(s)")
print(f"   Fine-tune interval: {CONFIG['FINETUNE_INTERVAL_HOURS']} hour(s)")
print(f"   Explainability: {CONFIG['ENABLE_EXPLANATIONS']}")
print(f"   State dimension: {CONFIG['state_dim']} (Production format)")

trader.run()

ü§ñ Loading PPO model: trained_models/agent_ppo.zip
‚úì PPO model loaded
‚úì ExplainableAgent initialized (SHAP + LIME only)
‚úì Wrapped with ExplainableAgent

üìä Initializing data collection...
‚úì CSV exists: production_paper_trading_data.csv (22,620 records)

üì• Fetching historical data for 2 trading days...
  Download range: 2026-01-08 to 2026-01-10
  Downloading...
empty
  ‚úì Downloaded: 22,584 records
Data cleaning started
align start and end dates
produce full timestamp index
Start processing tickers
The price of the first row for ticker AAPL is NaN. It will be filled with the first valid price.
The price of the first row for ticker AMGN is NaN. It will be filled with the first valid price.
The price of the first row for ticker AXP is NaN. It will be filled with the first valid price.
The price of the first row for ticker BA is NaN. It will be filled with the first valid price.
The price of the first row for ticker CAT is NaN. It will be filled with the first valid price.


KeyboardInterrupt: 

# Part 8: Performance Analysis

In [None]:
# Visualization imports
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker

# Load results
output_dir = Path(CONFIG['OUTPUT_DIR'])
trading_history_path = output_dir / 'trading_history.csv'
finetune_history_path = output_dir / 'finetune_history.csv'

if trading_history_path.exists():
    df_trading = pd.read_csv(trading_history_path)
    df_trading['timestamp'] = pd.to_datetime(df_trading['timestamp'])
    
    # Plot portfolio value
    fig, axes = plt.subplots(2, 1, figsize=(14, 10))
    
    # Portfolio value over time
    axes[0].plot(df_trading['timestamp'], df_trading['portfolio_value'], 
                 linewidth=2, color='blue', marker='o', markersize=4)
    axes[0].set_title('Portfolio Value Over Time', fontsize=16, fontweight='bold')
    axes[0].set_xlabel('Time', fontsize=12)
    axes[0].set_ylabel('Portfolio Value ($)', fontsize=12)
    axes[0].grid(True, alpha=0.3)
    axes[0].xaxis.set_major_formatter(plt.matplotlib.dates.DateFormatter('%H:%M'))
    
    # Cash over time
    axes[1].plot(df_trading['timestamp'], df_trading['cash'], 
                 linewidth=2, color='green', marker='o', markersize=4)
    axes[1].set_title('Cash Balance Over Time', fontsize=16, fontweight='bold')
    axes[1].set_xlabel('Time', fontsize=12)
    axes[1].set_ylabel('Cash ($)', fontsize=12)
    axes[1].grid(True, alpha=0.3)
    axes[1].xaxis.set_major_formatter(plt.matplotlib.dates.DateFormatter('%H:%M'))
    
    plt.tight_layout()
    plt.savefig(output_dir / 'trading_performance.png', dpi=150, bbox_inches='tight')
    plt.show()
    
    # Performance metrics
    initial_value = df_trading['portfolio_value'].iloc[0]
    final_value = df_trading['portfolio_value'].iloc[-1]
    total_return = (final_value - initial_value) / initial_value * 100
    max_value = df_trading['portfolio_value'].max()
    min_value = df_trading['portfolio_value'].min()
    
    print("\n" + "="*60)
    print("PERFORMANCE METRICS")
    print("="*60)
    print(f"Initial Portfolio Value: ${initial_value:,.2f}")
    print(f"Final Portfolio Value:   ${final_value:,.2f}")
    print(f"Total Return:            {total_return:+.2f}%")
    print(f"Max Value:               ${max_value:,.2f}")
    print(f"Min Value:               ${min_value:,.2f}")
    print(f"Total Cycles:            {len(df_trading)}")
    print("="*60)
else:
    print("‚ö†Ô∏è  No trading history found")

# Fine-tuning analysis
if finetune_history_path.exists():
    df_finetune = pd.read_csv(finetune_history_path)
    df_finetune['timestamp'] = pd.to_datetime(df_finetune['timestamp'])
    
    fig, ax = plt.subplots(figsize=(14, 6))
    
    # Performance improvement bar chart
    colors = ['green' if x else 'red' for x in df_finetune['accepted']]
    bars = ax.bar(range(len(df_finetune)), df_finetune['improvement_pct'], 
                   color=colors, alpha=0.7, edgecolor='black')
    ax.axhline(y=0, color='black', linestyle='--', linewidth=1)
    ax.set_title('Fine-Tuning Performance Improvement', fontsize=16, fontweight='bold')
    ax.set_xlabel('Fine-Tuning Session', fontsize=12)
    ax.set_ylabel('Improvement (%)', fontsize=12)
    ax.grid(True, alpha=0.3, axis='y')
    
    # Add labels
    for i, (bar, acc) in enumerate(zip(bars, df_finetune['accepted'])):
        height = bar.get_height()
        label = '‚úì' if acc else '‚úó'
        ax.text(bar.get_x() + bar.get_width()/2., height,
                label, ha='center', va='bottom' if height > 0 else 'top',
                fontsize=14, fontweight='bold')
    
    plt.tight_layout()
    plt.savefig(output_dir / 'finetune_performance.png', dpi=150, bbox_inches='tight')
    plt.show()
    
    # Fine-tuning summary
    accepted_count = df_finetune['accepted'].sum()
    total_count = len(df_finetune)
    avg_improvement = df_finetune['improvement_pct'].mean()
    
    print("\n" + "="*60)
    print("FINE-TUNING SUMMARY")
    print("="*60)
    print(f"Total Sessions:       {total_count}")
    print(f"Accepted:             {accepted_count} ({accepted_count/total_count*100:.1f}%)")
    print(f"Rejected:             {total_count - accepted_count}")
    print(f"Avg Improvement:      {avg_improvement:+.2f}%")
    print("="*60)
else:
    print("‚ö†Ô∏è  No fine-tuning history found")

print(f"\n‚úì All results saved to: {output_dir}")

# Part 9: Explainability Analysis

Analyze SHAP and LIME explanations if available.

In [None]:
# Check if we have explanation data in trading history
if trading_history_path.exists():
    df_trading = pd.read_csv(trading_history_path)
    
    print("\nüìä EXPLAINABILITY SUMMARY")
    print("="*60)
    
    if 'explanations' in df_trading.columns:
        print("‚úì Explanation data available")
        print(f"  Total trading cycles with explanations: {len(df_trading)}")
        print("\nNote: Detailed SHAP/LIME feature importance is logged during trading.")
        print("      Check console output for decision justifications.")
    else:
        print("‚ö†Ô∏è  No explanation data found in trading history")
        print("   This is normal - explanations are logged separately during trading")
    
    print("="*60)
    
    # Summary statistics
    print(f"\nüìà TRADING STATISTICS")
    print("="*60)
    
    if 'num_trades' in df_trading.columns:
        total_trades = df_trading['num_trades'].sum()
        avg_trades_per_cycle = df_trading['num_trades'].mean()
        max_trades = df_trading['num_trades'].max()
        
        print(f"Total trades executed: {total_trades}")
        print(f"Avg trades per cycle: {avg_trades_per_cycle:.1f}")
        print(f"Max trades in one cycle: {max_trades}")
    
    if 'turbulence' in df_trading.columns:
        high_turbulence_cycles = (df_trading['turbulence'] >= CONFIG['TURBULENCE_THRESHOLD']).sum()
        print(f"\nHigh turbulence events: {high_turbulence_cycles}")
    
    print("="*60)
else:
    print("‚ö†Ô∏è  No trading history available for analysis")