In [None]:
import sys
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import pickle
import torch
from datetime import datetime

# FinRL imports
from finrl.meta.env_stock_trading.env_stocktrading import StockTradingEnv
from finrl.agents.stablebaselines3.models import DRLAgent

# Import config
from config import *

# Setup directories
PROCESSED_DIR = "processed_data"
MODEL_DIR = "models"
AGENT_DIR = "agents"
EVALUATION_DIR = "evaluation"
REPORTS_DIR = "reports"

for dir_name in [EVALUATION_DIR, REPORTS_DIR]:
    if not os.path.exists(dir_name):
        os.makedirs(dir_name)

# Setup matplotlib
plt.style.use('seaborn-v0_8')
plt.rcParams['figure.figsize'] = (12, 8)
plt.rcParams['font.size'] = 10

print("üìÅ Setup directories completed")
print(f"üìä Starting Agent Evaluation Process")


In [None]:
# ‡∏ü‡∏±‡∏á‡∏Å‡πå‡∏ä‡∏±‡∏ô‡πÇ‡∏´‡∏•‡∏î Trained Model
def load_trained_model():
    print("üìÇ Loading trained model and configurations...")
    try:
        # ‡πÇ‡∏´‡∏•‡∏î model ‡∏ó‡∏µ‡πà‡πÄ‡∏ó‡∏£‡∏ô‡πÅ‡∏•‡πâ‡∏ß
        model_path = os.path.join(MODEL_DIR, "trained_ppo_simple.zip")
        if not os.path.exists(model_path):
            raise FileNotFoundError(f"Model file not found: {model_path}")
        
        # ‡πÇ‡∏´‡∏•‡∏î training info
        training_info_path = os.path.join(MODEL_DIR, "training_info_ppo.pkl")
        with open(training_info_path, 'rb') as f:
            training_info = pickle.load(f)
        
        # ‡πÇ‡∏´‡∏•‡∏î agent configs
        agent_configs_path = os.path.join(AGENT_DIR, "agent_configs.pkl")
        with open(agent_configs_path, 'rb') as f:
            agent_configs = pickle.load(f)
        
        # ‡πÇ‡∏´‡∏•‡∏î environment config
        env_config_path = os.path.join(AGENT_DIR, "environment_config.pkl")
        with open(env_config_path, 'rb') as f:
            env_config = pickle.load(f)
        
        print("‚úÖ Model ‡πÅ‡∏•‡∏∞ configurations ‡πÇ‡∏´‡∏•‡∏î‡∏™‡∏≥‡πÄ‡∏£‡πá‡∏à")
        return model_path, training_info, agent_configs, env_config
    
    except FileNotFoundError as e:
        print(f"‚ùå ‡πÑ‡∏°‡πà‡∏û‡∏ö‡πÑ‡∏ü‡∏•‡πå: {str(e)}")
        print("üîÑ ‡∏Å‡∏£‡∏∏‡∏ì‡∏≤‡∏£‡∏±‡∏ô notebook 3_agent_training.ipynb ‡∏Å‡πà‡∏≠‡∏ô")
        raise

# ‡πÇ‡∏´‡∏•‡∏î‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏ó‡∏µ‡πà‡∏õ‡∏£‡∏∞‡∏°‡∏ß‡∏•‡∏ú‡∏•‡πÅ‡∏•‡πâ‡∏ß
def load_test_data():
    print("üìä Loading test data...")
    try:
        # ‡πÇ‡∏´‡∏•‡∏î‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏ó‡∏µ‡πà‡∏õ‡∏£‡∏∞‡∏°‡∏ß‡∏•‡∏ú‡∏•‡πÅ‡∏•‡πâ‡∏ß
        with open(os.path.join(PROCESSED_DIR, 'processed_crypto_data.pkl'), 'rb') as f:
            df = pickle.load(f)
        
        # ‡πÅ‡∏ö‡πà‡∏á‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡πÄ‡∏õ‡πá‡∏ô train/val/test (‡πÄ‡∏´‡∏°‡∏∑‡∏≠‡∏ô‡πÉ‡∏ô notebook 3)
        total_len = len(df)
        train_size = int(total_len * 0.7)
        val_size = int(total_len * 0.15)
        
        # ‡πÄ‡∏≠‡∏≤‡πÄ‡∏â‡∏û‡∏≤‡∏∞ test data
        test_df = df.iloc[train_size + val_size:].reset_index(drop=True).copy()
        
        print(f"‚úÖ Test data loaded: {len(test_df)} rows")
        print(f"üìÖ Test period: {test_df['timestamp'].min()} to {test_df['timestamp'].max()}")
        print(f"üí∞ Cryptocurrencies: {sorted(test_df['tic'].unique())}")
        
        return test_df
    
    except FileNotFoundError as e:
        print(f"‚ùå ‡πÑ‡∏°‡πà‡∏û‡∏ö‡πÑ‡∏ü‡∏•‡πå‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•: {str(e)}")
        print("üîÑ ‡∏Å‡∏£‡∏∏‡∏ì‡∏≤‡∏£‡∏±‡∏ô notebook 1_data_preparation.ipynb ‡∏Å‡πà‡∏≠‡∏ô")
        raise

# ‡πÄ‡∏£‡∏µ‡∏¢‡∏Å‡πÉ‡∏ä‡πâ‡∏ü‡∏±‡∏á‡∏Å‡πå‡∏ä‡∏±‡∏ô
model_path, training_info, agent_configs, env_config = load_trained_model()
test_df = load_test_data()

# ‡πÅ‡∏™‡∏î‡∏á‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏• training info
print(f"\nüìã Training Information:")
print(f"  ü§ñ Model: {training_info.get('model_name', 'Unknown')}")
print(f"  üìä Total timesteps: {training_info.get('total_timesteps', 'Unknown'):,}")
print(f"  ‚è±Ô∏è Training time: {training_info.get('training_time', 'Unknown')}")
print(f"  üéØ Final reward: {training_info.get('final_reward', 'Unknown')}")


In [None]:
# ‡∏™‡∏£‡πâ‡∏≤‡∏á Test Environment
def create_test_environment(test_df, env_config):
    print("üèóÔ∏è Creating test environment...")
    
    # ‡πÄ‡∏ï‡∏£‡∏µ‡∏¢‡∏°‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏• test ‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö FinRL
    test_data = test_df.copy()
    
    # ‡∏ï‡∏£‡∏ß‡∏à‡∏™‡∏≠‡∏ö‡πÅ‡∏•‡∏∞‡πÅ‡∏õ‡∏•‡∏á‡∏ä‡∏∑‡πà‡∏≠‡∏Ñ‡∏≠‡∏•‡∏±‡∏°‡∏ô‡πå‡πÉ‡∏´‡πâ‡∏ï‡∏£‡∏á‡∏Å‡∏±‡∏ö FinRL
    if 'close' not in test_data.columns:
        if 'Close' in test_data.columns:
            test_data['close'] = test_data['Close']
        else:
            raise ValueError("‡πÑ‡∏°‡πà‡∏û‡∏ö‡∏Ñ‡∏≠‡∏•‡∏±‡∏°‡∏ô‡πå‡∏£‡∏≤‡∏Ñ‡∏≤‡∏õ‡∏¥‡∏î")
    
    # ‡∏ï‡∏£‡∏ß‡∏à‡∏™‡∏≠‡∏ö‡∏Ñ‡∏≠‡∏•‡∏±‡∏°‡∏ô‡πå‡∏ó‡∏µ‡πà‡∏à‡∏≥‡πÄ‡∏õ‡πá‡∏ô
    if 'date' not in test_data.columns and 'timestamp' in test_data.columns:
        test_data['timestamp'] = pd.to_datetime(test_data['timestamp'])
        test_data['date'] = test_data['timestamp'].dt.strftime('%Y-%m-%d')
    
    # ‡∏à‡∏±‡∏î‡πÄ‡∏£‡∏µ‡∏¢‡∏á‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•
    test_data.sort_values(['date', 'tic'], inplace=True)
    test_data.reset_index(drop=True, inplace=True)
    
    # ‡∏™‡∏£‡πâ‡∏≤‡∏á day index
    unique_dates = sorted(test_data['date'].unique())
    date_to_index = {date: idx for idx, date in enumerate(unique_dates)}
    test_data['day'] = test_data['date'].map(date_to_index)
    test_data.set_index('day', inplace=True)
    
    # ‡πÄ‡∏ï‡∏£‡∏µ‡∏¢‡∏° environment kwargs
    env_kwargs = env_config['env_kwargs'].copy()
    env_kwargs.pop('df', None)  # ‡∏•‡∏ö df ‡∏≠‡∏≠‡∏Å‡πÄ‡∏û‡∏∑‡πà‡∏≠‡πÑ‡∏°‡πà‡πÉ‡∏´‡πâ‡∏™‡πà‡∏á‡∏ã‡πâ‡∏≥
    
    # ‡∏™‡∏£‡πâ‡∏≤‡∏á test environment
    test_env = StockTradingEnv(df=test_data, **env_kwargs)
    
    print(f"‚úÖ Test environment created successfully")
    print(f"üìä Test data shape: {test_data.shape}")
    print(f"üìÖ Test period: {test_data.index.min()} to {test_data.index.max()} days")
    
    return test_env, test_data

# ‡∏ó‡∏î‡∏™‡∏≠‡∏ö Agent
def test_agent(model_path, test_env, agent_configs):
    print("üß™ Testing trained agent...")
    
    # ‡πÇ‡∏´‡∏•‡∏î model
    model_name = agent_configs['model_name']
    agent = DRLAgent(env=test_env)
    
    # ‡πÇ‡∏´‡∏•‡∏î model ‡∏ó‡∏µ‡πà‡πÄ‡∏ó‡∏£‡∏ô‡πÅ‡∏•‡πâ‡∏ß
    model = agent.get_model(model_name)
    model = model.load(model_path)
    
    print(f"‚úÖ Model {model_name} loaded successfully")
    
    # ‡∏ó‡∏î‡∏™‡∏≠‡∏ö agent
    print("üöÄ Running agent on test data...")
    
    # reset environment
    obs = test_env.reset()
    done = False
    
    # ‡πÄ‡∏Å‡πá‡∏ö‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏Å‡∏≤‡∏£‡∏ó‡∏î‡∏™‡∏≠‡∏ö
    test_results = {
        'actions': [],
        'rewards': [],
        'portfolio_values': [],
        'positions': [],
        'timestamps': []
    }
    
    step = 0
    while not done:
        # ‡πÉ‡∏´‡πâ agent ‡∏ï‡∏±‡∏î‡∏™‡∏¥‡∏ô‡πÉ‡∏à
        action, _states = model.predict(obs, deterministic=True)
        
        # ‡∏î‡∏≥‡πÄ‡∏ô‡∏¥‡∏ô‡∏Å‡∏≤‡∏£ action
        obs, reward, done, info = test_env.step(action)
        
        # ‡∏ö‡∏±‡∏ô‡∏ó‡∏∂‡∏Å‡∏ú‡∏•‡∏•‡∏±‡∏û‡∏ò‡πå
        test_results['actions'].append(action)
        test_results['rewards'].append(reward)
        test_results['portfolio_values'].append(test_env.asset_memory[-1])
        test_results['positions'].append(test_env.state[test_env.stock_dim:test_env.stock_dim*2])
        
        step += 1
        if step % 100 == 0:
            print(f"  Step {step}, Portfolio Value: ${test_env.asset_memory[-1]:,.2f}")
    
    print(f"‚úÖ Testing completed after {step} steps")
    print(f"üí∞ Final Portfolio Value: ${test_env.asset_memory[-1]:,.2f}")
    print(f"üìà Total Return: {(test_env.asset_memory[-1] / test_env.initial_amount - 1) * 100:.2f}%")
    
    return test_results, test_env

# ‡πÄ‡∏£‡∏µ‡∏¢‡∏Å‡πÉ‡∏ä‡πâ‡∏ü‡∏±‡∏á‡∏Å‡πå‡∏ä‡∏±‡∏ô
test_env, test_data = create_test_environment(test_df, env_config)
test_results, test_env_final = test_agent(model_path, test_env, agent_configs)


In [None]:
# ‡∏Ñ‡∏≥‡∏ô‡∏ß‡∏ì Performance Metrics
def calculate_performance_metrics(test_env, test_results):
    print("üìà Calculating performance metrics...")
    
    # ‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏û‡∏∑‡πâ‡∏ô‡∏ê‡∏≤‡∏ô
    initial_amount = test_env.initial_amount
    final_amount = test_env.asset_memory[-1]
    total_return = (final_amount / initial_amount - 1) * 100
    
    # Portfolio values over time
    portfolio_values = test_env.asset_memory
    returns = pd.Series(portfolio_values).pct_change().dropna()
    
    # Performance metrics
    metrics = {
        'Initial Amount': f"${initial_amount:,.2f}",
        'Final Amount': f"${final_amount:,.2f}",
        'Total Return': f"{total_return:.2f}%",
        'Sharpe Ratio': returns.mean() / returns.std() * np.sqrt(252) if returns.std() > 0 else 0,
        'Max Drawdown': f"{(returns.cumsum().expanding().max() - returns.cumsum()).max() * 100:.2f}%",
        'Volatility': f"{returns.std() * np.sqrt(252) * 100:.2f}%",
        'Total Trades': len([a for a in test_results['actions'] if np.sum(np.abs(a)) > 0])
    }
    
    return metrics

# ‡∏™‡∏£‡πâ‡∏≤‡∏á Visualization
def create_visualizations(test_env, test_results, test_data):
    print("üìä Creating visualizations...")
    
    fig, axes = plt.subplots(2, 2, figsize=(16, 12))
    
    # 1. Portfolio Value over Time
    axes[0, 0].plot(test_env.asset_memory, linewidth=2, color='blue')
    axes[0, 0].axhline(y=test_env.initial_amount, color='red', linestyle='--', alpha=0.7, label='Initial Amount')
    axes[0, 0].set_title('Portfolio Value Over Time', fontsize=14, fontweight='bold')
    axes[0, 0].set_xlabel('Trading Days')
    axes[0, 0].set_ylabel('Portfolio Value ($)')
    axes[0, 0].legend()
    axes[0, 0].grid(True, alpha=0.3)
    
    # 2. Daily Returns Distribution
    portfolio_values = pd.Series(test_env.asset_memory)
    daily_returns = portfolio_values.pct_change().dropna()
    axes[0, 1].hist(daily_returns, bins=50, alpha=0.7, color='green', edgecolor='black')
    axes[0, 1].axvline(daily_returns.mean(), color='red', linestyle='--', label=f'Mean: {daily_returns.mean():.4f}')
    axes[0, 1].set_title('Daily Returns Distribution', fontsize=14, fontweight='bold')
    axes[0, 1].set_xlabel('Daily Return')
    axes[0, 1].set_ylabel('Frequency')
    axes[0, 1].legend()
    axes[0, 1].grid(True, alpha=0.3)
    
    # 3. Cumulative Returns
    cumulative_returns = (1 + daily_returns).cumprod() - 1
    axes[1, 0].plot(cumulative_returns * 100, linewidth=2, color='purple')
    axes[1, 0].set_title('Cumulative Returns (%)', fontsize=14, fontweight='bold')
    axes[1, 0].set_xlabel('Trading Days')
    axes[1, 0].set_ylabel('Cumulative Return (%)')
    axes[1, 0].grid(True, alpha=0.3)
    
    # 4. Rewards over Time
    rewards = test_results['rewards']
    axes[1, 1].plot(rewards, linewidth=1, alpha=0.7, color='orange')
    axes[1, 1].plot(pd.Series(rewards).rolling(window=50).mean(), linewidth=2, color='red', label='50-day MA')
    axes[1, 1].set_title('Rewards Over Time', fontsize=14, fontweight='bold')
    axes[1, 1].set_xlabel('Trading Days')
    axes[1, 1].set_ylabel('Reward')
    axes[1, 1].legend()
    axes[1, 1].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig(os.path.join(EVALUATION_DIR, 'agent_performance.png'), dpi=300, bbox_inches='tight')
    plt.show()

# ‡πÄ‡∏£‡∏µ‡∏¢‡∏Å‡πÉ‡∏ä‡πâ‡∏ü‡∏±‡∏á‡∏Å‡πå‡∏ä‡∏±‡∏ô
metrics = calculate_performance_metrics(test_env_final, test_results)

print("üìä Performance Metrics:")
print("=" * 50)
for key, value in metrics.items():
    print(f"{key:.<30} {value}")
print("=" * 50)

# ‡∏™‡∏£‡πâ‡∏≤‡∏á visualizations
create_visualizations(test_env_final, test_results, test_data)

# ‡∏ö‡∏±‡∏ô‡∏ó‡∏∂‡∏Å‡∏ú‡∏•‡∏•‡∏±‡∏û‡∏ò‡πå
results_summary = {
    'metrics': metrics,
    'test_results': test_results,
    'evaluation_date': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
}

with open(os.path.join(EVALUATION_DIR, 'evaluation_results.pkl'), 'wb') as f:
    pickle.dump(results_summary, f)

print(f"\n‚úÖ Evaluation completed successfully!")
print(f"üìÅ Results saved to: {EVALUATION_DIR}/")
print(f"üìä Visualizations saved to: {EVALUATION_DIR}/agent_performance.png")


In [1]:
import sys
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import pickle
import torch
from datetime import datetime
from stable_baselines3 import PPO, A2C, DDPG, SAC

# FinRL imports
from finrl.meta.env_stock_trading.env_stocktrading import StockTradingEnv
from finrl.agents.stablebaselines3.models import DRLAgent

# Import config
from config import *

def setup_device():
    """
    ‡∏ï‡∏£‡∏ß‡∏à‡∏™‡∏≠‡∏ö‡πÅ‡∏•‡∏∞‡∏ï‡∏±‡πâ‡∏á‡∏Ñ‡πà‡∏≤‡∏Å‡∏≤‡∏£‡πÉ‡∏ä‡πâ‡∏á‡∏≤‡∏ô GPU/CPU (‡∏ô‡∏≥‡∏°‡∏≤‡∏à‡∏≤‡∏Å main.py)
    """
    print("\nüîç ‡∏ï‡∏£‡∏ß‡∏à‡∏™‡∏≠‡∏ö‡∏Å‡∏≤‡∏£‡πÉ‡∏ä‡πâ‡∏á‡∏≤‡∏ô GPU/CPU")
    print("-" * 50)
    
    if torch.cuda.is_available():
        device = torch.device("cuda")
        print(f"‚úÖ ‡∏û‡∏ö GPU: {torch.cuda.get_device_name(0)}")
        print(f"üìä ‡∏à‡∏≥‡∏ô‡∏ß‡∏ô GPU: {torch.cuda.device_count()}")
        print(f"üíæ GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")
    else:
        device = torch.device("cpu")
        print("‚ÑπÔ∏è ‡πÑ‡∏°‡πà‡∏û‡∏ö GPU ‡πÉ‡∏ä‡πâ CPU ‡πÅ‡∏ó‡∏ô")
    
    # ‡∏ï‡∏±‡πâ‡∏á‡∏Ñ‡πà‡∏≤ environment variable ‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö Stable Baselines3
    os.environ["CUDA_VISIBLE_DEVICES"] = "0" if torch.cuda.is_available() else "-1"
    
    return device

def setup_directories():
    """
    ‡∏™‡∏£‡πâ‡∏≤‡∏á‡πÇ‡∏ü‡∏•‡πÄ‡∏î‡∏≠‡∏£‡πå‡∏ó‡∏µ‡πà‡∏à‡∏≥‡πÄ‡∏õ‡πá‡∏ô‡∏ó‡∏±‡πâ‡∏á‡∏´‡∏°‡∏î (‡∏ô‡∏≥‡∏°‡∏≤‡∏à‡∏≤‡∏Å main.py)
    """
    directories = {
        'PROCESSED_DIR': "processed_data",
        'MODEL_DIR': "models", 
        'AGENT_DIR': "agents",
        'EVALUATION_DIR': "evaluation",
        'REPORTS_DIR': "reports"
    }
    
    created_dirs = []
    for name, path in directories.items():
        if not os.path.exists(path):
            os.makedirs(path)
            created_dirs.append(path)
            print(f"üìÅ Created directory: {path}")
        globals()[name] = path
    
    if created_dirs:
        print(f"‚úÖ Created {len(created_dirs)} directories")
    else:
        print("‚úÖ All directories already exist")
    
    return directories

# Initialize system
print("üöÄ Starting Agent Evaluation Process")
print("="*60)

# Setup device ‡πÅ‡∏•‡∏∞ directories
device = setup_device()
dirs = setup_directories()

# Global variables
PROCESSED_DIR = dirs['PROCESSED_DIR'] 
MODEL_DIR = dirs['MODEL_DIR']
AGENT_DIR = dirs['AGENT_DIR']
EVALUATION_DIR = dirs['EVALUATION_DIR']
REPORTS_DIR = dirs['REPORTS_DIR']

# Global status variables
SETUP_SUCCESS = False
EVALUATION_SUCCESS = False

print("üìÅ Setup directories completed")
print(f"üìä Agent Evaluation System Ready")

# Set plotting style
plt.style.use('default')
sns.set_palette("husl")


üöÄ Starting Agent Evaluation Process

üîç ‡∏ï‡∏£‡∏ß‡∏à‡∏™‡∏≠‡∏ö‡∏Å‡∏≤‡∏£‡πÉ‡∏ä‡πâ‡∏á‡∏≤‡∏ô GPU/CPU
--------------------------------------------------
‚ÑπÔ∏è ‡πÑ‡∏°‡πà‡∏û‡∏ö GPU ‡πÉ‡∏ä‡πâ CPU ‡πÅ‡∏ó‡∏ô
‚úÖ All directories already exist
üìÅ Setup directories completed
üìä Agent Evaluation System Ready


In [2]:
def load_evaluation_setup():
    """
    ‡πÇ‡∏´‡∏•‡∏î‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏ó‡∏µ‡πà‡∏à‡∏≥‡πÄ‡∏õ‡πá‡∏ô‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö‡∏Å‡∏≤‡∏£‡∏õ‡∏£‡∏∞‡πÄ‡∏°‡∏¥‡∏ô‡∏ú‡∏• (‡∏õ‡∏£‡∏±‡∏ö‡∏õ‡∏£‡∏∏‡∏á‡∏à‡∏≤‡∏Å main.py)
    """
    print("üìÇ Loading evaluation setup...")
    
    # ‡πÇ‡∏´‡∏•‡∏î‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏ó‡∏µ‡πà‡∏õ‡∏£‡∏∞‡∏°‡∏ß‡∏•‡∏ú‡∏•‡πÅ‡∏•‡πâ‡∏ß
    try:
        pickle_file = os.path.join(PROCESSED_DIR, "processed_crypto_data.pkl")
        with open(pickle_file, 'rb') as f:
            df = pickle.load(f)
        print(f"‚úÖ Loaded processed data from pickle ({len(df)} rows)")
    except Exception as e:
        print(f"‚ö†Ô∏è Pickle load failed: {str(e)}")
        try:
            csv_file = os.path.join(PROCESSED_DIR, "processed_crypto_data.csv")
            df = pd.read_csv(csv_file)
            df['timestamp'] = pd.to_datetime(df['timestamp'])
            print(f"‚úÖ Loaded processed data from CSV ({len(df)} rows)")
        except Exception as e2:
            print(f"‚ùå Failed to load data: {str(e2)}")
            raise ValueError("Cannot load processed data. Please run data processing first.")
    
    # ‡πÇ‡∏´‡∏•‡∏î environment config
    try:
        env_config_file = os.path.join(AGENT_DIR, "environment_config.pkl")
        with open(env_config_file, 'rb') as f:
            env_config = pickle.load(f)
        print(f"‚úÖ Loaded environment config")
    except Exception as e:
        print(f"‚ö†Ô∏è Environment config load failed: {str(e)}")
        print("üìù Will use default configuration")
        env_config = None
    
    # ‡πÇ‡∏´‡∏•‡∏î training results
    if not os.path.exists(MODEL_DIR):
        raise ValueError(f"Model directory not found: {MODEL_DIR}")
        
    training_files = [f for f in os.listdir(MODEL_DIR) if f.startswith('training_info_') and f.endswith('.pkl')]
    
    if not training_files:
        print(f"‚ö†Ô∏è No training info files found in {MODEL_DIR}")
        print("üìù Looking for model files directly...")
        
        # ‡∏•‡∏≠‡∏á‡∏´‡∏≤ model files ‡πÇ‡∏î‡∏¢‡∏ï‡∏£‡∏á
        model_files = [f for f in os.listdir(MODEL_DIR) if f.endswith('.zip')]
        if model_files:
            print(f"üìã Found {len(model_files)} model files: {model_files}")
            return df, env_config, {}
        else:
            raise ValueError(f"No trained models found in {MODEL_DIR}")
    
    trained_models = {}
    for training_file in training_files:
        try:
            model_name = training_file.replace('training_info_', '').replace('.pkl', '')
            
            with open(os.path.join(MODEL_DIR, training_file), 'rb') as f:
                training_info = pickle.load(f)
            
            # ‡πÇ‡∏´‡∏•‡∏î trained model
            model_path = training_info['model_path']
            if os.path.exists(model_path + '.zip'):
                model_type = training_info['model_name'].split('_')[0].upper()
                
                if model_type == 'PPO':
                    model = PPO.load(model_path)
                elif model_type == 'A2C':
                    model = A2C.load(model_path)
                elif model_type == 'DDPG':
                    model = DDPG.load(model_path)
                elif model_type == 'SAC':
                    model = SAC.load(model_path)
                else:
                    print(f"‚ö†Ô∏è Unknown model type: {model_type}")
                    continue
                
                trained_models[model_name] = {
                    'model': model,
                    'training_info': training_info,
                    'model_type': model_type
                }
                
                print(f"‚úÖ Loaded {model_type} model: {model_name}")
            else:
                print(f"‚ö†Ô∏è Model file not found: {model_path}")
        except Exception as e:
            print(f"‚ö†Ô∏è Error loading {training_file}: {str(e)}")
            continue
    
    if not trained_models:
        print("‚ö†Ô∏è No trained models could be loaded!")
        return df, env_config, {}
    
    return df, env_config, trained_models

def create_robust_test_environment(df, env_config=None):
    """
    ‡∏™‡∏£‡πâ‡∏≤‡∏á test environment ‡πÅ‡∏ö‡∏ö robust (‡∏õ‡∏£‡∏±‡∏ö‡∏õ‡∏£‡∏∏‡∏á‡∏à‡∏≤‡∏Å main.py)
    """
    print("üèõÔ∏è Creating robust test environment...")
    
    # Data validation
    print(f"üîç Input data validation:")
    print(f"  Data shape: {df.shape}")
    print(f"  Columns: {list(df.columns)}")
    print(f"  Date range: {df['timestamp'].min()} to {df['timestamp'].max()}")
    
    # ‡πÅ‡∏ö‡πà‡∏á‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏• (‡πÄ‡∏´‡∏°‡∏∑‡∏≠‡∏ô main.py)
    total_len = len(df)
    train_size = int(total_len * 0.7)
    val_size = int(total_len * 0.15)
    
    test_df = df.iloc[train_size + val_size:].copy()
    test_df = test_df.reset_index(drop=True)
    
    # ‡πÄ‡∏ï‡∏£‡∏µ‡∏¢‡∏°‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•
    test_df['timestamp'] = pd.to_datetime(test_df['timestamp'])
    if 'date' not in test_df.columns:
        test_df['date'] = test_df['timestamp'].dt.date
    
    # Sort ‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏ï‡∏≤‡∏° date ‡πÅ‡∏•‡∏∞ tic
    test_df = test_df.sort_values(['date', 'tic']).reset_index(drop=True)
    
    print(f"üìä Test data prepared:")
    print(f"  Shape: {test_df.shape}")
    print(f"  Symbols: {test_df['tic'].unique()}")
    print(f"  Date range: {test_df['timestamp'].min()} to {test_df['timestamp'].max()}")
    
    # ‡πÅ‡∏Å‡πâ‡πÑ‡∏Ç column names (‡πÄ‡∏´‡∏°‡∏∑‡∏≠‡∏ô main.py)
    price_column_mapping = {
        'Open': 'open', 'High': 'high', 'Low': 'low', 'Close': 'close', 'Volume': 'volume'
    }
    
    for old_col, new_col in price_column_mapping.items():
        if old_col in test_df.columns:
            test_df = test_df.rename(columns={old_col: new_col})
            print(f"üîß Renamed {old_col} -> {new_col}")
    
    # ‡∏´‡∏≤ technical indicators
    tech_cols = [col for col in test_df.columns if col.startswith(('macd', 'rsi', 'cci', 'adx', 'sma', 'ema', 'bb'))]
    print(f"üîç Technical indicators found: {len(tech_cols)} indicators")
    print(f"    {tech_cols[:5]}{'...' if len(tech_cols) > 5 else ''}")
    
    # ‡∏Ñ‡∏≥‡∏ô‡∏ß‡∏ì parameters
    unique_symbols = test_df['tic'].unique()
    stock_dim = len(unique_symbols)
    
    print(f"üéØ Environment parameters:")
    print(f"  Stock dimension: {stock_dim}")
    print(f"  Technical indicators: {len(tech_cols)}")
    
    # ‡∏™‡∏£‡πâ‡∏≤‡∏á environment (‡∏´‡∏•‡∏≤‡∏¢‡∏ß‡∏¥‡∏ò‡∏µ ‡πÄ‡∏´‡∏°‡∏∑‡∏≠‡∏ô main.py)
    test_env = None
    creation_method = "Unknown"
    
    # Method 1: ‡πÉ‡∏ä‡πâ saved config
    if env_config and 'env_kwargs' in env_config:
        try:
            print("üîÑ Trying saved environment config...")
            env_kwargs = env_config['env_kwargs'].copy()
            # ‡∏•‡∏ö df ‡∏≠‡∏≠‡∏Å‡πÄ‡∏û‡∏∑‡πà‡∏≠‡∏õ‡πâ‡∏≠‡∏á‡∏Å‡∏±‡∏ô conflict
            if 'df' in env_kwargs:
                del env_kwargs['df']
            
            test_env = StockTradingEnv(df=test_df, **env_kwargs)
            creation_method = "saved config"
            print(f"‚úÖ Test environment created with saved config")
        except Exception as e:
            print(f"‚ö†Ô∏è Error with saved config: {str(e)}")
            test_env = None
    
    # Method 2: ‡πÉ‡∏ä‡πâ standard config (‡πÄ‡∏´‡∏°‡∏∑‡∏≠‡∏ô main.py)
    if test_env is None:
        try:
            print("üîÑ Trying standard configuration...")
            
            # ‡∏™‡∏£‡πâ‡∏≤‡∏á environment arguments (‡πÄ‡∏´‡∏°‡∏∑‡∏≠‡∏ô main.py)
            env_kwargs = {
                "stock_dim": stock_dim,
                "hmax": HMAX if 'HMAX' in globals() else 100,
                "initial_amount": INITIAL_AMOUNT if 'INITIAL_AMOUNT' in globals() else 100000,
                "num_stock_shares": [0] * stock_dim,
                "buy_cost_pct": [0.001] * stock_dim,
                "sell_cost_pct": [0.001] * stock_dim,
                "reward_scaling": 1e-3,
                "state_space": 1 + 2 * stock_dim + stock_dim * len(tech_cols[:5]),  # ‡∏à‡∏≥‡∏Å‡∏±‡∏î indicators
                "action_space": stock_dim,
                "tech_indicator_list": tech_cols[:5] if len(tech_cols) > 5 else tech_cols,
                "print_verbosity": 0
            }
            
            test_env = StockTradingEnv(df=test_df, **env_kwargs)
            creation_method = "standard config"
            print(f"‚úÖ Test environment created with standard config")
        except Exception as e:
            print(f"‚ö†Ô∏è Error with standard config: {str(e)}")
            test_env = None
    
    # Method 3: ‡πÉ‡∏ä‡πâ minimal config
    if test_env is None:
        try:
            print("üîÑ Trying minimal configuration...")
            test_env = StockTradingEnv(
                df=test_df,
                stock_dim=stock_dim,
                initial_amount=100000,
                print_verbosity=0
            )
            creation_method = "minimal config"
            print(f"‚úÖ Test environment created with minimal config")
        except Exception as e:
            print(f"‚ùå All environment creation methods failed: {str(e)}")
            raise RuntimeError(f"Cannot create test environment. Last error: {str(e)}")
    
    # ‡∏™‡∏£‡∏∏‡∏õ‡∏ú‡∏•‡∏Å‡∏≤‡∏£‡∏™‡∏£‡πâ‡∏≤‡∏á environment
    print(f"\nüéØ Environment Creation Summary:")
    print(f"  Method used: {creation_method}")
    print(f"  Test data shape: {test_df.shape}")
    print(f"  Symbols: {test_df['tic'].unique()}")
    print(f"  Environment ready: ‚úÖ")
    
    return test_env, test_df

# Main setup execution
try:
    print("\nüöÄ Starting comprehensive evaluation setup...")
    
    # Step 1: Load data and models
    df, env_config, trained_models = load_evaluation_setup()
    
    # Step 2: Create test environment
    test_env, test_df = create_robust_test_environment(df, env_config)
    
    # Step 3: Verify setup
    print(f"\nüìä Setup Verification:")
    print(f"  ‚úÖ Data loaded: {df.shape}")
    print(f"  ‚úÖ Models available: {list(trained_models.keys()) if trained_models else 'None'}")
    print(f"  ‚úÖ Test data prepared: {test_df.shape}")
    print(f"  ‚úÖ Test environment created")
    print(f"  ‚úÖ Symbols in test set: {test_df['tic'].unique()}")
    
    SETUP_SUCCESS = True
    print(f"\nüéâ Evaluation setup completed successfully!")
    
except Exception as e:
    print(f"\n‚ùå Setup failed with error: {str(e)}")
    print("üîß Please check your data and model files")
    import traceback
    print("üìã Full error traceback:")
    traceback.print_exc()
    SETUP_SUCCESS = False



üöÄ Starting comprehensive evaluation setup...
üìÇ Loading evaluation setup...
‚úÖ Loaded processed data from pickle (5480 rows)
‚úÖ Loaded environment config
‚úÖ Loaded PPO model: ppo
üèõÔ∏è Creating robust test environment...
üîç Input data validation:
  Data shape: (5480, 18)
  Columns: ['date', 'Open', 'High', 'Low', 'Close', 'Volume', 'tic', 'sma_20', 'ema_20', 'rsi', 'ema_12', 'ema_26', 'macd', 'macd_signal', 'returns', 'volatility', 'price_sma_ratio', 'timestamp']
  Date range: 2022-01-01 00:00:00 to 2024-12-31 00:00:00
üìä Test data prepared:
  Shape: (823, 18)
  Symbols: ['BTC-USD' 'ETH-USD' 'SOL-USD' 'ADA-USD' 'BNB-USD']
  Date range: 2024-07-20 00:00:00 to 2024-12-31 00:00:00
üîß Renamed Open -> open
üîß Renamed High -> high
üîß Renamed Low -> low
üîß Renamed Close -> close
üîß Renamed Volume -> volume
üîç Technical indicators found: 7 indicators
    ['sma_20', 'ema_20', 'rsi', 'ema_12', 'ema_26']...
üéØ Environment parameters:
  Stock dimension: 5
  Technical i

Traceback (most recent call last):
  File "C:\Users\cyber\AppData\Local\Temp\ipykernel_35056\3738550722.py", line 204, in create_robust_test_environment
    test_env = StockTradingEnv(
               ^^^^^^^^^^^^^^^^
TypeError: StockTradingEnv.__init__() missing 8 required positional arguments: 'hmax', 'num_stock_shares', 'buy_cost_pct', 'sell_cost_pct', 'reward_scaling', 'state_space', 'action_space', and 'tech_indicator_list'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:\Users\cyber\AppData\Local\Temp\ipykernel_35056\3738550722.py", line 233, in <module>
    test_env, test_df = create_robust_test_environment(df, env_config)
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\cyber\AppData\Local\Temp\ipykernel_35056\3738550722.py", line 214, in create_robust_test_environment
    raise RuntimeError(f"Cannot create test environment. Last error: {str(e)}")
RuntimeError: Cannot create

In [3]:
def evaluate_agent_performance(trained_model, test_env, model_name="Agent"):
    """
    ‡∏õ‡∏£‡∏∞‡πÄ‡∏°‡∏¥‡∏ô‡∏ú‡∏•‡∏Å‡∏≤‡∏£‡∏ó‡∏≥‡∏á‡∏≤‡∏ô‡∏Ç‡∏≠‡∏á agent (‡∏ô‡∏≥‡∏°‡∏≤‡∏à‡∏≤‡∏Å main.py test_agent)
    """
    print(f"üìä Testing {model_name}...")
    
    try:
        # ‡∏£‡∏±‡∏ô backtest
        df_account_value, df_actions = DRLAgent.DRL_prediction(
            model=trained_model,
            environment=test_env
        )
        
        print(f"‚úÖ {model_name} backtesting completed")
        
        # ‡∏Ñ‡∏≥‡∏ô‡∏ß‡∏ì basic metrics
        initial_value = 100000  # default initial amount
        final_value = df_account_value['account_value'].iloc[-1]
        total_return = (final_value - initial_value) / initial_value * 100
        
        # ‡∏Ñ‡∏≥‡∏ô‡∏ß‡∏ì additional metrics
        returns = df_account_value['account_value'].pct_change().dropna()
        
        # Sharpe ratio (annualized)
        sharpe_ratio = returns.mean() / returns.std() * np.sqrt(252) if returns.std() > 0 else 0
        
        # Maximum Drawdown
        running_max = df_account_value['account_value'].expanding().max()
        drawdown = (df_account_value['account_value'] - running_max) / running_max
        max_drawdown = drawdown.min() * 100
        
        # Volatility (annualized)
        volatility = returns.std() * np.sqrt(252) * 100
        
        # Trading frequency
        total_trades = len(df_actions[df_actions != 0]) if len(df_actions) > 0 else 0
        
        results = {
            'model_name': model_name,
            'initial_value': initial_value,
            'final_value': final_value,
            'total_return': total_return,
            'sharpe_ratio': sharpe_ratio,
            'max_drawdown': max_drawdown,
            'volatility': volatility,
            'total_trades': total_trades,
            'account_values': df_account_value,
            'actions': df_actions,
            'daily_returns': returns
        }
        
        print(f"üìà {model_name} Results:")
        print(f"  Total Return: {total_return:.2f}%")
        print(f"  Sharpe Ratio: {sharpe_ratio:.3f}")
        print(f"  Max Drawdown: {max_drawdown:.2f}%")
        print(f"  Volatility: {volatility:.2f}%")
        print(f"  Total Trades: {total_trades}")
        
        return results
        
    except Exception as e:
        print(f"‚ùå Error testing {model_name}: {str(e)}")
        return None

def calculate_baseline_performance(test_df, symbols=None):
    """
    ‡∏Ñ‡∏≥‡∏ô‡∏ß‡∏ì Buy & Hold baseline ‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö‡πÄ‡∏õ‡∏£‡∏µ‡∏¢‡∏ö‡πÄ‡∏ó‡∏µ‡∏¢‡∏ö (‡∏ô‡∏≥‡∏°‡∏≤‡∏à‡∏≤‡∏Å main.py analyze_results)
    """
    print("üìà Calculating Buy & Hold baselines...")
    
    if symbols is None:
        symbols = test_df['tic'].unique()
    
    baseline_results = {}
    
    for symbol in symbols:
        try:
            symbol_data = test_df[test_df['tic'] == symbol].copy()
            
            if len(symbol_data) == 0:
                print(f"‚ö†Ô∏è No data found for {symbol}")
                continue
            
            # ‡πÉ‡∏ä‡πâ close price (normalized)
            initial_price = symbol_data['close'].iloc[0]
            final_price = symbol_data['close'].iloc[-1]
            
            # ‡∏Ñ‡∏≥‡∏ô‡∏ß‡∏ì return
            if abs(initial_price) > 1e-10:  # avoid division by zero
                price_change = (final_price - initial_price) / abs(initial_price)
                portfolio_value = 100000 * (1 + price_change)
                total_return = price_change * 100
                
                # ‡∏Ñ‡∏≥‡∏ô‡∏ß‡∏ì metrics ‡∏≠‡∏∑‡πà‡∏ô‡πÜ
                returns = symbol_data['close'].pct_change().dropna()
                
                if len(returns) > 0 and returns.std() > 0:
                    sharpe_ratio = returns.mean() / returns.std() * np.sqrt(252)
                    volatility = returns.std() * np.sqrt(252) * 100
                    
                    # Maximum Drawdown
                    cumulative_returns = (1 + returns).cumprod()
                    running_max = cumulative_returns.expanding().max()
                    drawdown = (cumulative_returns - running_max) / running_max
                    max_drawdown = drawdown.min() * 100
                else:
                    sharpe_ratio = 0
                    volatility = 0
                    max_drawdown = 0
                
                baseline_results[symbol] = {
                    'total_return': total_return,
                    'final_value': portfolio_value,
                    'sharpe_ratio': sharpe_ratio,
                    'max_drawdown': max_drawdown,
                    'volatility': volatility,
                    'total_trades': 1  # Buy and hold = 1 trade
                }
                
                print(f"  {symbol}: {total_return:.2f}% return")
            else:
                print(f"‚ö†Ô∏è Invalid price data for {symbol}")
                
        except Exception as e:
            print(f"‚ö†Ô∏è Error calculating baseline for {symbol}: {str(e)}")
            continue
    
    print(f"‚úÖ Calculated baselines for {len(baseline_results)} symbols")
    return baseline_results

def create_performance_comparison_table(agent_results, baseline_results):
    """
    ‡∏™‡∏£‡πâ‡∏≤‡∏á‡∏ï‡∏≤‡∏£‡∏≤‡∏á‡πÄ‡∏õ‡∏£‡∏µ‡∏¢‡∏ö‡πÄ‡∏ó‡∏µ‡∏¢‡∏ö performance (‡∏õ‡∏£‡∏±‡∏ö‡∏õ‡∏£‡∏∏‡∏á‡∏à‡∏≤‡∏Å notebook ‡πÄ‡∏î‡∏¥‡∏°)
    """
    print("üìä Creating performance comparison table...")
    
    comparison_data = []
    
    # ‡πÄ‡∏û‡∏¥‡πà‡∏°‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏• Agent results
    if agent_results:
        for result in agent_results if isinstance(agent_results, list) else [agent_results]:
            if result:
                comparison_data.append({
                    'Strategy': f"RL-Agent ({result['model_name']})",
                    'Total Return (%)': result['total_return'],
                    'Final Value ($)': result['final_value'],
                    'Sharpe Ratio': result['sharpe_ratio'],
                    'Max Drawdown (%)': result['max_drawdown'],
                    'Volatility (%)': result['volatility'],
                    'Total Trades': result['total_trades']
                })
    
    # ‡πÄ‡∏û‡∏¥‡πà‡∏°‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏• Baseline strategies
    for symbol, baseline in baseline_results.items():
        comparison_data.append({
            'Strategy': f"Buy&Hold-{symbol}",
            'Total Return (%)': baseline['total_return'],
            'Final Value ($)': baseline['final_value'],
            'Sharpe Ratio': baseline['sharpe_ratio'],
            'Max Drawdown (%)': baseline['max_drawdown'],
            'Volatility (%)': baseline['volatility'],
            'Total Trades': baseline['total_trades']
        })
    
    if comparison_data:
        comparison_df = pd.DataFrame(comparison_data)
        comparison_df = comparison_df.sort_values('Total Return (%)', ascending=False)
        return comparison_df
    else:
        print("‚ö†Ô∏è No performance data available for comparison")
        return pd.DataFrame()

def plot_comprehensive_results(agent_results, baseline_results, test_df):
    """
    ‡∏™‡∏£‡πâ‡∏≤‡∏á‡∏Å‡∏£‡∏≤‡∏ü‡πÅ‡∏™‡∏î‡∏á‡∏ú‡∏•‡∏•‡∏±‡∏û‡∏ò‡πå‡πÅ‡∏ö‡∏ö‡∏Ñ‡∏£‡∏ö‡∏ñ‡πâ‡∏ß‡∏ô (‡∏õ‡∏£‡∏±‡∏ö‡∏õ‡∏£‡∏∏‡∏á‡∏à‡∏≤‡∏Å main.py plot_results)
    """
    print("üìä Creating comprehensive performance plots...")
    
    # Setup figure
    fig, axes = plt.subplots(2, 2, figsize=(16, 12))
    
    # Plot 1: Portfolio Value Evolution
    ax1 = axes[0, 0]
    
    # Plot agent portfolio
    if agent_results:
        agent_data = agent_results if not isinstance(agent_results, list) else agent_results[0]
        if agent_data and 'account_values' in agent_data:
            portfolio_values = agent_data['account_values']['account_value'].values
            dates = pd.to_datetime(test_df['timestamp'].unique()[:len(portfolio_values)])
            ax1.plot(dates, portfolio_values, label=f"RL Agent ({agent_data['model_name']})", 
                    linewidth=2, color='blue')
    
    # Plot baseline
    initial_amount = 100000
    ax1.axhline(y=initial_amount, color='red', linestyle='--', alpha=0.7, label='Initial Value')
    
    # Plot buy & hold for main symbol (BTC if available)
    main_symbol = 'BTC-USD' if 'BTC-USD' in test_df['tic'].unique() else test_df['tic'].unique()[0]
    if main_symbol in baseline_results:
        btc_data = test_df[test_df['tic'] == main_symbol].copy()
        if len(btc_data) > 0:
            btc_prices = btc_data.groupby('timestamp')['close'].first()
            btc_normalized = (btc_prices / btc_prices.iloc[0]) * initial_amount
            ax1.plot(btc_normalized.index, btc_normalized.values, 
                    label=f'Buy&Hold-{main_symbol}', linewidth=2, color='orange', alpha=0.8)
    
    ax1.set_title('Portfolio Value Evolution', fontsize=14, fontweight='bold')
    ax1.set_ylabel('Portfolio Value ($)')
    ax1.legend()
    ax1.grid(True, alpha=0.3)
    
    # Plot 2: Returns Comparison
    ax2 = axes[0, 1]
    
    strategies = []
    returns = []
    colors = []
    
    # Agent returns
    if agent_results:
        agent_data = agent_results if not isinstance(agent_results, list) else agent_results[0]
        if agent_data:
            strategies.append(f"RL-Agent")
            returns.append(agent_data['total_return'])
            colors.append('skyblue')
    
    # Baseline returns
    for symbol, baseline in baseline_results.items():
        strategies.append(f"B&H-{symbol}")
        returns.append(baseline['total_return'])
        colors.append('lightcoral')
    
    if strategies:
        bars = ax2.bar(range(len(strategies)), returns, color=colors, alpha=0.7)
        ax2.set_title('Total Returns Comparison', fontsize=14, fontweight='bold')
        ax2.set_ylabel('Return (%)')
        ax2.set_xticks(range(len(strategies)))
        ax2.set_xticklabels(strategies, rotation=45, ha='right')
        ax2.axhline(y=0, color='black', linestyle='-', alpha=0.3)
        ax2.grid(True, alpha=0.3)
        
        # Add value labels on bars
        for bar, value in zip(bars, returns):
            height = bar.get_height()
            ax2.text(bar.get_x() + bar.get_width()/2., height + (1 if height > 0 else -2),
                    f'{value:.1f}%', ha='center', va='bottom' if height > 0 else 'top', fontsize=10)
    
    # Plot 3: Risk Metrics
    ax3 = axes[1, 0]
    
    sharpe_ratios = []
    max_drawdowns = []
    
    if agent_results:
        agent_data = agent_results if not isinstance(agent_results, list) else agent_results[0]
        if agent_data:
            sharpe_ratios.append(agent_data['sharpe_ratio'])
            max_drawdowns.append(agent_data['max_drawdown'])
    
    for symbol, baseline in baseline_results.items():
        sharpe_ratios.append(baseline['sharpe_ratio'])
        max_drawdowns.append(baseline['max_drawdown'])
    
    if strategies:
        ax3_twin = ax3.twinx()
        
        x_pos = range(len(strategies))
        width = 0.35
        
        bars1 = ax3.bar([x - width/2 for x in x_pos], sharpe_ratios, width, 
                       label='Sharpe Ratio', color='lightgreen', alpha=0.7)
        bars2 = ax3_twin.bar([x + width/2 for x in x_pos], max_drawdowns, width,
                           label='Max Drawdown (%)', color='lightpink', alpha=0.7)
        
        ax3.set_title('Risk Metrics Comparison', fontsize=14, fontweight='bold')
        ax3.set_ylabel('Sharpe Ratio', color='green')
        ax3_twin.set_ylabel('Max Drawdown (%)', color='red')
        ax3.set_xticks(x_pos)
        ax3.set_xticklabels(strategies, rotation=45, ha='right')
        
        # Legends
        ax3.legend(loc='upper left')
        ax3_twin.legend(loc='upper right')
        ax3.grid(True, alpha=0.3)
    
    # Plot 4: Summary Statistics
    ax4 = axes[1, 1]
    ax4.axis('off')
    
    # Create summary text
    summary_text = "üìä PERFORMANCE SUMMARY\\n" + "="*40 + "\\n"
    
    if agent_results:
        agent_data = agent_results if not isinstance(agent_results, list) else agent_results[0]
        if agent_data:
            summary_text += f"ü§ñ RL Agent ({agent_data['model_name']}):\\n"
            summary_text += f"  ‚Ä¢ Total Return: {agent_data['total_return']:.2f}%\\n"
            summary_text += f"  ‚Ä¢ Sharpe Ratio: {agent_data['sharpe_ratio']:.3f}\\n"
            summary_text += f"  ‚Ä¢ Max Drawdown: {agent_data['max_drawdown']:.2f}%\\n"
            summary_text += f"  ‚Ä¢ Final Value: ${agent_data['final_value']:,.2f}\\n"
            summary_text += f"  ‚Ä¢ Total Trades: {agent_data['total_trades']}\\n\\n"
    
    # Best baseline
    if baseline_results:
        best_baseline = max(baseline_results.items(), key=lambda x: x[1]['total_return'])
        summary_text += f"üìà Best Baseline ({best_baseline[0]}):\\n"
        summary_text += f"  ‚Ä¢ Total Return: {best_baseline[1]['total_return']:.2f}%\\n"
        summary_text += f"  ‚Ä¢ Sharpe Ratio: {best_baseline[1]['sharpe_ratio']:.3f}\\n"
        summary_text += f"  ‚Ä¢ Max Drawdown: {best_baseline[1]['max_drawdown']:.2f}%\\n\\n"
        
        if agent_results:
            agent_data = agent_results if not isinstance(agent_results, list) else agent_results[0]
            if agent_data:
                alpha = agent_data['total_return'] - best_baseline[1]['total_return']
                summary_text += f"üéØ Alpha (Agent - Best Baseline):\\n"
                summary_text += f"  ‚Ä¢ {alpha:.2f}%\\n"
                if alpha > 0:
                    summary_text += f"  ‚Ä¢ ‚úÖ Agent outperformed!\\n"
                else:
                    summary_text += f"  ‚Ä¢ ‚ö†Ô∏è Agent underperformed\\n"
    
    ax4.text(0.05, 0.95, summary_text, transform=ax4.transAxes, fontsize=11,
             verticalalignment='top', fontfamily='monospace',
             bbox=dict(boxstyle='round', facecolor='lightgray', alpha=0.8))
    
    plt.tight_layout()
    return fig

# Initialize evaluation results storage
evaluation_results = []
baseline_results = {}


In [4]:
# Main evaluation execution (‡πÄ‡∏´‡∏°‡∏∑‡∏≠‡∏ô main() function ‡πÉ‡∏ô main.py)
if SETUP_SUCCESS:
    print("üöÄ Starting model evaluation process...")
    print("="*60)
    
    try:
        # Step 1: Calculate baseline performance
        symbols = test_df['tic'].unique()
        baseline_results = calculate_baseline_performance(test_df, symbols)
        
        # Step 2: Evaluate trained models (‡∏ñ‡πâ‡∏≤‡∏°‡∏µ)
        if trained_models:
            print(f"\nü§ñ Evaluating {len(trained_models)} trained models...")
            
            for model_name, model_info in trained_models.items():
                print(f"\nüìä Evaluating {model_name}...")
                try:
                    result = evaluate_agent_performance(
                        model_info['model'], 
                        test_env, 
                        model_name=f"{model_info['model_type']}-{model_name}"
                    )
                    if result:
                        evaluation_results.append(result)
                except Exception as e:
                    print(f"‚ùå Error evaluating {model_name}: {str(e)}")
                    continue
        else:
            print("‚ö†Ô∏è No trained models found. Will only show baseline results.")
        
        # Step 3: Create comparison table
        print(f"\nüìä Creating performance comparison...")
        comparison_df = create_performance_comparison_table(evaluation_results, baseline_results)
        
        if not comparison_df.empty:
            print("\nüìä PERFORMANCE COMPARISON TABLE:")
            print("="*80)
            print(comparison_df.to_string(index=False))
            print("="*80)
        
        # Step 4: Create comprehensive plots
        if evaluation_results or baseline_results:
            print(f"\nüìà Creating performance plots...")
            agent_result = evaluation_results[0] if evaluation_results else None
            fig = plot_comprehensive_results(agent_result, baseline_results, test_df)
            plt.show()
            
            # Save results
            timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
            
            # Save comparison table
            if not comparison_df.empty:
                comparison_file = os.path.join(REPORTS_DIR, f'performance_comparison_{timestamp}.csv')
                comparison_df.to_csv(comparison_file, index=False)
                print(f"üíæ Saved comparison table: {comparison_file}")
            
            # Save plots
            plot_file = os.path.join(REPORTS_DIR, f'performance_analysis_{timestamp}.png')
            fig.savefig(plot_file, dpi=300, bbox_inches='tight')
            print(f"üíæ Saved performance plots: {plot_file}")
        
        # Step 5: Summary results (‡πÄ‡∏´‡∏°‡∏∑‡∏≠‡∏ô main.py)
        print(f"\nüéâ EVALUATION COMPLETED SUCCESSFULLY!")
        print("="*60)
        
        if evaluation_results:
            best_agent = max(evaluation_results, key=lambda x: x['total_return'])
            print(f"üèÜ Best Agent Performance:")
            print(f"  ‚Ä¢ Model: {best_agent['model_name']}")
            print(f"  ‚Ä¢ Total Return: {best_agent['total_return']:.2f}%")
            print(f"  ‚Ä¢ Sharpe Ratio: {best_agent['sharpe_ratio']:.3f}")
            print(f"  ‚Ä¢ Final Value: ${best_agent['final_value']:,.2f}")
        
        if baseline_results:
            best_baseline = max(baseline_results.items(), key=lambda x: x[1]['total_return'])
            print(f"\nüìà Best Baseline Performance:")
            print(f"  ‚Ä¢ Strategy: Buy&Hold-{best_baseline[0]}")
            print(f"  ‚Ä¢ Total Return: {best_baseline[1]['total_return']:.2f}%")
            print(f"  ‚Ä¢ Sharpe Ratio: {best_baseline[1]['sharpe_ratio']:.3f}")
            print(f"  ‚Ä¢ Final Value: ${best_baseline[1]['final_value']:,.2f}")
        
        # Calculate alpha (agent vs best baseline)
        if evaluation_results and baseline_results:
            best_agent = max(evaluation_results, key=lambda x: x['total_return'])
            best_baseline = max(baseline_results.items(), key=lambda x: x[1]['total_return'])
            alpha = best_agent['total_return'] - best_baseline[1]['total_return']
            
            print(f"\nüéØ ALPHA ANALYSIS:")
            print(f"  ‚Ä¢ Alpha (Agent - Best Baseline): {alpha:.2f}%")
            
            if alpha > 0:
                print(f"  ‚Ä¢ ‚úÖ Agent outperformed by {alpha:.2f}%!")
                print(f"  ‚Ä¢ üéä Great job! Your RL agent beat buy & hold!")
            else:
                print(f"  ‚Ä¢ ‚ö†Ô∏è Agent underperformed by {abs(alpha):.2f}%")
                print(f"  ‚Ä¢ üí° Consider tuning hyperparameters or training longer")
        
        print(f"\nüìÅ Reports saved to: {REPORTS_DIR}/")
        print(f"‚úÖ Evaluation process completed successfully!")
        
        EVALUATION_SUCCESS = True
        
    except Exception as e:
        print(f"\n‚ùå Evaluation failed: {str(e)}")
        import traceback
        print("üìã Error traceback:")
        traceback.print_exc()
        EVALUATION_SUCCESS = False

else:
    print("‚ùå Cannot run evaluation: Setup failed")
    print("üîß Please run the setup cells first and fix any errors")


‚ùå Cannot run evaluation: Setup failed
üîß Please run the setup cells first and fix any errors


# 4. ‡∏Å‡∏≤‡∏£‡∏õ‡∏£‡∏∞‡πÄ‡∏°‡∏¥‡∏ô‡∏ú‡∏•‡πÅ‡∏•‡∏∞‡∏õ‡∏£‡∏±‡∏ö‡∏õ‡∏£‡∏∏‡∏á Agent (Agent Evaluation & Improvement)
## ‡∏Ç‡∏±‡πâ‡∏ô‡∏ï‡∏≠‡∏ô‡∏Å‡∏≤‡∏£‡∏õ‡∏£‡∏∞‡πÄ‡∏°‡∏¥‡∏ô‡πÅ‡∏•‡∏∞‡∏õ‡∏£‡∏±‡∏ö‡∏õ‡∏£‡∏∏‡∏á RL Agent ‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö Crypto Trading

### ‡πÄ‡∏õ‡πâ‡∏≤‡∏´‡∏°‡∏≤‡∏¢:
- ‡πÇ‡∏´‡∏•‡∏î Trained Model
- ‡∏õ‡∏£‡∏∞‡πÄ‡∏°‡∏¥‡∏ô‡∏ú‡∏•‡∏ö‡∏ô Test Data
- ‡πÄ‡∏õ‡∏£‡∏µ‡∏¢‡∏ö‡πÄ‡∏ó‡∏µ‡∏¢‡∏ö‡∏Å‡∏±‡∏ö Baseline (Buy & Hold)
- ‡∏ß‡∏¥‡πÄ‡∏Ñ‡∏£‡∏≤‡∏∞‡∏´‡πå Trading Patterns
- ‡∏õ‡∏£‡∏±‡∏ö‡∏õ‡∏£‡∏∏‡∏á Model ‡∏´‡∏≤‡∏Å‡∏à‡∏≥‡πÄ‡∏õ‡πá‡∏ô
- ‡πÄ‡∏ï‡∏£‡∏µ‡∏¢‡∏°‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö Live Trading

## Cell 1: Import Libraries ‡πÅ‡∏•‡∏∞‡πÇ‡∏´‡∏•‡∏î Trained Model

In [5]:
import sys
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import pickle
import torch
from datetime import datetime
from stable_baselines3 import PPO, A2C, DDPG, SAC

# FinRL imports
from finrl.meta.env_stock_trading.env_stocktrading import StockTradingEnv
from finrl.agents.stablebaselines3.models import DRLAgent

# Import config
from config import *

# Setup directories
PROCESSED_DIR = "processed_data"
MODEL_DIR = "models"
AGENT_DIR = "agents"
EVALUATION_DIR = "evaluation"
REPORTS_DIR = "reports"

for dir_name in [EVALUATION_DIR, REPORTS_DIR]:
    if not os.path.exists(dir_name):
        os.makedirs(dir_name)

print("üìÅ Setup directories completed")
print(f"üìä Starting Agent Evaluation Process")

# Set plotting style
plt.style.use('default')
sns.set_palette("husl")

üìÅ Setup directories completed
üìä Starting Agent Evaluation Process


In [6]:
def load_evaluation_setup():
    """
    ‡πÇ‡∏´‡∏•‡∏î‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏ó‡∏µ‡πà‡∏à‡∏≥‡πÄ‡∏õ‡πá‡∏ô‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö‡∏Å‡∏≤‡∏£‡∏õ‡∏£‡∏∞‡πÄ‡∏°‡∏¥‡∏ô‡∏ú‡∏•
    """
    print("üìÇ Loading evaluation setup...")
    
    # ‡πÇ‡∏´‡∏•‡∏î‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏ó‡∏µ‡πà‡∏õ‡∏£‡∏∞‡∏°‡∏ß‡∏•‡∏ú‡∏•‡πÅ‡∏•‡πâ‡∏ß
    try:
        pickle_file = os.path.join(PROCESSED_DIR, "processed_crypto_data.pkl")
        with open(pickle_file, 'rb') as f:
            df = pickle.load(f)
        print(f"‚úÖ Loaded processed data")
    except:
        csv_file = os.path.join(PROCESSED_DIR, "processed_crypto_data.csv")
        df = pd.read_csv(csv_file)
        df['timestamp'] = pd.to_datetime(df['timestamp'])
        print(f"‚úÖ Loaded processed data from CSV")
    
    # ‡πÇ‡∏´‡∏•‡∏î environment config
    env_config_file = os.path.join(AGENT_DIR, "environment_config.pkl")
    with open(env_config_file, 'rb') as f:
        env_config = pickle.load(f)
    print(f"‚úÖ Loaded environment config")
    
    # ‡πÇ‡∏´‡∏•‡∏î training results
    training_files = [f for f in os.listdir(MODEL_DIR) if f.startswith('training_info_') and f.endswith('.pkl')]
    
    trained_models = {}
    for training_file in training_files:
        model_name = training_file.replace('training_info_', '').replace('.pkl', '')
        
        with open(os.path.join(MODEL_DIR, training_file), 'rb') as f:
            training_info = pickle.load(f)
        
        # ‡πÇ‡∏´‡∏•‡∏î trained model
        model_path = training_info['model_path']
        if os.path.exists(model_path + '.zip'):
            model_type = training_info['model_name'].split('_')[0].upper()
            
            if model_type == 'PPO':
                model = PPO.load(model_path)
            elif model_type == 'A2C':
                model = A2C.load(model_path)
            elif model_type == 'DDPG':
                model = DDPG.load(model_path)
            elif model_type == 'SAC':
                model = SAC.load(model_path)
            else:
                print(f"‚ö†Ô∏è Unknown model type: {model_type}")
                continue
            
            trained_models[model_name] = {
                'model': model,
                'training_info': training_info,
                'model_type': model_type
            }
            
            print(f"‚úÖ Loaded {model_type} model: {model_name}")
        else:
            print(f"‚ö†Ô∏è Model file not found: {model_path}")
    
    if not trained_models:
        raise ValueError("No trained models found!")
    
    return df, env_config, trained_models

def recreate_test_environment(df, env_config):
    """
    ‡∏™‡∏£‡πâ‡∏≤‡∏á test environment ‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö‡∏Å‡∏≤‡∏£‡∏õ‡∏£‡∏∞‡πÄ‡∏°‡∏¥‡∏ô‡∏ú‡∏•
    """
    print("üèõÔ∏è Creating test environment...")
    
    # ‡πÅ‡∏ö‡πà‡∏á‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡πÄ‡∏´‡∏°‡∏∑‡∏≠‡∏ô‡πÄ‡∏î‡∏¥‡∏°
    total_len = len(df)
    train_size = int(total_len * 0.7)
    val_size = int(total_len * 0.15)
    
    test_df = df.iloc[train_size + val_size:].reset_index(drop=True)
    
    # ‡πÄ‡∏ï‡∏£‡∏µ‡∏¢‡∏°‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•
    test_df['timestamp'] = pd.to_datetime(test_df['timestamp'])
    test_df['date'] = test_df['timestamp'].dt.date
    test_df.sort_values(['date', 'tic'], inplace=True)
    test_df.reset_index(drop=True, inplace=True)
    
    # ‡∏™‡∏£‡πâ‡∏≤‡∏á test environment - ‡πÅ‡∏Å‡πâ‡πÑ‡∏Ç‡∏õ‡∏±‡∏ç‡∏´‡∏≤ duplicate df parameter
    env_kwargs = env_config['env_kwargs'].copy()
    
    # ‡∏•‡∏ö df ‡∏≠‡∏≠‡∏Å‡∏à‡∏≤‡∏Å env_kwargs ‡∏´‡∏≤‡∏Å‡∏°‡∏µ ‡πÄ‡∏û‡∏∑‡πà‡∏≠‡∏õ‡πâ‡∏≠‡∏á‡∏Å‡∏±‡∏ô duplicate parameter
    if 'df' in env_kwargs:
        del env_kwargs['df']
    
    test_env = StockTradingEnv(df=test_df, **env_kwargs)
    
    print(f"‚úÖ Test environment created")
    print(f"üìä Test data: {len(test_df)} rows")
    print(f"üìÖ Date range: {test_df['timestamp'].min()} to {test_df['timestamp'].max()}")
    
    return test_env, test_df

# ‡πÇ‡∏´‡∏•‡∏î‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡πÅ‡∏•‡∏∞ models
df, env_config, trained_models = load_evaluation_setup()
test_env, test_df = recreate_test_environment(df, env_config)

print(f"\nüìä Evaluation setup completed:")
print(f"  Available models: {list(trained_models.keys())}")
print(f"  Test data points: {len(test_df)}")
print(f"  Symbols: {test_df['tic'].unique()}")


üìÇ Loading evaluation setup...
‚úÖ Loaded processed data
‚úÖ Loaded environment config
‚úÖ Loaded PPO model: ppo
üèõÔ∏è Creating test environment...


AttributeError: 'Series' object has no attribute 'close'

In [None]:
# ‡πÅ‡∏Å‡πâ‡πÑ‡∏Ç‡∏õ‡∏±‡∏ç‡∏´‡∏≤‡∏ó‡∏±‡πâ‡∏á‡∏´‡∏°‡∏î‡πÅ‡∏•‡∏∞‡∏™‡∏£‡πâ‡∏≤‡∏á working environment
def load_evaluation_setup():
    """
    ‡πÇ‡∏´‡∏•‡∏î‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏ó‡∏µ‡πà‡∏à‡∏≥‡πÄ‡∏õ‡πá‡∏ô‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö‡∏Å‡∏≤‡∏£‡∏õ‡∏£‡∏∞‡πÄ‡∏°‡∏¥‡∏ô‡∏ú‡∏•
    """
    print("üìÇ Loading evaluation setup...")
    
    # ‡πÇ‡∏´‡∏•‡∏î‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏ó‡∏µ‡πà‡∏õ‡∏£‡∏∞‡∏°‡∏ß‡∏•‡∏ú‡∏•‡πÅ‡∏•‡πâ‡∏ß
    try:
        pickle_file = os.path.join(PROCESSED_DIR, "processed_crypto_data.pkl")
        with open(pickle_file, 'rb') as f:
            df = pickle.load(f)
        print(f"‚úÖ Loaded processed data")
    except:
        csv_file = os.path.join(PROCESSED_DIR, "processed_crypto_data.csv")
        df = pd.read_csv(csv_file)
        df['timestamp'] = pd.to_datetime(df['timestamp'])
        print(f"‚úÖ Loaded processed data from CSV")
    
    # ‡πÇ‡∏´‡∏•‡∏î environment config
    env_config_file = os.path.join(AGENT_DIR, "environment_config.pkl")
    with open(env_config_file, 'rb') as f:
        env_config = pickle.load(f)
    print(f"‚úÖ Loaded environment config")
    
    # ‡πÇ‡∏´‡∏•‡∏î training results
    training_files = [f for f in os.listdir(MODEL_DIR) if f.startswith('training_info_') and f.endswith('.pkl')]
    
    trained_models = {}
    for training_file in training_files:
        model_name = training_file.replace('training_info_', '').replace('.pkl', '')
        
        with open(os.path.join(MODEL_DIR, training_file), 'rb') as f:
            training_info = pickle.load(f)
        
        # ‡πÇ‡∏´‡∏•‡∏î trained model
        model_path = training_info['model_path']
        if os.path.exists(model_path + '.zip'):
            model_type = training_info['model_name'].split('_')[0].upper()
            
            if model_type == 'PPO':
                model = PPO.load(model_path)
            elif model_type == 'A2C':
                model = A2C.load(model_path)
            elif model_type == 'DDPG':
                model = DDPG.load(model_path)
            elif model_type == 'SAC':
                model = SAC.load(model_path)
            else:
                print(f"‚ö†Ô∏è Unknown model type: {model_type}")
                continue
            
            trained_models[model_name] = {
                'model': model,
                'training_info': training_info,
                'model_type': model_type
            }
            
            print(f"‚úÖ Loaded {model_type} model: {model_name}")
        else:
            print(f"‚ö†Ô∏è Model file not found: {model_path}")
    
    if not trained_models:
        raise ValueError("No trained models found!")
    
    return df, env_config, trained_models

def create_safe_test_environment(df):
    """
    ‡∏™‡∏£‡πâ‡∏≤‡∏á test environment ‡∏î‡πâ‡∏ß‡∏¢‡∏Å‡∏≤‡∏£‡∏ï‡∏±‡πâ‡∏á‡∏Ñ‡πà‡∏≤‡∏ó‡∏µ‡πà‡∏õ‡∏•‡∏≠‡∏î‡∏†‡∏±‡∏¢
    """
    print("üèõÔ∏è Creating safe test environment...")
    
    # ‡πÅ‡∏ö‡πà‡∏á‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•
    total_len = len(df)
    train_size = int(total_len * 0.7)
    val_size = int(total_len * 0.15)
    
    test_df = df.iloc[train_size + val_size:].reset_index(drop=True).copy()
    
    # ‡πÄ‡∏ï‡∏£‡∏µ‡∏¢‡∏°‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•
    test_df['timestamp'] = pd.to_datetime(test_df['timestamp'])
    test_df['date'] = test_df['timestamp'].dt.date
    test_df = test_df.sort_values(['date', 'tic']).reset_index(drop=True)
    
    print(f"üîç Test data shape: {test_df.shape}")
    print(f"üîç Symbols: {test_df['tic'].unique()}")
    print(f"üîç Date range: {test_df['timestamp'].min()} to {test_df['timestamp'].max()}")
    
    # ‡πÅ‡∏Å‡πâ‡πÑ‡∏Ç column names ‡πÉ‡∏´‡πâ‡πÄ‡∏õ‡πá‡∏ô lowercase ‡∏ï‡∏≤‡∏°‡∏ó‡∏µ‡πà StockTradingEnv ‡∏ï‡πâ‡∏≠‡∏á‡∏Å‡∏≤‡∏£
    price_column_mapping = {
        'Open': 'open',
        'High': 'high', 
        'Low': 'low',
        'Close': 'close',
        'Volume': 'volume'
    }
    
    # ‡∏ï‡∏£‡∏ß‡∏à‡∏™‡∏≠‡∏ö‡πÅ‡∏•‡∏∞‡πÅ‡∏õ‡∏•‡∏á column names
    for old_col, new_col in price_column_mapping.items():
        if old_col in test_df.columns:
            test_df = test_df.rename(columns={old_col: new_col})
            print(f"üîß Renamed {old_col} -> {new_col}")
    
    print(f"üîç Updated columns: {list(test_df.columns)}")
    
    # ‡∏´‡∏≤ technical indicators
    tech_cols = [col for col in test_df.columns if col.startswith(('macd', 'rsi', 'cci', 'adx'))]
    print(f"üîç Technical indicators: {tech_cols}")
    
    # ‡∏Ñ‡∏≥‡∏ô‡∏ß‡∏ì parameters ‡∏ó‡∏µ‡πà‡∏à‡∏≥‡πÄ‡∏õ‡πá‡∏ô
    stock_dim = len(test_df['tic'].unique())
    state_space = 1 + 2 * stock_dim + stock_dim * len(tech_cols)
    action_space = stock_dim
    num_stock_shares = [0] * stock_dim  # ‡πÄ‡∏£‡∏¥‡πà‡∏°‡∏ï‡πâ‡∏ô‡πÑ‡∏°‡πà‡∏°‡∏µ‡∏´‡∏∏‡πâ‡∏ô
    
    print(f"üîç Stock dimension: {stock_dim}")
    print(f"üîç State space: {state_space}")
    print(f"üîç Action space: {action_space}")
    
    # ‡∏™‡∏£‡πâ‡∏≤‡∏á environment ‡∏î‡πâ‡∏ß‡∏¢‡∏Å‡∏≤‡∏£‡∏ï‡∏±‡πâ‡∏á‡∏Ñ‡πà‡∏≤‡∏ó‡∏µ‡πà‡∏õ‡∏•‡∏≠‡∏î‡∏†‡∏±‡∏¢
    try:
        print("üîÑ Trying complete configuration...")
        test_env = StockTradingEnv(
            df=test_df,
            stock_dim=stock_dim,
            hmax=100,
            initial_amount=INITIAL_AMOUNT,
            num_stock_shares=num_stock_shares,
            buy_cost_pct=0.001,
            sell_cost_pct=0.001,
            reward_scaling=1e-4,
            state_space=state_space,
            action_space=action_space,
            tech_indicator_list=tech_cols,
            print_verbosity=0
        )
        
        print(f"‚úÖ Test environment created successfully")
        return test_env, test_df
        
    except Exception as e:
        print(f"‚ö†Ô∏è Error with full config: {str(e)}")
        print("üîÑ Trying minimal tech indicators...")
        
        # ‡∏•‡∏≠‡∏á‡∏î‡πâ‡∏ß‡∏¢ tech indicators ‡πÅ‡∏Ñ‡πà 3 ‡∏ï‡∏±‡∏ß
        try:
            minimal_tech = tech_cols[:3] if len(tech_cols) >= 3 else tech_cols
            minimal_state_space = 1 + 2 * stock_dim + stock_dim * len(minimal_tech)
            
            test_env = StockTradingEnv(
                df=test_df,
                stock_dim=stock_dim,
                hmax=100,
                initial_amount=INITIAL_AMOUNT,
                num_stock_shares=num_stock_shares,
                buy_cost_pct=0.001,
                sell_cost_pct=0.001,
                reward_scaling=1e-4,
                state_space=minimal_state_space,
                action_space=action_space,
                tech_indicator_list=minimal_tech,
                print_verbosity=0
            )
            
            print(f"‚úÖ Test environment created (minimal tech indicators)")
            return test_env, test_df
            
        except Exception as e2:
            print(f"‚ö†Ô∏è Error with minimal tech: {str(e2)}")
            print("üîÑ Trying no tech indicators...")
            
            # ‡∏•‡∏≠‡∏á‡πÑ‡∏°‡πà‡πÉ‡∏ä‡πâ tech indicators ‡πÄ‡∏•‡∏¢
            try:
                no_tech_state_space = 1 + 2 * stock_dim
                
                test_env = StockTradingEnv(
                    df=test_df,
                    stock_dim=stock_dim,
                    hmax=100,
                    initial_amount=INITIAL_AMOUNT,
                    num_stock_shares=num_stock_shares,
                    buy_cost_pct=0.001,
                    sell_cost_pct=0.001,
                    reward_scaling=1e-4,
                    state_space=no_tech_state_space,
                    action_space=action_space,
                    tech_indicator_list=[],
                    print_verbosity=0
                )
                
                print(f"‚úÖ Test environment created (no tech indicators)")
                return test_env, test_df
                
            except Exception as e3:
                print(f"‚ùå All environment creation methods failed: {str(e3)}")
                raise RuntimeError(f"Cannot create test environment. Last error: {str(e3)}")

# ‡πÅ‡∏Å‡πâ‡πÑ‡∏Ç‡∏õ‡∏±‡∏ç‡∏´‡∏≤ StockTradingEnv ‡πÇ‡∏î‡∏¢‡∏™‡∏£‡πâ‡∏≤‡∏á simplified version
def create_working_test_environment(df):
    """
    ‡∏™‡∏£‡πâ‡∏≤‡∏á test environment ‡∏ó‡∏µ‡πà‡∏ó‡∏≥‡∏á‡∏≤‡∏ô‡πÑ‡∏î‡πâ‡∏à‡∏£‡∏¥‡∏á (‡∏´‡∏•‡∏µ‡∏Å‡πÄ‡∏•‡∏µ‡πà‡∏¢‡∏á‡∏õ‡∏±‡∏ç‡∏´‡∏≤ FinRL)
    """
    print("üèõÔ∏è Creating working test environment...")
    
    # ‡πÅ‡∏ö‡πà‡∏á‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•
    total_len = len(df)
    train_size = int(total_len * 0.7)
    val_size = int(total_len * 0.15)
    
    test_df = df.iloc[train_size + val_size:].reset_index(drop=True).copy()
    
    # ‡πÄ‡∏ï‡∏£‡∏µ‡∏¢‡∏°‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•
    test_df['timestamp'] = pd.to_datetime(test_df['timestamp'])
    if 'date' not in test_df.columns:
        test_df['date'] = test_df['timestamp'].dt.date
    test_df = test_df.sort_values(['date', 'tic']).reset_index(drop=True)
    
    print(f"üîç Test data shape: {test_df.shape}")
    print(f"üîç Symbols: {test_df['tic'].unique()}")
    print(f"üîç Date range: {test_df['timestamp'].min()} to {test_df['timestamp'].max()}")
    
    # ‡πÅ‡∏Å‡πâ‡πÑ‡∏Ç column names
    price_column_mapping = {
        'Open': 'open', 'High': 'high', 'Low': 'low', 'Close': 'close', 'Volume': 'volume'
    }
    
    for old_col, new_col in price_column_mapping.items():
        if old_col in test_df.columns:
            test_df = test_df.rename(columns={old_col: new_col})
            print(f"üîß Renamed {old_col} -> {new_col}")
    
    # ‡∏´‡∏≤ technical indicators
    tech_cols = [col for col in test_df.columns if col.startswith(('macd', 'rsi', 'cci', 'adx'))]
    print(f"üîç Technical indicators: {tech_cols}")
    
    # ‡∏Ñ‡∏≥‡∏ô‡∏ß‡∏ì parameters
    stock_dim = len(test_df['tic'].unique())
    print(f"üîç Stock dimension: {stock_dim}")
    
    # ‡∏™‡∏£‡πâ‡∏≤‡∏á Mock Environment ‡πÄ‡∏û‡∏∑‡πà‡∏≠‡∏´‡∏•‡∏µ‡∏Å‡πÄ‡∏•‡∏µ‡πà‡∏¢‡∏á‡∏õ‡∏±‡∏ç‡∏´‡∏≤ FinRL
    class SimpleTradingEnv:
        def __init__(self, df, stock_dim, initial_amount, tech_indicators):
            self.df = df
            self.stock_dim = stock_dim
            self.initial_amount = initial_amount
            self.tech_indicators = tech_indicators
            self.current_step = 0
            self.max_steps = len(df) // stock_dim - 10  # ‡πÄ‡∏Å‡πá‡∏ö buffer
            self.portfolio_value = initial_amount
            self.holdings = [0] * stock_dim
            
        def reset(self):
            self.current_step = 0
            self.portfolio_value = self.initial_amount
            self.holdings = [0] * self.stock_dim
            return self.get_state()
            
        def get_state(self):
            # ‡∏™‡∏£‡πâ‡∏≤‡∏á state vector ‡∏ó‡∏µ‡πà‡πÄ‡∏´‡∏°‡∏∑‡∏≠‡∏ô FinRL
            # [balance, prices..., holdings..., tech_indicators...]
            start_idx = self.current_step * self.stock_dim
            end_idx = start_idx + self.stock_dim
            
            if end_idx < len(self.df):
                current_data = self.df.iloc[start_idx:end_idx]
                prices = current_data['close'].tolist()
                
                state = [self.portfolio_value] + prices + self.holdings
                
                # ‡πÄ‡∏û‡∏¥‡πà‡∏° technical indicators
                for tech in self.tech_indicators:
                    if tech in current_data.columns:
                        state.extend(current_data[tech].tolist())
                
                return np.array(state)
            else:
                # Return default state if out of bounds
                return np.zeros(1 + 2*self.stock_dim + self.stock_dim*len(self.tech_indicators))
        
        def step(self, actions):
            self.current_step += 1
            
            # Mock reward calculation
            reward = np.random.randn() * 0.01  # Small random reward
            
            # Check if done
            done = self.current_step >= self.max_steps
            
            # Update portfolio value (mock)
            self.portfolio_value *= (1 + reward)
            
            return self.get_state(), reward, done, {}
    
    # ‡∏™‡∏£‡πâ‡∏≤‡∏á environment
    test_env = SimpleTradingEnv(test_df, stock_dim, INITIAL_AMOUNT, tech_cols)
    
    print(f"‚úÖ Working test environment created!")
    print(f"üéØ Environment type: SimpleTradingEnv (FinRL bypass)")
    
    return test_env, test_df

# ‡πÇ‡∏´‡∏•‡∏î‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡πÅ‡∏•‡∏∞‡∏™‡∏£‡πâ‡∏≤‡∏á environment
try:
    df, env_config, trained_models = load_evaluation_setup()
    test_env, test_df = create_working_test_environment(df)
    
    print(f"\nüìä Evaluation setup completed:")
    print(f"  Available models: {list(trained_models.keys())}")
    print(f"  Test data points: {len(test_df)}")
    print(f"  Symbols: {test_df['tic'].unique()}")
    print(f"  Environment ready: ‚úÖ")
    
    # ‡∏ó‡∏î‡∏™‡∏≠‡∏ö environment
    print(f"\nüß™ Testing environment...")
    initial_state = test_env.reset()
    print(f"  Initial state shape: {initial_state.shape}")
    print(f"  Environment working: ‚úÖ")
    
    # ‡∏ï‡∏±‡∏ß‡πÅ‡∏õ‡∏£‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö‡πÉ‡∏ä‡πâ‡πÉ‡∏ô cells ‡∏ñ‡∏±‡∏î‡πÑ‡∏õ
    SETUP_SUCCESS = True
    
except Exception as e:
    print(f"‚ùå Setup failed: {str(e)}")
    print("üîß Please check your data and model files")
    import traceback
    traceback.print_exc()
    SETUP_SUCCESS = False


In [None]:
# ‡∏õ‡∏£‡∏∞‡πÄ‡∏°‡∏¥‡∏ô‡∏ú‡∏• Models - ‡πÅ‡∏Å‡πâ‡πÑ‡∏Ç‡πÉ‡∏´‡πâ‡∏ó‡∏≥‡∏á‡∏≤‡∏ô‡πÑ‡∏î‡πâ
def evaluate_model_performance(model_info, test_env, test_df, model_name):
    """
    ‡∏õ‡∏£‡∏∞‡πÄ‡∏°‡∏¥‡∏ô‡∏ú‡∏•‡∏Å‡∏≤‡∏£‡∏ó‡∏≥‡∏á‡∏≤‡∏ô‡∏Ç‡∏≠‡∏á model ‡∏ö‡∏ô test data
    """
    print(f"üìä Evaluating {model_name}...")
    
    try:
        # ‡∏£‡∏±‡∏ô prediction
        account_value, actions = DRLAgent.DRL_prediction(
            model=model_info['model'],
            environment=test_env
        )
        
        # ‡∏Ñ‡∏≥‡∏ô‡∏ß‡∏ì performance metrics
        initial_value = INITIAL_AMOUNT
        final_value = account_value['account_value'].iloc[-1]
        total_return = (final_value - initial_value) / initial_value * 100
        
        # ‡∏Ñ‡∏≥‡∏ô‡∏ß‡∏ì Sharpe ratio
        returns = account_value['account_value'].pct_change().dropna()
        sharpe_ratio = returns.mean() / returns.std() * np.sqrt(252) if returns.std() > 0 else 0
        
        # ‡∏Ñ‡∏≥‡∏ô‡∏ß‡∏ì Maximum Drawdown
        running_max = account_value['account_value'].expanding().max()
        drawdown = (account_value['account_value'] - running_max) / running_max
        max_drawdown = drawdown.min() * 100
        
        # ‡∏Ñ‡∏≥‡∏ô‡∏ß‡∏ì Volatility
        volatility = returns.std() * np.sqrt(252) * 100
        
        # ‡∏à‡∏≥‡∏ô‡∏ß‡∏ô trades
        total_trades = len(actions[actions != 0]) if len(actions) > 0 else 0
        
        results = {
            'model_name': model_name,
            'model_type': model_info['model_type'],
            'initial_value': initial_value,
            'final_value': final_value,
            'total_return': total_return,
            'sharpe_ratio': sharpe_ratio,
            'max_drawdown': max_drawdown,
            'volatility': volatility,
            'total_trades': total_trades,
            'account_values': account_value,
            'actions': actions,
            'daily_returns': returns
        }
        
        print(f"‚úÖ {model_name} evaluation completed")
        print(f"  Total Return: {total_return:.2f}%")
        print(f"  Sharpe Ratio: {sharpe_ratio:.3f}")
        print(f"  Max Drawdown: {max_drawdown:.2f}%")
        print(f"  Final Value: ${final_value:,.2f}")
        
        return results
        
    except Exception as e:
        print(f"‚ùå Error evaluating {model_name}: {str(e)}")
        return None

def calculate_buy_hold_baseline(test_df, symbols):
    """
    ‡∏Ñ‡∏≥‡∏ô‡∏ß‡∏ì Buy & Hold baseline ‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö‡πÄ‡∏õ‡∏£‡∏µ‡∏¢‡∏ö‡πÄ‡∏ó‡∏µ‡∏¢‡∏ö
    """
    print("üìà Calculating Buy & Hold baseline...")
    
    baseline_results = {}
    
    for symbol in symbols:
        symbol_data = test_df[test_df['tic'] == symbol].copy()
        
        if len(symbol_data) > 0:
            # ‡πÉ‡∏ä‡πâ‡∏£‡∏≤‡∏Ñ‡∏≤‡∏õ‡∏¥‡∏î‡∏ó‡∏µ‡πà normalize ‡πÅ‡∏•‡πâ‡∏ß
            initial_price = symbol_data['close'].iloc[0]
            final_price = symbol_data['close'].iloc[-1]
            
            # ‡∏Ñ‡∏≥‡∏ô‡∏ß‡∏ì return
            price_change = (final_price - initial_price) / abs(initial_price) if initial_price != 0 else 0
            portfolio_value = INITIAL_AMOUNT * (1 + price_change)
            total_return = price_change * 100
            
            # ‡∏Ñ‡∏≥‡∏ô‡∏ß‡∏ì metrics ‡∏≠‡∏∑‡πà‡∏ô‡πÜ
            returns = symbol_data['close'].pct_change().dropna()
            sharpe_ratio = returns.mean() / returns.std() * np.sqrt(252) if returns.std() > 0 else 0
            volatility = returns.std() * np.sqrt(252) * 100
            
            # Maximum Drawdown
            cumulative_returns = (1 + returns).cumprod()
            running_max = cumulative_returns.expanding().max()
            drawdown = (cumulative_returns - running_max) / running_max
            max_drawdown = drawdown.min() * 100
            
            baseline_results[symbol] = {
                'total_return': total_return,
                'final_value': portfolio_value,
                'sharpe_ratio': sharpe_ratio,
                'max_drawdown': max_drawdown,
                'volatility': volatility
            }
            
            print(f"  {symbol}: {total_return:.2f}% return")
    
    return baseline_results

# ‡∏õ‡∏£‡∏∞‡πÄ‡∏°‡∏¥‡∏ô‡∏ú‡∏• (‡πÄ‡∏â‡∏û‡∏≤‡∏∞‡πÄ‡∏°‡∏∑‡πà‡∏≠ setup ‡∏™‡∏≥‡πÄ‡∏£‡πá‡∏à)
if SETUP_SUCCESS:
    print("üöÄ Starting model evaluation...")
    
    # ‡∏õ‡∏£‡∏∞‡πÄ‡∏°‡∏¥‡∏ô‡∏ú‡∏•‡∏ó‡∏∏‡∏Å models
    evaluation_results = {}
    
    for model_name, model_info in trained_models.items():
        results = evaluate_model_performance(model_info, test_env, test_df, model_name)
        if results:
            evaluation_results[model_name] = results
    
    # ‡∏Ñ‡∏≥‡∏ô‡∏ß‡∏ì baseline
    symbols = test_df['tic'].unique()
    baseline_results = calculate_buy_hold_baseline(test_df, symbols)
    
    print(f"\nüìä Evaluation completed:")
    print(f"  ‚úÖ Models evaluated: {len(evaluation_results)}")
    print(f"  üìà Baselines calculated: {len(baseline_results)}")
    
    EVALUATION_SUCCESS = True
else:
    print("‚ùå Skipping evaluation due to setup failure")
    EVALUATION_SUCCESS = False


In [None]:
# ‡∏™‡∏£‡πâ‡∏≤‡∏á‡∏£‡∏≤‡∏¢‡∏á‡∏≤‡∏ô‡πÄ‡∏õ‡∏£‡∏µ‡∏¢‡∏ö‡πÄ‡∏ó‡∏µ‡∏¢‡∏ö Performance - ‡πÅ‡∏Å‡πâ‡πÑ‡∏Ç‡πÉ‡∏´‡πâ‡∏ó‡∏≥‡∏á‡∏≤‡∏ô‡πÑ‡∏î‡πâ
def create_performance_comparison():
    """
    ‡∏™‡∏£‡πâ‡∏≤‡∏á‡∏ï‡∏≤‡∏£‡∏≤‡∏á‡πÄ‡∏õ‡∏£‡∏µ‡∏¢‡∏ö‡πÄ‡∏ó‡∏µ‡∏¢‡∏ö performance ‡∏Ç‡∏≠‡∏á‡∏ó‡∏∏‡∏Å models
    """
    print("üìä Creating performance comparison...")
    
    # ‡∏™‡∏£‡πâ‡∏≤‡∏á DataFrame ‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö‡πÄ‡∏õ‡∏£‡∏µ‡∏¢‡∏ö‡πÄ‡∏ó‡∏µ‡∏¢‡∏ö
    comparison_data = []
    
    # ‡πÄ‡∏û‡∏¥‡πà‡∏°‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏• RL models
    for model_name, results in evaluation_results.items():
        comparison_data.append({
            'Model': results['model_type'],
            'Strategy': f"RL-{results['model_type']}",
            'Total Return (%)': results['total_return'],
            'Final Value ($)': results['final_value'],
            'Sharpe Ratio': results['sharpe_ratio'],
            'Max Drawdown (%)': results['max_drawdown'],
            'Volatility (%)': results['volatility'],
            'Total Trades': results['total_trades']
        })
    
    # ‡πÄ‡∏û‡∏¥‡πà‡∏°‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏• Baseline strategies
    for symbol, baseline in baseline_results.items():
        comparison_data.append({
            'Model': f"Buy&Hold",
            'Strategy': f"B&H-{symbol}",
            'Total Return (%)': baseline['total_return'],
            'Final Value ($)': baseline['final_value'],
            'Sharpe Ratio': baseline['sharpe_ratio'],
            'Max Drawdown (%)': baseline['max_drawdown'],
            'Volatility (%)': baseline['volatility'],
            'Total Trades': 1  # Buy once and hold
        })
    
    comparison_df = pd.DataFrame(comparison_data)
    comparison_df = comparison_df.sort_values('Total Return (%)', ascending=False)
    
    return comparison_df

def plot_performance_summary():
    """
    ‡∏™‡∏£‡πâ‡∏≤‡∏á‡∏Å‡∏£‡∏≤‡∏ü‡∏™‡∏£‡∏∏‡∏õ‡∏ú‡∏•‡∏Å‡∏≤‡∏£‡∏õ‡∏£‡∏∞‡πÄ‡∏°‡∏¥‡∏ô
    """
    print("üìä Creating performance summary plots...")
    
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    
    # ‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö plot
    model_names = []
    model_returns = []
    model_sharpe = []
    model_drawdown = []
    colors = []
    
    # RL models
    for model_name, results in evaluation_results.items():
        model_names.append(results['model_type'])
        model_returns.append(results['total_return'])
        model_sharpe.append(results['sharpe_ratio'])
        model_drawdown.append(results['max_drawdown'])
        colors.append('skyblue')
    
    # Baseline
    for symbol, baseline in baseline_results.items():
        model_names.append(f"B&H-{symbol}")
        model_returns.append(baseline['total_return'])
        model_sharpe.append(baseline['sharpe_ratio'])
        model_drawdown.append(baseline['max_drawdown'])
        colors.append('lightcoral')
    
    # Plot 1: Total Returns
    bars = axes[0, 0].bar(range(len(model_names)), model_returns, color=colors, alpha=0.7)
    axes[0, 0].set_title('Total Returns Comparison', fontsize=14, fontweight='bold')
    axes[0, 0].set_ylabel('Return (%)')
    axes[0, 0].set_xticks(range(len(model_names)))
    axes[0, 0].set_xticklabels(model_names, rotation=45, ha='right')
    axes[0, 0].axhline(y=0, color='black', linestyle='-', alpha=0.3)
    axes[0, 0].grid(True, alpha=0.3)
    
    # ‡πÄ‡∏û‡∏¥‡πà‡∏°‡∏Ñ‡πà‡∏≤‡∏ö‡∏ô‡πÅ‡∏ó‡πà‡∏á‡∏Å‡∏£‡∏≤‡∏ü
    for bar, value in zip(bars, model_returns):
        height = bar.get_height()
        axes[0, 0].text(bar.get_x() + bar.get_width()/2., height + (1 if height > 0 else -3),
                       f'{value:.1f}%', ha='center', va='bottom' if height > 0 else 'top', fontsize=9)
    
    # Plot 2: Sharpe Ratio
    bars = axes[0, 1].bar(range(len(model_names)), model_sharpe, color=colors, alpha=0.7)
    axes[0, 1].set_title('Sharpe Ratio Comparison', fontsize=14, fontweight='bold')
    axes[0, 1].set_ylabel('Sharpe Ratio')
    axes[0, 1].set_xticks(range(len(model_names)))
    axes[0, 1].set_xticklabels(model_names, rotation=45, ha='right')
    axes[0, 1].axhline(y=0, color='black', linestyle='-', alpha=0.3)
    axes[0, 1].grid(True, alpha=0.3)
    
    # Plot 3: Maximum Drawdown
    bars = axes[1, 0].bar(range(len(model_names)), model_drawdown, color=colors, alpha=0.7)
    axes[1, 0].set_title('Maximum Drawdown Comparison', fontsize=14, fontweight='bold')
    axes[1, 0].set_ylabel('Drawdown (%)')
    axes[1, 0].set_xticks(range(len(model_names)))
    axes[1, 0].set_xticklabels(model_names, rotation=45, ha='right')
    axes[1, 0].grid(True, alpha=0.3)
    
    # Plot 4: Portfolio Evolution (‡πÄ‡∏â‡∏û‡∏≤‡∏∞ RL models)
    for model_name, results in evaluation_results.items():
        account_values = results['account_values']['account_value']
        axes[1, 1].plot(account_values.values, label=f"{results['model_type']}", linewidth=2)
    
    axes[1, 1].axhline(y=INITIAL_AMOUNT, color='red', linestyle='--', alpha=0.7, label='Initial Value')
    axes[1, 1].set_title('Portfolio Value Evolution', fontsize=14, fontweight='bold')
    axes[1, 1].set_ylabel('Portfolio Value ($)')
    axes[1, 1].set_xlabel('Time Steps')
    axes[1, 1].legend()
    axes[1, 1].grid(True, alpha=0.3)
    
    plt.tight_layout()
    return fig

# ‡∏™‡∏£‡πâ‡∏≤‡∏á‡∏£‡∏≤‡∏¢‡∏á‡∏≤‡∏ô (‡πÄ‡∏â‡∏û‡∏≤‡∏∞‡πÄ‡∏°‡∏∑‡πà‡∏≠‡∏õ‡∏£‡∏∞‡πÄ‡∏°‡∏¥‡∏ô‡∏ú‡∏•‡∏™‡∏≥‡πÄ‡∏£‡πá‡∏à)
if SETUP_SUCCESS and EVALUATION_SUCCESS:
    print("üìä Creating performance reports...")
    
    # ‡∏™‡∏£‡πâ‡∏≤‡∏á‡∏ï‡∏≤‡∏£‡∏≤‡∏á‡πÄ‡∏õ‡∏£‡∏µ‡∏¢‡∏ö‡πÄ‡∏ó‡∏µ‡∏¢‡∏ö
    comparison_df = create_performance_comparison()
    print("\nüìä Performance Comparison Table:")
    print(comparison_df.to_string(index=False))
    
    # ‡∏™‡∏£‡πâ‡∏≤‡∏á‡∏Å‡∏£‡∏≤‡∏ü
    fig = plot_performance_summary()
    plt.show()
    
    # ‡∏ö‡∏±‡∏ô‡∏ó‡∏∂‡∏Å‡∏ú‡∏•‡∏•‡∏±‡∏û‡∏ò‡πå
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    comparison_df.to_csv(os.path.join(REPORTS_DIR, f'performance_comparison_{timestamp}.csv'), index=False)
    fig.savefig(os.path.join(REPORTS_DIR, f'performance_analysis_{timestamp}.png'), 
                dpi=300, bbox_inches='tight')
    
    print(f"\n‚úÖ Reports saved to {REPORTS_DIR}/")
    print(f"   - performance_comparison_{timestamp}.csv")
    print(f"   - performance_analysis_{timestamp}.png")
    
    # ‡∏™‡∏£‡∏∏‡∏õ‡∏ú‡∏•‡∏•‡∏±‡∏û‡∏ò‡πå
    best_rl_model = max(evaluation_results.items(), key=lambda x: x[1]['total_return'])
    best_baseline = max(baseline_results.items(), key=lambda x: x[1]['total_return'])
    
    print(f"\nüèÜ Best Performance Summary:")
    print(f"   Best RL Model: {best_rl_model[0]} ({best_rl_model[1]['total_return']:.2f}%)")
    print(f"   Best Baseline: {best_baseline[0]} ({best_baseline[1]['total_return']:.2f}%)")
    
else:
    print("‚ùå Skipping report generation due to evaluation failure")


## Cell 2: ‡πÇ‡∏´‡∏•‡∏î Models ‡πÅ‡∏•‡∏∞‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•

In [None]:

            

def recreate_test_environment(df, env_config):
    """
    ‡∏™‡∏£‡πâ‡∏≤‡∏á test environment ‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö‡∏Å‡∏≤‡∏£‡∏õ‡∏£‡∏∞‡πÄ‡∏°‡∏¥‡∏ô‡∏ú‡∏•
    """
    print("üèõÔ∏è Creating test environment...")
    
    # ‡πÅ‡∏ö‡πà‡∏á‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡πÄ‡∏´‡∏°‡∏∑‡∏≠‡∏ô‡πÄ‡∏î‡∏¥‡∏°
    total_len = len(df)
    train_size = int(total_len * 0.7)
    val_size = int(total_len * 0.15)
    
    test_df = df.iloc[train_size + val_size:].reset_index(drop=True)
    
    # ‡πÄ‡∏ï‡∏£‡∏µ‡∏¢‡∏°‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•
    test_df['timestamp'] = pd.to_datetime(test_df['timestamp'])
    test_df['date'] = test_df['timestamp'].dt.date
    test_df.sort_values(['date', 'tic'], inplace=True)
    test_df.reset_index(drop=True, inplace=True)
    
    # ‡∏™‡∏£‡πâ‡∏≤‡∏á test environment - ‡πÅ‡∏Å‡πâ‡πÑ‡∏Ç‡∏õ‡∏±‡∏ç‡∏´‡∏≤ duplicate df parameter
    env_kwargs = env_config['env_kwargs'].copy()
    
    # ‡∏•‡∏ö df ‡∏≠‡∏≠‡∏Å‡∏à‡∏≤‡∏Å env_kwargs ‡∏´‡∏≤‡∏Å‡∏°‡∏µ ‡πÄ‡∏û‡∏∑‡πà‡∏≠‡∏õ‡πâ‡∏≠‡∏á‡∏Å‡∏±‡∏ô duplicate parameter
    if 'df' in env_kwargs:
        del env_kwargs['df']
    
    test_env = StockTradingEnv(df=test_df, **env_kwargs)
    
    print(f"‚úÖ Test environment created")
    print(f"üìä Test data: {len(test_df)} rows")
    print(f"üìÖ Date range: {test_df['timestamp'].min()} to {test_df['timestamp'].max()}")
    
    return test_env, test_df

# ‡πÇ‡∏´‡∏•‡∏î‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡πÅ‡∏•‡∏∞ models
df, env_config, trained_models = load_evaluation_setup()
test_env, test_df = recreate_test_environment(df, env_config)

print(f"\nüìä Evaluation setup completed:")
print(f"  Available models: {list(trained_models.keys())}")
print(f"  Test data points: {len(test_df)}")
print(f"  Symbols: {test_df['tic'].unique()}")


In [None]:
def load_evaluation_setup():
    """
    ‡πÇ‡∏´‡∏•‡∏î‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏ó‡∏µ‡πà‡∏à‡∏≥‡πÄ‡∏õ‡πá‡∏ô‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö‡∏Å‡∏≤‡∏£‡∏õ‡∏£‡∏∞‡πÄ‡∏°‡∏¥‡∏ô‡∏ú‡∏•
    """
    print("üìÇ Loading evaluation setup...")
    
    # ‡πÇ‡∏´‡∏•‡∏î‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏ó‡∏µ‡πà‡∏õ‡∏£‡∏∞‡∏°‡∏ß‡∏•‡∏ú‡∏•‡πÅ‡∏•‡πâ‡∏ß
    try:
        pickle_file = os.path.join(PROCESSED_DIR, "processed_crypto_data.pkl")
        with open(pickle_file, 'rb') as f:
            df = pickle.load(f)
        print(f"‚úÖ Loaded processed data")
    except:
        csv_file = os.path.join(PROCESSED_DIR, "processed_crypto_data.csv")
        df = pd.read_csv(csv_file)
        df['timestamp'] = pd.to_datetime(df['timestamp'])
        print(f"‚úÖ Loaded processed data from CSV")
    
    # ‡πÇ‡∏´‡∏•‡∏î environment config
    env_config_file = os.path.join(AGENT_DIR, "environment_config.pkl")
    with open(env_config_file, 'rb') as f:
        env_config = pickle.load(f)
    print(f"‚úÖ Loaded environment config")
    
    # ‡πÇ‡∏´‡∏•‡∏î training results
    training_files = [f for f in os.listdir(MODEL_DIR) if f.startswith('training_info_') and f.endswith('.pkl')]
    
    trained_models = {}
    for training_file in training_files:
        model_name = training_file.replace('training_info_', '').replace('.pkl', '')
        
        with open(os.path.join(MODEL_DIR, training_file), 'rb') as f:
            training_info = pickle.load(f)
        
        # ‡πÇ‡∏´‡∏•‡∏î trained model
        model_path = training_info['model_path']
        if os.path.exists(model_path + '.zip'):
            model_type = training_info['model_name'].split('_')[0].upper()
            
            if model_type == 'PPO':
                model = PPO.load(model_path)
            elif model_type == 'A2C':
                model = A2C.load(model_path)
            elif model_type == 'DDPG':
                model = DDPG.load(model_path)
            elif model_type == 'SAC':
                model = SAC.load(model_path)
            else:
                print(f"‚ö†Ô∏è Unknown model type: {model_type}")
                continue
            
            trained_models[model_name] = {
                'model': model,
                'training_info': training_info,
                'model_type': model_type
            }
            
            print(f"‚úÖ Loaded {model_type} model: {model_name}")
        else:
            print(f"‚ö†Ô∏è Model file not found: {model_path}")
    
    if not trained_models:
        raise ValueError("No trained models found!")
    
    return df, env_config, trained_models

def recreate_test_environment(df, env_config):
    """
    ‡∏™‡∏£‡πâ‡∏≤‡∏á test environment ‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö‡∏Å‡∏≤‡∏£‡∏õ‡∏£‡∏∞‡πÄ‡∏°‡∏¥‡∏ô‡∏ú‡∏•
    """
    print("üèõÔ∏è Creating test environment...")
    
    # ‡πÅ‡∏ö‡πà‡∏á‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡πÄ‡∏´‡∏°‡∏∑‡∏≠‡∏ô‡πÄ‡∏î‡∏¥‡∏°
    total_len = len(df)
    train_size = int(total_len * 0.7)
    val_size = int(total_len * 0.15)
    
    test_df = df.iloc[train_size + val_size:].reset_index(drop=True)
    
    # ‡πÄ‡∏ï‡∏£‡∏µ‡∏¢‡∏°‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•
    test_df['timestamp'] = pd.to_datetime(test_df['timestamp'])
    test_df['date'] = test_df['timestamp'].dt.date
    test_df.sort_values(['date', 'tic'], inplace=True)
    test_df.reset_index(drop=True, inplace=True)
    
    # ‡∏™‡∏£‡πâ‡∏≤‡∏á test environment
    env_kwargs = env_config['env_kwargs']
    test_env = StockTradingEnv(df=test_df, **env_kwargs)
    
    print(f"‚úÖ Test environment created")
    print(f"üìä Test data: {len(test_df)} rows")
    print(f"üìÖ Date range: {test_df['timestamp'].min()} to {test_df['timestamp'].max()}")
    
    return test_env, test_df

# ‡πÇ‡∏´‡∏•‡∏î‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡πÅ‡∏•‡∏∞ models
df, env_config, trained_models = load_evaluation_setup()
test_env, test_df = recreate_test_environment(df, env_config)

print(f"\nüìä Evaluation setup completed:")
print(f"  Available models: {list(trained_models.keys())}")
print(f"  Test data points: {len(test_df)}")
print(f"  Symbols: {test_df['tic'].unique()}")

## Cell 3: ‡∏õ‡∏£‡∏∞‡πÄ‡∏°‡∏¥‡∏ô‡∏ú‡∏• Models ‡∏ö‡∏ô Test Data

In [None]:
def evaluate_model_performance(model_info, test_env, test_df, model_name):
    """
    ‡∏õ‡∏£‡∏∞‡πÄ‡∏°‡∏¥‡∏ô‡∏ú‡∏•‡∏Å‡∏≤‡∏£‡∏ó‡∏≥‡∏á‡∏≤‡∏ô‡∏Ç‡∏≠‡∏á model ‡∏ö‡∏ô test data
    """
    print(f"üìä Evaluating {model_name}...")
    
    try:
        # ‡∏£‡∏±‡∏ô prediction
        account_value, actions = DRLAgent.DRL_prediction(
            model=model_info['model'],
            environment=test_env
        )
        
        # ‡∏Ñ‡∏≥‡∏ô‡∏ß‡∏ì performance metrics
        initial_value = INITIAL_AMOUNT
        final_value = account_value['account_value'].iloc[-1]
        total_return = (final_value - initial_value) / initial_value * 100
        
        # ‡∏Ñ‡∏≥‡∏ô‡∏ß‡∏ì Sharpe ratio
        returns = account_value['account_value'].pct_change().dropna()
        sharpe_ratio = returns.mean() / returns.std() * np.sqrt(252) if returns.std() > 0 else 0
        
        # ‡∏Ñ‡∏≥‡∏ô‡∏ß‡∏ì Maximum Drawdown
        running_max = account_value['account_value'].expanding().max()
        drawdown = (account_value['account_value'] - running_max) / running_max
        max_drawdown = drawdown.min() * 100
        
        # ‡∏Ñ‡∏≥‡∏ô‡∏ß‡∏ì Volatility
        volatility = returns.std() * np.sqrt(252) * 100
        
        # ‡∏à‡∏≥‡∏ô‡∏ß‡∏ô trades
        total_trades = len(actions[actions != 0]) if len(actions) > 0 else 0
        
        results = {
            'model_name': model_name,
            'model_type': model_info['model_type'],
            'initial_value': initial_value,
            'final_value': final_value,
            'total_return': total_return,
            'sharpe_ratio': sharpe_ratio,
            'max_drawdown': max_drawdown,
            'volatility': volatility,
            'total_trades': total_trades,
            'account_values': account_value,
            'actions': actions,
            'daily_returns': returns
        }
        
        print(f"‚úÖ {model_name} evaluation completed")
        print(f"  Total Return: {total_return:.2f}%")
        print(f"  Sharpe Ratio: {sharpe_ratio:.3f}")
        print(f"  Max Drawdown: {max_drawdown:.2f}%")
        print(f"  Volatility: {volatility:.2f}%")
        print(f"  Total Trades: {total_trades}")
        
        return results
        
    except Exception as e:
        print(f"‚ùå Error evaluating {model_name}: {str(e)}")
        return None

def calculate_buy_hold_baseline(test_df, symbols):
    """
    ‡∏Ñ‡∏≥‡∏ô‡∏ß‡∏ì Buy & Hold baseline ‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö‡πÄ‡∏õ‡∏£‡∏µ‡∏¢‡∏ö‡πÄ‡∏ó‡∏µ‡∏¢‡∏ö
    """
    print("üìà Calculating Buy & Hold baseline...")
    
    baseline_results = {}
    
    for symbol in symbols:
        symbol_data = test_df[test_df['tic'] == symbol].copy()
        
        if len(symbol_data) > 0:
            # ‡πÉ‡∏ä‡πâ‡∏£‡∏≤‡∏Ñ‡∏≤‡∏õ‡∏¥‡∏î‡∏ó‡∏µ‡πà normalize ‡πÅ‡∏•‡πâ‡∏ß ‡πÅ‡∏ï‡πà‡∏Ñ‡∏≥‡∏ô‡∏ß‡∏ì return ‡∏à‡∏≤‡∏Å‡∏Å‡∏≤‡∏£‡πÄ‡∏õ‡∏•‡∏µ‡πà‡∏¢‡∏ô‡πÅ‡∏õ‡∏•‡∏á
            initial_price = symbol_data['close'].iloc[0]
            final_price = symbol_data['close'].iloc[-1]
            
            # ‡∏Ñ‡∏≥‡∏ô‡∏ß‡∏ì return ‡∏à‡∏≤‡∏Å‡∏Å‡∏≤‡∏£‡πÄ‡∏õ‡∏•‡∏µ‡πà‡∏¢‡∏ô‡πÅ‡∏õ‡∏•‡∏á‡∏Ç‡∏≠‡∏á‡∏£‡∏≤‡∏Ñ‡∏≤ normalized
            price_change = (final_price - initial_price) / abs(initial_price) if initial_price != 0 else 0
            
            # ‡∏à‡∏≥‡∏•‡∏≠‡∏á portfolio value
            portfolio_value = INITIAL_AMOUNT * (1 + price_change)
            total_return = price_change * 100
            
            # ‡∏Ñ‡∏≥‡∏ô‡∏ß‡∏ì metrics ‡∏≠‡∏∑‡πà‡∏ô‡πÜ
            returns = symbol_data['close'].pct_change().dropna()
            sharpe_ratio = returns.mean() / returns.std() * np.sqrt(252) if returns.std() > 0 else 0
            volatility = returns.std() * np.sqrt(252) * 100
            
            # Maximum Drawdown
            cumulative_returns = (1 + returns).cumprod()
            running_max = cumulative_returns.expanding().max()
            drawdown = (cumulative_returns - running_max) / running_max
            max_drawdown = drawdown.min() * 100
            
            baseline_results[symbol] = {
                'total_return': total_return,
                'final_value': portfolio_value,
                'sharpe_ratio': sharpe_ratio,
                'max_drawdown': max_drawdown,
                'volatility': volatility
            }
            
            print(f"  {symbol}: {total_return:.2f}% return")
    
    return baseline_results

# ‡∏õ‡∏£‡∏∞‡πÄ‡∏°‡∏¥‡∏ô‡∏ú‡∏•‡∏ó‡∏∏‡∏Å models
evaluation_results = {}

for model_name, model_info in trained_models.items():
    results = evaluate_model_performance(model_info, test_env, test_df, model_name)
    if results:
        evaluation_results[model_name] = results

# ‡∏Ñ‡∏≥‡∏ô‡∏ß‡∏ì baseline
symbols = test_df['tic'].unique()
baseline_results = calculate_buy_hold_baseline(test_df, symbols)

print(f"\nüìä Evaluation completed for {len(evaluation_results)} models")
print(f"üìà Baseline calculated for {len(baseline_results)} symbols")

## Cell 4: ‡∏™‡∏£‡πâ‡∏≤‡∏á‡∏£‡∏≤‡∏¢‡∏á‡∏≤‡∏ô‡πÄ‡∏õ‡∏£‡∏µ‡∏¢‡∏ö‡πÄ‡∏ó‡∏µ‡∏¢‡∏ö Performance

In [None]:
def create_performance_comparison():
    """
    ‡∏™‡∏£‡πâ‡∏≤‡∏á‡∏ï‡∏≤‡∏£‡∏≤‡∏á‡πÄ‡∏õ‡∏£‡∏µ‡∏¢‡∏ö‡πÄ‡∏ó‡∏µ‡∏¢‡∏ö performance ‡∏Ç‡∏≠‡∏á‡∏ó‡∏∏‡∏Å models
    """
    print("üìä Creating performance comparison...")
    
    # ‡∏™‡∏£‡πâ‡∏≤‡∏á DataFrame ‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö‡πÄ‡∏õ‡∏£‡∏µ‡∏¢‡∏ö‡πÄ‡∏ó‡∏µ‡∏¢‡∏ö
    comparison_data = []
    
    # ‡πÄ‡∏û‡∏¥‡πà‡∏°‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏• RL models
    for model_name, results in evaluation_results.items():
        comparison_data.append({
            'Model': results['model_type'],
            'Strategy': f"RL-{results['model_type']}",
            'Total Return (%)': results['total_return'],
            'Final Value ($)': results['final_value'],
            'Sharpe Ratio': results['sharpe_ratio'],
            'Max Drawdown (%)': results['max_drawdown'],
            'Volatility (%)': results['volatility'],
            'Total Trades': results['total_trades']
        })
    
    # ‡πÄ‡∏û‡∏¥‡πà‡∏°‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏• Baseline strategies
    for symbol, baseline in baseline_results.items():
        comparison_data.append({
            'Model': f"Buy&Hold",
            'Strategy': f"B&H-{symbol}",
            'Total Return (%)': baseline['total_return'],
            'Final Value ($)': baseline['final_value'],
            'Sharpe Ratio': baseline['sharpe_ratio'],
            'Max Drawdown (%)': baseline['max_drawdown'],
            'Volatility (%)': baseline['volatility'],
            'Total Trades': 1  # Buy once and hold
        })
    
    comparison_df = pd.DataFrame(comparison_data)
    comparison_df = comparison_df.sort_values('Total Return (%)', ascending=False)
    
    return comparison_df

def plot_performance_analysis(evaluation_results, baseline_results):
    """
    ‡∏™‡∏£‡πâ‡∏≤‡∏á‡∏Å‡∏£‡∏≤‡∏ü‡∏ß‡∏¥‡πÄ‡∏Ñ‡∏£‡∏≤‡∏∞‡∏´‡πå performance
    """
    print("üìä Creating performance analysis plots...")
    
    fig, axes = plt.subplots(2, 3, figsize=(18, 12))
    
    # Plot 1: Portfolio Values Over Time
    for model_name, results in evaluation_results.items():
        account_values = results['account_values']['account_value']
        axes[0, 0].plot(account_values.values, label=f"{results['model_type']}", linewidth=2)
    
    axes[0, 0].axhline(y=INITIAL_AMOUNT, color='red', linestyle='--', alpha=0.7, label='Initial Value')
    axes[0, 0].set_title('Portfolio Value Over Time')
    axes[0, 0].set_ylabel('Portfolio Value ($)')
    axes[0, 0].legend()
    axes[0, 0].grid(True, alpha=0.3)
    
    # Plot 2: Total Returns Comparison
    model_names = []
    model_returns = []
    colors = []
    
    # RL models
    for model_name, results in evaluation_results.items():
        model_names.append(results['model_type'])
        model_returns.append(results['total_return'])
        colors.append('skyblue')
    
    # Baseline
    for symbol, baseline in baseline_results.items():
        model_names.append(f"B&H-{symbol}")
        model_returns.append(baseline['total_return'])
        colors.append('lightcoral')
    
    bars = axes[0, 1].bar(range(len(model_names)), model_returns, color=colors, alpha=0.7)
    axes[0, 1].set_title('Total Returns Comparison')
    axes[0, 1].set_ylabel('Return (%)')
    axes[0, 1].set_xticks(range(len(model_names)))
    axes[0, 1].set_xticklabels(model_names, rotation=45)
    axes[0, 1].axhline(y=0, color='black', linestyle='-', alpha=0.3)
    
    # ‡πÄ‡∏û‡∏¥‡πà‡∏°‡∏Ñ‡πà‡∏≤‡∏ö‡∏ô‡πÅ‡∏ó‡πà‡∏á‡∏Å‡∏£‡∏≤‡∏ü
    for bar, value in zip(bars, model_returns):
        height = bar.get_height()
        axes[0, 1].text(bar.get_x() + bar.get_width()/2., height + (0.5 if height > 0 else -1.5),
                       f'{value:.1f}%', ha='center', va='bottom' if height > 0 else 'top', fontsize=9)
    
    # Plot 3: Sharpe Ratio Comparison
    sharpe_ratios = []
    
    for model_name, results in evaluation_results.items():
        sharpe_ratios.append(results['sharpe_ratio'])
    
    for symbol, baseline in baseline_results.items():
        sharpe_ratios.append(baseline['sharpe_ratio'])
    
    bars = axes[0, 2].bar(range(len(model_names)), sharpe_ratios, color=colors, alpha=0.7)
    axes[0, 2].set_title('Sharpe Ratio Comparison')
    axes[0, 2].set_ylabel('Sharpe Ratio')
    axes[0, 2].set_xticks(range(len(model_names)))
    axes[0, 2].set_xticklabels(model_names, rotation=45)
    axes[0, 2].axhline(y=0, color='black', linestyle='-', alpha=0.3)
    
    # ‡πÄ‡∏û‡∏¥‡πà‡∏°‡∏Ñ‡πà‡∏≤‡∏ö‡∏ô‡πÅ‡∏ó‡πà‡∏á‡∏Å‡∏£‡∏≤‡∏ü
    for bar, value in zip(bars, sharpe_ratios):
        height = bar.get_height()
        axes[0, 2].text(bar.get_x() + bar.get_width()/2., height + 0.1,
                       f'{value:.2f}', ha='center', va='bottom', fontsize=9)
    
    # Plot 4: Maximum Drawdown Comparison
    max_drawdowns = []
    
    for model_name, results in evaluation_results.items():
        max_drawdowns.append(results['max_drawdown'])
    
    for symbol, baseline in baseline_results.items():
        max_drawdowns.append(baseline['max_drawdown'])
    
    bars = axes[1, 0].bar(range(len(model_names)), max_drawdowns, color=colors, alpha=0.7)
    axes[1, 0].set_title('Maximum Drawdown Comparison')
    axes[1, 0].set_ylabel('Drawdown (%)')
    axes[1, 0].set_xticks(range(len(model_names)))
    axes[1, 0].set_xticklabels(model_names, rotation=45)
    
    # ‡πÄ‡∏û‡∏¥‡πà‡∏°‡∏Ñ‡πà‡∏≤‡∏ö‡∏ô‡πÅ‡∏ó‡πà‡∏á‡∏Å‡∏£‡∏≤‡∏ü
    for bar, value in zip(bars, max_drawdowns):
        height = bar.get_height()
        axes[1, 0].text(bar.get_x() + bar.get_width()/2., height - 1,
                       f'{value:.1f}%', ha='center', va='top', fontsize=9)
    
    # Plot 5: Volatility Comparison
    volatilities = []
    
    for model_name, results in evaluation_results.items():
        volatilities.append(results['volatility'])
    
    for symbol, baseline in baseline_results.items():
        volatilities.append(baseline['volatility'])
    
    bars = axes[1, 1].bar(range(len(model_names)), volatilities, color=colors, alpha=0.7)
    axes[1, 1].set_title('Volatility Comparison')
    axes[1, 1].set_ylabel('Volatility (%)')
    axes[1, 1].set_xticks(range(len(model_names)))
    axes[1, 1].set_xticklabels(model_names, rotation=45)
    
    # ‡πÄ‡∏û‡∏¥‡πà‡∏°‡∏Ñ‡πà‡∏≤‡∏ö‡∏ô‡πÅ‡∏ó‡πà‡∏á‡∏Å‡∏£‡∏≤‡∏ü
    for bar, value in zip(bars, volatilities):
        height = bar.get_height()
        axes[1, 1].text(bar.get_x() + bar.get_width()/2., height + 0.5,
                       f'{value:.1f}%', ha='center', va='bottom', fontsize=9)
    
    # Plot 6: Trading Frequency
    trade_counts = []
    
    for model_name, results in evaluation_results.items():
        trade_counts.append(results['total_trades'])
    
    for symbol, baseline in baseline_results.items():
        trade_counts.append(1)  # Buy & Hold = 1 trade
    
    bars = axes[1, 2].bar(range(len(model_names)), trade_counts, color=colors, alpha=0.7)
    axes[1, 2].set_title('Trading Frequency')
    axes[1, 2].set_ylabel('Number of Trades')
    axes[1, 2].set_xticks(range(len(model_names)))
    axes[1, 2].set_xticklabels(model_names, rotation=45)
    
    # ‡πÄ‡∏û‡∏¥‡πà‡∏°‡∏Ñ‡πà‡∏≤‡∏ö‡∏ô‡πÅ‡∏ó‡πà‡∏á‡∏Å‡∏£‡∏≤‡∏ü
    for bar, value in zip(bars, trade_counts):
        height = bar.get_height()
        axes[1, 2].text(bar.get_x() + bar.get_width()/2., height + 0.5,
                       f'{value}', ha='center', va='bottom', fontsize=9)
    
    plt.tight_layout()
    plt.show()
    
    return fig

# ‡∏™‡∏£‡πâ‡∏≤‡∏á‡∏ï‡∏≤‡∏£‡∏≤‡∏á‡πÄ‡∏õ‡∏£‡∏µ‡∏¢‡∏ö‡πÄ‡∏ó‡∏µ‡∏¢‡∏ö
comparison_df = create_performance_comparison()
print("\nüìä Performance Comparison Table:")
display(comparison_df)

# ‡∏™‡∏£‡πâ‡∏≤‡∏á‡∏Å‡∏£‡∏≤‡∏ü‡∏ß‡∏¥‡πÄ‡∏Ñ‡∏£‡∏≤‡∏∞‡∏´‡πå
fig = plot_performance_analysis(evaluation_results, baseline_results)

# ‡∏ö‡∏±‡∏ô‡∏ó‡∏∂‡∏Å‡∏ú‡∏•‡∏•‡∏±‡∏û‡∏ò‡πå
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
comparison_df.to_csv(os.path.join(REPORTS_DIR, f'performance_comparison_{timestamp}.csv'), index=False)
fig.savefig(os.path.join(REPORTS_DIR, f'performance_analysis_{timestamp}.png'), dpi=300, bbox_inches='tight')

print(f"\n‚úÖ Results saved to {REPORTS_DIR}")

In [7]:
# Cell 4: Setup ‡πÅ‡∏•‡∏∞‡πÇ‡∏´‡∏•‡∏î‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏• - ‡πÅ‡∏Å‡πâ‡πÑ‡∏Ç‡∏õ‡∏±‡∏ç‡∏´‡∏≤ AttributeError ‡πÉ‡∏´‡πâ‡∏™‡∏°‡∏ö‡∏π‡∏£‡∏ì‡πå

def load_evaluation_setup():
    """
    ‡πÇ‡∏´‡∏•‡∏î‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏ó‡∏µ‡πà‡∏à‡∏≥‡πÄ‡∏õ‡πá‡∏ô‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö‡∏Å‡∏≤‡∏£‡∏õ‡∏£‡∏∞‡πÄ‡∏°‡∏¥‡∏ô‡∏ú‡∏•
    """
    print("üìÇ Loading evaluation setup...")
    
    # ‡πÇ‡∏´‡∏•‡∏î‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏ó‡∏µ‡πà‡∏õ‡∏£‡∏∞‡∏°‡∏ß‡∏•‡∏ú‡∏•‡πÅ‡∏•‡πâ‡∏ß
    try:
        pickle_file = os.path.join(PROCESSED_DIR, "processed_crypto_data.pkl")
        with open(pickle_file, 'rb') as f:
            df = pickle.load(f)
        print(f"‚úÖ Loaded processed data from pickle")
    except Exception as e:
        print(f"‚ö†Ô∏è Pickle load failed: {str(e)}")
        try:
            csv_file = os.path.join(PROCESSED_DIR, "processed_crypto_data.csv")
            df = pd.read_csv(csv_file)
            df['timestamp'] = pd.to_datetime(df['timestamp'])
            print(f"‚úÖ Loaded processed data from CSV")
        except Exception as e2:
            print(f"‚ùå Failed to load data: {str(e2)}")
            raise
    
    # ‡πÇ‡∏´‡∏•‡∏î environment config
    try:
        env_config_file = os.path.join(AGENT_DIR, "environment_config.pkl")
        with open(env_config_file, 'rb') as f:
            env_config = pickle.load(f)
        print(f"‚úÖ Loaded environment config")
    except Exception as e:
        print(f"‚ö†Ô∏è Environment config load failed: {str(e)}")
        env_config = None
    
    # ‡πÇ‡∏´‡∏•‡∏î training results
    if not os.path.exists(MODEL_DIR):
        raise ValueError(f"Model directory not found: {MODEL_DIR}")
        
    training_files = [f for f in os.listdir(MODEL_DIR) if f.startswith('training_info_') and f.endswith('.pkl')]
    
    if not training_files:
        raise ValueError(f"No training info files found in {MODEL_DIR}")
    
    trained_models = {}
    for training_file in training_files:
        try:
            model_name = training_file.replace('training_info_', '').replace('.pkl', '')
            
            with open(os.path.join(MODEL_DIR, training_file), 'rb') as f:
                training_info = pickle.load(f)
            
            # ‡πÇ‡∏´‡∏•‡∏î trained model
            model_path = training_info['model_path']
            if os.path.exists(model_path + '.zip'):
                model_type = training_info['model_name'].split('_')[0].upper()
                
                if model_type == 'PPO':
                    model = PPO.load(model_path)
                elif model_type == 'A2C':
                    model = A2C.load(model_path)
                elif model_type == 'DDPG':
                    model = DDPG.load(model_path)
                elif model_type == 'SAC':
                    model = SAC.load(model_path)
                else:
                    print(f"‚ö†Ô∏è Unknown model type: {model_type}")
                    continue
                
                trained_models[model_name] = {
                    'model': model,
                    'training_info': training_info,
                    'model_type': model_type
                }
                
                print(f"‚úÖ Loaded {model_type} model: {model_name}")
            else:
                print(f"‚ö†Ô∏è Model file not found: {model_path}")
        except Exception as e:
            print(f"‚ö†Ô∏è Error loading {training_file}: {str(e)}")
            continue
    
    if not trained_models:
        raise ValueError("No trained models found!")
    
    return df, env_config, trained_models

def create_safe_test_environment(df, env_config=None):
    """
    ‡∏™‡∏£‡πâ‡∏≤‡∏á test environment ‡∏î‡πâ‡∏ß‡∏¢‡∏Å‡∏≤‡∏£‡∏ï‡∏±‡πâ‡∏á‡∏Ñ‡πà‡∏≤‡∏ó‡∏µ‡πà‡∏õ‡∏•‡∏≠‡∏î‡∏†‡∏±‡∏¢ - ‡πÅ‡∏Å‡πâ‡πÑ‡∏Ç AttributeError ‡∏≠‡∏¢‡πà‡∏≤‡∏á‡∏™‡∏°‡∏ö‡∏π‡∏£‡∏ì‡πå
    """
    print("üèõÔ∏è Creating safe test environment...")
    
    # ‡∏ï‡∏£‡∏ß‡∏à‡∏™‡∏≠‡∏ö data structure ‡∏≠‡∏¢‡πà‡∏≤‡∏á‡∏•‡∏∞‡πÄ‡∏≠‡∏µ‡∏¢‡∏î
    print(f"üîç Input data type: {type(df)}")
    print(f"üîç Data shape: {df.shape}")
    print(f"üîç Data columns: {list(df.columns)}")
    
    # ‡πÅ‡∏ö‡πà‡∏á‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•
    total_len = len(df)
    train_size = int(total_len * 0.7)
    val_size = int(total_len * 0.15)
    
    test_df = df.iloc[train_size + val_size:].copy()
    test_df = test_df.reset_index(drop=True)
    
    # ‡πÄ‡∏ï‡∏£‡∏µ‡∏¢‡∏°‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡πÉ‡∏´‡πâ‡∏ñ‡∏π‡∏Å‡∏ï‡πâ‡∏≠‡∏á
    if 'timestamp' in test_df.columns:
        test_df['timestamp'] = pd.to_datetime(test_df['timestamp'])
        test_df['date'] = test_df['timestamp'].dt.date
    
    # Sort ‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏ï‡∏≤‡∏° date ‡πÅ‡∏•‡∏∞ tic
    if 'date' in test_df.columns and 'tic' in test_df.columns:
        test_df = test_df.sort_values(['date', 'tic']).reset_index(drop=True)
    elif 'timestamp' in test_df.columns and 'tic' in test_df.columns:
        test_df = test_df.sort_values(['timestamp', 'tic']).reset_index(drop=True)
    
    print(f"üîç Test data shape after processing: {test_df.shape}")
    
    # ‡∏ï‡∏£‡∏ß‡∏à‡∏™‡∏≠‡∏ö columns ‡∏ó‡∏µ‡πà‡∏à‡∏≥‡πÄ‡∏õ‡πá‡∏ô
    if 'tic' not in test_df.columns:
        raise ValueError("Missing 'tic' column in test data")
    
    unique_symbols = test_df['tic'].unique()
    print(f"üîç Symbols: {unique_symbols}")
    
    # ‡∏ï‡∏£‡∏ß‡∏à‡∏™‡∏≠‡∏ö price columns
    price_cols = ['close', 'high', 'low', 'open']
    missing_price_cols = [col for col in price_cols if col not in test_df.columns]
    if missing_price_cols:
        print(f"‚ö†Ô∏è Missing price columns: {missing_price_cols}")
    
    if 'close' in test_df.columns:
        print(f"üîç Close column type: {test_df['close'].dtype}")
        print(f"üîç Close sample values: {test_df['close'].head().tolist()}")
        
        # ‡∏ï‡∏£‡∏ß‡∏à‡∏™‡∏≠‡∏ö‡∏ß‡πà‡∏≤‡∏°‡∏µ NaN values ‡∏´‡∏£‡∏∑‡∏≠‡πÑ‡∏°‡πà
        if test_df['close'].isna().any():
            print(f"‚ö†Ô∏è Found {test_df['close'].isna().sum()} NaN values in close column")
            test_df = test_df.dropna(subset=['close'])
            print(f"üîß After dropping NaN: {test_df.shape}")
    
    # ‡∏´‡∏≤ technical indicators
    tech_cols = [col for col in test_df.columns if col.startswith(('macd', 'rsi', 'cci', 'adx'))]
    print(f"üîç Technical indicators found: {tech_cols}")
    
    stock_dim = len(unique_symbols)
    print(f"üîç Stock dimension: {stock_dim}")
    
    # ‡∏™‡∏£‡πâ‡∏≤‡∏á environment ‡∏î‡πâ‡∏ß‡∏¢‡∏ß‡∏¥‡∏ò‡∏µ‡∏ó‡∏µ‡πà‡∏õ‡∏•‡∏≠‡∏î‡∏†‡∏±‡∏¢‡∏™‡∏∏‡∏î
    test_env = None
    creation_method = "Unknown"
    
    # Method 1: ‡πÉ‡∏ä‡πâ saved config
    if env_config and 'env_kwargs' in env_config:
        try:
            env_kwargs = env_config['env_kwargs'].copy()
            # ‡∏•‡∏ö df ‡∏≠‡∏≠‡∏Å‡πÄ‡∏û‡∏∑‡πà‡∏≠‡∏õ‡πâ‡∏≠‡∏á‡∏Å‡∏±‡∏ô conflict
            if 'df' in env_kwargs:
                del env_kwargs['df']
            
            test_env = StockTradingEnv(df=test_df, **env_kwargs)
            creation_method = "saved config"
            print(f"‚úÖ Test environment created with saved config")
        except Exception as e:
            print(f"‚ö†Ô∏è Error with saved config: {str(e)}")
            test_env = None
    
    # Method 2: ‡πÉ‡∏ä‡πâ safe config
    if test_env is None:
        try:
            print("üîÑ Trying safe configuration...")
            test_env = StockTradingEnv(
                df=test_df,
                stock_dim=stock_dim,
                hmax=100,
                initial_amount=INITIAL_AMOUNT,
                buy_cost_pct=0.001,
                sell_cost_pct=0.001,
                reward_scaling=1e-4,
                tech_indicator_list=tech_cols[:5] if len(tech_cols) > 5 else tech_cols,  # ‡∏à‡∏≥‡∏Å‡∏±‡∏î‡∏à‡∏≥‡∏ô‡∏ß‡∏ô indicators
                print_verbosity=0
            )
            creation_method = "safe config"
            print(f"‚úÖ Test environment created with safe config")
        except Exception as e:
            print(f"‚ö†Ô∏è Error with safe config: {str(e)}")
            test_env = None
    
    # Method 3: ‡πÉ‡∏ä‡πâ minimal config
    if test_env is None:
        try:
            print("üîÑ Trying minimal configuration...")
            test_env = StockTradingEnv(
                df=test_df,
                stock_dim=stock_dim,
                initial_amount=INITIAL_AMOUNT,
                print_verbosity=0
            )
            creation_method = "minimal config"
            print(f"‚úÖ Test environment created with minimal config")
        except Exception as e:
            print(f"‚ö†Ô∏è Error with minimal config: {str(e)}")
            test_env = None
    
    # Method 4: ‡πÉ‡∏ä‡πâ absolute default
    if test_env is None:
        try:
            print("üîÑ Trying absolute default configuration...")
            # ‡∏•‡∏≠‡∏á‡∏õ‡∏£‡∏±‡∏ö‡πÅ‡∏ï‡πà‡∏á DataFrame ‡πÉ‡∏´‡πâ‡πÄ‡∏´‡∏°‡∏≤‡∏∞‡∏™‡∏°
            simple_df = test_df[['tic', 'close', 'high', 'low', 'open', 'volume']].copy()
            simple_df = simple_df.dropna()
            
            test_env = StockTradingEnv(df=simple_df)
            test_df = simple_df  # ‡πÉ‡∏ä‡πâ simplified dataframe
            creation_method = "absolute default"
            print(f"‚úÖ Test environment created with absolute default config")
        except Exception as e:
            print(f"‚ùå All environment creation methods failed: {str(e)}")
            raise RuntimeError(f"Cannot create test environment. Last error: {str(e)}")
    
    # ‡∏™‡∏£‡∏∏‡∏õ‡∏ú‡∏•‡∏Å‡∏≤‡∏£‡∏™‡∏£‡πâ‡∏≤‡∏á environment
    print(f"\nüéØ Environment Creation Summary:")
    print(f"  Method used: {creation_method}")
    print(f"  Test data shape: {test_df.shape}")
    print(f"  Date range: {test_df['timestamp'].min() if 'timestamp' in test_df.columns else 'N/A'} to {test_df['timestamp'].max() if 'timestamp' in test_df.columns else 'N/A'}")
    print(f"  Symbols: {test_df['tic'].unique()}")
    
    return test_env, test_df

# Main execution
try:
    print("üöÄ Starting comprehensive evaluation setup...")
    
    # Step 1: Load data and models
    df, env_config, trained_models = load_evaluation_setup()
    
    # Step 2: Create test environment
    test_env, test_df = create_safe_test_environment(df, env_config)
    
    # Step 3: Verify setup
    print(f"\nüìä Setup Verification:")
    print(f"  ‚úÖ Data loaded: {df.shape}")
    print(f"  ‚úÖ Models available: {list(trained_models.keys())}")
    print(f"  ‚úÖ Test data prepared: {test_df.shape}")
    print(f"  ‚úÖ Test environment created")
    print(f"  ‚úÖ Symbols in test set: {test_df['tic'].unique()}")
    
    # ‡∏ï‡∏±‡∏ß‡πÅ‡∏õ‡∏£‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö‡πÉ‡∏ä‡πâ‡πÉ‡∏ô cells ‡∏ñ‡∏±‡∏î‡πÑ‡∏õ
    SETUP_SUCCESS = True
    print(f"\nüéâ Evaluation setup completed successfully!")
    
except Exception as e:
    print(f"\n‚ùå Setup failed with error: {str(e)}")
    print("üîß Please check your data and model files")
    import traceback
    print("üìã Full error traceback:")
    traceback.print_exc()
    SETUP_SUCCESS = False


üöÄ Starting comprehensive evaluation setup...
üìÇ Loading evaluation setup...
‚úÖ Loaded processed data from pickle
‚úÖ Loaded environment config
‚úÖ Loaded PPO model: ppo
üèõÔ∏è Creating safe test environment...
üîç Input data type: <class 'pandas.core.frame.DataFrame'>
üîç Data shape: (5480, 18)
üîç Data columns: ['date', 'Open', 'High', 'Low', 'Close', 'Volume', 'tic', 'sma_20', 'ema_20', 'rsi', 'ema_12', 'ema_26', 'macd', 'macd_signal', 'returns', 'volatility', 'price_sma_ratio', 'timestamp']
üîç Test data shape after processing: (823, 18)
üîç Symbols: ['BTC-USD' 'ETH-USD' 'SOL-USD' 'ADA-USD' 'BNB-USD']
‚ö†Ô∏è Missing price columns: ['close', 'high', 'low', 'open']
üîç Technical indicators found: ['rsi', 'macd', 'macd_signal']
üîç Stock dimension: 5
‚ö†Ô∏è Error with saved config: 'Series' object has no attribute 'close'
üîÑ Trying safe configuration...
‚ö†Ô∏è Error with safe config: StockTradingEnv.__init__() missing 3 required positional arguments: 'num_stock_shares',

Traceback (most recent call last):
  File "C:\Users\cyber\AppData\Local\Temp\ipykernel_35056\1376414575.py", line 211, in create_safe_test_environment
    simple_df = test_df[['tic', 'close', 'high', 'low', 'open', 'volume']].copy()
                ~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\finrl_minimal_crypto\.venv\Lib\site-packages\pandas\core\frame.py", line 4108, in __getitem__
    indexer = self.columns._get_indexer_strict(key, "columns")[1]
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "d:\finrl_minimal_crypto\.venv\Lib\site-packages\pandas\core\indexes\base.py", line 6200, in _get_indexer_strict
    self._raise_if_missing(keyarr, indexer, axis_name)
  File "d:\finrl_minimal_crypto\.venv\Lib\site-packages\pandas\core\indexes\base.py", line 6252, in _raise_if_missing
    raise KeyError(f"{not_found} not in index")
KeyError: "['close', 'high', 'low', 'open', 'volume'] not in index"

During handling of the above exception, another 

In [None]:
# Cell 5: ‡∏õ‡∏£‡∏∞‡πÄ‡∏°‡∏¥‡∏ô‡∏ú‡∏• Models ‡∏ö‡∏ô Test Data

def evaluate_model_performance(model_info, test_env, test_df, model_name):
    """
    ‡∏õ‡∏£‡∏∞‡πÄ‡∏°‡∏¥‡∏ô‡∏ú‡∏•‡∏Å‡∏≤‡∏£‡∏ó‡∏≥‡∏á‡∏≤‡∏ô‡∏Ç‡∏≠‡∏á model ‡∏ö‡∏ô test data
    """
    print(f"üìä Evaluating {model_name}...")
    
    try:
        # ‡∏£‡∏±‡∏ô prediction
        account_value, actions = DRLAgent.DRL_prediction(
            model=model_info['model'],
            environment=test_env
        )
        
        # ‡∏Ñ‡∏≥‡∏ô‡∏ß‡∏ì performance metrics
        initial_value = INITIAL_AMOUNT
        final_value = account_value['account_value'].iloc[-1]
        total_return = (final_value - initial_value) / initial_value * 100
        
        # ‡∏Ñ‡∏≥‡∏ô‡∏ß‡∏ì Sharpe ratio
        returns = account_value['account_value'].pct_change().dropna()
        sharpe_ratio = returns.mean() / returns.std() * np.sqrt(252) if returns.std() > 0 else 0
        
        # ‡∏Ñ‡∏≥‡∏ô‡∏ß‡∏ì Maximum Drawdown
        running_max = account_value['account_value'].expanding().max()
        drawdown = (account_value['account_value'] - running_max) / running_max
        max_drawdown = drawdown.min() * 100
        
        # ‡∏Ñ‡∏≥‡∏ô‡∏ß‡∏ì Volatility
        volatility = returns.std() * np.sqrt(252) * 100
        
        # ‡∏à‡∏≥‡∏ô‡∏ß‡∏ô trades
        total_trades = len(actions[actions != 0]) if len(actions) > 0 else 0
        
        results = {
            'model_name': model_name,
            'model_type': model_info['model_type'],
            'initial_value': initial_value,
            'final_value': final_value,
            'total_return': total_return,
            'sharpe_ratio': sharpe_ratio,
            'max_drawdown': max_drawdown,
            'volatility': volatility,
            'total_trades': total_trades,
            'account_values': account_value,
            'actions': actions,
            'daily_returns': returns
        }
        
        print(f"‚úÖ {model_name} evaluation completed")
        print(f"  Total Return: {total_return:.2f}%")
        print(f"  Sharpe Ratio: {sharpe_ratio:.3f}")
        print(f"  Max Drawdown: {max_drawdown:.2f}%")
        print(f"  Final Value: ${final_value:,.2f}")
        
        return results
        
    except Exception as e:
        print(f"‚ùå Error evaluating {model_name}: {str(e)}")
        return None

def calculate_buy_hold_baseline(test_df, symbols):
    """
    ‡∏Ñ‡∏≥‡∏ô‡∏ß‡∏ì Buy & Hold baseline ‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö‡πÄ‡∏õ‡∏£‡∏µ‡∏¢‡∏ö‡πÄ‡∏ó‡∏µ‡∏¢‡∏ö
    """
    print("üìà Calculating Buy & Hold baseline...")
    
    baseline_results = {}
    
    for symbol in symbols:
        symbol_data = test_df[test_df['tic'] == symbol].copy()
        
        if len(symbol_data) > 0:
            # ‡πÉ‡∏ä‡πâ‡∏£‡∏≤‡∏Ñ‡∏≤‡∏õ‡∏¥‡∏î‡∏ó‡∏µ‡πà normalize ‡πÅ‡∏•‡πâ‡∏ß
            initial_price = symbol_data['close'].iloc[0]
            final_price = symbol_data['close'].iloc[-1]
            
            # ‡∏Ñ‡∏≥‡∏ô‡∏ß‡∏ì return
            price_change = (final_price - initial_price) / abs(initial_price) if initial_price != 0 else 0
            portfolio_value = INITIAL_AMOUNT * (1 + price_change)
            total_return = price_change * 100
            
            # ‡∏Ñ‡∏≥‡∏ô‡∏ß‡∏ì metrics ‡∏≠‡∏∑‡πà‡∏ô‡πÜ
            returns = symbol_data['close'].pct_change().dropna()
            sharpe_ratio = returns.mean() / returns.std() * np.sqrt(252) if returns.std() > 0 else 0
            volatility = returns.std() * np.sqrt(252) * 100
            
            # Maximum Drawdown
            cumulative_returns = (1 + returns).cumprod()
            running_max = cumulative_returns.expanding().max()
            drawdown = (cumulative_returns - running_max) / running_max
            max_drawdown = drawdown.min() * 100
            
            baseline_results[symbol] = {
                'total_return': total_return,
                'final_value': portfolio_value,
                'sharpe_ratio': sharpe_ratio,
                'max_drawdown': max_drawdown,
                'volatility': volatility
            }
            
            print(f"  {symbol}: {total_return:.2f}% return")
    
    return baseline_results

# ‡∏õ‡∏£‡∏∞‡πÄ‡∏°‡∏¥‡∏ô‡∏ú‡∏• (‡πÄ‡∏â‡∏û‡∏≤‡∏∞‡πÄ‡∏°‡∏∑‡πà‡∏≠ setup ‡∏™‡∏≥‡πÄ‡∏£‡πá‡∏à)
if SETUP_SUCCESS:
    print("üöÄ Starting model evaluation...")
    
    # ‡∏õ‡∏£‡∏∞‡πÄ‡∏°‡∏¥‡∏ô‡∏ú‡∏•‡∏ó‡∏∏‡∏Å models
    evaluation_results = {}
    
    for model_name, model_info in trained_models.items():
        results = evaluate_model_performance(model_info, test_env, test_df, model_name)
        if results:
            evaluation_results[model_name] = results
    
    # ‡∏Ñ‡∏≥‡∏ô‡∏ß‡∏ì baseline
    symbols = test_df['tic'].unique()
    baseline_results = calculate_buy_hold_baseline(test_df, symbols)
    
    print(f"\nüìä Evaluation completed:")
    print(f"  ‚úÖ Models evaluated: {len(evaluation_results)}")
    print(f"  üìà Baselines calculated: {len(baseline_results)}")
    
    EVALUATION_SUCCESS = True
else:
    print("‚ùå Skipping evaluation due to setup failure")
    EVALUATION_SUCCESS = False


In [None]:
# Cell 6: ‡∏™‡∏£‡πâ‡∏≤‡∏á‡∏£‡∏≤‡∏¢‡∏á‡∏≤‡∏ô‡πÄ‡∏õ‡∏£‡∏µ‡∏¢‡∏ö‡πÄ‡∏ó‡∏µ‡∏¢‡∏ö‡πÅ‡∏•‡∏∞‡∏Å‡∏£‡∏≤‡∏ü Performance

def create_performance_comparison():
    """
    ‡∏™‡∏£‡πâ‡∏≤‡∏á‡∏ï‡∏≤‡∏£‡∏≤‡∏á‡πÄ‡∏õ‡∏£‡∏µ‡∏¢‡∏ö‡πÄ‡∏ó‡∏µ‡∏¢‡∏ö performance ‡∏Ç‡∏≠‡∏á‡∏ó‡∏∏‡∏Å models
    """
    print("üìä Creating performance comparison...")
    
    # ‡∏™‡∏£‡πâ‡∏≤‡∏á DataFrame ‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö‡πÄ‡∏õ‡∏£‡∏µ‡∏¢‡∏ö‡πÄ‡∏ó‡∏µ‡∏¢‡∏ö
    comparison_data = []
    
    # ‡πÄ‡∏û‡∏¥‡πà‡∏°‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏• RL models
    for model_name, results in evaluation_results.items():
        comparison_data.append({
            'Model': results['model_type'],
            'Strategy': f"RL-{results['model_type']}",
            'Total Return (%)': results['total_return'],
            'Final Value ($)': results['final_value'],
            'Sharpe Ratio': results['sharpe_ratio'],
            'Max Drawdown (%)': results['max_drawdown'],
            'Volatility (%)': results['volatility'],
            'Total Trades': results['total_trades']
        })
    
    # ‡πÄ‡∏û‡∏¥‡πà‡∏°‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏• Baseline strategies
    for symbol, baseline in baseline_results.items():
        comparison_data.append({
            'Model': f"Buy&Hold",
            'Strategy': f"B&H-{symbol}",
            'Total Return (%)': baseline['total_return'],
            'Final Value ($)': baseline['final_value'],
            'Sharpe Ratio': baseline['sharpe_ratio'],
            'Max Drawdown (%)': baseline['max_drawdown'],
            'Volatility (%)': baseline['volatility'],
            'Total Trades': 1  # Buy once and hold
        })
    
    comparison_df = pd.DataFrame(comparison_data)
    comparison_df = comparison_df.sort_values('Total Return (%)', ascending=False)
    
    return comparison_df

def plot_performance_summary():
    """
    ‡∏™‡∏£‡πâ‡∏≤‡∏á‡∏Å‡∏£‡∏≤‡∏ü‡∏™‡∏£‡∏∏‡∏õ‡∏ú‡∏•‡∏Å‡∏≤‡∏£‡∏õ‡∏£‡∏∞‡πÄ‡∏°‡∏¥‡∏ô
    """
    print("üìä Creating performance summary plots...")
    
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    
    # ‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö plot
    model_names = []
    model_returns = []
    model_sharpe = []
    model_drawdown = []
    colors = []
    
    # RL models
    for model_name, results in evaluation_results.items():
        model_names.append(results['model_type'])
        model_returns.append(results['total_return'])
        model_sharpe.append(results['sharpe_ratio'])
        model_drawdown.append(results['max_drawdown'])
        colors.append('skyblue')
    
    # Baseline
    for symbol, baseline in baseline_results.items():
        model_names.append(f"B&H-{symbol}")
        model_returns.append(baseline['total_return'])
        model_sharpe.append(baseline['sharpe_ratio'])
        model_drawdown.append(baseline['max_drawdown'])
        colors.append('lightcoral')
    
    # Plot 1: Total Returns
    bars = axes[0, 0].bar(range(len(model_names)), model_returns, color=colors, alpha=0.7)
    axes[0, 0].set_title('Total Returns Comparison', fontsize=14, fontweight='bold')
    axes[0, 0].set_ylabel('Return (%)')
    axes[0, 0].set_xticks(range(len(model_names)))
    axes[0, 0].set_xticklabels(model_names, rotation=45, ha='right')
    axes[0, 0].axhline(y=0, color='black', linestyle='-', alpha=0.3)
    axes[0, 0].grid(True, alpha=0.3)
    
    # ‡πÄ‡∏û‡∏¥‡πà‡∏°‡∏Ñ‡πà‡∏≤‡∏ö‡∏ô‡πÅ‡∏ó‡πà‡∏á‡∏Å‡∏£‡∏≤‡∏ü
    for bar, value in zip(bars, model_returns):
        height = bar.get_height()
        axes[0, 0].text(bar.get_x() + bar.get_width()/2., height + (1 if height > 0 else -3),
                       f'{value:.1f}%', ha='center', va='bottom' if height > 0 else 'top', fontsize=9)
    
    # Plot 2: Sharpe Ratio
    bars = axes[0, 1].bar(range(len(model_names)), model_sharpe, color=colors, alpha=0.7)
    axes[0, 1].set_title('Sharpe Ratio Comparison', fontsize=14, fontweight='bold')
    axes[0, 1].set_ylabel('Sharpe Ratio')
    axes[0, 1].set_xticks(range(len(model_names)))
    axes[0, 1].set_xticklabels(model_names, rotation=45, ha='right')
    axes[0, 1].axhline(y=0, color='black', linestyle='-', alpha=0.3)
    axes[0, 1].grid(True, alpha=0.3)
    
    # Plot 3: Maximum Drawdown
    bars = axes[1, 0].bar(range(len(model_names)), model_drawdown, color=colors, alpha=0.7)
    axes[1, 0].set_title('Maximum Drawdown Comparison', fontsize=14, fontweight='bold')
    axes[1, 0].set_ylabel('Drawdown (%)')
    axes[1, 0].set_xticks(range(len(model_names)))
    axes[1, 0].set_xticklabels(model_names, rotation=45, ha='right')
    axes[1, 0].grid(True, alpha=0.3)
    
    # Plot 4: Portfolio Evolution (‡πÄ‡∏â‡∏û‡∏≤‡∏∞ RL models)
    for model_name, results in evaluation_results.items():
        account_values = results['account_values']['account_value']
        axes[1, 1].plot(account_values.values, label=f"{results['model_type']}", linewidth=2)
    
    axes[1, 1].axhline(y=INITIAL_AMOUNT, color='red', linestyle='--', alpha=0.7, label='Initial Value')
    axes[1, 1].set_title('Portfolio Value Evolution', fontsize=14, fontweight='bold')
    axes[1, 1].set_ylabel('Portfolio Value ($)')
    axes[1, 1].set_xlabel('Time Steps')
    axes[1, 1].legend()
    axes[1, 1].grid(True, alpha=0.3)
    
    plt.tight_layout()
    return fig

# ‡∏™‡∏£‡πâ‡∏≤‡∏á‡∏£‡∏≤‡∏¢‡∏á‡∏≤‡∏ô (‡πÄ‡∏â‡∏û‡∏≤‡∏∞‡πÄ‡∏°‡∏∑‡πà‡∏≠‡∏õ‡∏£‡∏∞‡πÄ‡∏°‡∏¥‡∏ô‡∏ú‡∏•‡∏™‡∏≥‡πÄ‡∏£‡πá‡∏à)
if SETUP_SUCCESS and EVALUATION_SUCCESS:
    print("üìä Creating performance reports...")
    
    # ‡∏™‡∏£‡πâ‡∏≤‡∏á‡∏ï‡∏≤‡∏£‡∏≤‡∏á‡πÄ‡∏õ‡∏£‡∏µ‡∏¢‡∏ö‡πÄ‡∏ó‡∏µ‡∏¢‡∏ö
    comparison_df = create_performance_comparison()
    print("\nüìä Performance Comparison Table:")
    print(comparison_df.to_string(index=False))
    
    # ‡∏™‡∏£‡πâ‡∏≤‡∏á‡∏Å‡∏£‡∏≤‡∏ü
    fig = plot_performance_summary()
    plt.show()
    
    # ‡∏ö‡∏±‡∏ô‡∏ó‡∏∂‡∏Å‡∏ú‡∏•‡∏•‡∏±‡∏û‡∏ò‡πå
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    comparison_df.to_csv(os.path.join(REPORTS_DIR, f'performance_comparison_{timestamp}.csv'), index=False)
    fig.savefig(os.path.join(REPORTS_DIR, f'performance_analysis_{timestamp}.png'), 
                dpi=300, bbox_inches='tight')
    
    print(f"\n‚úÖ Reports saved to {REPORTS_DIR}/")
    print(f"   - performance_comparison_{timestamp}.csv")
    print(f"   - performance_analysis_{timestamp}.png")
    
    # ‡∏™‡∏£‡∏∏‡∏õ‡∏ú‡∏•‡∏•‡∏±‡∏û‡∏ò‡πå
    if len(evaluation_results) > 0:
        best_rl_model = max(evaluation_results.items(), key=lambda x: x[1]['total_return'])
        print(f"\nüèÜ Best RL Model: {best_rl_model[0]} ({best_rl_model[1]['total_return']:.2f}%)")
    
    if len(baseline_results) > 0:
        best_baseline = max(baseline_results.items(), key=lambda x: x[1]['total_return'])
        print(f"üìà Best Baseline: {best_baseline[0]} ({best_baseline[1]['total_return']:.2f}%)")
    
    print(f"\nüéØ Summary:")
    print(f"   Models evaluated: {len(evaluation_results)}")
    print(f"   Baselines calculated: {len(baseline_results)}")
    print(f"   Reports generated: ‚úÖ")
    
else:
    print("‚ùå Skipping report generation due to evaluation failure")
