# 4. การประเมินผลและปรับปรุง Agent (Agent Evaluation & Improvement)
## ขั้นตอนการประเมินและปรับปรุง RL Agent สำหรับ Crypto Trading

### เป้าหมาย:
- โหลด Trained Model
- ประเมินผลบน Test Data
- เปรียบเทียบกับ Baseline (Buy & Hold)
- วิเคราะห์ Trading Patterns
- ปรับปรุง Model หากจำเป็น
- เตรียมสำหรับ Live Trading

## Cell 1: Import Libraries และโหลด Trained Model

In [None]:
import sys
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import pickle
import torch
from datetime import datetime
from stable_baselines3 import PPO, A2C, DDPG, SAC

# FinRL imports
from finrl.meta.env_stock_trading.env_stocktrading import StockTradingEnv
from finrl.agents.stablebaselines3.models import DRLAgent

# Import config
from config import *

# Setup directories
PROCESSED_DIR = "processed_data"
MODEL_DIR = "models"
AGENT_DIR = "agents"
EVALUATION_DIR = "evaluation"
REPORTS_DIR = "reports"

for dir_name in [EVALUATION_DIR, REPORTS_DIR]:
    if not os.path.exists(dir_name):
        os.makedirs(dir_name)

print("📁 Setup directories completed")
print(f"📊 Starting Agent Evaluation Process")

# Set plotting style
plt.style.use('default')
sns.set_palette("husl")

## Cell 2: โหลด Models และข้อมูล

In [None]:
def load_evaluation_setup():
    """
    โหลดข้อมูลที่จำเป็นสำหรับการประเมินผล
    """
    print("📂 Loading evaluation setup...")
    
    # โหลดข้อมูลที่ประมวลผลแล้ว
    try:
        pickle_file = os.path.join(PROCESSED_DIR, "processed_crypto_data.pkl")
        with open(pickle_file, 'rb') as f:
            df = pickle.load(f)
        print(f"✅ Loaded processed data")
    except:
        csv_file = os.path.join(PROCESSED_DIR, "processed_crypto_data.csv")
        df = pd.read_csv(csv_file)
        df['timestamp'] = pd.to_datetime(df['timestamp'])
        print(f"✅ Loaded processed data from CSV")
    
    # โหลด environment config
    env_config_file = os.path.join(AGENT_DIR, "environment_config.pkl")
    with open(env_config_file, 'rb') as f:
        env_config = pickle.load(f)
    print(f"✅ Loaded environment config")
    
    # โหลด training results
    training_files = [f for f in os.listdir(MODEL_DIR) if f.startswith('training_info_') and f.endswith('.pkl')]
    
    trained_models = {}
    for training_file in training_files:
        model_name = training_file.replace('training_info_', '').replace('.pkl', '')
        
        with open(os.path.join(MODEL_DIR, training_file), 'rb') as f:
            training_info = pickle.load(f)
        
        # โหลด trained model
        model_path = training_info['model_path']
        if os.path.exists(model_path + '.zip'):
            model_type = training_info['model_name'].split('_')[0].upper()
            
            if model_type == 'PPO':
                model = PPO.load(model_path)
            elif model_type == 'A2C':
                model = A2C.load(model_path)
            elif model_type == 'DDPG':
                model = DDPG.load(model_path)
            elif model_type == 'SAC':
                model = SAC.load(model_path)
            else:
                print(f"⚠️ Unknown model type: {model_type}")
                continue
            
            trained_models[model_name] = {
                'model': model,
                'training_info': training_info,
                'model_type': model_type
            }
            
            print(f"✅ Loaded {model_type} model: {model_name}")
        else:
            print(f"⚠️ Model file not found: {model_path}")
    
    if not trained_models:
        raise ValueError("No trained models found!")
    
    return df, env_config, trained_models

def recreate_test_environment(df, env_config):
    """
    สร้าง test environment สำหรับการประเมินผล
    """
    print("🏛️ Creating test environment...")
    
    # แบ่งข้อมูลเหมือนเดิม
    total_len = len(df)
    train_size = int(total_len * 0.7)
    val_size = int(total_len * 0.15)
    
    test_df = df.iloc[train_size + val_size:].reset_index(drop=True)
    
    # เตรียมข้อมูล
    test_df['timestamp'] = pd.to_datetime(test_df['timestamp'])
    test_df['date'] = test_df['timestamp'].dt.date
    test_df.sort_values(['date', 'tic'], inplace=True)
    test_df.reset_index(drop=True, inplace=True)
    
    # สร้าง test environment
    env_kwargs = env_config['env_kwargs']
    test_env = StockTradingEnv(df=test_df, **env_kwargs)
    
    print(f"✅ Test environment created")
    print(f"📊 Test data: {len(test_df)} rows")
    print(f"📅 Date range: {test_df['timestamp'].min()} to {test_df['timestamp'].max()}")
    
    return test_env, test_df

# โหลดข้อมูลและ models
df, env_config, trained_models = load_evaluation_setup()
test_env, test_df = recreate_test_environment(df, env_config)

print(f"\n📊 Evaluation setup completed:")
print(f"  Available models: {list(trained_models.keys())}")
print(f"  Test data points: {len(test_df)}")
print(f"  Symbols: {test_df['tic'].unique()}")

## Cell 3: ประเมินผล Models บน Test Data

In [None]:
def evaluate_model_performance(model_info, test_env, test_df, model_name):
    """
    ประเมินผลการทำงานของ model บน test data
    """
    print(f"📊 Evaluating {model_name}...")
    
    try:
        # รัน prediction
        account_value, actions = DRLAgent.DRL_prediction(
            model=model_info['model'],
            environment=test_env
        )
        
        # คำนวณ performance metrics
        initial_value = INITIAL_AMOUNT
        final_value = account_value['account_value'].iloc[-1]
        total_return = (final_value - initial_value) / initial_value * 100
        
        # คำนวณ Sharpe ratio
        returns = account_value['account_value'].pct_change().dropna()
        sharpe_ratio = returns.mean() / returns.std() * np.sqrt(252) if returns.std() > 0 else 0
        
        # คำนวณ Maximum Drawdown
        running_max = account_value['account_value'].expanding().max()
        drawdown = (account_value['account_value'] - running_max) / running_max
        max_drawdown = drawdown.min() * 100
        
        # คำนวณ Volatility
        volatility = returns.std() * np.sqrt(252) * 100
        
        # จำนวน trades
        total_trades = len(actions[actions != 0]) if len(actions) > 0 else 0
        
        results = {
            'model_name': model_name,
            'model_type': model_info['model_type'],
            'initial_value': initial_value,
            'final_value': final_value,
            'total_return': total_return,
            'sharpe_ratio': sharpe_ratio,
            'max_drawdown': max_drawdown,
            'volatility': volatility,
            'total_trades': total_trades,
            'account_values': account_value,
            'actions': actions,
            'daily_returns': returns
        }
        
        print(f"✅ {model_name} evaluation completed")
        print(f"  Total Return: {total_return:.2f}%")
        print(f"  Sharpe Ratio: {sharpe_ratio:.3f}")
        print(f"  Max Drawdown: {max_drawdown:.2f}%")
        print(f"  Volatility: {volatility:.2f}%")
        print(f"  Total Trades: {total_trades}")
        
        return results
        
    except Exception as e:
        print(f"❌ Error evaluating {model_name}: {str(e)}")
        return None

def calculate_buy_hold_baseline(test_df, symbols):
    """
    คำนวณ Buy & Hold baseline สำหรับเปรียบเทียบ
    """
    print("📈 Calculating Buy & Hold baseline...")
    
    baseline_results = {}
    
    for symbol in symbols:
        symbol_data = test_df[test_df['tic'] == symbol].copy()
        
        if len(symbol_data) > 0:
            # ใช้ราคาปิดที่ normalize แล้ว แต่คำนวณ return จากการเปลี่ยนแปลง
            initial_price = symbol_data['close'].iloc[0]
            final_price = symbol_data['close'].iloc[-1]
            
            # คำนวณ return จากการเปลี่ยนแปลงของราคา normalized
            price_change = (final_price - initial_price) / abs(initial_price) if initial_price != 0 else 0
            
            # จำลอง portfolio value
            portfolio_value = INITIAL_AMOUNT * (1 + price_change)
            total_return = price_change * 100
            
            # คำนวณ metrics อื่นๆ
            returns = symbol_data['close'].pct_change().dropna()
            sharpe_ratio = returns.mean() / returns.std() * np.sqrt(252) if returns.std() > 0 else 0
            volatility = returns.std() * np.sqrt(252) * 100
            
            # Maximum Drawdown
            cumulative_returns = (1 + returns).cumprod()
            running_max = cumulative_returns.expanding().max()
            drawdown = (cumulative_returns - running_max) / running_max
            max_drawdown = drawdown.min() * 100
            
            baseline_results[symbol] = {
                'total_return': total_return,
                'final_value': portfolio_value,
                'sharpe_ratio': sharpe_ratio,
                'max_drawdown': max_drawdown,
                'volatility': volatility
            }
            
            print(f"  {symbol}: {total_return:.2f}% return")
    
    return baseline_results

# ประเมินผลทุก models
evaluation_results = {}

for model_name, model_info in trained_models.items():
    results = evaluate_model_performance(model_info, test_env, test_df, model_name)
    if results:
        evaluation_results[model_name] = results

# คำนวณ baseline
symbols = test_df['tic'].unique()
baseline_results = calculate_buy_hold_baseline(test_df, symbols)

print(f"\n📊 Evaluation completed for {len(evaluation_results)} models")
print(f"📈 Baseline calculated for {len(baseline_results)} symbols")

## Cell 4: สร้างรายงานเปรียบเทียบ Performance

In [None]:
def create_performance_comparison():
    """
    สร้างตารางเปรียบเทียบ performance ของทุก models
    """
    print("📊 Creating performance comparison...")
    
    # สร้าง DataFrame สำหรับเปรียบเทียบ
    comparison_data = []
    
    # เพิ่มข้อมูล RL models
    for model_name, results in evaluation_results.items():
        comparison_data.append({
            'Model': results['model_type'],
            'Strategy': f"RL-{results['model_type']}",
            'Total Return (%)': results['total_return'],
            'Final Value ($)': results['final_value'],
            'Sharpe Ratio': results['sharpe_ratio'],
            'Max Drawdown (%)': results['max_drawdown'],
            'Volatility (%)': results['volatility'],
            'Total Trades': results['total_trades']
        })
    
    # เพิ่มข้อมูล Baseline strategies
    for symbol, baseline in baseline_results.items():
        comparison_data.append({
            'Model': f"Buy&Hold",
            'Strategy': f"B&H-{symbol}",
            'Total Return (%)': baseline['total_return'],
            'Final Value ($)': baseline['final_value'],
            'Sharpe Ratio': baseline['sharpe_ratio'],
            'Max Drawdown (%)': baseline['max_drawdown'],
            'Volatility (%)': baseline['volatility'],
            'Total Trades': 1  # Buy once and hold
        })
    
    comparison_df = pd.DataFrame(comparison_data)
    comparison_df = comparison_df.sort_values('Total Return (%)', ascending=False)
    
    return comparison_df

def plot_performance_analysis(evaluation_results, baseline_results):
    """
    สร้างกราฟวิเคราะห์ performance
    """
    print("📊 Creating performance analysis plots...")
    
    fig, axes = plt.subplots(2, 3, figsize=(18, 12))
    
    # Plot 1: Portfolio Values Over Time
    for model_name, results in evaluation_results.items():
        account_values = results['account_values']['account_value']
        axes[0, 0].plot(account_values.values, label=f"{results['model_type']}", linewidth=2)
    
    axes[0, 0].axhline(y=INITIAL_AMOUNT, color='red', linestyle='--', alpha=0.7, label='Initial Value')
    axes[0, 0].set_title('Portfolio Value Over Time')
    axes[0, 0].set_ylabel('Portfolio Value ($)')
    axes[0, 0].legend()
    axes[0, 0].grid(True, alpha=0.3)
    
    # Plot 2: Total Returns Comparison
    model_names = []
    model_returns = []
    colors = []
    
    # RL models
    for model_name, results in evaluation_results.items():
        model_names.append(results['model_type'])
        model_returns.append(results['total_return'])
        colors.append('skyblue')
    
    # Baseline
    for symbol, baseline in baseline_results.items():
        model_names.append(f"B&H-{symbol}")
        model_returns.append(baseline['total_return'])
        colors.append('lightcoral')
    
    bars = axes[0, 1].bar(range(len(model_names)), model_returns, color=colors, alpha=0.7)
    axes[0, 1].set_title('Total Returns Comparison')
    axes[0, 1].set_ylabel('Return (%)')
    axes[0, 1].set_xticks(range(len(model_names)))
    axes[0, 1].set_xticklabels(model_names, rotation=45)
    axes[0, 1].axhline(y=0, color='black', linestyle='-', alpha=0.3)
    
    # เพิ่มค่าบนแท่งกราฟ
    for bar, value in zip(bars, model_returns):
        height = bar.get_height()
        axes[0, 1].text(bar.get_x() + bar.get_width()/2., height + (0.5 if height > 0 else -1.5),
                       f'{value:.1f}%', ha='center', va='bottom' if height > 0 else 'top', fontsize=9)
    
    # Plot 3: Sharpe Ratio Comparison
    sharpe_ratios = []
    
    for model_name, results in evaluation_results.items():
        sharpe_ratios.append(results['sharpe_ratio'])
    
    for symbol, baseline in baseline_results.items():
        sharpe_ratios.append(baseline['sharpe_ratio'])
    
    bars = axes[0, 2].bar(range(len(model_names)), sharpe_ratios, color=colors, alpha=0.7)
    axes[0, 2].set_title('Sharpe Ratio Comparison')
    axes[0, 2].set_ylabel('Sharpe Ratio')
    axes[0, 2].set_xticks(range(len(model_names)))
    axes[0, 2].set_xticklabels(model_names, rotation=45)
    axes[0, 2].axhline(y=0, color='black', linestyle='-', alpha=0.3)
    
    # เพิ่มค่าบนแท่งกราฟ
    for bar, value in zip(bars, sharpe_ratios):
        height = bar.get_height()
        axes[0, 2].text(bar.get_x() + bar.get_width()/2., height + 0.1,
                       f'{value:.2f}', ha='center', va='bottom', fontsize=9)
    
    # Plot 4: Maximum Drawdown Comparison
    max_drawdowns = []
    
    for model_name, results in evaluation_results.items():
        max_drawdowns.append(results['max_drawdown'])
    
    for symbol, baseline in baseline_results.items():
        max_drawdowns.append(baseline['max_drawdown'])
    
    bars = axes[1, 0].bar(range(len(model_names)), max_drawdowns, color=colors, alpha=0.7)
    axes[1, 0].set_title('Maximum Drawdown Comparison')
    axes[1, 0].set_ylabel('Drawdown (%)')
    axes[1, 0].set_xticks(range(len(model_names)))
    axes[1, 0].set_xticklabels(model_names, rotation=45)
    
    # เพิ่มค่าบนแท่งกราฟ
    for bar, value in zip(bars, max_drawdowns):
        height = bar.get_height()
        axes[1, 0].text(bar.get_x() + bar.get_width()/2., height - 1,
                       f'{value:.1f}%', ha='center', va='top', fontsize=9)
    
    # Plot 5: Volatility Comparison
    volatilities = []
    
    for model_name, results in evaluation_results.items():
        volatilities.append(results['volatility'])
    
    for symbol, baseline in baseline_results.items():
        volatilities.append(baseline['volatility'])
    
    bars = axes[1, 1].bar(range(len(model_names)), volatilities, color=colors, alpha=0.7)
    axes[1, 1].set_title('Volatility Comparison')
    axes[1, 1].set_ylabel('Volatility (%)')
    axes[1, 1].set_xticks(range(len(model_names)))
    axes[1, 1].set_xticklabels(model_names, rotation=45)
    
    # เพิ่มค่าบนแท่งกราฟ
    for bar, value in zip(bars, volatilities):
        height = bar.get_height()
        axes[1, 1].text(bar.get_x() + bar.get_width()/2., height + 0.5,
                       f'{value:.1f}%', ha='center', va='bottom', fontsize=9)
    
    # Plot 6: Trading Frequency
    trade_counts = []
    
    for model_name, results in evaluation_results.items():
        trade_counts.append(results['total_trades'])
    
    for symbol, baseline in baseline_results.items():
        trade_counts.append(1)  # Buy & Hold = 1 trade
    
    bars = axes[1, 2].bar(range(len(model_names)), trade_counts, color=colors, alpha=0.7)
    axes[1, 2].set_title('Trading Frequency')
    axes[1, 2].set_ylabel('Number of Trades')
    axes[1, 2].set_xticks(range(len(model_names)))
    axes[1, 2].set_xticklabels(model_names, rotation=45)
    
    # เพิ่มค่าบนแท่งกราฟ
    for bar, value in zip(bars, trade_counts):
        height = bar.get_height()
        axes[1, 2].text(bar.get_x() + bar.get_width()/2., height + 0.5,
                       f'{value}', ha='center', va='bottom', fontsize=9)
    
    plt.tight_layout()
    plt.show()
    
    return fig

# สร้างตารางเปรียบเทียบ
comparison_df = create_performance_comparison()
print("\n📊 Performance Comparison Table:")
display(comparison_df)

# สร้างกราฟวิเคราะห์
fig = plot_performance_analysis(evaluation_results, baseline_results)

# บันทึกผลลัพธ์
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
comparison_df.to_csv(os.path.join(REPORTS_DIR, f'performance_comparison_{timestamp}.csv'), index=False)
fig.savefig(os.path.join(REPORTS_DIR, f'performance_analysis_{timestamp}.png'), dpi=300, bbox_inches='tight')

print(f"\n✅ Results saved to {REPORTS_DIR}")