In [1]:
import sys
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import pickle
import torch

# FinRL imports
from finrl.meta.env_stock_trading.env_stocktrading import StockTradingEnv
from finrl.agents.stablebaselines3.models import DRLAgent

# Import config
from config import *

# Setup directories
PROCESSED_DIR = "processed_data"
AGENT_DIR = "agents"
for dir_name in [AGENT_DIR]:
    if not os.path.exists(dir_name):
        os.makedirs(dir_name)

print("📁 Setup directories completed")
print(f"🤖 Starting Agent Creation Process")


📁 Setup directories completed
🤖 Starting Agent Creation Process


In [2]:
# ฟังก์ชันโหลดข้อมูลที่ประมวลผลแล้ว
def load_processed_data():
    print("📂 Loading processed data...")
    try:
        # ลองโหลดจากไฟล์ pickle ก่อน (เร็วกว่า)
        pickle_file = os.path.join(PROCESSED_DIR, "processed_crypto_data.pkl")
        with open(pickle_file, 'rb') as f:
            df = pickle.load(f)
        print(f"✅ Loaded processed data from {pickle_file}")
    except:
        # ถ้าไม่มีไฟล์ pickle ให้โหลดจาก CSV
        csv_file = os.path.join(PROCESSED_DIR, "processed_crypto_data.csv")
        df = pd.read_csv(csv_file)
        # แปลง timestamp เป็น datetime
        if 'timestamp' in df.columns:
            df['timestamp'] = pd.to_datetime(df['timestamp'])
        elif 'date' in df.columns:
            df['timestamp'] = pd.to_datetime(df['date'])
        print(f"✅ Loaded processed data from {csv_file}")
    
    return df

# โหลดข้อมูล
df = load_processed_data()
print(f"📊 Data shape: {df.shape}")
print(f"🗓️ Date range: {df['timestamp'].min()} to {df['timestamp'].max()}")
print(f"💰 Cryptocurrencies: {sorted(df['tic'].unique())}")

# ตรวจสอบ columns ที่มี
print(f"\n📋 Available columns:")
for i, col in enumerate(df.columns):
    print(f"  {i+1:2d}. {col}")

# แสดงตัวอย่างข้อมูล
print(f"\n📈 Sample data:")
display_cols = ['timestamp', 'tic', 'Close', 'Volume']
if 'sma_20' in df.columns:
    display_cols.extend(['sma_20', 'rsi', 'macd'])
print(df[display_cols].head())


📂 Loading processed data...
✅ Loaded processed data from processed_data\processed_crypto_data.pkl
📊 Data shape: (5480, 18)
🗓️ Date range: 2022-01-01 00:00:00 to 2024-12-31 00:00:00
💰 Cryptocurrencies: ['ADA-USD', 'BNB-USD', 'BTC-USD', 'ETH-USD', 'SOL-USD']

📋 Available columns:
   1. date
   2. Open
   3. High
   4. Low
   5. Close
   6. Volume
   7. tic
   8. sma_20
   9. ema_20
  10. rsi
  11. ema_12
  12. ema_26
  13. macd
  14. macd_signal
  15. returns
  16. volatility
  17. price_sma_ratio
  18. timestamp

📈 Sample data:
   timestamp      tic         Close        Volume        sma_20   rsi  macd
0 2022-01-01  ADA-USD      1.266366  4.006706e+06      1.266366  50.0   0.0
1 2022-01-01  BNB-USD    527.162613  8.115936e+06    527.162613  50.0   0.0
2 2022-01-01  BTC-USD  47466.911304  1.522753e+06  47466.911304  50.0   0.0
3 2022-01-01  ETH-USD   3750.907040  2.805082e+06   3750.907040  50.0   0.0
4 2022-01-01  SOL-USD    168.478966  1.148705e+06    168.478966  50.0   0.0


In [3]:
# แบ่งข้อมูลเป็น Train/Validation/Test
def split_data(df, train_ratio=0.7, val_ratio=0.15):
    """
    แบ่งข้อมูลเป็น 3 ส่วน:
    - Training: 70%
    - Validation: 15% 
    - Test: 15%
    """
    print("📊 Splitting data...")
    
    # เรียงข้อมูลตามวันที่และ cryptocurrency
    df = df.copy()
    
    # ตรวจสอบและแปลง timestamp
    if 'date' not in df.columns:
        df['date'] = df['timestamp'].dt.date
    
    # เรียงข้อมูลตามวันที่และ cryptocurrency
    df.sort_values(['date', 'tic'], inplace=True)
    df.reset_index(drop=True, inplace=True)
    
    # คำนวณขนาดของแต่ละส่วน
    total_len = len(df)
    train_size = int(total_len * train_ratio)
    val_size = int(total_len * val_ratio)
    
    # แบ่งข้อมูล
    train_df = df.iloc[:train_size].reset_index(drop=True)
    val_df = df.iloc[train_size:train_size + val_size].reset_index(drop=True)
    test_df = df.iloc[train_size + val_size:].reset_index(drop=True)
    
    print(f"✅ Data split completed:")
    print(f"  📈 Train: {len(train_df):,} rows ({len(train_df)/total_len*100:.1f}%)")
    print(f"  📊 Validation: {len(val_df):,} rows ({len(val_df)/total_len*100:.1f}%)")
    print(f"  📉 Test: {len(test_df):,} rows ({len(test_df)/total_len*100:.1f}%)")
    
    return train_df, val_df, test_df

# สร้าง Trading Environment
def create_trading_environment(df, initial_amount=INITIAL_AMOUNT, 
                              transaction_cost_pct=TRANSACTION_COST_PCT):
    """
    สร้าง FinRL Trading Environment
    """
    print(f"🏛️ Creating trading environment...")
    
    # เตรียมข้อมูลสำหรับ environment
    df_processed = df.copy()
    
    # ตรวจสอบและจัดการ date columns
    if 'timestamp' in df_processed.columns:
        df_processed['timestamp'] = pd.to_datetime(df_processed['timestamp'])
        if 'date' not in df_processed.columns:
            df_processed['date'] = df_processed['timestamp'].dt.strftime('%Y-%m-%d')
    elif 'date' in df_processed.columns:
        df_processed['date'] = pd.to_datetime(df_processed['date']).dt.strftime('%Y-%m-%d')
    
    # เรียงข้อมูลให้แน่ใจ
    df_processed.sort_values(['date', 'tic'], inplace=True)
    df_processed.reset_index(drop=True, inplace=True)
    
    # แปลงชื่อคอลัมน์เป็นตัวเล็กสำหรับ FinRL (required)
    column_mapping = {
        'Open': 'open',
        'High': 'high', 
        'Low': 'low',
        'Close': 'close',
        'Volume': 'volume'
    }
    df_processed = df_processed.rename(columns=column_mapping)
    
    # ตรวจสอบว่ามี feature columns ที่จำเป็น
    required_cols = ['open', 'high', 'low', 'close', 'volume']
    for col in required_cols:
        if col not in df_processed.columns:
            raise ValueError(f"Missing required column: {col}")
    
    # เติมข้อมูลที่ขาดหาย (forward fill แล้ว backward fill)
    for col in required_cols:
        df_processed[col] = df_processed[col].ffill().bfill()
    
    # ตรวจสอบข้อมูลที่ขาดหาย
    print(f"📋 Data shape after processing: {df_processed.shape}")
    print(f"🔍 Missing values in OHLCV: {df_processed[required_cols].isnull().sum().sum()}")
    
    # เตรียม feature columns สำหรับ agent
    feature_cols = []
    for col in df_processed.columns:
        if col not in ['date', 'timestamp', 'tic'] and not col.startswith('Adj'):
            feature_cols.append(col)
    
    print(f"📋 All Feature columns: {feature_cols}")
    
    # ตรวจสอบข้อมูลสำคัญ
    print(f"📊 Date range: {df_processed['date'].min()} to {df_processed['date'].max()}")
    print(f"💰 Stocks: {sorted(df_processed['tic'].unique())}")
    print(f"📈 Sample close prices: {df_processed['close'].head().tolist()}")
    
    # ตรวจสอบว่าข้อมูลมีการกระจายที่ดี
    for tic in df_processed['tic'].unique()[:2]:  # ตรวจสอบ 2 stocks แรก
        tic_data = df_processed[df_processed['tic'] == tic]
        print(f"🔍 {tic}: {len(tic_data)} rows, close range: {tic_data['close'].min():.2f} - {tic_data['close'].max():.2f}")
    
    # ตรวจสอบข้อมูลก่อนสร้าง environment
    stock_list = sorted(df_processed['tic'].unique())
    stock_count = len(stock_list)
    
    print(f"📈 Stocks: {stock_list}")
    print(f"📊 Stock count: {stock_count}")
    print(f"🔧 Feature count: {len(feature_cols)}")
    
    # เอาเฉพาะ technical indicators ที่ไม่ใช่ OHLCV
    tech_indicators = []
    for col in feature_cols:
        if col not in ['open', 'high', 'low', 'close', 'volume']:
            tech_indicators.append(col)
    
    print(f"🔧 Technical indicators: {tech_indicators}")
    
    # คำนวณ state space
    # state = [balance] + [stock_prices] + [stock_shares] + [technical_indicators]
    state_space = 1 + stock_count + stock_count + (stock_count * len(tech_indicators))
    
    # ตรวจสอบข้อมูลให้ตรงกับ FinRL format
    # FinRL ต้องการข้อมูลที่เป็น pivot format (แต่ละ date มี stock ทั้งหมด)
    unique_dates = sorted(df_processed['date'].unique())
    print(f"📅 Total dates: {len(unique_dates)}")
    print(f"📅 Date sample: {unique_dates[:3]}")
    
    # ตรวจสอบว่าแต่ละวันมีข้อมูลครบทุก stock หรือไม่
    for date in unique_dates[:3]:  # ตรวจ 3 วันแรก
        date_data = df_processed[df_processed['date'] == date]
        stocks_in_date = sorted(date_data['tic'].unique())
        print(f"📊 {date}: {len(stocks_in_date)} stocks - {stocks_in_date}")
        if len(stocks_in_date) != stock_count:
            print(f"⚠️ Warning: Date {date} missing some stocks!")
    
    # แก้ไขข้อมูลให้ครบถ้วน - เติมข้อมูลที่ขาดหาย
    # สร้าง complete date-stock combinations
    complete_combinations = []
    for date in unique_dates:
        for stock in stock_list:
            complete_combinations.append({'date': date, 'tic': stock})
    
    complete_df = pd.DataFrame(complete_combinations)
    df_processed = complete_df.merge(df_processed, on=['date', 'tic'], how='left')
    
    # เติมข้อมูลที่หายไปด้วยการ forward fill และ backward fill
    for col in ['open', 'high', 'low', 'close', 'volume'] + tech_indicators:
        if col in df_processed.columns:
            # Group by tic และ fill ข้อมูลที่หายไป
            df_processed[col] = df_processed.groupby('tic')[col].ffill().bfill()
            # หากยังมีข้อมูลที่หายไป ให้ใช้ค่าเฉลี่ย
            if df_processed[col].isnull().any():
                mean_val = df_processed[col].mean()
                df_processed[col] = df_processed[col].fillna(mean_val)
    
    # เรียงข้อมูลอีกครั้งหลังจาก merge
    df_processed.sort_values(['date', 'tic'], inplace=True)
    df_processed.reset_index(drop=True, inplace=True)
    
    # ตรวจสอบข้อมูลหลังแก้ไข
    print(f"🔧 After data completion:")
    print(f"  📊 Shape: {df_processed.shape}")
    print(f"  🔍 Missing values: {df_processed[['open', 'high', 'low', 'close', 'volume']].isnull().sum().sum()}")
    
    # ตรวจสอบอีกครั้งว่าแต่ละวันมีข้อมูลครบแล้ว
    for date in unique_dates[:3]:
        date_data = df_processed[df_processed['date'] == date]
        print(f"📊 {date}: {len(date_data)} rows (should be {stock_count})")
    
    # ตรวจสอบว่า tech indicators มีค่าที่เหมาะสม
    for indicator in tech_indicators[:3]:  # ตรวจสอบ 3 indicators แรก
        if indicator in df_processed.columns:
            print(f"🔧 {indicator}: min={df_processed[indicator].min():.4f}, max={df_processed[indicator].max():.4f}")
    
    # ปรับปรุง state space calculation
    state_space = 1 + stock_count + stock_count + (stock_count * len(tech_indicators))
    print(f"📊 Final state space: {state_space} (1 + {stock_count} + {stock_count} + {stock_count}*{len(tech_indicators)})")
    
    # === สำคัญ: ปรับข้อมูลให้ตรงกับ FinRL format ===
    # FinRL คาดหวังว่า df.loc[day, :] จะให้ DataFrame ที่มีหลาย stocks
    # ดังนั้นเราต้องสร้าง MultiIndex หรือปรับโครงสร้างข้อมูล
    
    # สร้าง date index ที่เป็น integer
    unique_dates_sorted = sorted(df_processed['date'].unique())
    date_to_index = {date: idx for idx, date in enumerate(unique_dates_sorted)}
    
    # เพิ่ม day column เป็น integer index
    df_processed['day'] = df_processed['date'].map(date_to_index)
    
    # เซ็ต index เป็น day เพื่อให้ FinRL สามารถเข้าถึงข้อมูลได้
    df_processed = df_processed.set_index('day')
    
    print(f"🔧 FinRL format conversion:")
    print(f"  📅 Total trading days: {len(unique_dates_sorted)}")
    print(f"  📊 Index type: {type(df_processed.index)}")
    
    # ตรวจสอบว่าการเข้าถึงข้อมูลทำงานได้ถูกต้อง
    sample_day_data = df_processed.loc[0]
    if isinstance(sample_day_data, pd.DataFrame):
        print(f"✅ Day 0 data: {len(sample_day_data)} stocks")
        print(f"📈 Sample stocks: {sample_day_data['tic'].tolist()}")
    else:
        print(f"⚠️ Day 0 data is Series - this may cause issues")
        print(f"📈 Stock: {sample_day_data['tic']}")
    
    # สร้าง environment configuration
    env_kwargs = {
        'df': df_processed,
        'stock_dim': stock_count,
        'hmax': 100,  # maximum shares ที่สามารถถือได้
        'initial_amount': initial_amount,
        'num_stock_shares': [0] * stock_count,
        'buy_cost_pct': [transaction_cost_pct] * stock_count,
        'sell_cost_pct': [transaction_cost_pct] * stock_count,
        'reward_scaling': 1e-4,
        'state_space': state_space,
        'action_space': stock_count,
        'tech_indicator_list': tech_indicators,  # ใช้เฉพาะ tech indicators
        'print_verbosity': 1
    }
    
    print(f"🔧 Environment configuration:")
    print(f"  📊 DataFrame shape: {df_processed.shape}")
    print(f"  🏪 Stock dimension: {stock_count}")
    print(f"  📈 State space: {state_space}")
    print(f"  🎯 Action space: {stock_count}")
    print(f"  🔧 Tech indicators: {tech_indicators}")
    
    # สร้าง environment
    try:
        print(f"🔧 Creating StockTradingEnv with parameters:")
        print(f"  📊 DataFrame shape: {df_processed.shape}")
        print(f"  🏪 Stock dimension: {env_kwargs['stock_dim']}")
        print(f"  📈 State space: {env_kwargs['state_space']}")
        print(f"  🎯 Action space: {env_kwargs['action_space']}")
        print(f"  🔧 Tech indicators: {len(env_kwargs['tech_indicator_list'])}")
        
        # Debug: ตรวจสอบข้อมูลก่อนสร้าง env
        print(f"📋 DataFrame columns: {list(df_processed.columns)}")
        print(f"📊 Required columns check:")
        for col in ['date', 'tic', 'open', 'high', 'low', 'close', 'volume']:
            if col in df_processed.columns:
                print(f"  ✅ {col}: OK ({df_processed[col].dtype})")
            else:
                print(f"  ❌ {col}: MISSING")
        
        # ตรวจสอบข้อมูลตัวอย่าง
        print(f"📈 Sample data:")
        sample_cols = ['date', 'tic', 'close', 'volume']
        if len(tech_indicators) > 0:
            sample_cols.append(tech_indicators[0])
        print(df_processed[sample_cols].head(3))
        
        env = StockTradingEnv(**env_kwargs)
        
        print(f"✅ Environment created successfully:")
        print(f"  💰 Initial amount: ${initial_amount:,.2f}")
        print(f"  💸 Transaction cost: {transaction_cost_pct*100:.3f}%")
        print(f"  📊 State space: {env_kwargs['state_space']}")
        print(f"  🎯 Action space: {env_kwargs['action_space']}")
        print(f"  🏪 Stock dimension: {env_kwargs['stock_dim']}")
        print(f"  🔧 Tech indicators: {len(env_kwargs['tech_indicator_list'])}")
        
        return env, env_kwargs
        
    except Exception as e:
        print(f"❌ Error creating environment: {str(e)}")
        print(f"📋 Available columns: {list(df_processed.columns)}")
        print(f"📊 DataFrame info:")
        print(f"  Shape: {df_processed.shape}")
        print(f"  Date range: {df_processed['date'].min()} to {df_processed['date'].max()}")
        print(f"  Unique tickers: {df_processed['tic'].unique()}")
        print(f"  Sample row:")
        print(df_processed.iloc[0].to_dict())
        raise e

# แบ่งข้อมูล
train_df, val_df, test_df = split_data(df)

# สร้าง environments
train_env, train_env_kwargs = create_trading_environment(train_df)
val_env, val_env_kwargs = create_trading_environment(val_df)
test_env, test_env_kwargs = create_trading_environment(test_df)

print(f"\n✅ All environments created successfully!")


📊 Splitting data...
✅ Data split completed:
  📈 Train: 3,835 rows (70.0%)
  📊 Validation: 822 rows (15.0%)
  📉 Test: 823 rows (15.0%)
🏛️ Creating trading environment...
📋 Data shape after processing: (3835, 18)
🔍 Missing values in OHLCV: 0
📋 All Feature columns: ['open', 'high', 'low', 'close', 'volume', 'sma_20', 'ema_20', 'rsi', 'ema_12', 'ema_26', 'macd', 'macd_signal', 'returns', 'volatility', 'price_sma_ratio']
📊 Date range: 2022-01-01 to 2024-02-06
💰 Stocks: ['ADA-USD', 'BNB-USD', 'BTC-USD', 'ETH-USD', 'SOL-USD']
📈 Sample close prices: [1.2663657360618603, 527.1626125433695, 47466.91130383056, 3750.9070399318057, 168.47896597285185]
🔍 ADA-USD: 767 rows, close range: 0.89 - 2.19
🔍 BNB-USD: 767 rows, close range: 240.00 - 533.59
📈 Stocks: ['ADA-USD', 'BNB-USD', 'BTC-USD', 'ETH-USD', 'SOL-USD']
📊 Stock count: 5
🔧 Feature count: 15
🔧 Technical indicators: ['sma_20', 'ema_20', 'rsi', 'ema_12', 'ema_26', 'macd', 'macd_signal', 'returns', 'volatility', 'price_sma_ratio']
📅 Total dates: 

In [4]:
# กำหนด configurations สำหรับ RL Agents ต่างๆ
def setup_agent_configurations():
    """
    กำหนด hyperparameters สำหรับ RL algorithms ต่างๆ
    """
    print("🔧 Setting up agent configurations...")
    
    # ตรวจสอบ device ที่ใช้
    if torch.cuda.is_available():
        device = torch.device("cuda")
        print(f"✅ Using GPU: {torch.cuda.get_device_name(0)}")
        print(f"💾 GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")
    else:
        device = torch.device("cpu")
        print("ℹ️ Using CPU")
    
    # Agent configurations
    agent_configs = {
        # PPO (Proximal Policy Optimization) - เหมาะสำหรับ continuous action spaces
        'PPO': {
            'learning_rate': 3e-4,
            'n_steps': 2048,
            'batch_size': 64,
            'n_epochs': 10,
            'gamma': 0.99,
            'gae_lambda': 0.95,
            'clip_range': 0.2,
            'ent_coef': 0.01,
            'vf_coef': 0.5,
            'max_grad_norm': 0.5,
            'policy_kwargs': dict(net_arch=[256, 256]),
            'device': device,
            'verbose': 1
        },
        
        # A2C (Advantage Actor-Critic) - เร็วแต่อาจไม่เสถียรเท่า PPO
        'A2C': {
            'learning_rate': 7e-4,
            'n_steps': 5,
            'gamma': 0.99,
            'gae_lambda': 1.0,
            'ent_coef': 0.01,
            'vf_coef': 0.25,
            'max_grad_norm': 0.5,
            'policy_kwargs': dict(net_arch=[256, 256]),
            'device': device,
            'verbose': 1
        },
        
        # DDPG (Deep Deterministic Policy Gradient) - สำหรับ continuous actions
        'DDPG': {
            'learning_rate': 1e-3,
            'buffer_size': 1000000,
            'learning_starts': 100,
            'batch_size': 256,
            'tau': 0.005,
            'gamma': 0.99,
            'policy_kwargs': dict(net_arch=[256, 256]),
            'device': device,
            'verbose': 1
        },
        
        # SAC (Soft Actor-Critic) - มีประสิทธิภาพสูงสำหรับ continuous control
        'SAC': {
            'learning_rate': 3e-4,
            'buffer_size': 1000000,
            'learning_starts': 100,
            'batch_size': 256,
            'tau': 0.005,
            'gamma': 0.99,
            'ent_coef': 'auto',
            'policy_kwargs': dict(net_arch=[256, 256]),
            'device': device,
            'verbose': 1
        },
        
        # Training configuration
        'TRAINING': {
            'total_timesteps': 100000,  # จำนวน timesteps ในการเทรน
            'tb_log_name': 'crypto_trading',
            'eval_freq': 5000,
            'n_eval_episodes': 5,
            'save_freq': 10000
        }
    }
    
    return agent_configs, device

# สร้าง agent configurations
agent_configs, device = setup_agent_configurations()

# เลือก model ที่จะใช้ (สามารถเปลี่ยนได้)
MODEL_NAME = 'PPO'  # เปลี่ยนเป็น 'A2C', 'DDPG', หรือ 'SAC' ได้

print(f"\n🤖 Selected Model: {MODEL_NAME}")
print(f"🔧 Model Configuration:")
for key, value in agent_configs[MODEL_NAME].items():
    print(f"  {key}: {value}")

print(f"\n📊 Training Configuration:")
for key, value in agent_configs['TRAINING'].items():
    print(f"  {key}: {value}")


🔧 Setting up agent configurations...
✅ Using GPU: NVIDIA GeForce RTX 3060
💾 GPU Memory: 12.0 GB

🤖 Selected Model: PPO
🔧 Model Configuration:
  learning_rate: 0.0003
  n_steps: 2048
  batch_size: 64
  n_epochs: 10
  gamma: 0.99
  gae_lambda: 0.95
  clip_range: 0.2
  ent_coef: 0.01
  vf_coef: 0.5
  max_grad_norm: 0.5
  policy_kwargs: {'net_arch': [256, 256]}
  device: cuda
  verbose: 1

📊 Training Configuration:
  total_timesteps: 100000
  tb_log_name: crypto_trading
  eval_freq: 5000
  n_eval_episodes: 5
  save_freq: 10000


In [5]:
# สร้าง RL Agent
def create_agent(env, model_name, agent_configs):
    """
    สร้าง RL Agent ด้วย FinRL
    """
    print(f"🤖 Creating {model_name} agent...")
    
    try:
        # สร้าง DRLAgent
        agent = DRLAgent(env=env)
        
        # ดึง model parameters และปรับแต่งสำหรับ FinRL
        model_params = agent_configs[model_name].copy()
        
        # แยกเอา policy_kwargs ออกมาก่อนส่งไปให้ FinRL
        policy_kwargs = model_params.pop('policy_kwargs', None)
        
        # ลบ device ออกเพราะ FinRL จัดการเอง
        model_params.pop('device', None)
        
        print(f"🔧 Model parameters: {model_params}")
        if policy_kwargs:
            print(f"🔧 Policy kwargs: {policy_kwargs}")
        
        # สร้าง model โดยส่ง policy_kwargs แยกต่างหาก
        if policy_kwargs:
            model = agent.get_model(
                model_name.lower(), 
                model_kwargs=model_params,
                policy_kwargs=policy_kwargs
            )
        else:
            model = agent.get_model(
                model_name.lower(), 
                model_kwargs=model_params
            )
        
        print(f"✅ {model_name} agent created successfully!")
        print(f"📋 Model summary:")
        print(f"  Algorithm: {model_name}")
        print(f"  Policy: {type(model.policy).__name__}")
        print(f"  Device: {model.device}")
        
        return agent, model
        
    except Exception as e:
        print(f"❌ Error creating {model_name} agent: {str(e)}")
        print(f"📋 Available methods: {[method for method in dir(agent) if not method.startswith('_')]}")
        
        # ลองสร้างด้วยวิธีอื่น
        try:
            print(f"🔄 Trying alternative approach...")
            
            # ลองใช้ stable_baselines3 โดยตรง
            from stable_baselines3 import PPO, A2C, DDPG, SAC
            
            # เลือก model class ตาม model_name
            model_classes = {
                'PPO': PPO,
                'A2C': A2C, 
                'DDPG': DDPG,
                'SAC': SAC
            }
            
            if model_name in model_classes:
                ModelClass = model_classes[model_name]
                
                # เตรียม parameters สำหรับ stable_baselines3
                sb3_params = agent_configs[model_name].copy()
                sb3_params.pop('device', None)  # ลบ device ออก
                sb3_params.pop('verbose', None)  # ลบ verbose ออกเพื่อไม่ให้ซ้ำ
                
                # สร้าง model โดยตรง
                model = ModelClass(
                    policy="MlpPolicy",
                    env=env,
                    verbose=1,  # ใส่ verbose แยกต่างหาก
                    **sb3_params
                )
                
                print(f"✅ {model_name} agent created with stable_baselines3 directly!")
                return agent, model
            else:
                print(f"❌ Unsupported model: {model_name}")
                return None, None
                
        except Exception as e2:
            print(f"❌ Alternative approach also failed: {str(e2)}")
            return None, None

# สร้าง agent
agent, model = create_agent(train_env, MODEL_NAME, agent_configs)

if model is not None:
    print(f"\n🎯 Agent ready for training!")
    print(f"📊 Environment observation space: {train_env.observation_space}")
    print(f"🎮 Environment action space: {train_env.action_space}")
else:
    print(f"❌ Failed to create agent. Please check configurations.")


🤖 Creating PPO agent...
🔧 Model parameters: {'learning_rate': 0.0003, 'n_steps': 2048, 'batch_size': 64, 'n_epochs': 10, 'gamma': 0.99, 'gae_lambda': 0.95, 'clip_range': 0.2, 'ent_coef': 0.01, 'vf_coef': 0.5, 'max_grad_norm': 0.5, 'verbose': 1}
🔧 Policy kwargs: {'net_arch': [256, 256]}
{'learning_rate': 0.0003, 'n_steps': 2048, 'batch_size': 64, 'n_epochs': 10, 'gamma': 0.99, 'gae_lambda': 0.95, 'clip_range': 0.2, 'ent_coef': 0.01, 'vf_coef': 0.5, 'max_grad_norm': 0.5, 'verbose': 1}
❌ Error creating PPO agent: stable_baselines3.ppo.ppo.PPO() got multiple values for keyword argument 'verbose'
📋 Available methods: ['DRL_prediction', 'DRL_prediction_load_from_file', 'env', 'get_model', 'train_model']
🔄 Trying alternative approach...
Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
✅ PPO agent created with stable_baselines3 directly!

🎯 Agent ready for training!
📊 Environment observation space: Box(-inf, inf, (61,), float32)
🎮 Environment actio

In [6]:
# บันทึกข้อมูลสำหรับใช้ในขั้นตอนต่อไป
def save_agent_setup(df, train_env_kwargs, agent_configs, model_name, device):
    """
    บันทึกข้อมูล setup ทั้งหมดสำหรับใช้ในขั้นตอนต่อไป
    """
    print("💾 Saving agent setup...")
    
    try:
        # บันทึกข้อมูลที่ประมวลผลแล้ว
        with open(os.path.join(PROCESSED_DIR, "processed_crypto_data.pkl"), 'wb') as f:
            pickle.dump(df, f)
        print("✅ Saved processed data as pickle file")
        
        # บันทึก environment configuration
        env_config = {
            'env_kwargs': train_env_kwargs,
            'model_name': model_name,
            'device': str(device)
        }
        with open(os.path.join(AGENT_DIR, "environment_config.pkl"), 'wb') as f:
            pickle.dump(env_config, f)
        print("✅ Saved environment configuration")
        
        # บันทึก agent configurations
        with open(os.path.join(AGENT_DIR, "agent_configs.pkl"), 'wb') as f:
            pickle.dump(agent_configs, f)
        print("✅ Saved agent configurations")
        
        # บันทึกข้อมูล agent info
        from datetime import datetime
        agent_info = {
            'model_name': model_name,
            'device': str(device),
            'creation_time': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
            'data_shape': df.shape,
            'cryptocurrencies': sorted(df['tic'].unique().tolist()),
            'feature_columns': train_env_kwargs['tech_indicator_list']
        }
        with open(os.path.join(AGENT_DIR, "agent_info.pkl"), 'wb') as f:
            pickle.dump(agent_info, f)
        print("✅ Saved agent information")
        
        print(f"\n📋 Agent Setup Summary:")
        print(f"  🤖 Model: {model_name}")
        print(f"  💻 Device: {device}")
        print(f"  📊 Data shape: {df.shape}")
        print(f"  💰 Cryptocurrencies: {len(df['tic'].unique())}")
        print(f"  🔧 Features: {len(train_env_kwargs['tech_indicator_list'])}")
        print(f"  💰 Initial amount: ${train_env_kwargs['initial_amount']:,.2f}")
        print(f"  💸 Transaction cost: {train_env_kwargs['buy_cost_pct'][0]*100:.3f}%")
        
        return True
        
    except Exception as e:
        print(f"❌ Error saving agent setup: {str(e)}")
        return False

# ทดสอบ environment
def test_environment(env, steps=5):
    """
    ทดสอบ environment ว่าทำงานได้ปกติ
    """
    print(f"🧪 Testing environment...")
    
    try:
        reset_result = env.reset()
        
        # ตรวจสอบว่า reset ส่งคืน tuple หรือ array
        if isinstance(reset_result, tuple):
            obs = reset_result[0]  # เอาเฉพาะ observation
            print(f"✅ Environment reset successful (tuple format)")
        else:
            obs = reset_result  # observation โดยตรง
            print(f"✅ Environment reset successful (array format)")
            
        print(f"📊 Initial observation shape: {obs.shape}")
        print(f"📊 Initial observation type: {type(obs)}")
        
        total_reward = 0
        for step in range(steps):
            # สุ่ม action
            action = env.action_space.sample()
            step_result = env.step(action)
            
            # ตรวจสอบ format ของ step result
            if len(step_result) == 4:
                obs, reward, done, info = step_result
            elif len(step_result) == 5:
                obs, reward, terminated, truncated, info = step_result
                done = terminated or truncated
            else:
                print(f"⚠️ Unexpected step result format: {len(step_result)} elements")
                obs, reward, done = step_result[0], step_result[1], step_result[2]
                info = step_result[-1] if len(step_result) > 3 else {}
            
            total_reward += reward
            
            if done:
                print(f"🏁 Episode finished at step {step+1}")
                break
                
        print(f"✅ Environment test completed")
        print(f"🎯 Total reward from {steps} random steps: {total_reward:.4f}")
        return True
        
    except Exception as e:
        print(f"❌ Environment test failed: {str(e)}")
        print(f"📋 Reset result type: {type(reset_result) if 'reset_result' in locals() else 'Unknown'}")
        if 'reset_result' in locals():
            print(f"📋 Reset result: {reset_result}")
        return False

# บันทึกข้อมูลและทดสอบ
if model is not None:
    # บันทึกข้อมูล
    save_success = save_agent_setup(df, train_env_kwargs, agent_configs, MODEL_NAME, device)
    
    # ทดสอบ environment
    test_success = test_environment(train_env)
    
    if save_success and test_success:
        print(f"\n🎉 Agent creation completed successfully!")
        print(f"📂 All files saved in: {AGENT_DIR}")
        print(f"🚀 Ready to proceed to agent training!")
    else:
        print(f"\n⚠️ Some issues occurred during setup. Please check the logs.")
else:
    print(f"\n❌ Agent creation failed. Cannot proceed to saving.")


💾 Saving agent setup...
✅ Saved processed data as pickle file
✅ Saved environment configuration
✅ Saved agent configurations
✅ Saved agent information

📋 Agent Setup Summary:
  🤖 Model: PPO
  💻 Device: cuda
  📊 Data shape: (5480, 18)
  💰 Cryptocurrencies: 5
  🔧 Features: 10
  💰 Initial amount: $100,000.00
  💸 Transaction cost: 0.100%
🧪 Testing environment...
✅ Environment reset successful (tuple format)
❌ Environment test failed: 'list' object has no attribute 'shape'
📋 Reset result type: <class 'tuple'>
📋 Reset result: ([100000, 1.2663657360618603, 527.1626125433695, 47466.91130383056, 3750.9070399318057, 168.47896597285185, 0, 0, 0, 0, 0, 1.2663657360618603, 527.1626125433695, 47466.91130383056, 3750.9070399318057, 168.47896597285185, 1.2663657360618603, 527.1626125433695, 47466.91130383056, 3750.9070399318057, 168.47896597285185, 50.0, 50.0, 50.0, 50.0, 50.0, 1.2663657360618603, 527.1626125433695, 47466.91130383056, 3750.9070399318057, 168.47896597285185, 1.2663657360618603, 527.162