In [None]:
import sys
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import pickle
import torch

# FinRL imports
from finrl.meta.env_stock_trading.env_stocktrading import StockTradingEnv
from finrl.agents.stablebaselines3.models import DRLAgent

# Import config
from config import *

# Setup directories
PROCESSED_DIR = "processed_data"
AGENT_DIR = "agents"
for dir_name in [AGENT_DIR]:
    if not os.path.exists(dir_name):
        os.makedirs(dir_name)

print("📁 Setup directories completed")
print(f"🤖 Starting Agent Creation Process")


In [None]:
# ฟังก์ชันโหลดข้อมูลที่ประมวลผลแล้ว
def load_processed_data():
    print("📂 Loading processed data...")
    try:
        # ลองโหลดจากไฟล์ pickle ก่อน (เร็วกว่า)
        pickle_file = os.path.join(PROCESSED_DIR, "processed_crypto_data.pkl")
        with open(pickle_file, 'rb') as f:
            df = pickle.load(f)
        print(f"✅ Loaded processed data from {pickle_file}")
    except:
        # ถ้าไม่มีไฟล์ pickle ให้โหลดจาก CSV
        csv_file = os.path.join(PROCESSED_DIR, "processed_crypto_data.csv")
        df = pd.read_csv(csv_file)
        # แปลง timestamp เป็น datetime
        if 'timestamp' in df.columns:
            df['timestamp'] = pd.to_datetime(df['timestamp'])
        elif 'date' in df.columns:
            df['timestamp'] = pd.to_datetime(df['date'])
        print(f"✅ Loaded processed data from {csv_file}")
    
    return df

# โหลดข้อมูล
df = load_processed_data()
print(f"📊 Data shape: {df.shape}")
print(f"🗓️ Date range: {df['timestamp'].min()} to {df['timestamp'].max()}")
print(f"💰 Cryptocurrencies: {sorted(df['tic'].unique())}")

# ตรวจสอบ columns ที่มี
print(f"\n📋 Available columns:")
for i, col in enumerate(df.columns):
    print(f"  {i+1:2d}. {col}")

# แสดงตัวอย่างข้อมูล
print(f"\n📈 Sample data:")
display_cols = ['timestamp', 'tic', 'Close', 'Volume']
if 'sma_20' in df.columns:
    display_cols.extend(['sma_20', 'rsi', 'macd'])
print(df[display_cols].head())


In [None]:
# แบ่งข้อมูลเป็น Train/Validation/Test
def split_data(df, train_ratio=0.7, val_ratio=0.15):
    """
    แบ่งข้อมูลเป็น 3 ส่วน:
    - Training: 70%
    - Validation: 15% 
    - Test: 15%
    """
    print("📊 Splitting data...")
    
    # เรียงข้อมูลตามวันที่และ cryptocurrency
    df = df.copy()
    df['date'] = df['timestamp'].dt.date
    df.sort_values(['date', 'tic'], inplace=True)
    df.reset_index(drop=True, inplace=True)
    
    # คำนวณขนาดของแต่ละส่วน
    total_len = len(df)
    train_size = int(total_len * train_ratio)
    val_size = int(total_len * val_ratio)
    
    # แบ่งข้อมูล
    train_df = df.iloc[:train_size].reset_index(drop=True)
    val_df = df.iloc[train_size:train_size + val_size].reset_index(drop=True)
    test_df = df.iloc[train_size + val_size:].reset_index(drop=True)
    
    print(f"✅ Data split completed:")
    print(f"  📈 Train: {len(train_df):,} rows ({len(train_df)/total_len*100:.1f}%)")
    print(f"  📊 Validation: {len(val_df):,} rows ({len(val_df)/total_len*100:.1f}%)")
    print(f"  📉 Test: {len(test_df):,} rows ({len(test_df)/total_len*100:.1f}%)")
    
    return train_df, val_df, test_df

# สร้าง Trading Environment
def create_trading_environment(df, initial_amount=INITIAL_AMOUNT, 
                              transaction_cost_pct=TRANSACTION_COST_PCT):
    """
    สร้าง FinRL Trading Environment
    """
    print(f"🏛️ Creating trading environment...")
    
    # เตรียมข้อมูลสำหรับ environment
    df_processed = df.copy()
    
    # ตรวจสอบว่ามี feature columns ที่จำเป็น
    required_cols = ['Open', 'High', 'Low', 'Close', 'Volume']
    for col in required_cols:
        if col not in df_processed.columns:
            raise ValueError(f"Missing required column: {col}")
    
    # เตรียม feature columns สำหรับ agent
    feature_cols = []
    for col in df_processed.columns:
        if col not in ['date', 'timestamp', 'tic'] and not col.startswith('Adj'):
            feature_cols.append(col)
    
    print(f"📋 Feature columns: {feature_cols}")
    
    # สร้าง environment configuration
    env_kwargs = {
        'df': df_processed,
        'stock_dim': len(df_processed['tic'].unique()),
        'hmax': 100,  # maximum shares ที่สามารถถือได้
        'initial_amount': initial_amount,
        'transaction_cost_pct': transaction_cost_pct,
        'reward_scaling': 1e-4,
        'state_space': len(feature_cols),
        'action_space': len(df_processed['tic'].unique()),
        'tech_indicator_list': feature_cols,
        'print_verbosity': 1
    }
    
    # สร้าง environment
    env = StockTradingEnv(**env_kwargs)
    
    print(f"✅ Environment created successfully:")
    print(f"  💰 Initial amount: ${initial_amount:,.2f}")
    print(f"  💸 Transaction cost: {transaction_cost_pct*100:.3f}%")
    print(f"  📊 State space: {env_kwargs['state_space']}")
    print(f"  🎯 Action space: {env_kwargs['action_space']}")
    print(f"  🏪 Stock dimension: {env_kwargs['stock_dim']}")
    
    return env, env_kwargs

# แบ่งข้อมูล
train_df, val_df, test_df = split_data(df)

# สร้าง environments
train_env, train_env_kwargs = create_trading_environment(train_df)
val_env, val_env_kwargs = create_trading_environment(val_df)
test_env, test_env_kwargs = create_trading_environment(test_df)

print(f"\n✅ All environments created successfully!")


In [None]:
# กำหนด configurations สำหรับ RL Agents ต่างๆ
def setup_agent_configurations():
    """
    กำหนด hyperparameters สำหรับ RL algorithms ต่างๆ
    """
    print("🔧 Setting up agent configurations...")
    
    # ตรวจสอบ device ที่ใช้
    if torch.cuda.is_available():
        device = torch.device("cuda")
        print(f"✅ Using GPU: {torch.cuda.get_device_name(0)}")
        print(f"💾 GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")
    else:
        device = torch.device("cpu")
        print("ℹ️ Using CPU")
    
    # Agent configurations
    agent_configs = {
        # PPO (Proximal Policy Optimization) - เหมาะสำหรับ continuous action spaces
        'PPO': {
            'learning_rate': 3e-4,
            'n_steps': 2048,
            'batch_size': 64,
            'n_epochs': 10,
            'gamma': 0.99,
            'gae_lambda': 0.95,
            'clip_range': 0.2,
            'ent_coef': 0.01,
            'vf_coef': 0.5,
            'max_grad_norm': 0.5,
            'policy_kwargs': dict(net_arch=[256, 256]),
            'device': device,
            'verbose': 1
        },
        
        # A2C (Advantage Actor-Critic) - เร็วแต่อาจไม่เสถียรเท่า PPO
        'A2C': {
            'learning_rate': 7e-4,
            'n_steps': 5,
            'gamma': 0.99,
            'gae_lambda': 1.0,
            'ent_coef': 0.01,
            'vf_coef': 0.25,
            'max_grad_norm': 0.5,
            'policy_kwargs': dict(net_arch=[256, 256]),
            'device': device,
            'verbose': 1
        },
        
        # DDPG (Deep Deterministic Policy Gradient) - สำหรับ continuous actions
        'DDPG': {
            'learning_rate': 1e-3,
            'buffer_size': 1000000,
            'learning_starts': 100,
            'batch_size': 256,
            'tau': 0.005,
            'gamma': 0.99,
            'policy_kwargs': dict(net_arch=[256, 256]),
            'device': device,
            'verbose': 1
        },
        
        # SAC (Soft Actor-Critic) - มีประสิทธิภาพสูงสำหรับ continuous control
        'SAC': {
            'learning_rate': 3e-4,
            'buffer_size': 1000000,
            'learning_starts': 100,
            'batch_size': 256,
            'tau': 0.005,
            'gamma': 0.99,
            'ent_coef': 'auto',
            'policy_kwargs': dict(net_arch=[256, 256]),
            'device': device,
            'verbose': 1
        },
        
        # Training configuration
        'TRAINING': {
            'total_timesteps': 100000,  # จำนวน timesteps ในการเทรน
            'tb_log_name': 'crypto_trading',
            'eval_freq': 5000,
            'n_eval_episodes': 5,
            'save_freq': 10000
        }
    }
    
    return agent_configs, device

# สร้าง agent configurations
agent_configs, device = setup_agent_configurations()

# เลือก model ที่จะใช้ (สามารถเปลี่ยนได้)
MODEL_NAME = 'PPO'  # เปลี่ยนเป็น 'A2C', 'DDPG', หรือ 'SAC' ได้

print(f"\n🤖 Selected Model: {MODEL_NAME}")
print(f"🔧 Model Configuration:")
for key, value in agent_configs[MODEL_NAME].items():
    print(f"  {key}: {value}")

print(f"\n📊 Training Configuration:")
for key, value in agent_configs['TRAINING'].items():
    print(f"  {key}: {value}")


In [None]:
# สร้าง RL Agent
def create_agent(env, model_name, agent_configs):
    """
    สร้าง RL Agent ด้วย FinRL
    """
    print(f"🤖 Creating {model_name} agent...")
    
    try:
        # สร้าง DRLAgent
        agent = DRLAgent(env=env)
        
        # ดึง model parameters
        model_params = agent_configs[model_name].copy()
        
        # สร้าง model
        model = agent.get_model(model_name.lower(), model_kwargs=model_params)
        
        print(f"✅ {model_name} agent created successfully!")
        print(f"📋 Model summary:")
        print(f"  Algorithm: {model_name}")
        print(f"  Policy: {type(model.policy).__name__}")
        print(f"  Device: {model.device}")
        
        return agent, model
        
    except Exception as e:
        print(f"❌ Error creating {model_name} agent: {str(e)}")
        return None, None

# สร้าง agent
agent, model = create_agent(train_env, MODEL_NAME, agent_configs)

if model is not None:
    print(f"\n🎯 Agent ready for training!")
    print(f"📊 Environment observation space: {train_env.observation_space}")
    print(f"🎮 Environment action space: {train_env.action_space}")
else:
    print(f"❌ Failed to create agent. Please check configurations.")


In [None]:
# บันทึกข้อมูลสำหรับใช้ในขั้นตอนต่อไป
def save_agent_setup(df, train_env_kwargs, agent_configs, model_name, device):
    """
    บันทึกข้อมูล setup ทั้งหมดสำหรับใช้ในขั้นตอนต่อไป
    """
    print("💾 Saving agent setup...")
    
    try:
        # บันทึกข้อมูลที่ประมวลผลแล้ว
        with open(os.path.join(PROCESSED_DIR, "processed_crypto_data.pkl"), 'wb') as f:
            pickle.dump(df, f)
        print("✅ Saved processed data as pickle file")
        
        # บันทึก environment configuration
        env_config = {
            'env_kwargs': train_env_kwargs,
            'model_name': model_name,
            'device': str(device)
        }
        with open(os.path.join(AGENT_DIR, "environment_config.pkl"), 'wb') as f:
            pickle.dump(env_config, f)
        print("✅ Saved environment configuration")
        
        # บันทึก agent configurations
        with open(os.path.join(AGENT_DIR, "agent_configs.pkl"), 'wb') as f:
            pickle.dump(agent_configs, f)
        print("✅ Saved agent configurations")
        
        # บันทึกข้อมูล agent info
        agent_info = {
            'model_name': model_name,
            'device': str(device),
            'creation_time': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
            'data_shape': df.shape,
            'cryptocurrencies': sorted(df['tic'].unique().tolist()),
            'feature_columns': train_env_kwargs['tech_indicator_list']
        }
        with open(os.path.join(AGENT_DIR, "agent_info.pkl"), 'wb') as f:
            pickle.dump(agent_info, f)
        print("✅ Saved agent information")
        
        print(f"\n📋 Agent Setup Summary:")
        print(f"  🤖 Model: {model_name}")
        print(f"  💻 Device: {device}")
        print(f"  📊 Data shape: {df.shape}")
        print(f"  💰 Cryptocurrencies: {len(df['tic'].unique())}")
        print(f"  🔧 Features: {len(train_env_kwargs['tech_indicator_list'])}")
        print(f"  💰 Initial amount: ${train_env_kwargs['initial_amount']:,.2f}")
        print(f"  💸 Transaction cost: {train_env_kwargs['transaction_cost_pct']*100:.3f}%")
        
        return True
        
    except Exception as e:
        print(f"❌ Error saving agent setup: {str(e)}")
        return False

# ทดสอบ environment
def test_environment(env, steps=5):
    """
    ทดสอบ environment ว่าทำงานได้ปกติ
    """
    print(f"🧪 Testing environment...")
    
    try:
        obs = env.reset()
        print(f"✅ Environment reset successful")
        print(f"📊 Initial observation shape: {obs.shape}")
        
        total_reward = 0
        for step in range(steps):
            # สุ่ม action
            action = env.action_space.sample()
            obs, reward, done, info = env.step(action)
            total_reward += reward
            
            if done:
                print(f"🏁 Episode finished at step {step+1}")
                break
                
        print(f"✅ Environment test completed")
        print(f"🎯 Total reward from {steps} random steps: {total_reward:.4f}")
        return True
        
    except Exception as e:
        print(f"❌ Environment test failed: {str(e)}")
        return False

# บันทึกข้อมูลและทดสอบ
if model is not None:
    # บันทึกข้อมูล
    save_success = save_agent_setup(df, train_env_kwargs, agent_configs, MODEL_NAME, device)
    
    # ทดสอบ environment
    test_success = test_environment(train_env)
    
    if save_success and test_success:
        print(f"\n🎉 Agent creation completed successfully!")
        print(f"📂 All files saved in: {AGENT_DIR}")
        print(f"🚀 Ready to proceed to agent training!")
    else:
        print(f"\n⚠️ Some issues occurred during setup. Please check the logs.")
else:
    print(f"\n❌ Agent creation failed. Cannot proceed to saving.")
