In [None]:
import sys
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import pickle
import torch

# FinRL imports
from finrl.meta.env_stock_trading.env_stocktrading import StockTradingEnv
from finrl.agents.stablebaselines3.models import DRLAgent

# Import config
from config import *

# Setup directories
PROCESSED_DIR = "processed_data"
AGENT_DIR = "agents"
for dir_name in [AGENT_DIR]:
    if not os.path.exists(dir_name):
        os.makedirs(dir_name)

print("üìÅ Setup directories completed")
print(f"ü§ñ Starting Agent Creation Process")


In [None]:
# ‡∏ü‡∏±‡∏á‡∏Å‡πå‡∏ä‡∏±‡∏ô‡πÇ‡∏´‡∏•‡∏î‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏ó‡∏µ‡πà‡∏õ‡∏£‡∏∞‡∏°‡∏ß‡∏•‡∏ú‡∏•‡πÅ‡∏•‡πâ‡∏ß
def load_processed_data():
    print("üìÇ Loading processed data...")
    try:
        # ‡∏•‡∏≠‡∏á‡πÇ‡∏´‡∏•‡∏î‡∏à‡∏≤‡∏Å‡πÑ‡∏ü‡∏•‡πå pickle ‡∏Å‡πà‡∏≠‡∏ô (‡πÄ‡∏£‡πá‡∏ß‡∏Å‡∏ß‡πà‡∏≤)
        pickle_file = os.path.join(PROCESSED_DIR, "processed_crypto_data.pkl")
        with open(pickle_file, 'rb') as f:
            df = pickle.load(f)
        print(f"‚úÖ Loaded processed data from {pickle_file}")
    except:
        # ‡∏ñ‡πâ‡∏≤‡πÑ‡∏°‡πà‡∏°‡∏µ‡πÑ‡∏ü‡∏•‡πå pickle ‡πÉ‡∏´‡πâ‡πÇ‡∏´‡∏•‡∏î‡∏à‡∏≤‡∏Å CSV
        csv_file = os.path.join(PROCESSED_DIR, "processed_crypto_data.csv")
        df = pd.read_csv(csv_file)
        # ‡πÅ‡∏õ‡∏•‡∏á timestamp ‡πÄ‡∏õ‡πá‡∏ô datetime
        if 'timestamp' in df.columns:
            df['timestamp'] = pd.to_datetime(df['timestamp'])
        elif 'date' in df.columns:
            df['timestamp'] = pd.to_datetime(df['date'])
        print(f"‚úÖ Loaded processed data from {csv_file}")
    
    return df

# ‡πÇ‡∏´‡∏•‡∏î‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•
df = load_processed_data()
print(f"üìä Data shape: {df.shape}")
print(f"üóìÔ∏è Date range: {df['timestamp'].min()} to {df['timestamp'].max()}")
print(f"üí∞ Cryptocurrencies: {sorted(df['tic'].unique())}")

# ‡∏ï‡∏£‡∏ß‡∏à‡∏™‡∏≠‡∏ö columns ‡∏ó‡∏µ‡πà‡∏°‡∏µ
print(f"\nüìã Available columns:")
for i, col in enumerate(df.columns):
    print(f"  {i+1:2d}. {col}")

# ‡πÅ‡∏™‡∏î‡∏á‡∏ï‡∏±‡∏ß‡∏≠‡∏¢‡πà‡∏≤‡∏á‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•
print(f"\nüìà Sample data:")
display_cols = ['timestamp', 'tic', 'Close', 'Volume']
if 'sma_20' in df.columns:
    display_cols.extend(['sma_20', 'rsi', 'macd'])
print(df[display_cols].head())


In [None]:
# ‡πÅ‡∏ö‡πà‡∏á‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡πÄ‡∏õ‡πá‡∏ô Train/Validation/Test
def split_data(df, train_ratio=0.7, val_ratio=0.15):
    """
    ‡πÅ‡∏ö‡πà‡∏á‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡πÄ‡∏õ‡πá‡∏ô 3 ‡∏™‡πà‡∏ß‡∏ô:
    - Training: 70%
    - Validation: 15% 
    - Test: 15%
    """
    print("üìä Splitting data...")
    
    # ‡πÄ‡∏£‡∏µ‡∏¢‡∏á‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏ï‡∏≤‡∏°‡∏ß‡∏±‡∏ô‡∏ó‡∏µ‡πà‡πÅ‡∏•‡∏∞ cryptocurrency
    df = df.copy()
    df['date'] = df['timestamp'].dt.date
    df.sort_values(['date', 'tic'], inplace=True)
    df.reset_index(drop=True, inplace=True)
    
    # ‡∏Ñ‡∏≥‡∏ô‡∏ß‡∏ì‡∏Ç‡∏ô‡∏≤‡∏î‡∏Ç‡∏≠‡∏á‡πÅ‡∏ï‡πà‡∏•‡∏∞‡∏™‡πà‡∏ß‡∏ô
    total_len = len(df)
    train_size = int(total_len * train_ratio)
    val_size = int(total_len * val_ratio)
    
    # ‡πÅ‡∏ö‡πà‡∏á‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•
    train_df = df.iloc[:train_size].reset_index(drop=True)
    val_df = df.iloc[train_size:train_size + val_size].reset_index(drop=True)
    test_df = df.iloc[train_size + val_size:].reset_index(drop=True)
    
    print(f"‚úÖ Data split completed:")
    print(f"  üìà Train: {len(train_df):,} rows ({len(train_df)/total_len*100:.1f}%)")
    print(f"  üìä Validation: {len(val_df):,} rows ({len(val_df)/total_len*100:.1f}%)")
    print(f"  üìâ Test: {len(test_df):,} rows ({len(test_df)/total_len*100:.1f}%)")
    
    return train_df, val_df, test_df

# ‡∏™‡∏£‡πâ‡∏≤‡∏á Trading Environment
def create_trading_environment(df, initial_amount=INITIAL_AMOUNT, 
                              transaction_cost_pct=TRANSACTION_COST_PCT):
    """
    ‡∏™‡∏£‡πâ‡∏≤‡∏á FinRL Trading Environment
    """
    print(f"üèõÔ∏è Creating trading environment...")
    
    # ‡πÄ‡∏ï‡∏£‡∏µ‡∏¢‡∏°‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö environment
    df_processed = df.copy()
    
    # ‡∏ï‡∏£‡∏ß‡∏à‡∏™‡∏≠‡∏ö‡∏ß‡πà‡∏≤‡∏°‡∏µ feature columns ‡∏ó‡∏µ‡πà‡∏à‡∏≥‡πÄ‡∏õ‡πá‡∏ô
    required_cols = ['Open', 'High', 'Low', 'Close', 'Volume']
    for col in required_cols:
        if col not in df_processed.columns:
            raise ValueError(f"Missing required column: {col}")
    
    # ‡πÄ‡∏ï‡∏£‡∏µ‡∏¢‡∏° feature columns ‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö agent
    feature_cols = []
    for col in df_processed.columns:
        if col not in ['date', 'timestamp', 'tic'] and not col.startswith('Adj'):
            feature_cols.append(col)
    
    print(f"üìã Feature columns: {feature_cols}")
    
    # ‡∏™‡∏£‡πâ‡∏≤‡∏á environment configuration
    env_kwargs = {
        'df': df_processed,
        'stock_dim': len(df_processed['tic'].unique()),
        'hmax': 100,  # maximum shares ‡∏ó‡∏µ‡πà‡∏™‡∏≤‡∏°‡∏≤‡∏£‡∏ñ‡∏ñ‡∏∑‡∏≠‡πÑ‡∏î‡πâ
        'initial_amount': initial_amount,
        'transaction_cost_pct': transaction_cost_pct,
        'reward_scaling': 1e-4,
        'state_space': len(feature_cols),
        'action_space': len(df_processed['tic'].unique()),
        'tech_indicator_list': feature_cols,
        'print_verbosity': 1
    }
    
    # ‡∏™‡∏£‡πâ‡∏≤‡∏á environment
    env = StockTradingEnv(**env_kwargs)
    
    print(f"‚úÖ Environment created successfully:")
    print(f"  üí∞ Initial amount: ${initial_amount:,.2f}")
    print(f"  üí∏ Transaction cost: {transaction_cost_pct*100:.3f}%")
    print(f"  üìä State space: {env_kwargs['state_space']}")
    print(f"  üéØ Action space: {env_kwargs['action_space']}")
    print(f"  üè™ Stock dimension: {env_kwargs['stock_dim']}")
    
    return env, env_kwargs

# ‡πÅ‡∏ö‡πà‡∏á‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•
train_df, val_df, test_df = split_data(df)

# ‡∏™‡∏£‡πâ‡∏≤‡∏á environments
train_env, train_env_kwargs = create_trading_environment(train_df)
val_env, val_env_kwargs = create_trading_environment(val_df)
test_env, test_env_kwargs = create_trading_environment(test_df)

print(f"\n‚úÖ All environments created successfully!")


In [None]:
# ‡∏Å‡∏≥‡∏´‡∏ô‡∏î configurations ‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö RL Agents ‡∏ï‡πà‡∏≤‡∏á‡πÜ
def setup_agent_configurations():
    """
    ‡∏Å‡∏≥‡∏´‡∏ô‡∏î hyperparameters ‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö RL algorithms ‡∏ï‡πà‡∏≤‡∏á‡πÜ
    """
    print("üîß Setting up agent configurations...")
    
    # ‡∏ï‡∏£‡∏ß‡∏à‡∏™‡∏≠‡∏ö device ‡∏ó‡∏µ‡πà‡πÉ‡∏ä‡πâ
    if torch.cuda.is_available():
        device = torch.device("cuda")
        print(f"‚úÖ Using GPU: {torch.cuda.get_device_name(0)}")
        print(f"üíæ GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")
    else:
        device = torch.device("cpu")
        print("‚ÑπÔ∏è Using CPU")
    
    # Agent configurations
    agent_configs = {
        # PPO (Proximal Policy Optimization) - ‡πÄ‡∏´‡∏°‡∏≤‡∏∞‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö continuous action spaces
        'PPO': {
            'learning_rate': 3e-4,
            'n_steps': 2048,
            'batch_size': 64,
            'n_epochs': 10,
            'gamma': 0.99,
            'gae_lambda': 0.95,
            'clip_range': 0.2,
            'ent_coef': 0.01,
            'vf_coef': 0.5,
            'max_grad_norm': 0.5,
            'policy_kwargs': dict(net_arch=[256, 256]),
            'device': device,
            'verbose': 1
        },
        
        # A2C (Advantage Actor-Critic) - ‡πÄ‡∏£‡πá‡∏ß‡πÅ‡∏ï‡πà‡∏≠‡∏≤‡∏à‡πÑ‡∏°‡πà‡πÄ‡∏™‡∏ñ‡∏µ‡∏¢‡∏£‡πÄ‡∏ó‡πà‡∏≤ PPO
        'A2C': {
            'learning_rate': 7e-4,
            'n_steps': 5,
            'gamma': 0.99,
            'gae_lambda': 1.0,
            'ent_coef': 0.01,
            'vf_coef': 0.25,
            'max_grad_norm': 0.5,
            'policy_kwargs': dict(net_arch=[256, 256]),
            'device': device,
            'verbose': 1
        },
        
        # DDPG (Deep Deterministic Policy Gradient) - ‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö continuous actions
        'DDPG': {
            'learning_rate': 1e-3,
            'buffer_size': 1000000,
            'learning_starts': 100,
            'batch_size': 256,
            'tau': 0.005,
            'gamma': 0.99,
            'policy_kwargs': dict(net_arch=[256, 256]),
            'device': device,
            'verbose': 1
        },
        
        # SAC (Soft Actor-Critic) - ‡∏°‡∏µ‡∏õ‡∏£‡∏∞‡∏™‡∏¥‡∏ó‡∏ò‡∏¥‡∏†‡∏≤‡∏û‡∏™‡∏π‡∏á‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö continuous control
        'SAC': {
            'learning_rate': 3e-4,
            'buffer_size': 1000000,
            'learning_starts': 100,
            'batch_size': 256,
            'tau': 0.005,
            'gamma': 0.99,
            'ent_coef': 'auto',
            'policy_kwargs': dict(net_arch=[256, 256]),
            'device': device,
            'verbose': 1
        },
        
        # Training configuration
        'TRAINING': {
            'total_timesteps': 100000,  # ‡∏à‡∏≥‡∏ô‡∏ß‡∏ô timesteps ‡πÉ‡∏ô‡∏Å‡∏≤‡∏£‡πÄ‡∏ó‡∏£‡∏ô
            'tb_log_name': 'crypto_trading',
            'eval_freq': 5000,
            'n_eval_episodes': 5,
            'save_freq': 10000
        }
    }
    
    return agent_configs, device

# ‡∏™‡∏£‡πâ‡∏≤‡∏á agent configurations
agent_configs, device = setup_agent_configurations()

# ‡πÄ‡∏•‡∏∑‡∏≠‡∏Å model ‡∏ó‡∏µ‡πà‡∏à‡∏∞‡πÉ‡∏ä‡πâ (‡∏™‡∏≤‡∏°‡∏≤‡∏£‡∏ñ‡πÄ‡∏õ‡∏•‡∏µ‡πà‡∏¢‡∏ô‡πÑ‡∏î‡πâ)
MODEL_NAME = 'PPO'  # ‡πÄ‡∏õ‡∏•‡∏µ‡πà‡∏¢‡∏ô‡πÄ‡∏õ‡πá‡∏ô 'A2C', 'DDPG', ‡∏´‡∏£‡∏∑‡∏≠ 'SAC' ‡πÑ‡∏î‡πâ

print(f"\nü§ñ Selected Model: {MODEL_NAME}")
print(f"üîß Model Configuration:")
for key, value in agent_configs[MODEL_NAME].items():
    print(f"  {key}: {value}")

print(f"\nüìä Training Configuration:")
for key, value in agent_configs['TRAINING'].items():
    print(f"  {key}: {value}")


In [None]:
# ‡∏™‡∏£‡πâ‡∏≤‡∏á RL Agent
def create_agent(env, model_name, agent_configs):
    """
    ‡∏™‡∏£‡πâ‡∏≤‡∏á RL Agent ‡∏î‡πâ‡∏ß‡∏¢ FinRL
    """
    print(f"ü§ñ Creating {model_name} agent...")
    
    try:
        # ‡∏™‡∏£‡πâ‡∏≤‡∏á DRLAgent
        agent = DRLAgent(env=env)
        
        # ‡∏î‡∏∂‡∏á model parameters
        model_params = agent_configs[model_name].copy()
        
        # ‡∏™‡∏£‡πâ‡∏≤‡∏á model
        model = agent.get_model(model_name.lower(), model_kwargs=model_params)
        
        print(f"‚úÖ {model_name} agent created successfully!")
        print(f"üìã Model summary:")
        print(f"  Algorithm: {model_name}")
        print(f"  Policy: {type(model.policy).__name__}")
        print(f"  Device: {model.device}")
        
        return agent, model
        
    except Exception as e:
        print(f"‚ùå Error creating {model_name} agent: {str(e)}")
        return None, None

# ‡∏™‡∏£‡πâ‡∏≤‡∏á agent
agent, model = create_agent(train_env, MODEL_NAME, agent_configs)

if model is not None:
    print(f"\nüéØ Agent ready for training!")
    print(f"üìä Environment observation space: {train_env.observation_space}")
    print(f"üéÆ Environment action space: {train_env.action_space}")
else:
    print(f"‚ùå Failed to create agent. Please check configurations.")


In [None]:
# ‡∏ö‡∏±‡∏ô‡∏ó‡∏∂‡∏Å‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö‡πÉ‡∏ä‡πâ‡πÉ‡∏ô‡∏Ç‡∏±‡πâ‡∏ô‡∏ï‡∏≠‡∏ô‡∏ï‡πà‡∏≠‡πÑ‡∏õ
def save_agent_setup(df, train_env_kwargs, agent_configs, model_name, device):
    """
    ‡∏ö‡∏±‡∏ô‡∏ó‡∏∂‡∏Å‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏• setup ‡∏ó‡∏±‡πâ‡∏á‡∏´‡∏°‡∏î‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö‡πÉ‡∏ä‡πâ‡πÉ‡∏ô‡∏Ç‡∏±‡πâ‡∏ô‡∏ï‡∏≠‡∏ô‡∏ï‡πà‡∏≠‡πÑ‡∏õ
    """
    print("üíæ Saving agent setup...")
    
    try:
        # ‡∏ö‡∏±‡∏ô‡∏ó‡∏∂‡∏Å‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏ó‡∏µ‡πà‡∏õ‡∏£‡∏∞‡∏°‡∏ß‡∏•‡∏ú‡∏•‡πÅ‡∏•‡πâ‡∏ß
        with open(os.path.join(PROCESSED_DIR, "processed_crypto_data.pkl"), 'wb') as f:
            pickle.dump(df, f)
        print("‚úÖ Saved processed data as pickle file")
        
        # ‡∏ö‡∏±‡∏ô‡∏ó‡∏∂‡∏Å environment configuration
        env_config = {
            'env_kwargs': train_env_kwargs,
            'model_name': model_name,
            'device': str(device)
        }
        with open(os.path.join(AGENT_DIR, "environment_config.pkl"), 'wb') as f:
            pickle.dump(env_config, f)
        print("‚úÖ Saved environment configuration")
        
        # ‡∏ö‡∏±‡∏ô‡∏ó‡∏∂‡∏Å agent configurations
        with open(os.path.join(AGENT_DIR, "agent_configs.pkl"), 'wb') as f:
            pickle.dump(agent_configs, f)
        print("‚úÖ Saved agent configurations")
        
        # ‡∏ö‡∏±‡∏ô‡∏ó‡∏∂‡∏Å‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏• agent info
        agent_info = {
            'model_name': model_name,
            'device': str(device),
            'creation_time': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
            'data_shape': df.shape,
            'cryptocurrencies': sorted(df['tic'].unique().tolist()),
            'feature_columns': train_env_kwargs['tech_indicator_list']
        }
        with open(os.path.join(AGENT_DIR, "agent_info.pkl"), 'wb') as f:
            pickle.dump(agent_info, f)
        print("‚úÖ Saved agent information")
        
        print(f"\nüìã Agent Setup Summary:")
        print(f"  ü§ñ Model: {model_name}")
        print(f"  üíª Device: {device}")
        print(f"  üìä Data shape: {df.shape}")
        print(f"  üí∞ Cryptocurrencies: {len(df['tic'].unique())}")
        print(f"  üîß Features: {len(train_env_kwargs['tech_indicator_list'])}")
        print(f"  üí∞ Initial amount: ${train_env_kwargs['initial_amount']:,.2f}")
        print(f"  üí∏ Transaction cost: {train_env_kwargs['transaction_cost_pct']*100:.3f}%")
        
        return True
        
    except Exception as e:
        print(f"‚ùå Error saving agent setup: {str(e)}")
        return False

# ‡∏ó‡∏î‡∏™‡∏≠‡∏ö environment
def test_environment(env, steps=5):
    """
    ‡∏ó‡∏î‡∏™‡∏≠‡∏ö environment ‡∏ß‡πà‡∏≤‡∏ó‡∏≥‡∏á‡∏≤‡∏ô‡πÑ‡∏î‡πâ‡∏õ‡∏Å‡∏ï‡∏¥
    """
    print(f"üß™ Testing environment...")
    
    try:
        obs = env.reset()
        print(f"‚úÖ Environment reset successful")
        print(f"üìä Initial observation shape: {obs.shape}")
        
        total_reward = 0
        for step in range(steps):
            # ‡∏™‡∏∏‡πà‡∏° action
            action = env.action_space.sample()
            obs, reward, done, info = env.step(action)
            total_reward += reward
            
            if done:
                print(f"üèÅ Episode finished at step {step+1}")
                break
                
        print(f"‚úÖ Environment test completed")
        print(f"üéØ Total reward from {steps} random steps: {total_reward:.4f}")
        return True
        
    except Exception as e:
        print(f"‚ùå Environment test failed: {str(e)}")
        return False

# ‡∏ö‡∏±‡∏ô‡∏ó‡∏∂‡∏Å‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡πÅ‡∏•‡∏∞‡∏ó‡∏î‡∏™‡∏≠‡∏ö
if model is not None:
    # ‡∏ö‡∏±‡∏ô‡∏ó‡∏∂‡∏Å‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•
    save_success = save_agent_setup(df, train_env_kwargs, agent_configs, MODEL_NAME, device)
    
    # ‡∏ó‡∏î‡∏™‡∏≠‡∏ö environment
    test_success = test_environment(train_env)
    
    if save_success and test_success:
        print(f"\nüéâ Agent creation completed successfully!")
        print(f"üìÇ All files saved in: {AGENT_DIR}")
        print(f"üöÄ Ready to proceed to agent training!")
    else:
        print(f"\n‚ö†Ô∏è Some issues occurred during setup. Please check the logs.")
else:
    print(f"\n‚ùå Agent creation failed. Cannot proceed to saving.")
