# Reinforcement Learning Training Example

This notebook demonstrates how to set up and train an RL agent for Bitcoin futures trading.

In [None]:
import numpy as np
import pandas as pd
import torch
import gymnasium as gym
from don.rl.env import TradingEnvironment
from don.rl.actions import DiscreteActionSpace
from don.rl.rewards import SharpeReward
from don.data.binance import BinanceDataCollector

## Load Training Data

First, we'll load historical data for training.

In [None]:
# Load training data
collector = BinanceDataCollector(
    symbol='BTCUSDT',
    api_key='your_api_key',
    api_secret='your_api_secret'
)

# Get three months of hourly data
end_time = pd.Timestamp.now()
start_time = end_time - pd.Timedelta(days=90)
training_data = collector.get_historical_data(
    start_time=start_time,
    end_time=end_time,
    interval='1h'
)

## Set Up Trading Environment

Create the trading environment with discrete actions and Sharpe ratio reward.

In [None]:
# Define action space
action_space = DiscreteActionSpace([-1.0, -0.5, 0.0, 0.5, 1.0])

# Create environment
env = TradingEnvironment(
    data=training_data,
    action_space=action_space,
    reward_calculator=SharpeReward(window=20),
    window_size=10
)

# Test environment
observation, info = env.reset()
print("Observation shape:", observation.shape)
print("Action space:", env.action_space)

## Implement Simple DQN Agent

Create a basic DQN agent for training.

In [None]:
class DQN(torch.nn.Module):
    def __init__(self, input_dim, output_dim):
        super().__init__()
        self.network = torch.nn.Sequential(
            torch.nn.Linear(input_dim, 64),
            torch.nn.ReLU(),
            torch.nn.Linear(64, 32),
            torch.nn.ReLU(),
            torch.nn.Linear(32, output_dim)
        )

    def forward(self, x):
        return self.network(x)

# Initialize DQN
input_dim = env.observation_space.shape[0] * env.observation_space.shape[1]
output_dim = len(action_space.positions)
dqn = DQN(input_dim, output_dim)
optimizer = torch.optim.Adam(dqn.parameters())

## Training Loop

Train the agent for a few episodes.

In [None]:
def train_episode():
    obs, info = env.reset()
    done = False
    total_reward = 0

    while not done:
        # Convert observation to tensor
        obs_tensor = torch.FloatTensor(obs.flatten()).unsqueeze(0)

        # Get action from network
        with torch.no_grad():
            q_values = dqn(obs_tensor)
            action = q_values.argmax().item()

        # Take action in environment
        next_obs, reward, done, truncated, info = env.step(action)
        total_reward += reward

        # Move to next observation
        obs = next_obs

    return total_reward

# Train for 10 episodes
for episode in range(10):
    reward = train_episode()
    print(f"Episode {episode + 1}, Total Reward: {reward:.2f}")