In [59]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision.transforms import ToTensor
from torchvision import transforms
import os
from PIL import Image
import torch.nn.functional as F
import random
from collections import deque
import math

In [60]:
print("PyTorch version:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())
print("CUDA version:", torch.version.cuda)
print("Number of GPUs:", torch.cuda.device_count())
if torch.cuda.is_available():
    for i in range(torch.cuda.device_count()):
        print(f"GPU {i}: {torch.cuda.get_device_name(i)}")

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Device being used:", device)

PyTorch version: 2.5.1+cu118
CUDA available: True
CUDA version: 11.8
Number of GPUs: 1
GPU 0: NVIDIA GeForce GTX 1660 Ti
Device being used: cuda


In [61]:
class OHLCVDataset(Dataset):
    def __init__(self, file_path, window_size=20):
        self.window_size = window_size
        # Load CSV and extract relevant columns
        self.data = pd.read_csv(file_path, parse_dates=["datetime"])
        self.raw_data = self.data[["open", "high", "low", "close", "volume"]].values
        # Normalize data
        self.normalized_data = self._normalize(self.raw_data)

    def _normalize(self, data):
        min_vals = data.min(axis=0)
        max_vals = data.max(axis=0)
        return (data - min_vals) / (max_vals - min_vals + 1e-6)

    def __len__(self):
        return len(self.raw_data) - self.window_size + 1

    def __getitem__(self, idx):
        # Get normalized state
        state = self.normalized_data[idx : idx + self.window_size]  # Shape: (window_size, 5)
        state_tensor = torch.tensor(state, dtype=torch.float32).transpose(0, 1)  # Shape: (5, window_size)
        state_tensor = state_tensor.unsqueeze(0).unsqueeze(0)  # Shape: (1, 1, 5, window_size)
        return state_tensor

In [62]:
class SingleScale1D(nn.Module):
    def __init__(self, input_channels=5, output_channels=5, kernel_size=3):
        super(SingleScale1D, self).__init__()
        self.conv1d = nn.Conv1d(in_channels=input_channels, out_channels=output_channels, kernel_size=kernel_size)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.conv1d(x)
        x = self.relu(x)
        return x

In [63]:
class ThreeByThreeConv2D(nn.Module):
    def __init__(self, input_channels=1, output_channels=2, kernel_size=3):
        super(ThreeByThreeConv2D, self).__init__()
        self.conv2d = nn.Conv2d(in_channels=input_channels, out_channels=output_channels, kernel_size=kernel_size, padding=1)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.conv2d(x)
        x = self.relu(x)
        return x

In [64]:
class FiveByFiveConv2D(nn.Module):
    def __init__(self, input_channels=1, output_channels=1, kernel_size=5):
        super(FiveByFiveConv2D, self).__init__()
        self.conv2d = nn.Conv2d(in_channels=input_channels, out_channels=output_channels, kernel_size=kernel_size, padding=2)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.conv2d(x)
        x = self.relu(x)
        return x

In [65]:
class MultiScaleNet(nn.Module):
    def __init__(self, height=5, width=20):
        super(MultiScaleNet, self).__init__()
        self.single_scale_1d = SingleScale1D(input_channels=5, output_channels=5, kernel_size=3)
        self.three_by_three = ThreeByThreeConv2D(input_channels=1, output_channels=2, kernel_size=3)
        self.five_by_five = FiveByFiveConv2D(input_channels=1, output_channels=1, kernel_size=5)
        self.height = height
        self.width = width

    def forward(self, x):
        # x shape: (batch_size, 1, height, width)
        batch_size, channels, height, width = x.size()
        
        # Reshape x for SingleScale1D
        x_1d = x.view(batch_size, height, width)  # Remove channel dimension: (batch_size, 5, window_size)
        features_1d = self.single_scale_1d(x_1d)  # Output shape: (batch_size, 5, new_time_length)

        # Pooling and upsampling to match 2D dimensions
        features_1d = features_1d.mean(dim=2, keepdim=True)  # Global average pooling along time dimension
        features_1d = features_1d.view(batch_size, 5, 1, 1)  # Reshape to (batch_size, 5, 1, 1)
        features_1d = F.interpolate(features_1d, size=(height, width), mode='bilinear', align_corners=False)

        # Apply 3x3 and 5x5 Convolutions
        features_3x3 = self.three_by_three(x)  # Output shape: (batch_size, 2, height, width)
        features_5x5 = self.five_by_five(x)    # Output shape: (batch_size, 1, height, width)

        # Concatenate along the channel dimension
        combined_features = torch.cat((features_1d, features_3x3, features_5x5), dim=1)
        
        return combined_features  # Shape: (batch_size, 8, height, width)

In [66]:
class ECABlock(nn.Module):
    def __init__(self, channels, gamma=2, b=1):
        super(ECABlock, self).__init__()
        t = int(abs((math.log(channels, 2) + b) / gamma))
        k = t if t % 2 else t + 1  # Ensure k is odd
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.conv = nn.Conv1d(1, 1, kernel_size=k, padding=(k - 1) // 2, bias=False)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        # x: (batch_size, channels, height, width)
        y = self.avg_pool(x)  # (batch_size, channels, 1, 1)
        y = y.squeeze(-1).transpose(-1, -2)  # (batch_size, 1, channels)
        y = self.conv(y)
        y = self.sigmoid(y)
        y = y.transpose(-1, -2).unsqueeze(-1)
        return x * y.expand_as(x)

In [67]:
class Backbone(nn.Module):
    def __init__(self, input_channels=8):
        super(Backbone, self).__init__()
        self.conv1 = nn.Conv2d(input_channels, 64, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(64)
        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(64, 64, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.eca = ECABlock(64)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = F.relu(x)
        x = self.maxpool(x)
        x = self.conv2(x)
        x = self.bn2(x)
        x = F.relu(x)
        x = self.eca(x)
        return x

In [68]:
class MSNetWithBackbone(nn.Module):
    def __init__(self):
        super(MSNetWithBackbone, self).__init__()
        self.multi_scale_net = MultiScaleNet()
        self.backbone = Backbone(input_channels=8)

    def forward(self, x):
        # First pass through multi-scale net
        x = self.multi_scale_net(x)
        # Pass through backbone with attention
        x = self.backbone(x)
        return x

In [69]:
class MSNetWithQValue(nn.Module):
    def __init__(self, num_actions=3):
        super(MSNetWithQValue, self).__init__()
        self.multi_scale_with_backbone = MSNetWithBackbone()

        # Additional Conv3x3 layer before the linear layers
        self.conv_final = nn.Conv2d(64, 64, kernel_size=3, padding=1)
        self.flatten = nn.Flatten()
        
        # Determine the input size for the fully connected layers
        sample_input = torch.randn(1, 1, 5, 20)
        sample_output = self.multi_scale_with_backbone(sample_input)
        sample_output = self.conv_final(sample_output)
        feature_map_size = sample_output.view(-1).size(0)

        self.fc1 = nn.Linear(feature_map_size, 128)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(128, num_actions)

    def forward(self, x):
        x = self.multi_scale_with_backbone(x)
        x = self.conv_final(x)
        x = F.relu(x)
        x = self.flatten(x)
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [70]:
model = MSNetWithQValue(num_actions=3)
test_input = torch.randn(1, 1, 5, 20)
output = model(test_input)
print("Output shape:", output.shape)

Output shape: torch.Size([1, 3])


In [71]:
class ReplayBuffer:
    def __init__(self, capacity):
        self.buffer = deque(maxlen=capacity)

    def push(self, state, action, reward, next_state, done):
        self.buffer.append((state, action, reward, next_state, done))

    def sample(self, batch_size):
        batch = random.sample(self.buffer, batch_size)
        state, action, reward, next_state, done = zip(*batch)
        state_batch = torch.cat(state, dim=0)  # Concatenate along batch dimension
        next_state_batch = torch.cat(next_state, dim=0)
        return (
            state_batch,
            torch.tensor(action, dtype=torch.long),
            torch.tensor(reward, dtype=torch.float32),
            next_state_batch,
            torch.tensor(done, dtype=torch.float32),
        )

    def __len__(self):
        return len(self.buffer)

In [72]:
class DDQNAgent:
    def __init__(self, buffer_capacity, gamma=0.99, lr=1e-3, target_update_freq=100):
        self.actor = MSNetWithQValue()
        self.target = MSNetWithQValue()
        self.target.load_state_dict(self.actor.state_dict())
        self.target.eval()

        self.buffer = ReplayBuffer(buffer_capacity)
        self.optimizer = optim.Adam(self.actor.parameters(), lr=lr)
        self.gamma = gamma
        self.target_update_freq = target_update_freq
        self.steps = 0
        self.epsilon = 1.0
        self.epsilon_min = 0.1
        self.epsilon_decay = 0.995

    def select_action(self, state):
        if random.random() < self.epsilon:
            return random.randint(0, 2)
        else:
            with torch.no_grad():
                q_values = self.actor(state)
                return q_values.argmax(dim=1).item()

    def train(self, batch_size):
        if len(self.buffer) < batch_size:
            return

        state_batch, action_batch, reward_batch, next_state_batch, done_batch = self.buffer.sample(batch_size)

        # Compute target Q-values
        with torch.no_grad():
            next_q_values = self.actor(next_state_batch)
            next_actions = next_q_values.argmax(dim=1)
            target_q_values = self.target(next_state_batch)
            target_q = reward_batch + self.gamma * (1 - done_batch) * target_q_values[range(batch_size), next_actions]

        current_q = self.actor(state_batch)[range(batch_size), action_batch]

        loss = F.mse_loss(current_q, target_q)

        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        # Update target network
        if self.steps % self.target_update_freq == 0:
            self.target.load_state_dict(self.actor.state_dict())

        # Decay epsilon
        self.epsilon = max(self.epsilon_min, self.epsilon * self.epsilon_decay)
        self.steps += 1

    def store_transition(self, state, action, reward, next_state, done):
        self.buffer.push(state, action, reward, next_state, done)

In [None]:
class StockTradingEnvWithFeatures:
    def __init__(self, dataset, k=5):
        self.dataset = dataset
        self.current_step = 0
        self.done = False
        self.num_steps = len(dataset)
        self.price_history = []
        self.initial_cash = 500000  # Starting with $500,000
        self.cash_balance = self.initial_cash
        self.shares_held = 0
        self.total_asset_value = self.cash_balance
        self.previous_total_asset_value = self.total_asset_value
        self.k = k
        self.cost_basis = 0.0

        self.raw_close_prices = self.dataset.raw_data[self.dataset.window_size - 1 :, 3]
        assert len(self.raw_close_prices) == self.num_steps, "Length of raw_close_prices does not match num_steps"
        
        self.datetimes = self.dataset.data["datetime"].values[self.dataset.window_size - 1 :]
        assert len(self.datetimes) == self.num_steps, "Length of datetimes does not match num_steps"

    def reset(self):
        self.current_step = 0
        self.done = False
        self.price_history = []
        self.cash_balance = self.initial_cash
        self.shares_held = 0
        self.total_asset_value = self.cash_balance
        self.previous_total_asset_value = self.total_asset_value
        self.trade_log = []
        self.cost_basis = 0.0
        return self._get_state()

    def _get_state(self):
        if self.current_step >= self.num_steps:
            self.done = True
            return None

        state = self.dataset[self.current_step]  # Returns the normalized state tensor
        current_close_price = self.raw_close_prices[self.current_step]
        self.price_history.append(current_close_price)
        return state

    def step(self, action):
        if self.current_step + 1 < self.num_steps:
            # Get current state and price
            state = self.dataset[self.current_step]
            current_close_price = self.raw_close_prices[self.current_step]
            next_close_price = self.raw_close_prices[self.current_step + 1]
            next_state = self.dataset[self.current_step + 1]
            datetime = self.datetimes[self.current_step]

            # Get datetime
            datetime = self.datetimes[self.current_step]

            # Initialize profit to zero
            profit = 0

            # Handle actions
            if action == 1:  # Buy
                if current_close_price == 0:
                    num_shares = 0
                else:
                    num_shares = int(self.cash_balance / current_close_price)
                total_cost = num_shares * current_close_price
                self.cash_balance -= total_cost
                self.shares_held += num_shares
                self.cost_basis += total_cost

            elif action == 2:  # Sell
                num_shares = self.shares_held
                total_proceeds = num_shares * current_close_price
                self.cash_balance += total_proceeds
                self.shares_held = 0
                profit = total_proceeds - self.cost_basis  # Assuming you track cost basis
                self.cost_basis = 0
            
            else:
                profit = 0.0

            # Update total asset value
            self.previous_total_asset_value = self.total_asset_value
            self.total_asset_value = self.cash_balance + self.shares_held * current_close_price

            # Compute profit as change in total asset value
            profit = self.total_asset_value - self.previous_total_asset_value

            # Compute reward
            reward = self.calculate_reward(action, current_close_price)

            action_mapping = {0: 'Hold', 1: 'Buy', 2: 'Sell'}
            # Log the trade
            trade_info = {
                'datetime': datetime,
                'action': action_mapping[action],
                'reward': reward,
                'profit': profit,
                'total_asset_value': self.total_asset_value,
                'cash_balance': self.cash_balance,
                'shares_held': self.shares_held
            }
            self.trade_log.append(trade_info)

            self.current_step += 1
            done = self.current_step >= self.num_steps - 1
            return next_state, reward, done, {}
        else:
            self.done = True
            return None, 0, True, {}

    def calculate_reward(self, action, current_close_price):
        """
        Calculates the reward based on the agent's action and future price movements.
        Implements the short-term Sharpe ratio as the reward function.
        """
        # Determine POS_t
        POS_t = 1 if self.shares_held > 0 else 0

        # Compute R_k_t
        R_k_t = []
        for i in range(1, self.k + 1):
            future_step = self.current_step + i
            if future_step >= self.num_steps:
                break
            future_price = self.raw_close_prices[future_step]
            if current_close_price == 0:
                R_k_t_i = 0  # Avoid division by zero
            else:
                R_k_t_i = (future_price - current_close_price) / current_close_price
            R_k_t.append(R_k_t_i)

        if len(R_k_t) < 2:
            # Cannot compute Sharpe ratio with less than 2 data points
            SR_t = 0
        else:
            mean_R = np.mean(R_k_t)
            std_R = np.std(R_k_t)
            if std_R == 0:
                SR_t = 0  # Avoid division by zero
            else:
                SR_t = mean_R / std_R

        SSR_t = POS_t * SR_t

        return SSR_t

In [74]:
file_path = "./labeling/TSLA_minute_data_cleaned_labeled.csv"
dataset = OHLCVDataset(file_path)
env = StockTradingEnvWithFeatures(dataset)
agent = DDQNAgent(buffer_capacity=10000)

num_episodes = 100
batch_size = 32

for episode in range(num_episodes):
    state = env.reset()
    done = False
    total_reward = 0

    while not done and state is not None:
        action = agent.select_action(state)
        next_state, reward, done, _ = env.step(action)

        agent.store_transition(state, action, reward, next_state, done)
        agent.train(batch_size)

        state = next_state
        total_reward += reward

    # Update total_asset_value at the end of the episode
    if env.shares_held > 0:
        last_close_price = env.raw_close_prices[env.current_step - 1]
        env.total_asset_value = env.cash_balance + env.shares_held * last_close_price

    total_profit = env.total_asset_value - env.initial_cash
    print(f"Episode {episode + 1}/{num_episodes}, Total Reward: {total_reward}, Total Profit: {total_profit}")

    # Access and process the trade log
    trade_log = env.trade_log

    # Optionally, save the trade log to a CSV file
    trade_log_df = pd.DataFrame(trade_log)
    trade_log_df.to_csv(f"trade_log_episode_{episode + 1}.csv", index=False)

KeyboardInterrupt: 