In [1]:
from dataset import load_all_stock_data
import pandas as pd
from dataset import ConditionalStockDataset
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
import pandas as pd
from sklearn.model_selection import train_test_split
from matplotlib import pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from multi_stock_input_modules import ResidualMLPWithExtraBlock as ResidualMLP
from multi_stock_input_modules import forward_diffusion_sample
from multi_stock_input_modules import reverse_diffusion_sample
import torch.nn.functional as F
from multi_stock_input_modules import Context_Encoder
from multi_stock_input_modules import cosine_beta_schedule
from multi_stock_input_modules import get_time_embedding
from multi_stock_input_modules import TickerEmbedding
import torch

# Data Loading

In [2]:
data_path = "../../../price/raw/"
data = load_all_stock_data(data_path)

Loaded and processed data for 88 stocks


In [3]:
context_len = 10
feature_columns = ["Open","High","Low","Volume","Return","Diff","HL_Diff","MA5","Return_MA5"]
target_column = ["Close"]
train_data = ConditionalStockDataset(data,context_len,feature_columns,target_column,split='train')
test_data = ConditionalStockDataset(data,context_len,feature_columns,target_column,split='test')

In [4]:
for ticker,context,target in train_data :
    print(ticker,context,target)
    break

BA tensor([[3.0266e-04, 3.0252e-04, 3.0409e-04, 6.4080e-03, 7.3007e-01, 5.8009e-01,
         4.5562e-05, 3.0576e-04, 6.3183e-01],
        [3.0371e-04, 3.0257e-04, 3.0256e-04, 7.1370e-03, 7.1718e-01, 5.8004e-01,
         7.7105e-05, 3.0611e-04, 6.4068e-01],
        [3.0042e-04, 3.0392e-04, 3.0097e-04, 8.6181e-03, 7.4949e-01, 5.8016e-01,
         1.3581e-04, 3.0527e-04, 6.2808e-01],
        [3.0630e-04, 3.0584e-04, 3.0344e-04, 1.0590e-02, 7.1100e-01, 5.8002e-01,
         1.2530e-04, 3.0418e-04, 6.3388e-01],
        [3.0082e-04, 2.9882e-04, 2.9856e-04, 1.1218e-02, 7.0810e-01, 5.8001e-01,
         8.1486e-05, 3.0276e-04, 6.2449e-01],
        [2.9683e-04, 2.9725e-04, 2.9711e-04, 8.9039e-03, 7.4232e-01, 5.8013e-01,
         7.8858e-05, 3.0175e-04, 6.2894e-01],
        [3.0069e-04, 2.9847e-04, 2.9851e-04, 8.4224e-03, 7.0833e-01, 5.8001e-01,
         7.5353e-05, 3.0103e-04, 6.3546e-01],
        [2.9459e-04, 2.9463e-04, 2.9478e-04, 7.9062e-03, 7.4025e-01, 5.8012e-01,
         7.2724e-05, 3.0009

In [5]:
print(len(train_data),len(test_data))

85767 21487


In [6]:
train_loader = DataLoader(train_data, batch_size = 16, shuffle = True)
test_laoder = DataLoader(test_data, batch_size =16, shuffle = False)

In [7]:
for sample in train_loader :
    print(f" Shape of context: {sample[1].shape} \n Shape of x0: {sample[2].shape}")
    break

 Shape of context: torch.Size([16, 10, 9]) 
 Shape of x0: torch.Size([16, 1])


In [None]:
# Hyperparameters
num_diffusion_steps = 100  # Total diffusion steps
num_epochs = 200
batch_size = 16
learning_rate = 1e-4

# Create a beta schedule: linearly spaced between 0.0001 and 0.01
# betas = torch.linspace(0.0001, 0.01, num_diffusion_steps)
betas = cosine_beta_schedule(num_diffusion_steps, s=0.008)


# Since our targets are scalars, dim = 1
dim = 1
embedding_dim = 32  # As used in get_time_embedding
#hidden_size for context = context_embedding size
context_embedding_size = 32
ticker_embedding_dim = 32

# Denoise net parameters : dim, embedding_dim, context_embedding_size, hidden_size=512, num_chunks=8, attn_heads=4, dropout_prob=0.1

denoise_net = ResidualMLP(dim=1, embedding_dim=32, context_embedding_size=32, ticker_embedding_size=ticker_embedding_dim, hidden_size=512, num_chunks=8, attn_heads=4,dropout_prob=0.1)

#input size = num of features

input_size = 9
context_input_dim = 9
context_hidden_dim = 32


# Context encoding parameters : input_dim, hidden_dim, kernel_size=3, dilation_rates=[1, 2, 4], num_heads=4
context_net = Context_Encoder(input_dim=context_input_dim, hidden_dim=context_hidden_dim, kernel_size=3, dilation_rates=[1,2,4], num_heads=4)
# Define the optimizer
optimizer = torch.optim.Adam(denoise_net.parameters(), lr=learning_rate)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)

# To generate the ticker embeddings, we need to get a list of all the inique tickers in the dataset

ticker_list =[tickers for tickers,_,_ in train_data]
unique_tickers = [ticker for ticker in set(ticker_list)]


ticker_emb_layer = TickerEmbedding(ticker_list = unique_tickers, ticker_embedding_dim=32)


for epoch in range(num_epochs):
    for ticker,context,x0 in train_loader:
        # Again, ticker is name of stock
        # Context is a tensor of [batch_size, context_len=10, feature_dim=9]
        # x0 is a tensor of [batch_size,dim=1]

        # Ensure x0 is shaped as [batch_size, dim]
        # context is of shape [batch_size, context_len, num_features]
        
        # Sample a random diffusion timestep for each sample in the batch
        batch_size = x0.shape[0]
        t = torch.randint(0, num_diffusion_steps, (batch_size,), dtype=torch.long)
        
        # Generate the noisy sample and the true noise using forward diffusion
        x_t, true_noise = forward_diffusion_sample(x0, t, betas)
        # print(f"x_t shape : {x_t.shape}")
        
        # Compute time embedding for the sampled timesteps
        time_embedding = get_time_embedding(t, embedding_dim)
        # print(f"time_embedding shape : {time_embedding.shape}")
        
        # Concatenate x_t with the time embedding to form the input to the denoising network
        x_combined = torch.cat([x_t, time_embedding], dim=-1)
        # x_combined shape is [batch_size, dim + embedding_dim]
        # print(f"x_combined shape : {x_combined.shape}")
        
        # Get context embedding
        context_embedding = context_net(context)
        # print(f"context_embedding shape :{context_embedding.shape}")

        x_combined = torch.cat([x_combined, context_embedding], dim=-1)
        # print(f"x_combined shape : {x_combined.shape}")

        ticker_emb = ticker_emb_layer(ticker) # Should be shape [num_tickers (should be batch_size),embedding_dim]
        # print(f"ticker embedding shape : {ticker_emb.shape}")

        x_combined = torch.cat([x_combined, ticker_emb], dim = -1)
        # print(f"x_combined after ticker emb : {x_combined.shape}")
        
        # Predict the noise using the denoising network directly
        predicted_noise = denoise_net(x_combined)
        
        # Compute the loss between predicted noise and true noise
        loss = F.mse_loss(predicted_noise, true_noise)
        
        optimizer.zero_grad()
        loss.backward()
        # torch.nn.utils.clip_grad_norm_(denoise_net.parameters(), max_norm=1.0)
        optimizer.step()
    
    scheduler.step()
    current_lr = optimizer.param_groups[0]['lr']
    print(f"Epoch {epoch+1}, Loss: {loss.item()}, LR: {current_lr}")

Epoch 1, Loss: 0.019402045756578445, LR: 0.0001
Epoch 2, Loss: 0.006481352727860212, LR: 0.0001
Epoch 3, Loss: 0.004343767650425434, LR: 0.0001
Epoch 4, Loss: 0.0061234901659190655, LR: 0.0001
Epoch 5, Loss: 0.0016276149544864893, LR: 0.0001
Epoch 6, Loss: 0.03131469339132309, LR: 0.0001
Epoch 7, Loss: 0.012462134473025799, LR: 0.0001
Epoch 8, Loss: 0.0020250563975423574, LR: 0.0001
Epoch 9, Loss: 0.0037357774563133717, LR: 0.0001
Epoch 10, Loss: 0.0014304263750091195, LR: 5e-05
Epoch 11, Loss: 0.000854013953357935, LR: 5e-05
Epoch 12, Loss: 0.009855100885033607, LR: 5e-05
