In [1]:
import torch
import torch.nn.functional as F
from torch.utils.data import DataLoader
from dataset import load_all_stock_data, ConditionalStockDataset
from modules import cosine_beta_schedule, get_time_embedding, Context_Encoder, TickerEmbedding, ResidualMLPWithExtraBlock, forward_diffusion_sample, reverse_diffusion_sample


In [2]:
# Define device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load and process data
data_path = "../price/raw/"
data = load_all_stock_data(data_path)
context_len = 10
feature_columns = ["Open", "High", "Low", "Volume", "Return", "Diff", "HL_Diff", "MA5", "Return_MA5"]
target_column = ["Close"]

# Pass device to dataset so that tensors are already on the correct device
train_data = ConditionalStockDataset(data, context_len, feature_columns, target_column, split='train', device=device)
test_data = ConditionalStockDataset(data, context_len, feature_columns, target_column, split='test', device=device)

train_loader = DataLoader(train_data, batch_size=16, shuffle=True)
test_loader = DataLoader(test_data, batch_size=16, shuffle=False)

Loaded and processed data for 88 stocks


KeyboardInterrupt: 

In [None]:

# Hyperparameters
num_diffusion_steps = 100
num_epochs = 50
batch_size = 16
learning_rate = 1e-4

# Create beta schedule and move it to device
betas = cosine_beta_schedule(num_diffusion_steps, s=0.008).to(device)

# Model hyperparameters
dim = 1
embedding_dim = 32
context_embedding_size = 32
ticker_embedding_dim = 32
context_input_dim = 9
context_hidden_dim = 32

# Instantiate models and move them to device.
denoise_net = ResidualMLPWithExtraBlock(dim=1, 
                                         embedding_dim=32, 
                                         context_embedding_size=32, 
                                         ticker_embedding_dim=ticker_embedding_dim, 
                                         hidden_size=512, 
                                         num_chunks=8, 
                                         attn_heads=4, 
                                         dropout_prob=0.1).to(device)

context_net = Context_Encoder(input_dim=context_input_dim, hidden_dim=context_hidden_dim, kernel_size=3, dilation_rates=[1,2,4], num_heads=4).to(device)

# To generate ticker embeddings, get the list of unique tickers from training data.
ticker_list = [tick for tick, _, _ in train_data]
unique_tickers = list(set(ticker_list))
ticker_emb_layer = TickerEmbedding(ticker_list=unique_tickers, ticker_embedding_dim=32).to(device)

# Define optimizer and scheduler
optimizer = torch.optim.Adam(denoise_net.parameters(), lr=learning_rate)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)

for epoch in range(num_epochs):
    all_losses = []
    for ticker, context, x0 in train_loader:
        denoise_net.train()
        context_net.train()
        ticker_emb_layer.train()

        # ticker remains a list of strings; context and x0 are now already on device.
        batch_size = x0.shape[0]
        t = torch.randint(0, num_diffusion_steps, (batch_size,), dtype=torch.long, device=device)
        
        # Generate noisy sample and true noise (x0 is on device)
        x_t, true_noise = forward_diffusion_sample(x0, t, betas)
        
        time_embedding = get_time_embedding(t, embedding_dim)
        x_combined = torch.cat([x_t, time_embedding], dim=-1)
        
        context_embedding = context_net(context)
        x_combined = torch.cat([x_combined, context_embedding], dim=-1)
        
        ticker_emb = ticker_emb_layer(ticker)
        x_combined = torch.cat([x_combined, ticker_emb], dim=-1)
        
        predicted_noise = denoise_net(x_combined)
        loss = F.mse_loss(predicted_noise, true_noise)
        all_losses.append(loss.item())
        
        optimizer.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(denoise_net.parameters(), max_norm=1.0)
        optimizer.step()
    
    # Evaluation on test data
    denoise_net.eval()
    context_net.eval()
    ticker_emb_layer.eval()
    all_losses_eval = []
    with torch.no_grad():
        for tickers, context, x0 in test_loader:
            batch_size = x0.shape[0]
            x_T = torch.randn(batch_size, dim, device=device)
            x_t = x_T

            for t_val in reversed(range(num_diffusion_steps)):
                t_tensor = torch.full((batch_size,), t_val, dtype=torch.long, device=device)
                time_embedding = get_time_embedding(t_tensor, embedding_dim)
                context_embedding = context_net(context)
                ticker_embedding = ticker_emb_layer(tickers)
                x_combined = torch.cat([x_t, time_embedding, context_embedding, ticker_embedding], dim=-1)
                predicted_noise = denoise_net(x_combined)
                
                beta_t = betas[t_tensor].unsqueeze(1)
                alpha_t = 1 - beta_t
                alphas = 1 - betas
                alpha_bars = torch.cumprod(alphas, dim=0)
                alpha_bar_t = alpha_bars[t_tensor].unsqueeze(1)
                sqrt_alpha_t = torch.sqrt(alpha_t)
                sqrt_one_minus_alpha_bar_t = torch.sqrt(1 - alpha_bar_t)
                
                x_t = (x_t - (beta_t / sqrt_one_minus_alpha_bar_t) * predicted_noise) / sqrt_alpha_t

            x0_pred = x_t
            loss_eval = F.mse_loss(x0_pred, x0)
            all_losses_eval.append(loss_eval.item())
    
    scheduler.step()
    current_lr = optimizer.param_groups[0]['lr']
    print(f"Epoch {epoch+1}, Loss: {sum(all_losses)/len(all_losses)}, LR: {current_lr}\nValidation Loss: {sum(all_losses_eval)/len(all_losses_eval)}")
