In [1]:
import pickle
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.utils.data as Data
import torch.nn.functional as F
from torch_geometric.nn import GATConv, SAGPooling,global_mean_pool , global_max_pool 
from sklearn.metrics import mean_absolute_error
from torch import optim

In [28]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## 导入数据

In [12]:
# a_i
with open('./datasets/a_i.pkl', "rb") as f:
    a_data = pickle.load(f)
# 提取字典的值（数组）到一个列表中
a_data_list = list(a_data.values())

# 转换列表为NumPy数组
a_i = np.array(a_data_list)

# 检查结果数组的形状是否正确
print(a_i.shape)  # 应输出 (390, 480, 64)

(390, 480, 64)


In [13]:
# g_i
with open('./datasets/g_i.pkl', "rb") as f:
    g_data = pickle.load(f)
# 提取字典的值（数组）到一个列表中
g_data_list = list(g_data.values())

# 转换列表为NumPy数组
g_i = np.array(g_data_list)

# 检查结果数组的形状是否正确
print(g_i.shape)  # 应输出 (390, 480, 8)

(390, 480, 8)


In [17]:
with open('./datasets/sp500_data.pkl', "rb") as f:
    data = pickle.load(f)
test_y = data['test']['y_return ratio']
test_y.shape

(390, 480)

## 形成a_i长期序列

In [14]:
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam

class StockDataset(Dataset):
    def __init__(self, x, y):
        self.x = x
        self.y = y

    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        return self.x[idx], self.y[idx]

def create_dataloader(x, y, batch_size):
    dataset = StockDataset(x, y)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
    return dataloader

In [32]:
class AttentionBlock(nn.Module):
    def __init__(self,time_step,dim):
        super(AttentionBlock, self).__init__()
        self.attention_matrix = nn.Linear(time_step, time_step)

    def forward(self, inputs):
        inputs_t = torch.transpose(inputs,2,1) # (batch_size, input_dim, time_step)
        attention_weight = self.attention_matrix(inputs_t)
        attention_probs = F.softmax(attention_weight,dim=-1)
        attention_probs = torch.transpose(attention_probs,2,1)
        attention_vec = torch.mul(attention_probs, inputs)
        attention_vec = torch.sum(attention_vec,dim=1)
        return attention_vec, attention_probs

In [25]:
dataloader_ai = create_dataloader(a_i, test_y, batch_size=16)

In [26]:
for batch_idx, (seq, target) in enumerate(dataloader_ai):
    print(batch_idx)
    print(seq.shape)
    print(target.shape)
    print("=========================")

0
torch.Size([16, 480, 64])
torch.Size([16, 480])
1
torch.Size([16, 480, 64])
torch.Size([16, 480])
2
torch.Size([16, 480, 64])
torch.Size([16, 480])
3
torch.Size([16, 480, 64])
torch.Size([16, 480])
4
torch.Size([16, 480, 64])
torch.Size([16, 480])
5
torch.Size([16, 480, 64])
torch.Size([16, 480])
6
torch.Size([16, 480, 64])
torch.Size([16, 480])
7
torch.Size([16, 480, 64])
torch.Size([16, 480])
8
torch.Size([16, 480, 64])
torch.Size([16, 480])
9
torch.Size([16, 480, 64])
torch.Size([16, 480])
10
torch.Size([16, 480, 64])
torch.Size([16, 480])
11
torch.Size([16, 480, 64])
torch.Size([16, 480])
12
torch.Size([16, 480, 64])
torch.Size([16, 480])
13
torch.Size([16, 480, 64])
torch.Size([16, 480])
14
torch.Size([16, 480, 64])
torch.Size([16, 480])
15
torch.Size([16, 480, 64])
torch.Size([16, 480])
16
torch.Size([16, 480, 64])
torch.Size([16, 480])
17
torch.Size([16, 480, 64])
torch.Size([16, 480])
18
torch.Size([16, 480, 64])
torch.Size([16, 480])
19
torch.Size([16, 480, 64])
torch.Size([

In [37]:
# 建立
ai_attmodel = AttentionBlock(time_step=20, dim=32)

# 定义损失和优化
criterion = nn.MSELoss()  # 因为是回归问题，所以我们使用均方误差损失
optimizer = Adam(ai_attmodel.parameters(), lr=0.001)

In [38]:
# 5. 训练模型
def train_model(model, dataloader, epochs):
    min_loss = float('inf')  # 初始最小损失设为正无穷大
    model = model.to(device)
    model.train()
    for epoch in range(epochs):
        for batch_idx, (seq, target) in enumerate(dataloader):
            seq = seq.to(device).float()
            target = target.to(device).float()
            optimizer.zero_grad()
            seq = seq.view(-1, 20, 64)  # Reshape to (batch_size * num_stocks, time_step, input_dim)
            print(seq.shape)  #  [5984, 7, 30]
            target = target.view(-1, 20, 1)  # Flatten target to have shape (batch_size * num_stocks)
            print(target.shape)  #  [5984, 1]
            attention_vec, attention_probs = model(seq)
            attention_vec = attention_vec.squeeze()  # Remove extra dimensions to match target shape
            loss = criterion(attention_vec, target)
            loss.backward()
            optimizer.step()
            if batch_idx % 20 == 0:
                print(f'Epoch {epoch}, Batch {batch_idx}, Loss {loss.item()}')
        if loss.item() < min_loss:  # 当前损失小于记录的最小损失就保存
            min_loss = loss.item() 
            torch.save(model, './output/attention_ai.pkl') 
            print('save!')

# 开始训练
train_model(ai_attmodel, dataloader_ai, epochs=50)

torch.Size([384, 20, 64])
torch.Size([384, 20, 1])


RuntimeError: mat1 and mat2 shapes cannot be multiplied (7680x64 and 20x20)