In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

# 定义Add & Norm层
class AddNorm(nn.Module):
    def __init__(self, hidden_dim):
        super(AddNorm, self).__init__()
        self.norm = nn.LayerNorm(hidden_dim)

    def forward(self, x, residual):
        return self.norm(x + residual)

# 定义Feed Forward层
class FeedForward(nn.Module):
    def __init__(self, hidden_dim, ff_dim):
        super(FeedForward, self).__init__()
        self.linear1 = nn.Linear(hidden_dim, ff_dim)
        self.linear2 = nn.Linear(ff_dim, hidden_dim)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.linear1(x)
        x = self.relu(x)
        x = self.linear2(x)
        return x

# 定义Multi-Head Attention层
class MultiHeadAttention(nn.Module):
    def __init__(self, hidden_dim, num_heads):
        super(MultiHeadAttention, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_heads = num_heads
        self.head_dim = hidden_dim // num_heads

        self.query = nn.Linear(hidden_dim, hidden_dim)
        self.key = nn.Linear(hidden_dim, hidden_dim)
        self.value = nn.Linear(hidden_dim, hidden_dim)

        self.fc = nn.Linear(hidden_dim, hidden_dim)

    def forward(self, query, key, value, mask=None):
        batch_size = query.size(0)

        Q = self.query(query).view(batch_size, -1, self.num_heads, self.head_dim).transpose(1, 2)
        K = self.key(key).view(batch_size, -1, self.num_heads, self.head_dim).transpose(1, 2)
        V = self.value(value).view(batch_size, -1, self.num_heads, self.head_dim).transpose(1, 2)

        energy = torch.matmul(Q, K.transpose(-2, -1)) / np.sqrt(self.head_dim)

        if mask is not None:
            energy = energy.masked_fill(mask == 0, -1e10)

        attention = torch.softmax(energy, dim=-1)

        x = torch.matmul(attention, V).transpose(1, 2).contiguous().view(batch_size, -1, self.hidden_dim)
        x = self.fc(x)

        return x

# 定义Encoder层
class EncoderLayer(nn.Module):
    def __init__(self, hidden_dim, num_heads, ff_dim):
        super(EncoderLayer, self).__init__()
        self.self_attn = MultiHeadAttention(hidden_dim, num_heads)
        self.ff = FeedForward(hidden_dim, ff_dim)
        self.norm1 = AddNorm(hidden_dim)
        self.norm2 = AddNorm(hidden_dim)

    def forward(self, x, mask=None):
        residual = x
        x = self.self_attn(x, x, x, mask)
        x = self.norm1(x, residual)

        residual = x
        x = self.ff(x)
        x = self.norm2(x, residual)

        return x

# 定义Decoder层
class DecoderLayer(nn.Module):
    def __init__(self, hidden_dim, num_heads, ff_dim):
        super(DecoderLayer, self).__init__()
        self.self_attn = MultiHeadAttention(hidden_dim, num_heads)
        self.cross_attn = MultiHeadAttention(hidden_dim, num_heads)
        self.ff = FeedForward(hidden_dim, ff_dim)
        self.norm1 = AddNorm(hidden_dim)
        self.norm2 = AddNorm(hidden_dim)
        self.norm3 = AddNorm(hidden_dim)

    def forward(self, x, enc_outputs, src_mask=None, tgt_mask=None):
        residual = x
        x = self.self_attn(x, x, x, tgt_mask)
        x = self.norm1(x, residual)

        residual = x
        x = self.cross_attn(x, enc_outputs, enc_outputs, src_mask)
        x = self.norm2(x, residual)

        residual = x
        x = self.ff(x)
        x = self.norm3(x, residual)

        return x
class PositionalEncoding(nn.Module):
    def __init__(self, hidden_dim, max_len=5000):
        super(PositionalEncoding, self).__init__()
        pe = torch.zeros(max_len, hidden_dim)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, hidden_dim, 2).float() * (-np.log(10000.0) / hidden_dim))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:x.size(0), :]
        return x
class Transformer(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_heads, num_layers, ff_dim):
        super(Transformer, self).__init__()
        self.input_embedding = nn.Linear(input_dim, hidden_dim)
        self.output_embedding = nn.Linear(hidden_dim, output_dim)

        self.encoder_layers = nn.ModuleList([EncoderLayer(hidden_dim, num_heads, ff_dim) for _ in range(num_layers)])
        self.decoder_layers = nn.ModuleList([DecoderLayer(hidden_dim, num_heads, ff_dim) for _ in range(num_layers)])

    def forward(self, src, tgt_mask=None):
        src_embed = self.input_embedding(src)

        enc_outputs = src_embed
        for enc_layer in self.encoder_layers:
            enc_outputs = enc_layer(enc_outputs)

        dec_outputs = enc_outputs
        for dec_layer in self.decoder_layers:
            dec_outputs = dec_layer(dec_outputs, enc_outputs, tgt_mask=tgt_mask)

        outputs = self.output_embedding(dec_outputs[:, -1, :])
        return outputs
'''
# 定义Transformer模型
class Transformer(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_heads, num_layers, ff_dim):
        super(Transformer, self).__init__()
        self.input_embedding = nn.Linear(input_dim, hidden_dim)
        self.pos_encoding = PositionalEncoding(hidden_dim)
        self.output_embedding = nn.Linear(hidden_dim, output_dim)

        self.encoder_layers = nn.ModuleList([EncoderLayer(hidden_dim, num_heads, ff_dim) for _ in range(num_layers)])
        self.decoder_layers = nn.ModuleList([DecoderLayer(hidden_dim, num_heads, ff_dim) for _ in range(num_layers)])

    def forward(self, src, tgt_mask=None):
        src_embed = self.input_embedding(src)
        src_embed = self.pos_encoding(src_embed)

        enc_outputs = src_embed
        for enc_layer in self.encoder_layers:
            enc_outputs = enc_layer(enc_outputs)

        dec_outputs = enc_outputs
        for dec_layer in self.decoder_layers:
            dec_outputs = dec_layer(dec_outputs, enc_outputs, tgt_mask=tgt_mask)

        outputs = self.output_embedding(dec_outputs[:, -1, :])
        return outputs'''

'\n# 定义Transformer模型\nclass Transformer(nn.Module):\n    def __init__(self, input_dim, hidden_dim, output_dim, num_heads, num_layers, ff_dim):\n        super(Transformer, self).__init__()\n        self.input_embedding = nn.Linear(input_dim, hidden_dim)\n        self.pos_encoding = PositionalEncoding(hidden_dim)\n        self.output_embedding = nn.Linear(hidden_dim, output_dim)\n\n        self.encoder_layers = nn.ModuleList([EncoderLayer(hidden_dim, num_heads, ff_dim) for _ in range(num_layers)])\n        self.decoder_layers = nn.ModuleList([DecoderLayer(hidden_dim, num_heads, ff_dim) for _ in range(num_layers)])\n\n    def forward(self, src, tgt_mask=None):\n        src_embed = self.input_embedding(src)\n        src_embed = self.pos_encoding(src_embed)\n\n        enc_outputs = src_embed\n        for enc_layer in self.encoder_layers:\n            enc_outputs = enc_layer(enc_outputs)\n\n        dec_outputs = enc_outputs\n        for dec_layer in self.decoder_layers:\n            dec_outp

In [2]:
import pandas as pd
data = pd.read_csv('tmp2.csv')
#data_stock=data[data.name=='万科A']

features = ['company_id','open', 'close', 'high', 'low', 'volume', 'money_netflow', 'money_inflow', 'money_outflow',
       'net_inflow_rate', 'list_sector', 'CPI', '无风险利率',
       'total_market_cap', 'float_market_cap', 'pe_ttm', 'pb',
       'dividend_yield_ratio', 'major_id', 'minor_id',
       'change_ratio']
data = data[features]

In [3]:
import numpy as np

def create_dataset(data, look_back=5):
    X, Y = [], []
    for i in range(len(data) - look_back - 1):
        X.append(data[i:(i + look_back), :-1])
        Y.append(data[i + look_back, -1])
    return np.array(X), np.array(Y)

In [4]:
def create_all_dataset(data, look_back=5):
    X, Y = [], []
    # 划分
    for i in range(len(data.company_id.unique())):
        minidata=data[data.company_id==i]
        x, y = create_dataset(minidata.values, look_back) # array合并
        X.append(x)
        Y.append(y)
    return np.concatenate(X), np.concatenate(Y)

In [5]:
from sklearn.preprocessing import MinMaxScaler
scale_need_cols = [ 'open', 'close', 'high', 'low', 'volume', 'money_netflow', 'money_inflow', 'money_outflow',
       'net_inflow_rate', 'CPI', '无风险利率',
       'total_market_cap', 'float_market_cap', 'pe_ttm', 'pb',
       'dividend_yield_ratio',
       'change_ratio']
# 创建MinMaxScaler对象
scaler = MinMaxScaler(feature_range=(0, 1))
data.loc[:, scale_need_cols] = scaler.fit_transform(data[scale_need_cols])

In [6]:
look_back = 5
X, Y = create_all_dataset(data, look_back)

In [7]:
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=42)

In [8]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# 设置模型参数
input_dim = 20
hidden_dim = 128
output_dim = 1
num_heads = 8
num_layers = 6
ff_dim = 512

# 创建模型实例
model = Transformer(input_dim, hidden_dim, output_dim, num_heads, num_layers, ff_dim)
model.to(device)

Transformer(
  (input_embedding): Linear(in_features=20, out_features=128, bias=True)
  (output_embedding): Linear(in_features=128, out_features=1, bias=True)
  (encoder_layers): ModuleList(
    (0): EncoderLayer(
      (self_attn): MultiHeadAttention(
        (query): Linear(in_features=128, out_features=128, bias=True)
        (key): Linear(in_features=128, out_features=128, bias=True)
        (value): Linear(in_features=128, out_features=128, bias=True)
        (fc): Linear(in_features=128, out_features=128, bias=True)
      )
      (ff): FeedForward(
        (linear1): Linear(in_features=128, out_features=512, bias=True)
        (linear2): Linear(in_features=512, out_features=128, bias=True)
        (relu): ReLU()
      )
      (norm1): AddNorm(
        (norm): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
      )
      (norm2): AddNorm(
        (norm): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
      )
    )
    (1): EncoderLayer(
      (self_attn): MultiHeadAtt

In [9]:
def directional_accuracy(actual, predicted):
    actual_diff = np.diff(actual)
    predicted_diff = np.diff(predicted)
    return np.mean(np.sign(actual_diff) == np.sign(predicted_diff))

In [10]:
def train_model(model, optimizer, criterion, train_src, train_tgt, val_src, val_tgt, num_epochs, batch_size, patience, convergence_threshold):
    train_losses = []
    val_losses = []

    for epoch in range(num_epochs):
        # 训练模式
        model.train()
        epoch_train_loss = 0

        for i in range(0, len(train_src), batch_size):
            batch_src = train_src[i:i+batch_size]
            batch_tgt = train_tgt[i:i+batch_size]

            optimizer.zero_grad()
            outputs = model(batch_src)
            loss = criterion(outputs.view(-1), batch_tgt.view(-1))
            loss.backward()
            optimizer.step()

            epoch_train_loss += loss.item()

        # 评估模式
        model.eval()
        with torch.no_grad():
            train_outputs = model(train_src)
            val_outputs = model(val_src)
            train_loss = criterion(train_outputs.view(-1), train_tgt.view(-1))
            val_loss = criterion(val_outputs.view(-1), val_tgt.view(-1))

            train_mae = torch.mean(torch.abs(train_tgt - train_outputs.view(-1)))
            val_mae = torch.mean(torch.abs(val_tgt - val_outputs.view(-1)))
            train_rmse = torch.sqrt(torch.mean((train_tgt - train_outputs.view(-1))**2))
            val_rmse = torch.sqrt(torch.mean((val_tgt - val_outputs.view(-1))**2))
            train_da = directional_accuracy(train_tgt, train_outputs.view(-1))
            val_da = directional_accuracy(val_tgt, val_outputs.view(-1))

        train_losses.append(train_loss.item())
        val_losses.append(val_loss.item())

        print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss.item():.8f}, Train MAE: {train_mae.item():.8f}, Train RMSE: {train_rmse.item():.8f}, Train DA: {train_da:.8f}, Val Loss: {val_loss.item():.8f}, Val MAE: {val_mae.item():.8f}, Val RMSE: {val_rmse.item():.8f}, Val DA: {val_da:.8f}")

        if len(train_losses) > patience:
            recent_train_losses = train_losses[-patience:]
            if max(recent_train_losses) - min(recent_train_losses) < convergence_threshold:
                print(f"Training stopped at epoch {epoch+1} due to loss convergence.")
                return


In [None]:

# 准备训练和验证数据
train_src_data = X_train
train_tgt_data = Y_train
val_src_data = X_test
val_tgt_data = Y_test

train_src_tensor = torch.tensor(train_src_data,dtype=torch.float32,device=device)
#train_src_tensor = train_src_data.clone().detach().to(device)

train_tgt_tensor = torch.tensor(train_tgt_data,dtype=torch.float32,device=device)
val_src_tensor = torch.tensor(val_src_data,dtype=torch.float32,device=device)
#val_src_tensor = val_src_data.clone().detach().to(device)

val_tgt_tensor = torch.tensor(val_tgt_data,dtype=torch.float32,device=device)

# 设置早停参数
patience = 1000
num_epochs = 2000
# 设置优化器和损失函数
optimizer = optim.Adam(model.parameters())
criterion = nn.MSELoss()
# 训练模型
train_model(model, optimizer, criterion, train_src_tensor, train_tgt_tensor, val_src_tensor, val_tgt_tensor, num_epochs=num_epochs, batch_size=8, patience=patience, convergence_threshold=1e-8)