In [1]:
import nbimporter
from ModelBase.Transformer_LSTM import*


import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
#from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
from torch.utils.data import DataLoader, Dataset 
import tqdm
from torch.autograd import Variable
import argparse
import math
import torch.nn.functional as F
import os 
from sklearn.model_selection import train_test_split
import torch.optim as optim 

In [4]:
def l2_loss(pred, label):
    loss = torch.nn.functional.mse_loss(pred, label, size_average=True)
    return loss

def train_once(encoder, decoder, dataloader, encoder_optim, decoder_optim, device):
    encoder.train()
    decoder.train()
    loader = tqdm.tqdm(dataloader, desc="Training")
    loss_epoch = 0
    
    for data, label in loader:
        data = data.float().to(device)  # 转换数据类型为 float 并移动到 GPU
        label = label.float().to(device)  # 转换标签类型为 float 并移动到 GPU

        encoder_optim.zero_grad()
        decoder_optim.zero_grad()
        
        # 数据传入编码器
        encoded_data = encoder(data)
        
        # 解码器输入初始化，这里使用了全零张量
        decoder_input = torch.zeros_like(label).to(device)  # 确保在 GPU 上
        
        print(encoded_data.shape)
        print(decoder_input.shape)
        # 数据传入解码器
        decoded_output = decoder(encoded_data, decoder_input)
        
        # 计算损失
        loss = l2_loss(decoded_output, label)
        loss.backward()
        
        # 更新优化器
        encoder_optim.step()
        decoder_optim.step()
        
        # 统计损失值
        loss_epoch += loss.item()
        
        # 更新进度条显示当前批次的损失
        loader.set_postfix(loss=loss.item())
    
    # 返回平均损失
    return loss_epoch / len(dataloader)

def eval_once(encoder, decoder, dataloader, device):
    encoder.eval()
    decoder.eval()
    loader = tqdm.tqdm(dataloader, desc="Evaluating")
    loss_epoch = 0
    preds = []
    labels = []
    for idx, (data, label) in enumerate(loader):
        data = data.float().to(device)
        label = label.float().to(device)
        decoder_input = torch.zeros_like(label).to(device)
        
        with torch.no_grad():
            encoded_data = encoder(data)
            decoded_output = decoder(encoded_data, decoder_input).squeeze(1)
        
        loss = l2_loss(decoded_output, label)
        loss_epoch += loss.item()
        preds += decoded_output.tolist()
        labels += label.tolist()
        loader.set_postfix(loss=loss.item())
    loss_epoch /= len(loader)
    return loss_epoch

def eval_plot(encoder, decoder, dataloader, train_losses, val_losses, device):
    dataloader.shuffle = False
    preds = []
    labels = []
    encoder.eval()
    decoder.eval()
    loader = tqdm.tqdm(dataloader, desc="Plotting")
    for idx, (data, label) in enumerate(loader):
        data = data.float().to(device)
        label = label.float().to(device)
        decoder_input = torch.zeros_like(label).to(device)
        
        with torch.no_grad():
            encoded_data = encoder(data)
            decoded_output = decoder(encoded_data, decoder_input)
        
        preds += decoded_output.tolist()
        labels += label.tolist()
    
    fig, ax = plt.subplots()
    data_x = list(range(len(preds)))
    ax.plot(data_x, preds, label='predict', color='red')
    ax.plot(data_x, labels, label='ground truth', color='blue')
    ax.plot(train_losses, label='train loss', color='green')  # 添加训练损失曲线
    ax.plot(val_losses, label='val loss', color='orange')  # 添加评估损失曲线
    plt.savefig('shangzheng-tran-lstm.png')
    plt.legend()
    plt.show()

def main():
    # Load data
    input_data = np.load("Preprocessing/sample.npy")
    target_data = np.load("Preprocessing/target.npy")

    input_data = torch.tensor(input_data, dtype=torch.float32)
    target_data = torch.tensor(target_data, dtype=torch.float32)

    # Split data into train and validation sets
    X_train, X_val, y_train, y_val = train_test_split(input_data, target_data, test_size=0.2, random_state=42)
    # Create DataLoader
    train_dataset = torch.utils.data.TensorDataset(X_train, y_train)
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_dataset = torch.utils.data.TensorDataset(X_val, y_val)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

    # Initialize model and optimizers
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Running on device: {device}")
    encoder = TransAm().to(device)
    decoder = AttnDecoder(code_hidden_size=88, hidden_size=64, time_step=24).to(device)  # Adjust time_step according to your data
    encoder_optim = optim.Adam(encoder.parameters(), lr=0.001)
    decoder_optim = optim.Adam(decoder.parameters(), lr=0.001)

    total_epoch = 201
    train_losses = []
    val_losses = []

    for epoch_idx in range(total_epoch):
        train_loss = train_once(encoder, decoder, train_loader, encoder_optim, decoder_optim, device)
        train_losses.append(train_loss)
        print(f"Epoch: {epoch_idx}, Train Loss: {train_loss}")

        if epoch_idx % 5 == 0:
            val_loss = eval_once(encoder, decoder, val_loader, device)
            val_losses.append(val_loss)
            print(f"Epoch: {epoch_idx}, Validation Loss: {val_loss}")
            eval_plot(encoder, decoder, val_loader, train_losses, val_losses, device)

In [8]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset 
import tqdm
from torch.autograd import Variable
import math
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
import torch.optim as optim 

class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=44):  # max_len设置为44，因为有44个特征
        super(PositionalEncoding, self).__init__()       
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)  # 变为 (max_len, 1, d_model)
        self.register_buffer('pe', pe)

    def forward(self, x):
        """
        Args:
            x: Tensor, shape [batch_size, 44, d_model]
        """
        x = x + self.pe[:x.size(0), :]
        return x

class TransAm(nn.Module):
    def __init__(self, feature_size=64, num_layers=6, dropout=0.1):
        super(TransAm, self).__init__()
        self.model_type = 'Transformer'
        self.src_mask = None
        self.pos_encoder = PositionalEncoding(feature_size, max_len=44)
        self.encoder_layer = nn.TransformerEncoderLayer(d_model=feature_size, nhead=8, dropout=dropout)
        self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=num_layers)
        self.decoder = nn.Linear(feature_size, 1)
        self.init_weights()

    def init_weights(self):
        initrange = 0.1    
        self.decoder.bias.data.zero_()
        self.decoder.weight.data.uniform_(-initrange, initrange)

    def forward(self, src):
        """
        Args:
            src: Tensor, shape [batch_size, 44]
        """
        src = src.unsqueeze(-1)  # 变为 [batch_size, 44, 1]
        src = self.pos_encoder(src)
        src = src.permute(1, 0, 2)  # 变为 [44, batch_size, 1]
        if self.src_mask is None or self.src_mask.size(0) != src.size(0):
            device = src.device
            mask = self._generate_square_subsequent_mask(src.size(0)).to(device)
            self.src_mask = mask
        output = self.transformer_encoder(src, self.src_mask)
        output = self.decoder(output)
        output = output.permute(1, 0, 2).squeeze(-1)  # 变为 [batch_size, 44]
        return output

    def _generate_square_subsequent_mask(self, sz):
        mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
        mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
        return mask

class AttnDecoder(nn.Module):
    def __init__(self, code_hidden_size, hidden_size, time_step):
        super(AttnDecoder, self).__init__()
        self.code_hidden_size = code_hidden_size
        self.hidden_size = hidden_size
        self.T = time_step

        self.attn1 = nn.Linear(in_features=hidden_size + 44, out_features=code_hidden_size)
        self.attn2 = nn.Linear(in_features=code_hidden_size, out_features=code_hidden_size)
        self.tanh = nn.Tanh()
        self.attn3 = nn.Linear(in_features=code_hidden_size, out_features=1)
        self.lstm = nn.LSTM(input_size=1, hidden_size=self.hidden_size, num_layers=1)
        self.tilde = nn.Linear(in_features=self.code_hidden_size + 1, out_features=1)
        self.fc1 = nn.Linear(in_features=hidden_size + 44, out_features=hidden_size)
        self.fc2 = nn.Linear(in_features=hidden_size, out_features=1)

    def forward(self, h, y_seq):
        """
        Args:
            h: Tensor, shape [batch_size, 44]
            y_seq: Tensor, shape [batch_size, 1]
        """
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        batch_size = h.size(0)
        seq_len = h.size(1)
        feature_size = h.size(2)
        d = self.init_variable(1, batch_size, self.hidden_size).to(device)
        s = self.init_variable(1, batch_size, self.hidden_size).to(device)
        h = h.unsqueeze(-1)  # 变为 [batch_size, 44, 1]

        outputs = []

        for t in range(self.T):
            h_t = h[:, t, :].unsqueeze(0)  # 从 h 中取出第 t 个时间步，变为 [1, batch_size, feature_size]
            x = torch.cat((d, h_t), dim=2)  # 拼接 d 和 h_t，变为 [1, batch_size, hidden_size + feature_size]
            h1 = self.attn1(x)
            h1 = h1.squeeze(0)  # 去掉第一维度，变为 [batch_size, code_hidden_size]
            y_t = y_seq[:, t].unsqueeze(1).unsqueeze(0)  # 从 y_seq 中取出第 t 个时间步，变为 [1, batch_size, 1]
            _, states = self.lstm(y_t, (d, s))  # 使用d和s作为LSTM的初始状态
            d = states[0]
            s = states[1]
            y_res = self.fc2(self.fc1(torch.cat((d.squeeze(0), h_t.squeeze(0)), dim=1)))  # 确保维度匹配
            outputs.append(y_res)

        outputs = torch.stack(outputs, dim=1).squeeze(2)  # 变为 [batch_size, 1]
        return outputs

    def init_variable(self, *args):
        zero_tensor = torch.zeros(*args)
        return Variable(zero_tensor)

# 训练部分代码
def l2_loss(pred, label):
    loss = torch.nn.functional.mse_loss(pred, label, size_average=True)
    return loss

def train_once(encoder, decoder, dataloader, encoder_optim, decoder_optim, device):
    encoder.train()
    decoder.train()
    loader = tqdm.tqdm(dataloader, desc="Training")
    loss_epoch = 0
    
    for data, label in loader:
        data = data.float().to(device)  # 转换数据类型为 float 并移动到 GPU
        label = label.float().to(device)  # 转换标签类型为 float 并移动到 GPU

        encoder_optim.zero_grad()
        decoder_optim.zero_grad()
        
        # 数据传入编码器
        encoded_data = encoder(data)
        
        # 解码器输入初始化，这里使用了全零张量
        decoder_input = torch.zeros_like(label).to(device)  # 确保在 GPU 上
        
        print(encoded_data.shape)
        print(decoder_input.shape)
        # 数据传入解码器
        decoded_output = decoder(encoded_data, decoder_input)
        
        # 计算损失
        loss = l2_loss(decoded_output, label)
        loss.backward()
        
        # 更新优化器
        encoder_optim.step()
        decoder_optim.step()
        
        # 统计损失值
        loss_epoch += loss.item()
        
        # 更新进度条显示当前批次的损失
        loader.set_postfix(loss=loss.item())
    
    # 返回平均损失
    return loss_epoch / len(dataloader)

def eval_once(encoder, decoder, dataloader, device):
    encoder.eval()
    decoder.eval()
    loader = tqdm.tqdm(dataloader, desc="Evaluating")
    loss_epoch = 0
    preds = []
    labels = []
    for idx, (data, label) in enumerate(loader):
        data = data.float().to(device)
        label = label.float().to(device)
        decoder_input = torch.zeros_like(label).to(device)
        
        with torch.no_grad():
            encoded_data = encoder(data)
            decoded_output = decoder(encoded_data, decoder_input).squeeze(1)
        
        loss = l2_loss(decoded_output, label)
        loss_epoch += loss.item()
        preds += decoded_output.tolist()
        labels += label.tolist()
        loader.set_postfix(loss=loss.item())
    loss_epoch /= len(loader)
    return loss_epoch

def eval_plot(encoder, decoder, dataloader, train_losses, val_losses, device):
    dataloader.shuffle = False
    preds = []
    labels = []
    encoder.eval()
    decoder.eval()
    loader = tqdm.tqdm(dataloader, desc="Plotting")
    for idx, (data, label) in enumerate(loader):
        data = data.float().to(device)
        label = label.float().to(device)
        decoder_input = torch.zeros_like(label).to(device)
        
        with torch.no_grad():
            encoded_data = encoder(data)
            decoded_output = decoder(encoded_data, decoder_input)
        
        preds += decoded_output.tolist()
        labels += label.tolist()
    
    fig, ax = plt.subplots()
    data_x = list(range(len(preds)))
    ax.plot(data_x, preds, label='predict', color='red')
    ax.plot(data_x, labels, label='ground truth', color='blue')
    ax.plot(train_losses, label='train loss', color='green')  # 添加训练损失曲线
    ax.plot(val_losses, label='val loss', color='orange')  # 添加评估损失曲线
    plt.savefig('shangzheng-tran-lstm.png')
    plt.legend()
    plt.show()

def main():
    # 加载数据
    input_data = np.load("Preprocessing/sample.npy")
    target_data = np.load("Preprocessing/target.npy")

    input_data = torch.tensor(input_data, dtype=torch.float32)
    target_data = torch.tensor(target_data, dtype=torch.float32)

    # 将数据划分为训练集和验证集
    X_train, X_val, y_train, y_val = train_test_split(input_data, target_data, test_size=0.2, random_state=42)
    # 创建DataLoader
    train_dataset = torch.utils.data.TensorDataset(X_train, y_train)
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_dataset = torch.utils.data.TensorDataset(X_val, y_val)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

    # 初始化模型和优化器
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Running on device: {device}")
    encoder = TransAm().to(device)
    decoder = AttnDecoder(code_hidden_size=88, hidden_size=64, time_step=44).to(device)  # Adjust time_step according to your data
    encoder_optim = optim.Adam(encoder.parameters(), lr=0.001)
    decoder_optim = optim.Adam(decoder.parameters(), lr=0.001)

    total_epoch = 201
    train_losses = []
    val_losses = []

    for epoch_idx in range(total_epoch):
        train_loss = train_once(encoder, decoder, train_loader, encoder_optim, decoder_optim, device)
        train_losses.append(train_loss)
        print(f"Epoch: {epoch_idx}, Train Loss: {train_loss}")

        if epoch_idx % 5 == 0:
            val_loss = eval_once(encoder, decoder, val_loader, device)
            val_losses.append(val_loss)
            print(f"Epoch: {epoch_idx}, Validation Loss: {val_loss}")
            eval_plot(encoder, decoder, val_loader, train_losses, val_losses, device)

if __name__ == "__main__":
    main()


Running on device: cuda


Training:   0%|                                                                                 | 0/73 [00:00<?, ?it/s]

Encoded data shape: torch.Size([32, 44])
Decoder input shape: torch.Size([32, 44])


  loss = torch.nn.functional.mse_loss(pred, label, reduction='mean')  # size_average=True 已弃用，改用 reduction='mean'
Training:   0%|                                                                                 | 0/73 [00:00<?, ?it/s]


RuntimeError: The size of tensor a (44) must match the size of tensor b (32) at non-singleton dimension 1