In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
#from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
from torch.utils.data import DataLoader, Dataset 
import tqdm
from torch.autograd import Variable
import argparse
import math
import torch.nn.functional as F
import os 
from sklearn.model_selection import train_test_split
import torch.optim as optim 

class PositionalEncoding(nn.Module):

    def __init__(self, d_model, max_len=24):  # max_len设置为24，因为有24个时间步
        super(PositionalEncoding, self).__init__()       
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)  # 变为 (max_len, 1, d_model)
        self.register_buffer('pe', pe)

    def forward(self, x):
        """
        Args:
            x: Tensor, shape [batch_size, 118, 24, d_model]
        """
        x = x.permute(2, 0, 1, 3)  # 变为 (24, batch_size, 118, d_model)
        x = x + self.pe[:x.size(0), :, :].unsqueeze(2)  # 加位置编码
        x = x.permute(1, 2, 0, 3)  # 变回 (batch_size, 118, 24, d_model)
        print(x.shape)
        return x

class TransAm(nn.Module):
    def __init__(self, feature_size=64, num_layers=6, dropout=0.1):
        super(TransAm, self).__init__()
        self.model_type = 'Transformer'
        
        self.src_mask = None
        self.pos_encoder = PositionalEncoding(feature_size)
        self.encoder_layer = nn.TransformerEncoderLayer(d_model=feature_size, nhead=8, dropout=dropout)
        self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=num_layers)
        self.decoder_layer = nn.TransformerDecoderLayer(d_model=feature_size, nhead=8, dropout=dropout)
        self.transformer_decoder = nn.TransformerDecoder(self.decoder_layer, num_layers=num_layers)

        self.decoder = nn.Linear(feature_size, 1)
        self.init_weights()

    def init_weights(self):
        initrange = 0.1    
        self.decoder.bias.data.zero_()
        self.decoder.weight.data.uniform_(-initrange, initrange)

    def forward(self, src):
        """
        Args:
            src: Tensor, shape [batch_size, 118, 24]
        """
        print(src.shape)
        src = src.unsqueeze(-1)  # 变为 [batch_size, 118, 24, 1]
        print(src.shape)
        src = self.pos_encoder(src)
        
        
        batch_size, num_nodes, seq_len, d_model = src.shape
        src = src.view(seq_len, batch_size * num_nodes, d_model)  # 变为 [24, batch_size * 118, d_model]

        if self.src_mask is None or self.src_mask.size(0) != seq_len:
            device = src.device
            mask = self._generate_square_subsequent_mask(seq_len).to(device)
            self.src_mask = mask
        output = self.transformer_encoder(src, self.src_mask)
        output = output.view(seq_len, batch_size, num_nodes, d_model)  # 变回 [24, batch_size, 118, d_model]
        output = output.permute(1, 2, 0, 3)  # 变为 [batch_size, 118, 24, d_model]
        output = self.decoder(output)  # [batch_size, 118, 24, 1]
        output = output.squeeze(-1)  # [batch_size, 118, 24]

        return output

    def _generate_square_subsequent_mask(self, sz):
        mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
        mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
        return mask
    
      
    
class AttnDecoder(nn.Module):
    def __init__(self, code_hidden_size, hidden_size, time_step):
        super(AttnDecoder, self).__init__()
        self.code_hidden_size = code_hidden_size
        self.hidden_size = hidden_size
        self.T = time_step

        self.attn1 = nn.Linear(in_features=hidden_size + 118, out_features=code_hidden_size)
        self.attn2 = nn.Linear(in_features=code_hidden_size, out_features=code_hidden_size)
        self.tanh = nn.Tanh()
        self.attn3 = nn.Linear(in_features=code_hidden_size, out_features=1)
        self.lstm = nn.LSTM(input_size=1, hidden_size=self.hidden_size, num_layers=1)
        self.tilde = nn.Linear(in_features=self.code_hidden_size + 1, out_features=1)
        self.fc1 = nn.Linear(in_features=hidden_size + 118, out_features=hidden_size)
        self.fc2 = nn.Linear(in_features=hidden_size, out_features=1)

    def forward(self, h, y_seq):
        """
        Args:
            h: Tensor, shape [batch_size, 118, 24]
            y_seq: Tensor, shape [batch_size, 24]
        """
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        batch_size = h.size(0)
        seq_len = h.size(1)
        feature_size = h.size(2)
        d = self.init_variable(1, batch_size, self.hidden_size).to(device)
        s = self.init_variable(1, batch_size, self.hidden_size).to(device)
        h = h.transpose(1, 2)  # 变为 [batch_size, 24, 118]

        outputs = []

        for t in range(self.T):
            h_t = h[:, t, :].unsqueeze(0)  # 从 h 中取出第 t 个时间步，变为 [1, batch_size, feature_size]
            x = torch.cat((d, h_t), dim=2)  # 拼接 d 和 h_t，变为 [1, batch_size, hidden_size + feature_size]
            h1 = self.attn1(x)
            h1 = h1.squeeze(0)  # 去掉第一维度，变为 [batch_size, code_hidden_size]
            y_t = y_seq[:, t].unsqueeze(1).unsqueeze(0)  # 从 y_seq 中取出第 t 个时间步，变为 [1, batch_size, 1]
            _, states = self.lstm(y_t, (d, s))  # 使用d和s作为LSTM的初始状态
            d = states[0]
            s = states[1]
            y_res = self.fc2(self.fc1(torch.cat((d.squeeze(0), h_t.squeeze(0)), dim=1)))  # 确保维度匹配
            outputs.append(y_res)

        outputs = torch.stack(outputs, dim=1).squeeze(2)  # 变为 [batch_size, 24]
        return outputs

    def init_variable(self, *args):
        zero_tensor = torch.zeros(*args)
        return Variable(zero_tensor)