In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
import pickle
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
from torch.nn import init
import torch.nn.functional as F

In [2]:
def open_data(file_path):
    file = open(file_path,"rb")
    raw_data = pickle.load(file)  
    return raw_data

In [3]:
def save_data(data, file_path):
    with open(file_path , 'wb') as f:
        pickle.dump(data,f)
        f.close()

In [4]:
class TimeSeriesDataset(Dataset):
    def __init__(self, inputs, targets, dist, dist_arg, masks, min_vals, max_vals, mask_dist_max, class_labels):
        self.inputs = inputs
        self.targets = targets
        self.dist = dist
        self.dist_arg = dist_arg
        self.masks = masks
        self.min_vals = min_vals
        self.max_vals = max_vals
        self.mask_dist_max = mask_dist_max
        self.class_labels = class_labels
    
    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):
        # 获取输入和对应的目标数据
        x = self.inputs[idx]
        y = self.targets[idx]
        d_ = self.dist[idx]
        d_arg = self.dist_arg[idx]
        mask = self.masks[idx]
        min_ = self.min_vals[idx]
        max_ = self.max_vals[idx]
        mask_d_ = self.mask_dist_max[idx]
        class_ = self.class_labels[idx]
        return x, y, d_, d_arg, mask, min_, max_, mask_d_, class_

In [36]:
# 定义 LSTM 模型
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, output_seq_len, drop_out = True):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers = 2, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size * output_seq_len)
        self.output_seq_len = output_seq_len
        self.output_size = output_size
        self.acti_func = nn.ReLU()
        self.drop_out = drop_out
        for m in self.modules():
            if isinstance(m, nn.Linear):
                #m.weight.data = init.xavier_normal_(m.weight.data, nonlinearity='relu')
                m.weight.data = init.kaiming_normal_(m.weight.data, nonlinearity="relu")
                if m.bias is not None:
                    m.bias.data = init.constant_(m.bias.data, 0.0) 

    
    def forward(self, x):
        # x 的形状为 (batch_size, seq_len, input_size)
        lstm_out, _ = self.lstm(x)  # lstm_out: (batch_size, seq_len, hidden_size)
        
        lstm_out = lstm_out[:, -1, :]  # 取最后一个时间步的输出 (batch_size, hidden_size)
        if self.drop_out:
            lstm_out = F.dropout(lstm_out, training=self.training, p=0.2)
        out = self.fc(lstm_out)# 全连接层 (batch_size, output_size * output_seq_len)
        out= self.acti_func(out)
        out = out.view(-1, self.output_seq_len, self.output_size)  # 重塑为 (batch_size, output_seq_len, num_nodes * num_features)
        return out

In [37]:
train_dataset = torch.load("D:/ThesisData/processed data/SourceDomain/NEW/train_data_14days.h5")
vali_bangkok = torch.load("D:/ThesisData/processed data/SourceDomain/NEW/Bangkok_vali_data_14days.h5")
vali_antwerp = torch.load("D:/ThesisData/processed data/SourceDomain/NEW/Antwerp_vali_data_14days.h5")

In [38]:
test_bangkok = torch.load("D:/ThesisData/processed data/SourceDomain/NEW/Bangkok_test_data_14days.h5")
test_antwerp = torch.load("D:/ThesisData/processed data/SourceDomain/NEW/Antwerp_test_data_14days.h5")

In [39]:
batch_size = 16
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
Bangkok_vali_loader = DataLoader(vali_bangkok, batch_size=batch_size, shuffle=False)
Antwerp_vali_loader = DataLoader(vali_antwerp, batch_size=batch_size, shuffle=False)
Bangkok_test_loader = DataLoader(test_bangkok, batch_size=batch_size, shuffle=False)
Antwerp_test_loader = DataLoader(test_antwerp, batch_size=batch_size, shuffle=False)

In [40]:
seq_len = 28
num_nodes = 1466
num_features = 1
input_size = num_nodes * num_features  # LSTM 输入的维度
hidden_size = 256   # LSTM 隐藏层的维度
output_seq_len = 14  # 预测14天
output_size = num_nodes * num_features  # 每个时间步预测的数据量

In [41]:
model = LSTMModel(input_size=input_size, hidden_size=hidden_size, output_size=output_size, output_seq_len=output_seq_len)
criterion_MSE = nn.MSELoss(reduction='none')  # 使用均方误差作为损失函数
criterion_MAE = nn.L1Loss(reduction='none')
optimizer = optim.NAdam(model.parameters(), lr=4e-4, weight_decay=1e-5)

In [42]:
epoch_num = 40

In [43]:
for h in range(epoch_num):
    model.train()
    for batch_input, batch_labels, batch_dist, batch_dist_arg, batch_masks, batch_min, batch_max, batch_dist_mask, _ in train_loader:
        
        batch_input, batch_labels, batch_masks = batch_input.squeeze(), batch_labels.squeeze(), batch_masks.squeeze()
        batch_min, batch_max = batch_min.squeeze(), batch_max.squeeze()
  
        batch_outputs = model(batch_input)

        
        loss_mse = criterion_MSE(batch_outputs, batch_labels) * batch_masks
        loss_mae = criterion_MAE(batch_outputs, batch_labels) * batch_masks
        loss_mse = loss_mse.sum() / batch_masks.sum()
        loss_mae = loss_mae.sum() / batch_masks.sum()
        
        optimizer.zero_grad()
        loss_mse.backward()
        optimizer.step()

        del loss_mse
        del loss_mae
        
        batch_outputs = batch_outputs * (batch_max - batch_min) + batch_min
        batch_labels = batch_labels * (batch_max - batch_min) + batch_min
        loss_mse = criterion_MSE(batch_outputs, batch_labels) * batch_masks
        loss_mae = criterion_MAE(batch_outputs, batch_labels) * batch_masks
        loss_mse = loss_mse.sum() / batch_masks.sum()
        loss_mae = loss_mae.sum() / batch_masks.sum()

    
    print(f"epoch {h + 1}, Train: MSE={loss_mse.item():.0f}, MAE={loss_mae.item():.0f}")

    model.eval()

    for batch_input, batch_labels, batch_dist, batch_dist_arg, batch_masks, batch_min, batch_max, batch_dist_mask, _ in Antwerp_vali_loader:
        batch_input, batch_labels, batch_masks = batch_input.squeeze(), batch_labels.squeeze(), batch_masks.squeeze()
        batch_min, batch_max = batch_min.squeeze(), batch_max.squeeze()
        
        batch_outputs = model(batch_input)
        Antwerp_vali_outputs = batch_outputs * (batch_max - batch_min) + batch_min
        Antwerp_vali_labels = batch_labels * (batch_max - batch_min) + batch_min
        loss_mse = criterion_MSE(Antwerp_vali_outputs, Antwerp_vali_labels) * batch_masks
        loss_mae = criterion_MAE(Antwerp_vali_outputs, Antwerp_vali_labels) * batch_masks
        loss_mse = loss_mse.sum() / batch_masks.sum()
        loss_mae = loss_mae.sum() / batch_masks.sum()

        
    print(f"--Anterp Vali set: MSE={loss_mse.item():.0f}, MAE={loss_mae.item():.0f}")  


    for batch_input, batch_labels, batch_dist, batch_dist_arg, batch_masks, batch_min, batch_max, batch_dist_mask, _ in Antwerp_test_loader:
        batch_input, batch_labels, batch_masks = batch_input.squeeze(), batch_labels.squeeze(), batch_masks.squeeze()
        batch_min, batch_max = batch_min.squeeze(), batch_max.squeeze()
        
        batch_outputs = model(batch_input)
        Antwerp_test_outputs = batch_outputs * (batch_max - batch_min) + batch_min
        Antwerp_test_labels = batch_labels * (batch_max - batch_min) + batch_min
        loss_mse = criterion_MSE(Antwerp_test_outputs, Antwerp_test_labels) * batch_masks
        loss_mae = criterion_MAE(Antwerp_test_outputs, Antwerp_test_labels) * batch_masks
        loss_mse = loss_mse.sum() / batch_masks.sum()
        loss_mae = loss_mae.sum() / batch_masks.sum()

        
    print(f"--Anterp Test set: MSE={loss_mse.item():.0f}, MAE={loss_mae.item():.0f}") 


    


    for batch_input, batch_labels, batch_dist, batch_dist_arg, batch_masks, batch_min, batch_max, batch_dist_mask, _ in Bangkok_vali_loader:
        batch_input, batch_labels, batch_masks = batch_input.squeeze(), batch_labels.squeeze(), batch_masks.squeeze()
        batch_min, batch_max = batch_min.squeeze(), batch_max.squeeze()

        batch_outputs = model(batch_input)
        Bangkok_vali_outputs = batch_outputs * (batch_max - batch_min) + batch_min
        Bangkok_vali_labels = batch_labels * (batch_max - batch_min) + batch_min
        loss_mse = criterion_MSE(Bangkok_vali_outputs, Bangkok_vali_labels) * batch_masks
        loss_mae = criterion_MAE(Bangkok_vali_outputs, Bangkok_vali_labels) * batch_masks
        loss_mse = loss_mse.sum() / batch_masks.sum()
        loss_mae = loss_mae.sum() / batch_masks.sum()

    
    print(f"--Bangkok Vali set: MSE={loss_mse.item():.0f}, MAE={loss_mae.item():.0f}")     


    for batch_input, batch_labels, batch_dist, batch_dist_arg, batch_masks, batch_min, batch_max, batch_dist_mask, _ in Bangkok_test_loader:
        batch_input, batch_labels, batch_masks = batch_input.squeeze(), batch_labels.squeeze(), batch_masks.squeeze()
        batch_min, batch_max = batch_min.squeeze(), batch_max.squeeze()

        batch_outputs = model(batch_input)
        Bangkok_test_outputs = batch_outputs * (batch_max - batch_min) + batch_min
        Bangkok_test_labels = batch_labels * (batch_max - batch_min) + batch_min
        loss_mse = criterion_MSE(Bangkok_test_outputs, Bangkok_test_labels) * batch_masks
        loss_mae = criterion_MAE(Bangkok_test_outputs, Bangkok_test_labels) * batch_masks
        loss_mse = loss_mse.sum() / batch_masks.sum()
        loss_mae = loss_mae.sum() / batch_masks.sum()

    
    print(f"--Bangkok Test set: MSE={loss_mse.item():.0f}, MAE={loss_mae.item():.0f}")    


    if h == epoch_num-1:
        lstm_results = {"Antwerp_vali": Antwerp_vali_outputs.detach().numpy(), "Antwerp_vali_label": Antwerp_vali_labels.detach().numpy(), "Bangkok_vali": Bangkok_vali_outputs.detach().numpy(), "Bangkok_vali_label": Bangkok_vali_labels.detach().numpy(),
                    "Antwerp_test": Antwerp_test_outputs.detach().numpy(), "Antwerp_test_label": Antwerp_test_labels.detach().numpy(), "Bangkok_test": Bangkok_test_outputs.detach().numpy(), "Bangkok_test_label": Bangkok_test_labels.detach().numpy(),
                     "Antwerp_node_num": 1466,  "Bangkok_node_num": 1137}
        
        save_data(lstm_results, "D:/ThesisData/processed data/SourceDomain/NEW/results/lstm_results.h5")

epoch 1, Train: MSE=18028, MAE=104
--Anterp Vali set: MSE=5228, MAE=47
--Anterp Test set: MSE=8563, MAE=64
--Bangkok Vali set: MSE=6089, MAE=56
--Bangkok Test set: MSE=9133, MAE=72
epoch 2, Train: MSE=10891, MAE=75
--Anterp Vali set: MSE=4918, MAE=44
--Anterp Test set: MSE=8061, MAE=60
--Bangkok Vali set: MSE=5264, MAE=51
--Bangkok Test set: MSE=7938, MAE=65
epoch 3, Train: MSE=7154, MAE=57
--Anterp Vali set: MSE=4408, MAE=43
--Anterp Test set: MSE=7003, MAE=57
--Bangkok Vali set: MSE=4348, MAE=46
--Bangkok Test set: MSE=6257, MAE=57
epoch 4, Train: MSE=10262, MAE=76
--Anterp Vali set: MSE=4171, MAE=42
--Anterp Test set: MSE=6491, MAE=55
--Bangkok Vali set: MSE=4423, MAE=46
--Bangkok Test set: MSE=6551, MAE=59
epoch 5, Train: MSE=9302, MAE=71
--Anterp Vali set: MSE=3951, MAE=43
--Anterp Test set: MSE=6015, MAE=55
--Bangkok Vali set: MSE=3592, MAE=44
--Bangkok Test set: MSE=4508, MAE=49
epoch 6, Train: MSE=7606, MAE=64
--Anterp Vali set: MSE=3865, MAE=41
--Anterp Test set: MSE=5883, MAE

In [15]:
#Save Model Para
torch.save({
    'lstm_state_dict': model.state_dict(),
    'lstm_optimizer_state_dict': optimizer.state_dict(),
}, 'D:/ThesisData/processed data/ModelPara/source_lstm_14days.pth')

In [20]:
checkpoint = torch.load('D:/ThesisData/processed data/ModelPara/source_lstm_14days.pth')
model.load_state_dict(checkpoint['lstm_state_dict'])
#optimizer.load_state_dict(checkpoint['lstm_optimizer_state_dict'])

<All keys matched successfully>

In [11]:
target_train = torch.load("D:/ThesisData/processed data/TargetDomain/NEW/train_data_14days.h5")
vali_barcelona = torch.load("D:/ThesisData/processed data/TargetDomain/NEW/Barcelona_vali_data_14days.h5")
test_barcelona = torch.load("D:/ThesisData/processed data/TargetDomain/NEW/Barcelona_test_data_14days.h5")

In [49]:
batch_size = 14
train_loader = DataLoader(target_train, batch_size=batch_size, shuffle=True)
Barcelona_vali_loader = DataLoader(vali_barcelona, batch_size=batch_size, shuffle=False)
Barcelona_test_loader = DataLoader(test_barcelona, batch_size=batch_size, shuffle=False)

In [50]:
optimizer = optim.NAdam(model.parameters(), lr=2e-4, weight_decay=1e-5)
lstm_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=11, gamma=0.65)

In [51]:
epoch_num = 40

In [52]:
for h in range(epoch_num):
    model.train()
    for batch_input, batch_labels, batch_dist, batch_dist_arg, batch_masks, batch_min, batch_max, batch_dist_mask, _ in train_loader:
        
        batch_input, batch_labels, batch_masks = batch_input.squeeze(), batch_labels.squeeze(), batch_masks.squeeze()
        batch_min, batch_max = batch_min.squeeze(), batch_max.squeeze()
        
        
        batch_outputs = model(batch_input)

        
        loss_mse = criterion_MSE(batch_outputs, batch_labels) * batch_masks
        loss_mae = criterion_MAE(batch_outputs, batch_labels) * batch_masks
        loss_mse = loss_mse.sum() / batch_masks.sum()
        loss_mae = loss_mae.sum() / batch_masks.sum()
        
        optimizer.zero_grad()
        loss_mse.backward()
        optimizer.step()

        del loss_mse
        del loss_mae
        
        batch_outputs = batch_outputs * (batch_max - batch_min) + batch_min
        batch_labels = batch_labels * (batch_max - batch_min) + batch_min
        loss_mse = criterion_MSE(batch_outputs, batch_labels) * batch_masks
        loss_mae = criterion_MAE(batch_outputs, batch_labels) * batch_masks
        loss_mse = loss_mse.sum() / batch_masks.sum()
        loss_mae = loss_mae.sum() / batch_masks.sum()

    
    print(f"epoch {h + 1}, Train: MSE={loss_mse.item():.0f}, MAE={loss_mae.item():.0f}")

    model.eval()

    for batch_input, batch_labels, batch_dist, batch_dist_arg, batch_masks, batch_min, batch_max, batch_dist_mask, _ in Barcelona_vali_loader:
        batch_input, batch_labels, batch_masks = batch_input.squeeze(), batch_labels.squeeze(), batch_masks.squeeze()
        batch_min, batch_max = batch_min.squeeze(), batch_max.squeeze()
        
        batch_outputs = model(batch_input)
        Barcelona_vali_outputs = batch_outputs * (batch_max - batch_min) + batch_min
        Barcelona_vali_labels = batch_labels * (batch_max - batch_min) + batch_min
        loss_mse = criterion_MSE(Barcelona_vali_outputs, Barcelona_vali_labels) * batch_masks
        loss_mae = criterion_MAE(Barcelona_vali_outputs, Barcelona_vali_labels) * batch_masks
        loss_mse = loss_mse.sum() / batch_masks.sum()
        loss_mae = loss_mae.sum() / batch_masks.sum()

        
    print(f"--Barcelona Vali set: MSE={loss_mse.item():.0f}, MAE={loss_mae.item():.0f}")  


    for batch_input, batch_labels, batch_dist, batch_dist_arg, batch_masks, batch_min, batch_max, batch_dist_mask, _ in Barcelona_test_loader:
        batch_input, batch_labels, batch_masks = batch_input.squeeze(), batch_labels.squeeze(), batch_masks.squeeze()
        batch_min, batch_max = batch_min.squeeze(), batch_max.squeeze()
        
        batch_outputs = model(batch_input)
        Barcelona_test_outputs = batch_outputs * (batch_max - batch_min) + batch_min
        Barcelona_test_labels = batch_labels * (batch_max - batch_min) + batch_min
        loss_mse = criterion_MSE(Barcelona_test_outputs, Barcelona_test_labels) * batch_masks
        loss_mae = criterion_MAE(Barcelona_test_outputs, Barcelona_test_labels) * batch_masks
        loss_mse = loss_mse.sum() / batch_masks.sum()
        loss_mae = loss_mae.sum() / batch_masks.sum()

        
    print(f"--Barcelona Test set: MSE={loss_mse.item():.0f}, MAE={loss_mae.item():.0f}") 

    lstm_scheduler.step()
    #if h == epoch_num-1: 
        #lstm_results = {"Barcelona_vali": Barcelona_vali_outputs.detach().numpy(), "Barcelona_vali_label": Barcelona_vali_labels.detach().numpy(),
                            #"Barcelona_test": Barcelona_test_outputs.detach().numpy(), "Barcelona_test_label": Barcelona_test_labels.detach().numpy(),
                            #"Barcelona_node_num": 1273}
        
        #save_data(lstm_results, "D:/ThesisData/processed data/TargetDomain/NEW/results/lstm_results.h5")
        



epoch 1, Train: MSE=3259, MAE=46
--Barcelona Vali set: MSE=2881, MAE=46
--Barcelona Test set: MSE=3846, MAE=53
epoch 2, Train: MSE=3190, MAE=45
--Barcelona Vali set: MSE=2871, MAE=45
--Barcelona Test set: MSE=3827, MAE=53
epoch 3, Train: MSE=3311, MAE=46
--Barcelona Vali set: MSE=2553, MAE=42
--Barcelona Test set: MSE=3389, MAE=49
epoch 4, Train: MSE=3073, MAE=44
--Barcelona Vali set: MSE=2961, MAE=46
--Barcelona Test set: MSE=3920, MAE=53
epoch 5, Train: MSE=3191, MAE=45
--Barcelona Vali set: MSE=2838, MAE=45
--Barcelona Test set: MSE=3724, MAE=51
epoch 6, Train: MSE=3019, MAE=43
--Barcelona Vali set: MSE=2953, MAE=45
--Barcelona Test set: MSE=3860, MAE=52
epoch 7, Train: MSE=3141, MAE=44
--Barcelona Vali set: MSE=3003, MAE=46
--Barcelona Test set: MSE=3913, MAE=53
epoch 8, Train: MSE=2977, MAE=43
--Barcelona Vali set: MSE=2892, MAE=45
--Barcelona Test set: MSE=3769, MAE=51
epoch 9, Train: MSE=3075, MAE=43
--Barcelona Vali set: MSE=2824, MAE=44
--Barcelona Test set: MSE=3681, MAE=51
e