In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
import pickle
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
from torch.nn import init
import torch.nn.functional as F

In [2]:
def open_data(file_path):
    file = open(file_path,"rb")
    raw_data = pickle.load(file)  
    return raw_data

In [3]:
def save_data(data, file_path):
    with open(file_path , 'wb') as f:
        pickle.dump(data,f)
        f.close()

In [4]:
class TimeSeriesDataset(Dataset):
    def __init__(self, inputs, targets, dist, dist_arg, masks, min_vals, max_vals, mask_dist_max, class_labels):
        self.inputs = inputs
        self.targets = targets
        self.dist = dist
        self.dist_arg = dist_arg
        self.masks = masks
        self.min_vals = min_vals
        self.max_vals = max_vals
        self.mask_dist_max = mask_dist_max
        self.class_labels = class_labels
    
    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):
        # 获取输入和对应的目标数据
        x = self.inputs[idx]
        y = self.targets[idx]
        d_ = self.dist[idx]
        d_arg = self.dist_arg[idx]
        mask = self.masks[idx]
        min_ = self.min_vals[idx]
        max_ = self.max_vals[idx]
        mask_d_ = self.mask_dist_max[idx]
        class_ = self.class_labels[idx]
        return x, y, d_, d_arg, mask, min_, max_, mask_d_, class_

In [5]:
# 定义 RNN 模型
class RNNModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, output_seq_len, drop_out = True):
        super(RNNModel, self).__init__()
        self.hidden_size = hidden_size
        self.rnn = nn.RNN(input_size, hidden_size, num_layers=2, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size * output_seq_len)
        self.output_seq_len = output_seq_len
        self.output_size = output_size
        self.acti_func = nn.ReLU()
        self.drop_out = drop_out
        for m in self.modules():
            if isinstance(m, nn.Linear):
                #m.weight.data = init.xavier_normal_(m.weight.data, nonlinearity='relu')
                m.weight.data = init.kaiming_normal_(m.weight.data, nonlinearity="relu")
                if m.bias is not None:
                    m.bias.data = init.constant_(m.bias.data, 0.0) 

    
    def forward(self, x):
        # x 的形状为 (batch_size, seq_len, input_size)
        lstm_out, _ = self.rnn(x)  # lstm_out: (batch_size, seq_len, hidden_size)
        lstm_out = lstm_out[:, -1, :]  # 取最后一个时间步的输出 (batch_size, hidden_size)
        if self.drop_out:
            lstm_out = F.dropout(lstm_out, training=self.training, p=0.2)
        out = self.fc(lstm_out)# 全连接层 (batch_size, output_size * output_seq_len)
        out= self.acti_func(out)
        out = out.view(-1, self.output_seq_len, self.output_size)  # 重塑为 (batch_size, output_seq_len, num_nodes * num_features)
        return out

In [6]:
train_dataset = torch.load("D:/ThesisData/processed data/SourceDomain/NEW/train_data_14days.h5")
vali_bangkok = torch.load("D:/ThesisData/processed data/SourceDomain/NEW/Bangkok_vali_data_14days.h5")
vali_antwerp = torch.load("D:/ThesisData/processed data/SourceDomain/NEW/Antwerp_vali_data_14days.h5")

In [7]:
test_bangkok = torch.load("D:/ThesisData/processed data/SourceDomain/NEW/Bangkok_test_data_14days.h5")
test_antwerp = torch.load("D:/ThesisData/processed data/SourceDomain/NEW/Antwerp_test_data_14days.h5")

In [8]:
batch_size = 16
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
Bangkok_vali_loader = DataLoader(vali_bangkok, batch_size=batch_size, shuffle=False)
Antwerp_vali_loader = DataLoader(vali_antwerp, batch_size=batch_size, shuffle=False)
Bangkok_test_loader = DataLoader(test_bangkok, batch_size=batch_size, shuffle=False)
Antwerp_test_loader = DataLoader(test_antwerp, batch_size=batch_size, shuffle=False)

In [9]:
seq_len = 28
num_nodes = 1466
num_features = 1
input_size = num_nodes * num_features  # LSTM 输入的维度
hidden_size = 256   # RNN 隐藏层的维度
output_seq_len = 14  # 预测14天
output_size = num_nodes * num_features  # 每个时间步预测的数据量

In [10]:
model = RNNModel(input_size=input_size, hidden_size=hidden_size, output_size=output_size, output_seq_len=output_seq_len)
criterion_MSE = nn.MSELoss(reduction='none')  # 使用均方误差作为损失函数
criterion_MAE = nn.L1Loss(reduction='none')
optimizer = optim.NAdam(model.parameters(), lr=4e-4, weight_decay=1e-5)

In [11]:
epoch_num = 40

In [12]:
for h in range(epoch_num):
    model.train()
    for batch_input, batch_labels, batch_dist, batch_dist_arg, batch_masks, batch_min, batch_max, batch_dist_mask, _ in train_loader:
        
        batch_input, batch_labels, batch_masks = batch_input.squeeze(), batch_labels.squeeze(), batch_masks.squeeze()
        batch_min, batch_max = batch_min.squeeze(), batch_max.squeeze()
  
        batch_outputs = model(batch_input)

        
        loss_mse = criterion_MSE(batch_outputs, batch_labels) * batch_masks
        loss_mae = criterion_MAE(batch_outputs, batch_labels) * batch_masks
        loss_mse = loss_mse.sum() / batch_masks.sum()
        loss_mae = loss_mae.sum() / batch_masks.sum()
        
        optimizer.zero_grad()
        loss_mse.backward()
        optimizer.step()

        del loss_mse
        del loss_mae
        
        batch_outputs = batch_outputs * (batch_max - batch_min) + batch_min
        batch_labels = batch_labels * (batch_max - batch_min) + batch_min
        loss_mse = criterion_MSE(batch_outputs, batch_labels) * batch_masks
        loss_mae = criterion_MAE(batch_outputs, batch_labels) * batch_masks
        loss_mse = loss_mse.sum() / batch_masks.sum()
        loss_mae = loss_mae.sum() / batch_masks.sum()

    
    print(f"epoch {h + 1}, Train: MSE={loss_mse.item():.0f}, MAE={loss_mae.item():.0f}")

    model.eval()

    for batch_input, batch_labels, batch_dist, batch_dist_arg, batch_masks, batch_min, batch_max, batch_dist_mask, _ in Antwerp_vali_loader:
        batch_input, batch_labels, batch_masks = batch_input.squeeze(), batch_labels.squeeze(), batch_masks.squeeze()
        batch_min, batch_max = batch_min.squeeze(), batch_max.squeeze()
        
        batch_outputs = model(batch_input)
        Antwerp_vali_outputs = batch_outputs * (batch_max - batch_min) + batch_min
        Antwerp_vali_labels = batch_labels * (batch_max - batch_min) + batch_min
        loss_mse = criterion_MSE(Antwerp_vali_outputs, Antwerp_vali_labels) * batch_masks
        loss_mae = criterion_MAE(Antwerp_vali_outputs, Antwerp_vali_labels) * batch_masks
        loss_mse = loss_mse.sum() / batch_masks.sum()
        loss_mae = loss_mae.sum() / batch_masks.sum()

        
    print(f"--Anterp Vali set: MSE={loss_mse.item():.0f}, MAE={loss_mae.item():.0f}")  


    for batch_input, batch_labels, batch_dist, batch_dist_arg, batch_masks, batch_min, batch_max, batch_dist_mask, _ in Antwerp_test_loader:
        batch_input, batch_labels, batch_masks = batch_input.squeeze(), batch_labels.squeeze(), batch_masks.squeeze()
        batch_min, batch_max = batch_min.squeeze(), batch_max.squeeze()
        
        batch_outputs = model(batch_input)
        Antwerp_test_outputs = batch_outputs * (batch_max - batch_min) + batch_min
        Antwerp_test_labels = batch_labels * (batch_max - batch_min) + batch_min
        loss_mse = criterion_MSE(Antwerp_test_outputs, Antwerp_test_labels) * batch_masks
        loss_mae = criterion_MAE(Antwerp_test_outputs, Antwerp_test_labels) * batch_masks
        loss_mse = loss_mse.sum() / batch_masks.sum()
        loss_mae = loss_mae.sum() / batch_masks.sum()

        
    print(f"--Anterp Test set: MSE={loss_mse.item():.0f}, MAE={loss_mae.item():.0f}") 


    


    for batch_input, batch_labels, batch_dist, batch_dist_arg, batch_masks, batch_min, batch_max, batch_dist_mask, _ in Bangkok_vali_loader:
        batch_input, batch_labels, batch_masks = batch_input.squeeze(), batch_labels.squeeze(), batch_masks.squeeze()
        batch_min, batch_max = batch_min.squeeze(), batch_max.squeeze()

        batch_outputs = model(batch_input)
        Bangkok_vali_outputs = batch_outputs * (batch_max - batch_min) + batch_min
        Bangkok_vali_labels = batch_labels * (batch_max - batch_min) + batch_min
        loss_mse = criterion_MSE(Bangkok_vali_outputs, Bangkok_vali_labels) * batch_masks
        loss_mae = criterion_MAE(Bangkok_vali_outputs, Bangkok_vali_labels) * batch_masks
        loss_mse = loss_mse.sum() / batch_masks.sum()
        loss_mae = loss_mae.sum() / batch_masks.sum()

    
    print(f"--Bangkok Vali set: MSE={loss_mse.item():.0f}, MAE={loss_mae.item():.0f}")     


    for batch_input, batch_labels, batch_dist, batch_dist_arg, batch_masks, batch_min, batch_max, batch_dist_mask, _ in Bangkok_test_loader:
        batch_input, batch_labels, batch_masks = batch_input.squeeze(), batch_labels.squeeze(), batch_masks.squeeze()
        batch_min, batch_max = batch_min.squeeze(), batch_max.squeeze()

        batch_outputs = model(batch_input)
        Bangkok_test_outputs = batch_outputs * (batch_max - batch_min) + batch_min
        Bangkok_test_labels = batch_labels * (batch_max - batch_min) + batch_min
        loss_mse = criterion_MSE(Bangkok_test_outputs, Bangkok_test_labels) * batch_masks
        loss_mae = criterion_MAE(Bangkok_test_outputs, Bangkok_test_labels) * batch_masks
        loss_mse = loss_mse.sum() / batch_masks.sum()
        loss_mae = loss_mae.sum() / batch_masks.sum()

    
    print(f"--Bangkok Test set: MSE={loss_mse.item():.0f}, MAE={loss_mae.item():.0f}")    


    #if h == epoch_num-1:
        #lstm_results = {"Antwerp_vali": Antwerp_vali_outputs.detach().numpy(), "Antwerp_vali_label": Antwerp_vali_labels.detach().numpy(), "Bangkok_vali": Bangkok_vali_outputs.detach().numpy(), "Bangkok_vali_label": Bangkok_vali_labels.detach().numpy(),
                    #"Antwerp_test": Antwerp_test_outputs.detach().numpy(), "Antwerp_test_label": Antwerp_test_labels.detach().numpy(), "Bangkok_test": Bangkok_test_outputs.detach().numpy(), "Bangkok_test_label": Bangkok_test_labels.detach().numpy(),
                     #"Antwerp_node_num": 1466,  "Bangkok_node_num": 1137}
        
        #save_data(lstm_results, "D:/ThesisData/processed data/SourceDomain/NEW/results/rnn_results.h5")

epoch 1, Train: MSE=17242, MAE=100
--Anterp Vali set: MSE=6148, MAE=53
--Anterp Test set: MSE=9670, MAE=69
--Bangkok Vali set: MSE=6921, MAE=61
--Bangkok Test set: MSE=10289, MAE=77
epoch 2, Train: MSE=12974, MAE=84
--Anterp Vali set: MSE=5944, MAE=52
--Anterp Test set: MSE=9405, MAE=67
--Bangkok Vali set: MSE=6693, MAE=60
--Bangkok Test set: MSE=9731, MAE=75
epoch 3, Train: MSE=13480, MAE=88
--Anterp Vali set: MSE=5952, MAE=52
--Anterp Test set: MSE=9291, MAE=67
--Bangkok Vali set: MSE=6271, MAE=57
--Bangkok Test set: MSE=8921, MAE=71
epoch 4, Train: MSE=11287, MAE=75
--Anterp Vali set: MSE=5574, MAE=50
--Anterp Test set: MSE=8450, MAE=64
--Bangkok Vali set: MSE=5078, MAE=51
--Bangkok Test set: MSE=7040, MAE=62
epoch 5, Train: MSE=7917, MAE=63
--Anterp Vali set: MSE=5434, MAE=49
--Anterp Test set: MSE=8241, MAE=63
--Bangkok Vali set: MSE=5140, MAE=52
--Bangkok Test set: MSE=7077, MAE=63
epoch 6, Train: MSE=8563, MAE=68
--Anterp Vali set: MSE=4945, MAE=47
--Anterp Test set: MSE=7181, M

In [17]:
#Save Model Para
torch.save({
    'rnn_state_dict': model.state_dict(),
    'rnn_optimizer_state_dict': optimizer.state_dict(),
}, 'D:/ThesisData/processed data/ModelPara/source_rnn_14days.pth')

In [57]:
checkpoint = torch.load('D:/ThesisData/processed data/ModelPara/source_rnn.pth')
model.load_state_dict(checkpoint['rnn_state_dict'])
#optimizer.load_state_dict(checkpoint['rnn_optimizer_state_dict'])

<All keys matched successfully>

In [21]:
target_train = torch.load("D:/ThesisData/processed data/TargetDomain/NEW/train_data_14days.h5")
vali_barcelona = torch.load("D:/ThesisData/processed data/TargetDomain/NEW/Barcelona_vali_data_14days.h5")
test_barcelona = torch.load("D:/ThesisData/processed data/TargetDomain/NEW/Barcelona_test_data_14days.h5")

In [23]:
batch_size = 14
train_loader = DataLoader(target_train, batch_size=batch_size, shuffle=True)
Barcelona_vali_loader = DataLoader(vali_barcelona, batch_size=batch_size, shuffle=False)
Barcelona_test_loader = DataLoader(test_barcelona, batch_size=batch_size, shuffle=False)

In [24]:
optimizer = optim.NAdam(model.parameters(), lr=2e-4,weight_decay=1e-5)
#lstm_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=11, gamma=0.65)

In [25]:
epoch_num = 40

In [26]:
for h in range(epoch_num):
    model.train()
    for batch_input, batch_labels, batch_dist, batch_dist_arg, batch_masks, batch_min, batch_max, batch_dist_mask, _ in train_loader:
        
        batch_input, batch_labels, batch_masks = batch_input.squeeze(), batch_labels.squeeze(), batch_masks.squeeze()
        batch_min, batch_max = batch_min.squeeze(), batch_max.squeeze()
  
        batch_outputs = model(batch_input)

        
        loss_mse = criterion_MSE(batch_outputs, batch_labels) * batch_masks
        loss_mae = criterion_MAE(batch_outputs, batch_labels) * batch_masks
        loss_mse = loss_mse.sum() / batch_masks.sum()
        loss_mae = loss_mae.sum() / batch_masks.sum()
        
        optimizer.zero_grad()
        loss_mse.backward()
        optimizer.step()

        del loss_mse
        del loss_mae
        
        batch_outputs = batch_outputs * (batch_max - batch_min) + batch_min
        batch_labels = batch_labels * (batch_max - batch_min) + batch_min
        loss_mse = criterion_MSE(batch_outputs, batch_labels) * batch_masks
        loss_mae = criterion_MAE(batch_outputs, batch_labels) * batch_masks
        loss_mse = loss_mse.sum() / batch_masks.sum()
        loss_mae = loss_mae.sum() / batch_masks.sum()

    
    print(f"epoch {h + 1}, Train: MSE={loss_mse.item():.0f}, MAE={loss_mae.item():.0f}")

    model.eval()

    for batch_input, batch_labels, batch_dist, batch_dist_arg, batch_masks, batch_min, batch_max, batch_dist_mask, _ in Barcelona_vali_loader:
        batch_input, batch_labels, batch_masks = batch_input.squeeze(), batch_labels.squeeze(), batch_masks.squeeze()
        batch_min, batch_max = batch_min.squeeze(), batch_max.squeeze()
        
        batch_outputs = model(batch_input)
        Barcelona_vali_outputs = batch_outputs * (batch_max - batch_min) + batch_min
        Barcelona_vali_labels = batch_labels * (batch_max - batch_min) + batch_min
        loss_mse = criterion_MSE(Barcelona_vali_outputs, Barcelona_vali_labels) * batch_masks
        loss_mae = criterion_MAE(Barcelona_vali_outputs, Barcelona_vali_labels) * batch_masks
        loss_mse = loss_mse.sum() / batch_masks.sum()
        loss_mae = loss_mae.sum() / batch_masks.sum()

        
    print(f"--Barcelona Vali set: MSE={loss_mse.item():.0f}, MAE={loss_mae.item():.0f}")  


    for batch_input, batch_labels, batch_dist, batch_dist_arg, batch_masks, batch_min, batch_max, batch_dist_mask, _ in Barcelona_test_loader:
        batch_input, batch_labels, batch_masks = batch_input.squeeze(), batch_labels.squeeze(), batch_masks.squeeze()
        batch_min, batch_max = batch_min.squeeze(), batch_max.squeeze()
        
        batch_outputs = model(batch_input)
        Barcelona_test_outputs = batch_outputs * (batch_max - batch_min) + batch_min
        Barcelona_test_labels = batch_labels * (batch_max - batch_min) + batch_min
        loss_mse = criterion_MSE(Barcelona_test_outputs, Barcelona_test_labels) * batch_masks
        loss_mae = criterion_MAE(Barcelona_test_outputs, Barcelona_test_labels) * batch_masks
        loss_mse = loss_mse.sum() / batch_masks.sum()
        loss_mae = loss_mae.sum() / batch_masks.sum()

        
    print(f"--Barcelona Test set: MSE={loss_mse.item():.0f}, MAE={loss_mae.item():.0f}") 

    #lstm_scheduler.step()
    if h == epoch_num-1 or loss_mse <=4000: 
        lstm_results = {"Barcelona_vali": Barcelona_vali_outputs.detach().numpy(), "Barcelona_vali_label": Barcelona_vali_labels.detach().numpy(),
                            "Barcelona_test": Barcelona_test_outputs.detach().numpy(), "Barcelona_test_label": Barcelona_test_labels.detach().numpy(),
                            "Barcelona_node_num": 1273}
        
        save_data(lstm_results, "D:/ThesisData/processed data/TargetDomain/NEW/results/rnn_results.h5")
     

epoch 1, Train: MSE=7379, MAE=71
--Barcelona Vali set: MSE=2642, MAE=37
--Barcelona Test set: MSE=4308, MAE=51
epoch 2, Train: MSE=6746, MAE=67
--Barcelona Vali set: MSE=2780, MAE=38
--Barcelona Test set: MSE=4469, MAE=53
epoch 3, Train: MSE=6256, MAE=64
--Barcelona Vali set: MSE=2820, MAE=39
--Barcelona Test set: MSE=4485, MAE=54
epoch 4, Train: MSE=5932, MAE=62
--Barcelona Vali set: MSE=2986, MAE=41
--Barcelona Test set: MSE=4775, MAE=56
epoch 5, Train: MSE=5509, MAE=60
--Barcelona Vali set: MSE=2940, MAE=41
--Barcelona Test set: MSE=4536, MAE=55
epoch 6, Train: MSE=4924, MAE=57
--Barcelona Vali set: MSE=2861, MAE=41
--Barcelona Test set: MSE=4278, MAE=53
epoch 7, Train: MSE=4539, MAE=55
--Barcelona Vali set: MSE=2999, MAE=42
--Barcelona Test set: MSE=4473, MAE=55
epoch 8, Train: MSE=4267, MAE=53
--Barcelona Vali set: MSE=3072, MAE=43
--Barcelona Test set: MSE=4566, MAE=56
epoch 9, Train: MSE=3906, MAE=51
--Barcelona Vali set: MSE=2953, MAE=42
--Barcelona Test set: MSE=4291, MAE=54
e