In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
import pickle
import torch
import torch.nn as nn
from torch_geometric.data import Data, Dataset
import networkx as nx
from torch_geometric.loader import DataLoader

In [2]:
def open_data(file_path):
    file = open(file_path,"rb")
    raw_data = pickle.load(file)  
    return raw_data

In [3]:
class TimeSeriesDataset(torch.utils.data.Dataset):
    def __init__(self, inputs, targets, dist, dist_arg, masks, min_vals, max_vals, mask_dist_max, class_labels):
        self.inputs = inputs
        self.targets = targets
        self.dist = dist
        self.dist_arg = dist_arg
        self.masks = masks
        self.min_vals = min_vals
        self.max_vals = max_vals
        self.mask_dist_max = mask_dist_max
        self.class_labels = class_labels
    
    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):
        # 获取输入和对应的目标数据
        x = self.inputs[idx]
        y = self.targets[idx]
        d_ = self.dist[idx]
        d_arg = self.dist_arg[idx]
        mask = self.masks[idx]
        min_ = self.min_vals[idx]
        max_ = self.max_vals[idx]
        mask_d_ = self.mask_dist_max[idx]
        class_ = self.class_labels[idx]
        
        return x, y, d_, d_arg, mask, min_, max_, mask_d_, class_

In [16]:
def train_data_prepare(city_names, total_ratio = 0.8 ,train_ratio = 0.6, max_node_num = 1466):
    sample = open_data(f"D:/ThesisData/processed data/{city_names[0]}/NEW/{city_names[0]}_data_14days.h5")
    sample_num = int(sample["input_result"].shape[0] * total_ratio * 0.6)
    
   
    for i in range(len(city_names)):
        city_dict = open_data(f"D:/ThesisData/processed data/{city_names[i]}/NEW/{city_names[i]}_data_14days.h5")
        if city_names[i] == "Antwerp" or city_names[i] == "Bangkok":
            total_scale = sample_num
        else:    
            total_scale = int(sample_num * 0.5)

        input_ = torch.tensor(city_dict["input_result"], dtype=torch.float)[:total_scale,:,:,1:]
        target_ = torch.tensor(city_dict["target_result"], dtype=torch.float)[:total_scale,:,:,1:]

        
        if city_names[i] == "Antwerp" or city_names[i] == "Bangkok" :
            class_label = torch.ones((input_.shape[0], 1), dtype=torch.float)
        else:
            class_label = torch.zeros((input_.shape[0], 1), dtype=torch.float)


            
        min_vals = torch.tensor(city_dict["min_vals"], dtype = torch.float).unsqueeze(0).unsqueeze(0).unsqueeze(0)
        max_vals = torch.tensor(city_dict["max_vals"], dtype = torch.float).unsqueeze(0).unsqueeze(0).unsqueeze(0)
        min_vals = min_vals.repeat(target_.shape[0], target_.shape[1], max_node_num, 1)
        max_vals = max_vals.repeat(target_.shape[0], target_.shape[1], max_node_num, 1)

        
        dist_max = torch.tensor(city_dict["dist_max"], dtype=torch.float).unsqueeze(0).unsqueeze(0).repeat(input_.shape[0], input_.shape[1], 1, 1)
        dist_argmax = torch.tensor(city_dict["dist_argmax"]).unsqueeze(0).unsqueeze(0).repeat(input_.shape[0], input_.shape[1], 1, 1) 
        #-----------------------------------------------------------------------
        mask_input = torch.ones_like(input_)
        mask_target = torch.ones_like(target_)
        mask_dist_max = torch.ones_like(dist_max)
        mask_dist_argmax = torch.ones_like(dist_argmax)

        
        padded_input = torch.zeros(input_.shape[0], input_.shape[1], max_node_num, input_.shape[3])
        padded_target = torch.zeros(target_.shape[0], target_.shape[1], max_node_num, target_.shape[3])
        padded_dist_max = torch.zeros(input_.shape[0], dist_max.shape[1], max_node_num, dist_max.shape[3])
        padded_dist_argmax = torch.zeros_like(padded_dist_max)  

        padded_input[:, :, :input_.shape[2], :] = input_
        padded_target[:, :, :target_.shape[2], :] = target_
        padded_dist_max[:, :, :dist_max.shape[2], :] = dist_max
        padded_dist_argmax[:, :, :dist_argmax.shape[2], :] = dist_argmax

        
        padded_mask_input = torch.zeros(input_.shape[0], input_.shape[1], max_node_num, input_.shape[3])
        padded_mask_input[:, :, :input_.shape[2], :] = mask_input
        padded_mask_target = torch.zeros(target_.shape[0], target_.shape[1], max_node_num, target_.shape[3])
        padded_mask_target[:, :, :target_.shape[2], :] = mask_target

        padded_mask_dist_max = torch.zeros_like(padded_dist_max)
        padded_mask_dist_max[:, :, :dist_max.shape[2], :] = mask_dist_max
        padded_mask_dist_argmax = torch.zeros_like(padded_dist_argmax)
        padded_mask_dist_argmax[:, :, :dist_max.shape[2], :] = mask_dist_argmax


        if i==0:
            combined_input = padded_input
            combined_target = padded_target
            combined_class_label = class_label
            combined_dist_max = padded_dist_max
            combined_dist_argmax = padded_dist_argmax

            
            combined_mask_input = padded_mask_input
            combined_mask_target = padded_mask_target
            combined_mask_dist_max = padded_mask_dist_max
            

            combined_min_vals = min_vals
            combined_max_vals = max_vals

        else:
            combined_input = torch.cat((combined_input, padded_input), dim=0)
            combined_target = torch.cat((combined_target, padded_target), dim=0)
            combined_class_label = torch.cat((combined_class_label, class_label), dim=0)
            combined_dist_max = torch.cat((combined_dist_max, padded_dist_max), dim=0) 
            combined_dist_argmax = torch.cat((combined_dist_argmax, padded_dist_argmax), dim=0)  

            
            combined_mask_input = torch.cat((combined_mask_input, padded_mask_input), dim=0)
            combined_mask_target = torch.cat((combined_mask_target, padded_mask_target), dim=0)
            combined_mask_dist_max = torch.cat((combined_mask_dist_max, padded_dist_max), dim=0)

            
            combined_min_vals = torch.cat((combined_min_vals, min_vals), dim = 0)
            combined_max_vals = torch.cat((combined_max_vals, max_vals), dim = 0)
            #-----------------------------------------------------------------------
            #-----------------------------------------------------------------------
    valid_input = combined_input * combined_mask_input 
    valid_label = combined_target * combined_mask_target 

    valid_dist = combined_dist_max * combined_mask_dist_max
    valid_dist_argmax = (combined_dist_argmax * combined_mask_dist_max).type(torch.int32)
        
    
    train_input, train_label = valid_input, valid_label
    train_dist_max, train_dist_argmax = valid_dist, valid_dist_argmax
    train_mask_label = combined_mask_target
    train_min_vals, train_max_vals = combined_min_vals, combined_max_vals
    print(f"train set output shape:")
    print(f"input and label: {train_input.shape}, {train_label.shape}")
    print(f"train_dist_max and train_dist_argmax: {train_dist_max.shape}, {train_dist_argmax.shape}")
    print(f"train_mask_label shape: {train_mask_label.shape}")
    print(f"train_min_vals and train_max_vals: {train_min_vals.shape}")
    print(f"combined_mask_dist_max shape: {combined_mask_dist_max.shape}")
    print(f"combined_class_label shape: {combined_class_label.shape}")
        
    return train_input, train_label, train_dist_max, train_dist_argmax, train_mask_label, train_min_vals, train_max_vals, combined_mask_dist_max, combined_class_label 






In [17]:
def vali_test_data_prepare(city_name, purpose, total_ratio = 0.8 , train_ratio = 0.6, vali_ratio=0.2, max_node_num = 1466):
    city_dict = open_data(f"D:/ThesisData/processed data/{city_name}/NEW/{city_name}_data_14days.h5")
    total_scale = int(city_dict["input_result"].shape[0] * total_ratio)
    
    #if city_name == "Antwerp" or city_name == "Bangkok":
    train_num = int(total_scale * train_ratio)
    vali_num =  int(total_scale * (train_ratio + vali_ratio))
    
    #if city_name == "Barcelona"    
        #total_scale = int(total_scale * 0.5)
        #train_num = int(total_scale * train_ratio)
        #vali_num =  int(total_scale * (train_ratio + vali_ratio))

    if purpose == "vali":
        input_ = torch.tensor(city_dict["input_result"], dtype=torch.float)[train_num: vali_num, :,:,1:]
        target_ = torch.tensor(city_dict["target_result"], dtype=torch.float)[train_num: vali_num,:,:,1:]
    if purpose == "test": 
        input_ = torch.tensor(city_dict["input_result"], dtype=torch.float)[vali_num: total_scale, :,:,1:]
        target_ = torch.tensor(city_dict["target_result"], dtype=torch.float)[vali_num: total_scale, :,:,1:]
        
        
    if city_name == "Antwerp" or city_name == "Bangkok" :
        class_label = torch.ones((input_.shape[0], 1), dtype=torch.float)
    if city_name == "Barcelona":
        class_label = torch.zeros((input_.shape[0], 1), dtype=torch.float)


            
    min_vals = torch.tensor(city_dict["min_vals"], dtype = torch.float).unsqueeze(0).unsqueeze(0).unsqueeze(0)
    max_vals = torch.tensor(city_dict["max_vals"], dtype = torch.float).unsqueeze(0).unsqueeze(0).unsqueeze(0)
    min_vals = min_vals.repeat(target_.shape[0], target_.shape[1], max_node_num, 1)
    max_vals = max_vals.repeat(target_.shape[0], target_.shape[1], max_node_num, 1)
    #-----------------------------------------------------------------------
 
    #-----------------------------------------------------------------------
    dist_max = torch.tensor(city_dict["dist_max"], dtype=torch.float).unsqueeze(0).unsqueeze(0).repeat(input_.shape[0], input_.shape[1], 1, 1)
    dist_argmax = torch.tensor(city_dict["dist_argmax"]).unsqueeze(0).unsqueeze(0).repeat(input_.shape[0], input_.shape[1], 1, 1) 
    #-----------------------------------------------------------------------
    mask_input = torch.ones_like(input_)
    mask_target = torch.ones_like(target_)
    mask_dist_max = torch.ones_like(dist_max)
    mask_dist_argmax = torch.ones_like(dist_argmax)

        
    padded_input = torch.zeros(input_.shape[0], input_.shape[1], max_node_num, input_.shape[3])
    padded_target = torch.zeros(target_.shape[0], target_.shape[1], max_node_num, target_.shape[3])
    padded_dist_max = torch.zeros(input_.shape[0], dist_max.shape[1], max_node_num, dist_max.shape[3])
    padded_dist_argmax = torch.zeros_like(padded_dist_max)  

    padded_input[:, :, :input_.shape[2], :] = input_
    padded_target[:, :, :target_.shape[2], :] = target_
    padded_dist_max[:, :, :dist_max.shape[2], :] = dist_max
    padded_dist_argmax[:, :, :dist_argmax.shape[2], :] = dist_argmax

        
    padded_mask_input = torch.zeros(input_.shape[0], input_.shape[1], max_node_num, input_.shape[3])
    padded_mask_input[:, :, :input_.shape[2], :] = mask_input
    padded_mask_target = torch.zeros(target_.shape[0], target_.shape[1], max_node_num, target_.shape[3])
    padded_mask_target[:, :, :target_.shape[2], :] = mask_target

    padded_mask_dist_max = torch.zeros_like(padded_dist_max)
    padded_mask_dist_max[:, :, :dist_max.shape[2], :] = mask_dist_max
    padded_mask_dist_argmax = torch.zeros_like(padded_dist_argmax)
    padded_mask_dist_argmax[:, :, :dist_max.shape[2], :] = mask_dist_argmax


   
    combined_input = padded_input
    combined_target = padded_target
    combined_class_label = class_label
    combined_dist_max = padded_dist_max
    combined_dist_argmax = padded_dist_argmax

            
    combined_mask_input = padded_mask_input
    combined_mask_target = padded_mask_target
    combined_mask_dist_max = padded_mask_dist_max
            

    combined_min_vals = min_vals
    combined_max_vals = max_vals


    #-----------------------------------------------------------------------
    #-----------------------------------------------------------------------
    valid_input = combined_input * combined_mask_input 
    valid_label = combined_target * combined_mask_target 

    valid_dist = combined_dist_max * combined_mask_dist_max
    valid_dist_argmax = (combined_dist_argmax * combined_mask_dist_max).type(torch.int32)
        
    
    train_input, train_label = valid_input, valid_label
    train_dist_max, train_dist_argmax = valid_dist, valid_dist_argmax
    train_mask_label = combined_mask_target
    train_min_vals, train_max_vals = combined_min_vals, combined_max_vals
    print(f"{purpose} set output shape:")
    print(f"input and label: {train_input.shape}, {train_label.shape}")
    print(f"{purpose}_dist_max and {purpose}_dist_argmax: {train_dist_max.shape}, {train_dist_argmax.shape}")
    print(f"{purpose}_mask_label shape: {train_mask_label.shape}")
    print(f"{purpose}_min_vals and {purpose}_max_vals: {train_min_vals.shape}")
    print(f"combined_mask_dist_max shape: {combined_mask_dist_max.shape}")
    print(f"combined_class_label shape: {combined_class_label.shape}")
        
    return train_input, train_label, train_dist_max, train_dist_argmax, train_mask_label, train_min_vals, train_max_vals, combined_mask_dist_max, combined_class_label

In [18]:

domains = ["Source","Target"]

In [19]:
for domain in domains:
    if domain == "Source":
        city_names = ["Antwerp", "Bangkok"]
    if domain == "Target":
        city_names = ["Barcelona"]
    train_input, train_label, train_dist_max, train_dist_argmax, train_mask_label, train_min_vals, train_max_vals, combined_mask_dist_max, combined_class_label = train_data_prepare(city_names)
    train_data_set = TimeSeriesDataset(train_input, train_label, train_dist_max, train_dist_argmax, train_mask_label, train_min_vals, train_max_vals, combined_mask_dist_max, combined_class_label)
    torch.save(train_data_set, f"D:/ThesisData/processed data/{domain}Domain/NEW/train_data_14days.h5")

train set output shape:
input and label: torch.Size([134, 28, 1466, 1]), torch.Size([134, 14, 1466, 1])
train_dist_max and train_dist_argmax: torch.Size([134, 28, 1466, 50]), torch.Size([134, 28, 1466, 50])
train_mask_label shape: torch.Size([134, 14, 1466, 1])
train_min_vals and train_max_vals: torch.Size([134, 14, 1466, 1])
combined_mask_dist_max shape: torch.Size([134, 28, 1466, 50])
combined_class_label shape: torch.Size([134, 1])
train set output shape:
input and label: torch.Size([33, 28, 1466, 1]), torch.Size([33, 14, 1466, 1])
train_dist_max and train_dist_argmax: torch.Size([33, 28, 1466, 50]), torch.Size([33, 28, 1466, 50])
train_mask_label shape: torch.Size([33, 14, 1466, 1])
train_min_vals and train_max_vals: torch.Size([33, 14, 1466, 1])
combined_mask_dist_max shape: torch.Size([33, 28, 1466, 50])
combined_class_label shape: torch.Size([33, 1])


In [22]:
purposes=["vali","test"]
city_names = ["Antwerp","Barcelona","Bangkok"]

In [23]:
for city_name in city_names:
    for purpose in purposes:
        train_input, train_label, train_dist_max, train_dist_argmax, train_mask_label, train_min_vals, train_max_vals, combined_mask_dist_max, combined_class_label = vali_test_data_prepare(city_name, purpose)
        train_data_set = TimeSeriesDataset(train_input, train_label, train_dist_max, train_dist_argmax, train_mask_label, train_min_vals, train_max_vals, combined_mask_dist_max, combined_class_label) 
        if city_name != "Barcelona":
            torch.save(train_data_set, f"D:/ThesisData/processed data/SourceDomain/NEW/{city_name}_{purpose}_data_14days.h5")
        else:
            torch.save(train_data_set, f"D:/ThesisData/processed data/TargetDomain/NEW/{city_name}_{purpose}_data_14days.h5")

vali set output shape:
input and label: torch.Size([22, 28, 1466, 1]), torch.Size([22, 14, 1466, 1])
vali_dist_max and vali_dist_argmax: torch.Size([22, 28, 1466, 50]), torch.Size([22, 28, 1466, 50])
vali_mask_label shape: torch.Size([22, 14, 1466, 1])
vali_min_vals and vali_max_vals: torch.Size([22, 14, 1466, 1])
combined_mask_dist_max shape: torch.Size([22, 28, 1466, 50])
combined_class_label shape: torch.Size([22, 1])
test set output shape:
input and label: torch.Size([23, 28, 1466, 1]), torch.Size([23, 14, 1466, 1])
test_dist_max and test_dist_argmax: torch.Size([23, 28, 1466, 50]), torch.Size([23, 28, 1466, 50])
test_mask_label shape: torch.Size([23, 14, 1466, 1])
test_min_vals and test_max_vals: torch.Size([23, 14, 1466, 1])
combined_mask_dist_max shape: torch.Size([23, 28, 1466, 50])
combined_class_label shape: torch.Size([23, 1])
vali set output shape:
input and label: torch.Size([22, 28, 1466, 1]), torch.Size([22, 14, 1466, 1])
vali_dist_max and vali_dist_argmax: torch.Size([2

In [32]:
for city in city_names:
    dictionary = open_data(f"D:/ThesisData/processed data/{city}/NEW/{city}_data_NEW.h5")
    city_node_coord = dictionary["coord_list"]
    print(len(city_node_coord.keys()))
    torch.save(city_node_coord, f"D:/ThesisData/processed data/SourceDomain/NEW/{city}_node_coord_NEW.h5")

671
