In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
import pickle
import torch
import torch.nn as nn
from torch_geometric.data import Data, Dataset
import networkx as nx
from torch_geometric.loader import DataLoader

  backends.update(_get_backends("networkx.backends"))


In [2]:
def open_data(file_path):
    file = open(file_path,"rb")
    raw_data = pickle.load(file)  
    return raw_data

In [3]:
class CustomGraphData(Data):
    def __init__(self, x=None, edge_index=None, edge_attr=None, y=None, class_label=None, min_vals=None, max_vals=None):
        # 正确调用父类的构造函数
        super(CustomGraphData, self).__init__(x=x, edge_index=edge_index, edge_attr=edge_attr, y=y)
        
        # 添加自定义属性
        self.class_label = class_label
        self.min_vals = min_vals
        self.max_vals = max_vals

In [9]:
class CustomGraphDataset(Dataset):
    def __init__(self, data_list):
        """
        Args:
            data_list (list): 包含多个 CustomGraphData 对象的列表
        """
        super(CustomGraphDataset, self).__init__()
        self.data_list = data_list

    def len(self):
        return len(self.data_list)

    def get(self, idx):
        return self.data_list[idx]

In [11]:
def create_train_graph_data(city_names, total_ratio = 0.8 ,train_ratio = 0.6):
    graphs_list = list()
    for i in range(len(city_names)):
        city_dictionary = open_data(f"D:/ThesisData/processed data/{city_names[i]}/NEW/{city_names[i]}_data_14days.h5")
        #-----------------------------------------------------------------------------
        subgraph = city_dictionary["biggest_subgraph"]
        sorted_nodes = sorted(subgraph.nodes())
        #-----------------------------------------------------------------------------
        mapping = {old_label: new_label for new_label, old_label in enumerate(sorted_nodes, start=0)}
        subgraph = nx.relabel_nodes(subgraph, mapping)    
        #-----------------------------------------------------------------------------

        
        #-----------------------------------------------------------------------------
        edge_pairs = []
        edge_weights = []
        for u, v, data in subgraph.edges(data=True):
            edge_pairs.append([u, v])
            edge_weights.append(data['weight'])
        edge_pairs, edge_weights = np.transpose(np.array(edge_pairs)), np.array(edge_weights)        

        #-----------------------------------------------------------------------------
        total_scale, train_scale, vali_scale = 0, 0, 0
        if city_names[i] == "Antwerp" or city_names[i] == "Bangkok" :
            total_scale = int(total_ratio * city_dictionary["input_result"].shape[0])
        else:    
            total_scale = int(total_ratio * city_dictionary["input_result"].shape[0] * 0.5)

        
        train_scale = int(train_ratio * total_scale)

        
        input_ = city_dictionary["input_result"][:train_scale]
        target_ = city_dictionary["target_result"][:train_scale]
        print(f"train length: {train_scale}")
                

        
        for j in range(input_.shape[0]):
            # x  SHAPE: 28, Node_num, 1
            #-----------------------------------------------------------
            #first_feature = input_[..., 0]
            #is_sorted = np.all(np.diff(first_feature, axis=-1) >= 0)
            #print("Is sorted in ascending order?", is_sorted)
            #-----------------------------------------------------------
            x = torch.tensor(input_[j], dtype= torch.float)[:,:,1:]
            # x  SHAPE: Node_num, 28
            x = x.permute(1, 0, 2).squeeze(-1)
            if j == 1:
                print(f"City {city_names[i]} train input Shape: {x.shape}")
            #---------------------------------
            # y SHAPE: 14, Node_num, 1
            y = torch.tensor(target_[j], dtype= torch.float)[:,:,1:]
            y = y.permute(1, 0, 2).squeeze(-1)
            #--------------------------------
            if j == 1:
                print(f"City {city_names[i]} train target Shape: {y.shape}")
            #city_dictionary["min_vals"] SHAPE:
            min_vals = torch.tensor(city_dictionary["min_vals"], dtype=torch.float).unsqueeze(-1).unsqueeze(-1)
            max_vals = torch.tensor(city_dictionary["max_vals"], dtype=torch.float).unsqueeze(-1).unsqueeze(-1)
            #_--------------------------------
            
            # EDGE INDEX SHAPE: 2, PAIR_NUM
            edge_index = torch.tensor(edge_pairs, dtype= torch.long)
            #  SHAPE: PAIR_NUM, 
            edge_attr = torch.tensor(edge_weights, dtype=torch.float)
            #-----------------------------------
            
            if city_names[i]=="Antwerp" or city_names[i]=="Bangkok":
                class_label = torch.tensor([1], dtype=torch.float).unsqueeze(-1)
            else:
                class_label = torch.tensor([0], dtype=torch.float).unsqueeze(-1)

            one_graph = CustomGraphData(x=x, edge_index=edge_index, edge_attr=edge_attr, y=y, class_label=class_label, min_vals=min_vals, max_vals=max_vals)
            graphs_list.append(one_graph)

    dataset = CustomGraphDataset(graphs_list)
    print(f"train dataset is done")
    
    return dataset

In [12]:
def create_test_vali_graph_data(city_name, purpose, total_ratio = 0.8 ,train_ratio = 0.6, vali_ratio = 0.2):
    graphs_list = list()
    
    city_dictionary = open_data(f"D:/ThesisData/processed data/{city_name}/NEW/{city_name}_data_14days.h5")
    #-----------------------------------------------------------------------------
    subgraph = city_dictionary["biggest_subgraph"]
    sorted_nodes = sorted(subgraph.nodes())
    #coord_list = city_dictionary["coord_list"]
    #new_coord_list = {new_key: coord_list[old_key] for new_key, old_key in enumerate(sorted_nodes, start=0) }
    #-----------------------------------------------------------------------------
    mapping = {old_label: new_label for new_label, old_label in enumerate(sorted_nodes, start=0)}
    subgraph = nx.relabel_nodes(subgraph, mapping)    
    #-----------------------------------------------------------------------------

        
    #-----------------------------------------------------------------------------
    edge_pairs = []
    edge_weights = []
    for u, v, data in subgraph.edges(data=True):
        edge_pairs.append([u, v])
        edge_weights.append(data['weight'])
    edge_pairs, edge_weights = np.transpose(np.array(edge_pairs)), np.array(edge_weights)        

    #-----------------------------------------------------------------------------
    total_scale, train_scale, vali_scale = 0, 0, 0
    
    total_scale = int(total_ratio * city_dictionary["input_result"].shape[0])
  

        
    train_scale = int(train_ratio * total_scale)
    vali_scale = int((vali_ratio+train_ratio) * total_scale)
    
                
    if purpose == "vali":
        input_ = city_dictionary["input_result"][train_scale: vali_scale]
        target_ = city_dictionary["target_result"][train_scale: vali_scale]  
        print(f"{purpose} length: {vali_scale-train_scale}")
            
    if purpose == "test":
        input_ = city_dictionary["input_result"][vali_scale: total_scale]
        target_ = city_dictionary["target_result"][vali_scale: total_scale] 
        print(f"{purpose} length: {total_scale - vali_scale}")

        
    for j in range(input_.shape[0]):
            # x  SHAPE: 28, Node_num, 1
            #-----------------------------------------------------------
            #first_feature = input_[..., 0]
            #is_sorted = np.all(np.diff(first_feature, axis=-1) >= 0)
            #print("Is sorted in ascending order?", is_sorted)
            #-----------------------------------------------------------
        x = torch.tensor(input_[j], dtype= torch.float)[:,:,1:]
            # x  SHAPE: Node_num, 28
        x = x.permute(1, 0, 2).squeeze(-1)
        if j == 1:
            print(f"City {city_name} {purpose} input Shape: {x.shape}")
        #---------------------------------
        # y SHAPE: 14, Node_num, 1
        y = torch.tensor(target_[j], dtype= torch.float)[:,:,1:]
        y = y.permute(1, 0, 2).squeeze(-1)
        #--------------------------------
        if j == 1:
            print(f"City {city_name} {purpose} target Shape: {y.shape}")
           
        min_vals = torch.tensor(city_dictionary["min_vals"], dtype=torch.float).unsqueeze(-1).unsqueeze(-1)
        max_vals = torch.tensor(city_dictionary["max_vals"], dtype=torch.float).unsqueeze(-1).unsqueeze(-1)
        #--------------------------------
            
        # EDGE INDEX SHAPE: 2, PAIR_NUM
        edge_index = torch.tensor(edge_pairs, dtype= torch.long)
        #  SHAPE: PAIR_NUM, 
        edge_attr = torch.tensor(edge_weights, dtype=torch.float)
        #-----------------------------------
            
        if city_name=="Antwerp" or city_name=="Bangkok":
            class_label = torch.tensor([1], dtype=torch.float).unsqueeze(-1)
        else:
            class_label = torch.tensor([0], dtype=torch.float).unsqueeze(-1)

        one_graph = CustomGraphData(x=x, edge_index=edge_index, edge_attr=edge_attr, y=y, class_label=class_label, min_vals=min_vals, max_vals=max_vals)
        graphs_list.append(one_graph)
        dataset = CustomGraphDataset(graphs_list)

    
    print(f"{purpose} dataset is done")
    
    return dataset

In [13]:
domains = ["Target", "Source"]
purposes = ["vali","test"]

In [14]:
for domain in domains:
    if domain == "Source":
        city_names = ["Bangkok", "Antwerp"]
    if domain == "Target":
        city_names = ["Barcelona"]
    dataset = create_train_graph_data(city_names)
    torch.save(dataset, f"D:/ThesisData/processed data/{domain}Domain/NEW/train_GCN_data_14days.h5")
    for city_name in city_names:
        purposes = ["vali","test"]
        for purpose in purposes:
            dataset = create_test_vali_graph_data(city_name, purpose)
            torch.save(dataset, f"D:/ThesisData/processed data/{domain}Domain/NEW/{city_name}_{purpose}_GCN_data_14days.h5")

train length: 33
City Barcelona train input Shape: torch.Size([1273, 28])
City Barcelona train target Shape: torch.Size([1273, 14])
train dataset is done
vali length: 22
City Barcelona vali input Shape: torch.Size([1273, 28])
City Barcelona vali target Shape: torch.Size([1273, 14])
vali dataset is done
test length: 23
City Barcelona test input Shape: torch.Size([1273, 28])
City Barcelona test target Shape: torch.Size([1273, 14])
test dataset is done
train length: 67
City Bangkok train input Shape: torch.Size([1137, 28])
City Bangkok train target Shape: torch.Size([1137, 14])
train length: 67
City Antwerp train input Shape: torch.Size([1466, 28])
City Antwerp train target Shape: torch.Size([1466, 14])
train dataset is done
vali length: 22
City Bangkok vali input Shape: torch.Size([1137, 28])
City Bangkok vali target Shape: torch.Size([1137, 14])
vali dataset is done
test length: 23
City Bangkok test input Shape: torch.Size([1137, 28])
City Bangkok test target Shape: torch.Size([1137, 14