In [1]:
import numpy as np
import pandas as pd
import scipy.sparse as sp
import torch
import igraph as ig
from pathlib import Path
import pickle
from src.config import INTERIM_DATA_DIR, PROCESSED_DATA_DIR

In [2]:
# 1. 加载网络数据
g_path = INTERIM_DATA_DIR / "Texas7k_Gas/merged_graph.pkl"
g = ig.Graph.Read_Pickle(g_path)

# 加载矩阵
distances_upper = sp.load_npz(INTERIM_DATA_DIR / "Texas7k_Gas/distances.npz").toarray()
distances = distances_upper + distances_upper.T
adj_sparse = sp.load_npz(INTERIM_DATA_DIR / "Texas7k_Gas/adj_sparse.npz")
B1 = sp.load_npz(INTERIM_DATA_DIR / "Texas7k_Gas/B1.npz")
L1_tilde = sp.load_npz(INTERIM_DATA_DIR / "Texas7k_Gas/L1_tilde.npz")

# 加载特征
edge_features = np.load(INTERIM_DATA_DIR / "Texas7k_Gas/edge_features.npy", allow_pickle=True)
node_features = np.load(INTERIM_DATA_DIR / "Texas7k_Gas/node_features.npy", allow_pickle=True)
edge_features_df = pd.DataFrame(edge_features, columns=np.load(INTERIM_DATA_DIR / "Texas7k_Gas/edge_feature_names.npy", allow_pickle=True))
node_features_df = pd.DataFrame(node_features, columns=np.load(INTERIM_DATA_DIR / "Texas7k_Gas/node_feature_names.npy", allow_pickle=True))

In [3]:
# 2. 模拟时间序列特征（负载、流量）并拼接静态特征
def generate_time_series_features(n_nodes, node_features, n_timesteps=600, seed=42):
    np.random.seed(seed)
    time = np.linspace(0, 50*12, n_timesteps)
    base_load = node_features[:, 3]  # pd
    base_flow = node_features[:, 8]  # gas_load_p
    dynamic_features = np.zeros((n_nodes, n_timesteps, 25))
    for t in range(n_timesteps):
        # 静态特征（前23维）
        dynamic_features[:, t, :23] = node_features[:, :23]
        # 动态特征（后2维：负载、流量）
        dynamic_features[:, t, 23] = base_load * (1 + 0.2 * np.sin(2 * np.pi * time[t] / 12) + 0.1 * np.random.randn(n_nodes))
        dynamic_features[:, t, 24] = base_flow * (1 + 0.2 * np.sin(2 * np.pi * time[t] / 12) + 0.1 * np.random.randn(n_nodes))
    return dynamic_features

n_timesteps = 600
dynamic_features = generate_time_series_features(9168, node_features, n_timesteps)

In [4]:
# 3. 生成灾害暴露度
def generate_disaster_exposure(n_nodes, coordinates, seed=42):
    np.random.seed(seed)
    lat, lon = coordinates[:, 0], coordinates[:, 1]
    earthquake_pga = np.random.lognormal(mean=-2, sigma=0.5, size=n_nodes)  # 中值约0.135g
    flood_depth = np.random.lognormal(mean=0, sigma=0.5, size=n_nodes)  # 中值约1米
    hurricane_speed = np.random.weibull(a=2, size=n_nodes) * 20  # 平均约20m/s
    return np.stack([earthquake_pga, flood_depth, hurricane_speed], axis=1)

disaster_exposure = generate_disaster_exposure(9168, node_features_df[['latitude', 'longitude']].values)
print(disaster_exposure.shape)
disaster_exposure

(9168, 3)


array([[ 0.17348868,  1.14604485, 12.44690953],
       [ 0.12629534,  1.18662458, 37.23573177],
       [ 0.18709183,  0.5773309 , 21.01843341],
       ...,
       [ 0.13108112,  0.62042229,  8.15923209],
       [ 0.0722942 ,  0.87521462, 10.12665919],
       [ 0.08212866,  0.78196935, 11.60030175]])

In [5]:
# 4. 构建时间超图
def build_temporal_hypergraph(n_nodes, n_edges, dynamic_features, disaster_exposure, n_timesteps):
    temporal_hyperedges = []
    for t in range(n_timesteps):
        hyperedges = []
        for e in range(n_edges):
            src, tgt = g.es[e].source, g.es[e].target
            hyperedges.append([src, tgt])
        for disaster_idx, disaster_name in enumerate(['earthquake', 'flood', 'hurricane']):
            high_risk_nodes = np.where(disaster_exposure[:, disaster_idx] > np.percentile(disaster_exposure[:, disaster_idx], 90))[0]
            if len(high_risk_nodes) > 1:
                hyperedges.append(high_risk_nodes.tolist())
        temporal_hyperedges.append(hyperedges)
    return temporal_hyperedges

temporal_hyperedges = build_temporal_hypergraph(9168, 11667, dynamic_features, disaster_exposure, n_timesteps)

In [6]:
# 5. 准备GNN输入
def prepare_gnn_inputs(node_features, edge_features, adj_sparse, B1, L1_tilde):
    X_n = torch.tensor(node_features.astype(np.float32), dtype=torch.float32)  # [9168, 25]
    X_e = torch.tensor(edge_features.astype(np.float32), dtype=torch.float32)  # [11667, 9]
    A_tilde = torch.tensor(sp.csr_matrix(adj_sparse).toarray(), dtype=torch.float32)  # [9168, 9168]
    B1_tilde = torch.tensor(sp.csr_matrix(B1).toarray(), dtype=torch.float32)  # [9168, 11667]
    L1_tilde = torch.tensor(sp.csr_matrix(L1_tilde).toarray(), dtype=torch.float32)  # [11667, 11667]
    return X_n, X_e, A_tilde, B1_tilde, L1_tilde

X_n, X_e, A_tilde, B1_tilde, L1_tilde = prepare_gnn_inputs(node_features, edge_features, adj_sparse, B1, L1_tilde)

In [7]:
# 保存数据
with open(PROCESSED_DATA_DIR / "temporal_hypergraph.pkl", "wb") as f:
    pickle.dump({
        'temporal_hyperedges': temporal_hyperedges,
        'dynamic_features': dynamic_features,
        'disaster_exposure': disaster_exposure,
        'X_n': X_n,
        'X_e': X_e,
        'A_tilde': A_tilde,
        'B1_tilde': B1_tilde,
        'L1_tilde': L1_tilde
    }, f)

print("数据准备完成，保存至 temporal_hypergraph.pkl")

数据准备完成，保存至 temporal_hypergraph.pkl
