In [8]:
import pandas as pd
import numpy as np
import scipy.sparse as sp
import torch
import os

cwd = os.getcwd()
data = os.path.join(cwd, '..', 'database', 'data')
eta_path = os.path.join(cwd, '..', 'database', 'ETA')
rel_path = os.path.join(data, 'rel.csv')


def encode_onehot(labels):
    classes = sorted(list(set(labels)))
    classes_dict = {c: i for i, c in enumerate(classes)}
    labels_indices = np.array(list(map(classes_dict.get, labels)), dtype=np.int32)
    num_classes = len(classes)
    labels_onehot = sp.coo_matrix((np.ones_like(labels_indices), (np.arange(len(labels_indices)), labels_indices)), shape=(len(labels), num_classes), dtype=np.int32)
    return labels_onehot

def normalize_adj(mx):
    """Symmetrically normalize adjacency matrix."""
    rowsum = np.array(mx.sum(1))
    r_inv_sqrt = np.power(rowsum, -0.5).flatten()
    r_inv_sqrt[np.isinf(r_inv_sqrt)] = 0.
    r_mat_inv_sqrt = sp.diags(r_inv_sqrt)
    return mx.dot(r_mat_inv_sqrt).transpose().dot(r_mat_inv_sqrt)


def normalize_features(mx):
    """Row-normalize feature matrix."""
    rowsum = np.array(mx.sum(1))
    r_inv = np.power(rowsum, -1).flatten()
    r_inv[np.isinf(r_inv)] = 0.
    r_mat_inv = sp.diags(r_inv)
    mx = r_mat_inv.dot(mx)
    return mx


def accuracy(output, labels):
    preds = output.max(1)[1].type_as(labels)
    correct = preds.eq(labels).double()
    correct = correct.sum()
    return correct / len(labels)


In [9]:
path = os.path.join(eta_path, 'road_features.csv')
print('Loading {} dataset...', path)

# Load features and labels
df = pd.read_csv(path)
# Extract header
header = df.columns
# Extract coordinates
coordinates = df['coordinates'].apply(eval).values
# Extract features excluding ID and coordinates
features = sp.csr_matrix(df.iloc[:, 2:].values.astype(np.float32))
labels = encode_onehot(df['id'].values.astype(np.int32))
# Print the results
print("Header:", header)
print("Coordinates:", coordinates)
print("Features:\n", features)
labels

Loading {} dataset... /mnt/f/codes/Python/BUAA-DM23/GAT-zjy/../database/ETA/road_features.csv
Header: Index(['id', 'coordinates', 'highway', 'length', 'lanes', 'tunnel', 'bridge',
       'maxspeed', 'width', 'alley', 'roundabout', '0', '1', '2', '3', '4',
       '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17',
       '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29',
       '30'],
      dtype='object')
Coordinates: [list([[116.3894407, 39.9062721], [116.3894463, 39.9060115]])
 list([[116.3894407, 39.9062721], [116.386428, 39.9061687], [116.3856338, 39.9061421]])
 list([[116.3930703, 39.906394], [116.3894407, 39.9062721]]) ...
 list([[116.406357, 39.8311645], [116.4062083, 39.8313723], [116.4061092, 39.8315409], [116.4060903, 39.8321389], [116.4060871, 39.8328152], [116.406125, 39.833363], [116.4059676, 39.8338401]])
 list([[116.4059676, 39.8338401], [116.406125, 39.833363], [116.4060871, 39.8328152], [116.4060903, 39.8321389], [116.4061092, 

<38027x38027 sparse matrix of type '<class 'numpy.int32'>'
	with 38027 stored elements in COOrdinate format>

In [10]:
# Load graph edges from rel.csv
edges_df = pd.read_csv(rel_path)
edges = edges_df[['origin_id', 'destination_id']].values

# Build graph adjacency matrix
adj = sp.coo_matrix((np.ones(edges.shape[0]), (edges[:, 0], edges[:, 1])), shape=(labels.shape[0], labels.shape[0]), dtype=np.float32)

# Build symmetric adjacency matrix
adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)

# Normalize features and adjacency matrix
features = normalize_features(features)
adj = normalize_adj(adj + sp.eye(adj.shape[0]))

内存不够了。。。一个38027*38027的矩阵需要10.8GB，改成稀疏图试试

In [12]:
idx_train = range(140)
idx_val = range(200, 500)
idx_test = range(500, 1500)

adj_coo = torch.sparse_coo_tensor(torch.LongTensor(np.vstack(adj.nonzero())),
                                torch.FloatTensor(adj.data),
                                torch.Size(adj.shape))

# 创建稀疏特征矩阵
features_coo = torch.sparse_coo_tensor(torch.LongTensor(np.vstack(features.nonzero())),
                                    torch.FloatTensor(features.data),
                                    torch.Size(features.shape))

# 转换标签为 LongTensor
labels = torch.LongTensor(labels.nonzero()[1])


# 转换索引为 LongTensor
idx_train = torch.LongTensor(idx_train)
idx_val = torch.LongTensor(idx_val)
idx_test = torch.LongTensor(idx_test)

# 返回稀疏张量和其他数据

In [16]:
df = pd.read_csv(os.path.join(eta_path, 'road_features.csv'))
df = df.sample(frac=0.25, random_state=42)

# Extract features excluding ID and coordinates
features = df.iloc[:, 2:].values.astype(np.float32)
labels = encode_onehot(df['id'].values.astype(np.int32))

# Load edges
edges_df = pd.read_csv(rel_path)
edges = edges_df[['origin_id', 'destination_id']].values

# Filter edges based on existing nodes in df
existing_nodes = set(df['id'].values)
filter_condition = np.logical_and(np.isin(edges[:, 0].astype(int), list(existing_nodes)),
                                    np.isin(edges[:, 1].astype(int), list(existing_nodes)))
edges = edges[filter_condition]

# Update node indices after filtering edges
existing_nodes = sorted(list(existing_nodes))
node_mapping = {node: index for index, node in enumerate(existing_nodes)}
edges[:, 0] = np.vectorize(node_mapping.get)(edges[:, 0])
edges[:, 1] = np.vectorize(node_mapping.get)(edges[:, 1])

# Build graph adjacency matrix
adj = sp.coo_matrix((np.ones(edges.shape[0]), (edges[:, 0], edges[:, 1])),
                    shape=(len(existing_nodes), len(existing_nodes)), dtype=np.float32)

# Build symmetric adjacency matrix
adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)

# Normalize features and adjacency matrix
features = normalize_features(features)
adj = normalize_adj(adj + sp.eye(adj.shape[0]))

idx_train = range(140)
idx_val = range(200, 500)
idx_test = range(500, 1500)

adj = torch.FloatTensor(np.array(adj.todense()))
features = torch.FloatTensor(np.array(features.todense()))
labels = torch.LongTensor(np.where(labels)[1])

idx_train = torch.LongTensor(idx_train)
idx_val = torch.LongTensor(idx_val)
idx_test = torch.LongTensor(idx_test)

AttributeError: 'numpy.ndarray' object has no attribute 'todense'

In [1]:
import numpy as np

d = np.load('../best_trained_features.npy')

d[0]

array([-6.610317 , -6.619588 , -6.7440476, -6.5666285, -6.5571613,
       -6.5735803, -6.5147667, -6.607243 , -6.543721 , -6.863328 ,
       -6.5021844, -6.693005 , -6.6466837, -6.6682286, -6.625781 ,
       -6.738177 , -6.838445 , -6.5230417, -6.779375 , -6.5782595,
       -6.7530584, -6.6754155, -6.849143 , -6.590446 , -6.7190514,
       -6.6515937, -6.684066 , -6.5523114, -6.7098274, -6.4798226,
       -6.759654 , -6.5024033, -6.5905075, -6.4912114, -6.5761843,
       -6.7280235, -6.5115356, -6.5697923, -6.6816063, -6.8083644,
       -6.5362964, -6.529802 , -6.7243047, -6.5610714, -6.5827994,
       -6.524435 , -6.744207 , -6.6814365, -6.800146 , -6.679621 ,
       -6.464323 , -6.7614603, -6.8334184, -6.6638284, -6.5655737,
       -6.709212 , -6.626978 , -6.6673293, -6.803067 , -6.6987977,
       -6.670445 , -6.4813514, -6.5685577, -6.613683 , -6.671551 ,
       -6.725924 , -6.81005  , -6.588813 , -6.699696 , -6.525244 ,
       -6.7935305, -6.528277 , -6.468602 , -6.711407 , -6.7440