# 使用普通神经网络包含节点特征

In [1]:
from torch_geometric.datasets import Planetoid

In [2]:
dataset = Planetoid(root=".", name="Cora")

In [3]:
data = dataset[0]
print(dataset)
print(len(dataset))
print(data.x.shape[0])
print(dataset.num_features)
print(dataset.num_classes)


Cora()
1
2708
1433
7


In [4]:
# Manual loading of FacebookPagePage dataset to avoid HTTP 404 error
import pandas as pd
import json
import torch
from torch_geometric.data import Data
import numpy as np
from sklearn.preprocessing import LabelEncoder

def load_facebook_dataset(root_path="FacebookPagePage"):
    """
    Manually load the Facebook Page-Page dataset from local files
    """
    # Load edges
    edges_df = pd.read_csv(f"{root_path}/raw/musae_facebook_edges.csv")
    edge_index = torch.tensor(edges_df.values.T, dtype=torch.long)
    
    # Load targets (labels)
    targets_df = pd.read_csv(f"{root_path}/raw/musae_facebook_target.csv")
    
    # Encode page_type as numerical labels
    le = LabelEncoder()
    y = torch.tensor(le.fit_transform(targets_df['page_type']), dtype=torch.long)
    
    print(f"Classes: {list(le.classes_)}")
    print(f"Number of nodes: {len(targets_df)}")
    print(f"Number of edges: {len(edges_df)}")
    
    # Load features (this might take a moment due to file size)
    print("Loading node features...")
    with open(f"{root_path}/raw/musae_facebook_features.json", 'r') as f:
        features_dict = json.load(f)
    
    # Convert features to tensor
    # Features are stored as sparse format, we need to create dense feature matrix
    num_nodes = len(targets_df)
    
    # Find the maximum feature index to determine feature dimension
    all_feature_indices = []
    for node_features in features_dict.values():
        all_feature_indices.extend(node_features)
    
    max_feature_idx = max(all_feature_indices) if all_feature_indices else 0
    feature_dim = max_feature_idx + 1
    
    # Create dense feature matrix
    x = torch.zeros((num_nodes, feature_dim), dtype=torch.float)
    
    for node_id, feature_indices in features_dict.items():
        node_idx = int(node_id)
        x[node_idx, feature_indices] = 1.0
    
    print(f"Feature dimension: {feature_dim}")
    
    # Create PyTorch Geometric Data object
    data = Data(x=x, edge_index=edge_index, y=y)
    
    # Add dataset info
    data.num_classes = len(le.classes_)
    data.num_features = feature_dim
    data.class_names = list(le.classes_)
    
    return data



In [5]:
data = load_facebook_dataset()

Classes: ['company', 'government', 'politician', 'tvshow']
Number of nodes: 22470
Number of edges: 171002
Loading node features...
Feature dimension: 4714


In [6]:
data.train_mask = range(18000)
data.val_mask = range(18001, 20000)
data.test_mask = range(20001, 22470)



In [7]:
import torch_geometric.transforms as T
dataset = Planetoid(root=".", name="Cora")
data = dataset[0]

In [8]:
import pandas as pd
df_x = pd.DataFrame(data.x.numpy())
df_x['label'] = pd.DataFrame(data.y)


In [9]:
def accuracy(y_pred, y_true):
    return torch.sum(y_pred == y_true) / len(y_true)

In [10]:
import torch
from torch.nn import Linear
import torch.nn.functional as F

In [11]:
class MLP(torch.nn.Module):
    def __init__(self, dim_in, dim_h, dim_out):
        super(MLP, self).__init__()
        self.linear1 = Linear(dim_in, dim_h)
        self.linear2 = Linear(dim_h, dim_out)
        
    def forward(self, x):
        x = self.linear1(x)
        x = torch.relu(x)
        x = self.linear2(x)
        return F.log_softmax(x, dim=1)
    
    def fit(self, data, epochs):
        criterion = torch.nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(self.parameters(), lr=0.01, weight_decay=5e-4)
        self.train()
        for epoch in range(epochs+1):
            optimizer.zero_grad()
            out = self(data.x)
            loss = criterion(out[data.train_mask], data.y[data.train_mask])
            acc = accuracy(out[data.train_mask].argmax(dim=1), data.y[data.train_mask])
            loss.backward()
            optimizer.step()
            if epoch % 20 == 0:
                val_loss = criterion(out[data.val_mask],data.y[data.val_mask])
                val_acc = accuracy(out[data.val_mask].argmax(dim=1),data.y[data.val_mask])
                print(f'Epoch {epoch:03d}, Loss: {loss:.4f}, Acc: {acc:.4f}, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}')
            

    def test(self, data):
        self.eval()
        out = self(data.x)
        acc = accuracy(out.argmax(dim=1)[data.test_mask], data.y[data.test_mask])
        return acc
    
    
            

In [12]:
mlp = MLP(dataset.num_features, 16, dataset.num_classes)
print(mlp)

MLP(
  (linear1): Linear(in_features=1433, out_features=16, bias=True)
  (linear2): Linear(in_features=16, out_features=7, bias=True)
)


In [13]:
mlp.fit(data, epochs=100)

Epoch 000, Loss: 1.9583, Acc: 0.1429, Val Loss: 1.9919, Val Acc: 0.1140
Epoch 020, Loss: 0.0922, Acc: 1.0000, Val Loss: 1.5961, Val Acc: 0.4580
Epoch 040, Loss: 0.0104, Acc: 1.0000, Val Loss: 1.7649, Val Acc: 0.4600
Epoch 060, Loss: 0.0061, Acc: 1.0000, Val Loss: 1.7156, Val Acc: 0.4480
Epoch 080, Loss: 0.0072, Acc: 1.0000, Val Loss: 1.5707, Val Acc: 0.4740
Epoch 100, Loss: 0.0082, Acc: 1.0000, Val Loss: 1.4812, Val Acc: 0.4940


In [14]:
acc = mlp.test(data)
print(f'Test Accuracy: {acc:.4f}')

Test Accuracy: 0.4930


In [25]:
class  VanillaGNNLayer(torch.nn.Module):
    def __init__(self, dim_in, dim_out):
        super().__init__()
        self.linear = Linear(dim_in, dim_out, bias = False)
    
    def forward(self, x, adjacency):
        x = self.linear(x) #线性变换
        x = torch.sparse.mm(adjacency, x) #邻接矩阵乘以特征矩阵
        return x

In [26]:
from torch_geometric.utils import to_dense_adj

adjacency = to_dense_adj(data.edge_index)[0]
adjacency += torch.eye(len(adjacency))
adjacency

tensor([[1., 0., 0.,  ..., 0., 0., 0.],
        [0., 1., 1.,  ..., 0., 0., 0.],
        [0., 1., 1.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 1., 0., 0.],
        [0., 0., 0.,  ..., 0., 1., 1.],
        [0., 0., 0.,  ..., 0., 1., 1.]])

In [27]:
class VanillaGNN(torch.nn.Module):
    def __init__(self, dim_in, dim_h, dim_out):
        super().__init__()
        self.gnn1 = VanillaGNNLayer(dim_in, dim_h)
        self.gnn2 = VanillaGNNLayer(dim_h, dim_out)

    def forward(self, x, adjacency):
        h = self.gnn1(x, adjacency)
        h = torch.relu(h)
        h = self.gnn2(h, adjacency)
        return F.log_softmax(h, dim=1)

    def fit(self, data, epochs):
        criterion = torch.nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(self.parameters(),lr=0.01, weight_decay=5e-4)
        self.train()
        for epoch in range(epochs+1):
            optimizer.zero_grad()
            out = self(data.x,adjacency)
            loss = criterion(out[data.train_mask], data.y[data.train_mask])
            acc = accuracy(out[data.train_mask].argmax(dim=1), data.y[data.train_mask])
            loss.backward()
            optimizer.step()
            if epoch % 20 == 0:
                val_loss = criterion(out[data.val_mask], data.y[data.val_mask])
                val_acc = accuracy(out[data.val_mask].argmax(dim=1), data.y[data.val_mask])
                print(f'Epoch {epoch:03d}, Loss: {loss:.4f}, Acc: {acc:.4f}, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}')

    def test(self, data):
        self.eval()
        out = self(data.x, adjacency)
        acc = accuracy(out.argmax(dim=1)[data.test_mask], data.y[data.test_mask])
        return acc



In [28]:
gnn = VanillaGNN(dataset.num_features, 16, dataset.num_classes)
print(gnn)
gnn.fit(data, epochs=100)
acc = gnn.test(data)
print(f'Test Accuracy: {acc:.4f}')

VanillaGNN(
  (gnn1): VanillaGNNLayer(
    (linear): Linear(in_features=1433, out_features=16, bias=False)
  )
  (gnn2): VanillaGNNLayer(
    (linear): Linear(in_features=16, out_features=7, bias=False)
  )
)
Epoch 000, Loss: 2.2500, Acc: 0.1357, Val Loss: 2.1049, Val Acc: 0.2380
Epoch 020, Loss: 0.1315, Acc: 1.0000, Val Loss: 1.4412, Val Acc: 0.6860
Epoch 040, Loss: 0.0218, Acc: 1.0000, Val Loss: 1.7630, Val Acc: 0.7340
Epoch 060, Loss: 0.0091, Acc: 1.0000, Val Loss: 1.9079, Val Acc: 0.7380
Epoch 080, Loss: 0.0057, Acc: 1.0000, Val Loss: 1.9354, Val Acc: 0.7420
Epoch 100, Loss: 0.0043, Acc: 1.0000, Val Loss: 1.9290, Val Acc: 0.7360
Test Accuracy: 0.7440
