In [2]:
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

!pip install pyg_lib torch_scatter torch_sparse torch_cluster torch_spline_conv -f https://data.pyg.org/whl/torch-2.0.0+cu118.html

!pip install torch_geometric


Looking in indexes: https://download.pytorch.org/whl/cu118
Looking in links: https://data.pyg.org/whl/torch-2.0.0+cu118.html
[31mERROR: Could not find a version that satisfies the requirement pyg_lib (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for pyg_lib[0m[31m
[0mCollecting torch_geometric
  Downloading torch_geometric-2.7.0-py3-none-any.whl.metadata (63 kB)
[2K     [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m63.7/63.7 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
Downloading torch_geometric-2.7.0-py3-none-any.whl (1.3 MB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m1.3/1.3 MB[0m [31m30.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torch_geometric
Successfully installed torch_geometric-2.7.0


In [15]:
# Save this as bench_dblp.py and run with: python bench_dblp.py

# First, install required dependencies:
# !pip install torch-scatter torch-sparse -f https://data.pyg.org/whl/torch-2.0.0+cpu.html
# !pip install pyg-lib -f https://data.pyg.org/whl/torch-2.0.0+cpu.html

import torch
import random
import numpy as np
from torch_geometric.datasets import DBLP
from torch_geometric.nn import HGTConv, RGCNConv, GATConv, HANConv, Linear
from torch_geometric.data import HeteroData
from torch.nn import functional as F
from sklearn.metrics import f1_score, accuracy_score
import time

# Set random seeds for reproducibility
torch.manual_seed(42)
np.random.seed(42)
random.seed(42)

# Load DBLP dataset
dataset = DBLP(root='/tmp/DBLP')
data = dataset[0]

print("Dataset info:")
print(f"Number of nodes: {data.num_nodes}")
print(f"Number of edges: {data.num_edges}")
print(f"Node types: {data.node_types}")
print(f"Edge types: {data.edge_types}")
print(f"Number of author classes: {len(set(data['author'].y.tolist()))}")

# Fix: Create consistent node features with same dimensions
print("Creating consistent node features...")
feature_dim = 128  # Consistent feature dimension for all node types

for node_type in data.node_types:
    num_nodes = data[node_type].num_nodes
    # Create random features with consistent dimension
    data[node_type].x = torch.randn(num_nodes, feature_dim, dtype=torch.float)

print("Node feature dimensions:")
for node_type in data.node_types:
    print(f"  {node_type}: {data[node_type].x.shape}")

# Define device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Create train/val/test masks for author nodes (the target node type)
author_nodes = data['author'].num_nodes
train_mask = torch.zeros(author_nodes, dtype=torch.bool)
val_mask = torch.zeros(author_nodes, dtype=torch.bool)
test_mask = torch.zeros(author_nodes, dtype=torch.bool)

# Randomly split author nodes
indices = torch.randperm(author_nodes)
train_idx = indices[:int(0.6 * author_nodes)]
val_idx = indices[int(0.6 * author_nodes):int(0.8 * author_nodes)]
test_idx = indices[int(0.8 * author_nodes):]

train_mask[train_idx] = True
val_mask[val_idx] = True
test_mask[test_idx] = True

data['author'].train_mask = train_mask
data['author'].val_mask = val_mask
data['author'].test_mask = test_mask

# Define HGT Model
class HGT(torch.nn.Module):
    def __init__(self, hidden_channels, out_channels, num_heads, num_layers, metadata):
        super().__init__()
        self.lin_dict = torch.nn.ModuleDict()
        for node_type in metadata[0]:
            self.lin_dict[node_type] = Linear(-1, hidden_channels)

        self.convs = torch.nn.ModuleList()
        for _ in range(num_layers):
            conv = HGTConv(hidden_channels, hidden_channels, metadata, num_heads)
            self.convs.append(conv)

        self.lin = Linear(hidden_channels, out_channels)

    def forward(self, x_dict, edge_index_dict):
        x_dict = {
            node_type: self.lin_dict[node_type](x).relu()
            for node_type, x in x_dict.items()
        }

        for conv in self.convs:
            x_dict = conv(x_dict, edge_index_dict)

        return self.lin(x_dict['author'])

# Define RGCN Model
class RGCN(torch.nn.Module):
    def __init__(self, hidden_channels, out_channels, num_layers, num_relations):
        super().__init__()
        self.convs = torch.nn.ModuleList()
        self.convs.append(RGCNConv(-1, hidden_channels, num_relations=num_relations))
        for _ in range(num_layers - 1):
            self.convs.append(RGCNConv(hidden_channels, hidden_channels, num_relations=num_relations))

        self.lin = Linear(hidden_channels, out_channels)

    def forward(self, x, edge_index, edge_type):
        for conv in self.convs[:-1]:
            x = conv(x, edge_index, edge_type).relu()
        x = self.convs[-1](x, edge_index, edge_type)
        return self.lin(x)

# Define HAN Model
class HAN(torch.nn.Module):
    def __init__(self, hidden_channels, out_channels, num_heads, num_layers, metadata):
        super().__init__()
        self.convs = torch.nn.ModuleList()
        self.convs.append(HANConv(-1, hidden_channels, heads=num_heads,
                                 metadata=metadata, dropout=0.6))
        for _ in range(num_layers - 1):
            self.convs.append(HANConv(hidden_channels * num_heads, hidden_channels,
                                     heads=num_heads, metadata=metadata, dropout=0.6))

        self.lin = Linear(hidden_channels * num_heads, out_channels)

    def forward(self, x_dict, edge_index_dict):
        for conv in self.convs[:-1]:
            x_dict = conv(x_dict, edge_index_dict)
            x_dict = {key: F.elu(x) for key, x in x_dict.items()}
        x_dict = self.convs[-1](x_dict, edge_index_dict)
        return self.lin(x_dict['author'])

# Define GAT Model (homogeneous version)
class GAT(torch.nn.Module):
    def __init__(self, hidden_channels, out_channels, num_heads, num_layers):
        super().__init__()
        self.convs = torch.nn.ModuleList()
        self.convs.append(GATConv(-1, hidden_channels, heads=num_heads))
        for _ in range(num_layers - 1):
            self.convs.append(GATConv(hidden_channels * num_heads, hidden_channels,
                                     heads=num_heads))
        self.lin = Linear(hidden_channels * num_heads, out_channels)

    def forward(self, x, edge_index):
        for conv in self.convs[:-1]:
            x = conv(x, edge_index).relu()
        x = self.convs[-1](x, edge_index)
        return self.lin(x)

# Utility function to convert heterogeneous graph to homogeneous for GAT
def hetero_to_homo(hetero_data):
    """Convert heterogeneous graph to homogeneous for GAT"""
    # Create mapping from node type to global indices
    node_offsets = {}
    current_idx = 0
    for node_type in hetero_data.node_types:
        node_offsets[node_type] = current_idx
        current_idx += hetero_data[node_type].num_nodes

    # Combine all node features (they now have same dimension)
    x_list = []
    for node_type in hetero_data.node_types:
        x_list.append(hetero_data[node_type].x)
    x = torch.cat(x_list, dim=0)

    # Combine all edges with offsets
    edge_indices = []
    for edge_type in hetero_data.edge_types:
        src_type, rel_type, dst_type = edge_type
        edge_index = hetero_data[edge_type].edge_index
        edge_index = edge_index.clone()
        edge_index[0] += node_offsets[src_type]
        edge_index[1] += node_offsets[dst_type]
        edge_indices.append(edge_index)

    edge_index = torch.cat(edge_indices, dim=1)

    # Create author node mask for homogeneous graph
    author_mask = torch.zeros(x.size(0), dtype=torch.bool)
    author_start = node_offsets['author']
    author_end = author_start + hetero_data['author'].num_nodes
    author_mask[author_start:author_end] = True

    return x, edge_index, author_mask, node_offsets['author']

# Prepare data for RGCN
def prepare_rgcn_data(hetero_data):
    """Prepare data for RGCN with edge types"""
    x_list = []
    for node_type in hetero_data.node_types:
        x_list.append(hetero_data[node_type].x)
    x = torch.cat(x_list, dim=0)

    edge_indices = []
    edge_types = []
    for i, edge_type in enumerate(hetero_data.edge_types):
        edge_index = hetero_data[edge_type].edge_index
        edge_indices.append(edge_index)
        edge_types.append(torch.full((edge_index.size(1),), i, dtype=torch.long))

    edge_index = torch.cat(edge_indices, dim=1)
    edge_type = torch.cat(edge_types, dim=0)

    # Author mask
    author_mask = torch.zeros(x.size(0), dtype=torch.bool)
    current_idx = 0
    for node_type in hetero_data.node_types:
        if node_type == 'author':
            author_mask[current_idx:current_idx + hetero_data[node_type].num_nodes] = True
        current_idx += hetero_data[node_type].num_nodes

    return x, edge_index, edge_type, author_mask

# Convert data for GAT and RGCN
print("Preparing data for GAT and RGCN...")
x_homo, edge_index_homo, author_mask_homo, author_start_idx = hetero_to_homo(data)
x_rgcn, edge_index_rgcn, edge_type_rgcn, author_mask_rgcn = prepare_rgcn_data(data)

# Simple training function without NeighborLoader
def train_full_batch(model, data, optimizer, device, model_type='hetero', epochs=100):
    model.train()
    best_val_acc = 0

    for epoch in range(epochs):
        optimizer.zero_grad()

        if model_type == 'gat':
            out = model(data.x.to(device), data.edge_index.to(device))
            out = out[data.author_mask]
            y = data.y.to(device)
        elif model_type == 'rgcn':
            out = model(data.x.to(device), data.edge_index.to(device), data.edge_type.to(device))
            out = out[data.author_mask]
            y = data.y.to(device)
        else:  # hetero models (HGT, HAN)
            x_dict = {node_type: data[node_type].x.to(device) for node_type in data.node_types}
            edge_index_dict = {edge_type: data[edge_type].edge_index.to(device) for edge_type in data.edge_types}
            out = model(x_dict, edge_index_dict)
            y = data['author'].y.to(device)

        # Only use training nodes
        if model_type in ['gat', 'rgcn']:
            train_out = out[data.train_mask]
            train_y = y[data.train_mask]
        else:
            train_out = out[data['author'].train_mask]
            train_y = y[data['author'].train_mask]

        loss = F.cross_entropy(train_out, train_y)
        loss.backward()
        optimizer.step()

        if epoch % 20 == 0:
            # Validation
            model.eval()
            with torch.no_grad():
                if model_type in ['gat', 'rgcn']:
                    val_out = out[data.val_mask]
                    val_y = y[data.val_mask]
                else:
                    val_out = out[data['author'].val_mask]
                    val_y = y[data['author'].val_mask]

                val_pred = val_out.argmax(dim=1)
                val_acc = accuracy_score(val_y.cpu().numpy(), val_pred.cpu().numpy())
                val_f1 = f1_score(val_y.cpu().numpy(), val_pred.cpu().numpy(), average='macro')

                print(f'Epoch {epoch:03d}, Loss: {loss:.4f}, Val Acc: {val_acc:.4f}, Val F1: {val_f1:.4f}')

                if val_acc > best_val_acc:
                    best_val_acc = val_acc
                    # Save best model state
                    torch.save(model.state_dict(), f'best_{model_type}.pth')

            model.train()

    return best_val_acc

def evaluate_full_batch(model, data, device, model_type='hetero'):
    model.eval()
    with torch.no_grad():
        if model_type == 'gat':
            out = model(data.x.to(device), data.edge_index.to(device))
            out = out[data.author_mask]
            y = data.y.to(device)
            test_out = out[data.test_mask]
            test_y = y[data.test_mask]
        elif model_type == 'rgcn':
            out = model(data.x.to(device), data.edge_index.to(device), data.edge_type.to(device))
            out = out[data.author_mask]
            y = data.y.to(device)
            test_out = out[data.test_mask]
            test_y = y[data.test_mask]
        else:  # hetero models (HGT, HAN)
            x_dict = {node_type: data[node_type].x.to(device) for node_type in data.node_types}
            edge_index_dict = {edge_type: data[edge_type].edge_index.to(device) for edge_type in data.edge_types}
            out = model(x_dict, edge_index_dict)
            y = data['author'].y.to(device)
            test_out = out[data['author'].test_mask]
            test_y = y[data['author'].test_mask]

        test_pred = test_out.argmax(dim=1)
        test_acc = accuracy_score(test_y.cpu().numpy(), test_pred.cpu().numpy())
        test_f1 = f1_score(test_y.cpu().numpy(), test_pred.cpu().numpy(), average='macro')

    return test_acc, test_f1

# Benchmark different models
def benchmark_models():
    results = {}
    num_classes = len(set(data['author'].y.tolist()))

    # Create data objects for GAT and RGCN
    class HomoData:
        def __init__(self, x, edge_index, author_mask, author_start_idx, original_data):
            self.x = x
            self.edge_index = edge_index
            self.author_mask = author_mask
            self.author_start_idx = author_start_idx
            self.y = original_data['author'].y
            self.train_mask = original_data['author'].train_mask
            self.val_mask = original_data['author'].val_mask
            self.test_mask = original_data['author'].test_mask

    class RGCNData:
        def __init__(self, x, edge_index, edge_type, author_mask, original_data):
            self.x = x
            self.edge_index = edge_index
            self.edge_type = edge_type
            self.author_mask = author_mask
            self.y = original_data['author'].y
            self.train_mask = original_data['author'].train_mask
            self.val_mask = original_data['author'].val_mask
            self.test_mask = original_data['author'].test_mask

    gat_data = HomoData(x_homo, edge_index_homo, author_mask_homo, author_start_idx, data)
    rgcn_data = RGCNData(x_rgcn, edge_index_rgcn, edge_type_rgcn, author_mask_rgcn, data)

    # HGT Model
    print("\n" + "="*50)
    print("Training HGT Model...")
    print("="*50)
    hgt_model = HGT(hidden_channels=64, out_channels=num_classes, num_heads=4, num_layers=2, metadata=data.metadata())
    hgt_model = hgt_model.to(device)
    hgt_optimizer = torch.optim.Adam(hgt_model.parameters(), lr=0.005, weight_decay=5e-4)

    train_full_batch(hgt_model, data, hgt_optimizer, device, model_type='hetero', epochs=100)
    test_acc, test_f1 = evaluate_full_batch(hgt_model, data, device, model_type='hetero')
    results['HGT'] = {'accuracy': test_acc, 'f1_score': test_f1}
    print(f'HGT Test Results - Acc: {test_acc:.4f}, F1: {test_f1:.4f}')

    # RGCN Model
    print("\n" + "="*50)
    print("Training RGCN Model...")
    print("="*50)
    rgcn_model = RGCN(hidden_channels=64, out_channels=num_classes, num_layers=2, num_relations=len(data.edge_types))
    rgcn_model = rgcn_model.to(device)
    rgcn_optimizer = torch.optim.Adam(rgcn_model.parameters(), lr=0.005, weight_decay=5e-4)

    train_full_batch(rgcn_model, rgcn_data, rgcn_optimizer, device, model_type='rgcn', epochs=100)
    test_acc, test_f1 = evaluate_full_batch(rgcn_model, rgcn_data, device, model_type='rgcn')
    results['RGCN'] = {'accuracy': test_acc, 'f1_score': test_f1}
    print(f'RGCN Test Results - Acc: {test_acc:.4f}, F1: {test_f1:.4f}')

    # HAN Model
    print("\n" + "="*50)
    print("Training HAN Model...")
    print("="*50)
    han_model = HAN(hidden_channels=64, out_channels=num_classes, num_heads=4, num_layers=2, metadata=data.metadata())
    han_model = han_model.to(device)
    han_optimizer = torch.optim.Adam(han_model.parameters(), lr=0.005, weight_decay=5e-4)

    train_full_batch(han_model, data, han_optimizer, device, model_type='hetero', epochs=100)
    test_acc, test_f1 = evaluate_full_batch(han_model, data, device, model_type='hetero')
    results['HAN'] = {'accuracy': test_acc, 'f1_score': test_f1}
    print(f'HAN Test Results - Acc: {test_acc:.4f}, F1: {test_f1:.4f}')

    # GAT Model
    print("\n" + "="*50)
    print("Training GAT Model...")
    print("="*50)
    gat_model = GAT(hidden_channels=64, out_channels=num_classes, num_heads=4, num_layers=2)
    gat_model = gat_model.to(device)
    gat_optimizer = torch.optim.Adam(gat_model.parameters(), lr=0.005, weight_decay=5e-4)

    train_full_batch(gat_model, gat_data, gat_optimizer, device, model_type='gat', epochs=100)
    test_acc, test_f1 = evaluate_full_batch(gat_model, gat_data, device, model_type='gat')
    results['GAT'] = {'accuracy': test_acc, 'f1_score': test_f1}
    print(f'GAT Test Results - Acc: {test_acc:.4f}, F1: {test_f1:.4f}')

    return results

# Run benchmark
if __name__ == "__main__":
    results = benchmark_models()

    print("\n" + "="*60)
    print("FINAL BENCHMARK RESULTS:")
    print("="*60)
    for model_name, metrics in results.items():
        print(f"{model_name}:")
        print(f"  Accuracy: {metrics['accuracy']:.4f}")
        print(f"  F1-Score: {metrics['f1_score']:.4f}")
        print()

    # Find best model
    best_model = max(results.items(), key=lambda x: x[1]['accuracy'])
    print(f"üèÜ BEST MODEL: {best_model[0]} with accuracy {best_model[1]['accuracy']:.4f}")

Dataset info:
Number of nodes: 26128
Number of edges: 239566
Node types: ['author', 'paper', 'term', 'conference']
Edge types: [('author', 'to', 'paper'), ('paper', 'to', 'author'), ('paper', 'to', 'term'), ('paper', 'to', 'conference'), ('term', 'to', 'paper'), ('conference', 'to', 'paper')]
Number of author classes: 4
Creating consistent node features...
Node feature dimensions:
  author: torch.Size([4057, 128])
  paper: torch.Size([14328, 128])
  term: torch.Size([7723, 128])
  conference: torch.Size([20, 128])
Using device: cpu
Preparing data for GAT and RGCN...

Training HGT Model...
Epoch 000, Loss: 1.4116, Val Acc: 0.1677, Val F1: 0.0718
Epoch 020, Loss: 0.5421, Val Acc: 0.6708, Val F1: 0.6398
Epoch 040, Loss: 0.0360, Val Acc: 0.9285, Val F1: 0.9199
Epoch 060, Loss: 0.0020, Val Acc: 0.9309, Val F1: 0.9226
Epoch 080, Loss: 0.0023, Val Acc: 0.9322, Val F1: 0.9237
HGT Test Results - Acc: 0.9397, F1: 0.9323

Training RGCN Model...


RuntimeError: Trying to create tensor with negative dimension -1: [6, -1, 64]

In [18]:
# Save this as bench_dblp.py and run with: python bench_dblp.py

import torch
import random
import numpy as np
from torch_geometric.datasets import DBLP
from torch_geometric.nn import HGTConv, RGCNConv, GATConv, HANConv, Linear
from torch_geometric.data import HeteroData
from torch.nn import functional as F
from sklearn.metrics import f1_score, accuracy_score
import time

# Set random seeds for reproducibility
torch.manual_seed(42)
np.random.seed(42)
random.seed(42)

# Load DBLP dataset
dataset = DBLP(root='/tmp/DBLP')
data = dataset[0]

print("Dataset info:")
print(f"Number of nodes: {data.num_nodes}")
print(f"Number of edges: {data.num_edges}")
print(f"Node types: {data.node_types}")
print(f"Edge types: {data.edge_types}")
print(f"Number of author classes: {len(set(data['author'].y.tolist()))}")

# Fix: Create consistent node features with same dimensions
print("Creating consistent node features...")
feature_dim = 128  # Consistent feature dimension for all node types

for node_type in data.node_types:
    num_nodes = data[node_type].num_nodes
    # Create random features with consistent dimension
    data[node_type].x = torch.randn(num_nodes, feature_dim, dtype=torch.float)

print("Node feature dimensions:")
for node_type in data.node_types:
    print(f"  {node_type}: {data[node_type].x.shape}")

# Define device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Create train/val/test masks for author nodes (the target node type)
author_nodes = data['author'].num_nodes
train_mask = torch.zeros(author_nodes, dtype=torch.bool)
val_mask = torch.zeros(author_nodes, dtype=torch.bool)
test_mask = torch.zeros(author_nodes, dtype=torch.bool)

# Randomly split author nodes
indices = torch.randperm(author_nodes)
train_idx = indices[:int(0.6 * author_nodes)]
val_idx = indices[int(0.6 * author_nodes):int(0.8 * author_nodes)]
test_idx = indices[int(0.8 * author_nodes):]

train_mask[train_idx] = True
val_mask[val_idx] = True
test_mask[test_idx] = True

data['author'].train_mask = train_mask
data['author'].val_mask = val_mask
data['author'].test_mask = test_mask

# Define HGT Model
class HGT(torch.nn.Module):
    def __init__(self, hidden_channels, out_channels, num_heads, num_layers, metadata):
        super().__init__()
        self.lin_dict = torch.nn.ModuleDict()
        for node_type in metadata[0]:
            self.lin_dict[node_type] = Linear(-1, hidden_channels)

        self.convs = torch.nn.ModuleList()
        for _ in range(num_layers):
            conv = HGTConv(hidden_channels, hidden_channels, metadata, num_heads)
            self.convs.append(conv)

        self.lin = Linear(hidden_channels, out_channels)

    def forward(self, x_dict, edge_index_dict):
        x_dict = {
            node_type: self.lin_dict[node_type](x).relu()
            for node_type, x in x_dict.items()
        }

        for conv in self.convs:
            x_dict = conv(x_dict, edge_index_dict)

        return self.lin(x_dict['author'])

# Define RGCN Model
class RGCN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_layers, num_relations):
        super().__init__()
        self.convs = torch.nn.ModuleList()
        # First layer with explicit input dimension
        self.convs.append(RGCNConv(in_channels, hidden_channels, num_relations=num_relations))
        # Middle layers
        for _ in range(num_layers - 2):
            self.convs.append(RGCNConv(hidden_channels, hidden_channels, num_relations=num_relations))
        # Last layer
        if num_layers > 1:
            self.convs.append(RGCNConv(hidden_channels, hidden_channels, num_relations=num_relations))

        self.lin = Linear(hidden_channels, out_channels)

    def forward(self, x, edge_index, edge_type):
        for conv in self.convs:
            x = conv(x, edge_index, edge_type).relu()
        return self.lin(x)

# Define HAN Model (Fixed)
class HAN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_heads, num_layers, metadata):
        super().__init__()
        self.convs = torch.nn.ModuleList()
        # First layer with explicit input dimension
        self.convs.append(HANConv(in_channels, hidden_channels, heads=num_heads,
                                 metadata=metadata, dropout=0.6))
        # Middle layers
        for _ in range(num_layers - 1):
            self.convs.append(HANConv(hidden_channels * num_heads, hidden_channels,
                                     heads=num_heads, metadata=metadata, dropout=0.6))

        self.lin = Linear(hidden_channels * num_heads, out_channels)

    def forward(self, x_dict, edge_index_dict):
        for conv in self.convs[:-1]:
            x_dict = conv(x_dict, edge_index_dict)
            x_dict = {key: F.elu(x) for key, x in x_dict.items()}
        x_dict = self.convs[-1](x_dict, edge_index_dict)
        return self.lin(x_dict['author'])

# Define GAT Model (homogeneous version)
class GAT(torch.nn.Module):
    def __init__(self, hidden_channels, out_channels, num_heads, num_layers):
        super().__init__()
        self.convs = torch.nn.ModuleList()
        self.convs.append(GATConv(-1, hidden_channels, heads=num_heads))
        for _ in range(num_layers - 1):
            self.convs.append(GATConv(hidden_channels * num_heads, hidden_channels,
                                     heads=num_heads))
        self.lin = Linear(hidden_channels * num_heads, out_channels)

    def forward(self, x, edge_index):
        for conv in self.convs[:-1]:
            x = conv(x, edge_index).relu()
        x = self.convs[-1](x, edge_index)
        return self.lin(x)

# Utility function to convert heterogeneous graph to homogeneous for GAT
def hetero_to_homo(hetero_data):
    """Convert heterogeneous graph to homogeneous for GAT"""
    # Create mapping from node type to global indices
    node_offsets = {}
    current_idx = 0
    for node_type in hetero_data.node_types:
        node_offsets[node_type] = current_idx
        current_idx += hetero_data[node_type].num_nodes

    # Combine all node features (they now have same dimension)
    x_list = []
    for node_type in hetero_data.node_types:
        x_list.append(hetero_data[node_type].x)
    x = torch.cat(x_list, dim=0)

    # Combine all edges with offsets
    edge_indices = []
    for edge_type in hetero_data.edge_types:
        src_type, rel_type, dst_type = edge_type
        edge_index = hetero_data[edge_type].edge_index
        edge_index = edge_index.clone()
        edge_index[0] += node_offsets[src_type]
        edge_index[1] += node_offsets[dst_type]
        edge_indices.append(edge_index)

    edge_index = torch.cat(edge_indices, dim=1)

    # Create author node mask for homogeneous graph
    author_mask = torch.zeros(x.size(0), dtype=torch.bool)
    author_start = node_offsets['author']
    author_end = author_start + hetero_data['author'].num_nodes
    author_mask[author_start:author_end] = True

    return x, edge_index, author_mask, node_offsets['author']

# Prepare data for RGCN
def prepare_rgcn_data(hetero_data):
    """Prepare data for RGCN with edge types"""
    x_list = []
    for node_type in hetero_data.node_types:
        x_list.append(hetero_data[node_type].x)
    x = torch.cat(x_list, dim=0)

    edge_indices = []
    edge_types = []
    for i, edge_type in enumerate(hetero_data.edge_types):
        edge_index = hetero_data[edge_type].edge_index
        edge_indices.append(edge_index)
        edge_types.append(torch.full((edge_index.size(1),), i, dtype=torch.long))

    edge_index = torch.cat(edge_indices, dim=1)
    edge_type = torch.cat(edge_types, dim=0)

    # Author mask
    author_mask = torch.zeros(x.size(0), dtype=torch.bool)
    current_idx = 0
    for node_type in hetero_data.node_types:
        if node_type == 'author':
            author_mask[current_idx:current_idx + hetero_data[node_type].num_nodes] = True
        current_idx += hetero_data[node_type].num_nodes

    return x, edge_index, edge_type, author_mask

# Convert data for GAT and RGCN
print("Preparing data for GAT and RGCN...")
x_homo, edge_index_homo, author_mask_homo, author_start_idx = hetero_to_homo(data)
x_rgcn, edge_index_rgcn, edge_type_rgcn, author_mask_rgcn = prepare_rgcn_data(data)

# Simple training function without NeighborLoader
def train_full_batch(model, data, optimizer, device, model_type='hetero', epochs=100):
    model.train()
    best_val_acc = 0

    for epoch in range(epochs):
        optimizer.zero_grad()

        if model_type == 'gat':
            out = model(data.x.to(device), data.edge_index.to(device))
            out = out[data.author_mask]
            y = data.y.to(device)
        elif model_type == 'rgcn':
            out = model(data.x.to(device), data.edge_index.to(device), data.edge_type.to(device))
            out = out[data.author_mask]
            y = data.y.to(device)
        else:  # hetero models (HGT, HAN)
            x_dict = {node_type: data[node_type].x.to(device) for node_type in data.node_types}
            edge_index_dict = {edge_type: data[edge_type].edge_index.to(device) for edge_type in data.edge_types}
            out = model(x_dict, edge_index_dict)
            y = data['author'].y.to(device)

        # Only use training nodes
        if model_type in ['gat', 'rgcn']:
            train_out = out[data.train_mask]
            train_y = y[data.train_mask]
        else:
            train_out = out[data['author'].train_mask]
            train_y = y[data['author'].train_mask]

        loss = F.cross_entropy(train_out, train_y)
        loss.backward()
        optimizer.step()

        if epoch % 20 == 0:
            # Validation
            model.eval()
            with torch.no_grad():
                if model_type in ['gat', 'rgcn']:
                    val_out = out[data.val_mask]
                    val_y = y[data.val_mask]
                else:
                    val_out = out[data['author'].val_mask]
                    val_y = y[data['author'].val_mask]

                val_pred = val_out.argmax(dim=1)
                val_acc = accuracy_score(val_y.cpu().numpy(), val_pred.cpu().numpy())
                val_f1 = f1_score(val_y.cpu().numpy(), val_pred.cpu().numpy(), average='macro')

                print(f'Epoch {epoch:03d}, Loss: {loss:.4f}, Val Acc: {val_acc:.4f}, Val F1: {val_f1:.4f}')

                if val_acc > best_val_acc:
                    best_val_acc = val_acc
                    # Save best model state
                    torch.save(model.state_dict(), f'best_{model_type}.pth')

            model.train()

    return best_val_acc

def evaluate_full_batch(model, data, device, model_type='hetero'):
    model.eval()
    with torch.no_grad():
        if model_type == 'gat':
            out = model(data.x.to(device), data.edge_index.to(device))
            out = out[data.author_mask]
            y = data.y.to(device)
            test_out = out[data.test_mask]
            test_y = y[data.test_mask]
        elif model_type == 'rgcn':
            out = model(data.x.to(device), data.edge_index.to(device), data.edge_type.to(device))
            out = out[data.author_mask]
            y = data.y.to(device)
            test_out = out[data.test_mask]
            test_y = y[data.test_mask]
        else:  # hetero models (HGT, HAN)
            x_dict = {node_type: data[node_type].x.to(device) for node_type in data.node_types}
            edge_index_dict = {edge_type: data[edge_type].edge_index.to(device) for edge_type in data.edge_types}
            out = model(x_dict, edge_index_dict)
            y = data['author'].y.to(device)
            test_out = out[data['author'].test_mask]
            test_y = y[data['author'].test_mask]

        test_pred = test_out.argmax(dim=1)
        test_acc = accuracy_score(test_y.cpu().numpy(), test_pred.cpu().numpy())
        test_f1 = f1_score(test_y.cpu().numpy(), test_pred.cpu().numpy(), average='macro')

    return test_acc, test_f1

# Benchmark different models
def benchmark_models():
    results = {}
    num_classes = len(set(data['author'].y.tolist()))

    # Create data objects for GAT and RGCN
    class HomoData:
        def __init__(self, x, edge_index, author_mask, author_start_idx, original_data):
            self.x = x
            self.edge_index = edge_index
            self.author_mask = author_mask
            self.author_start_idx = author_start_idx
            self.y = original_data['author'].y
            self.train_mask = original_data['author'].train_mask
            self.val_mask = original_data['author'].val_mask
            self.test_mask = original_data['author'].test_mask

    class RGCNData:
        def __init__(self, x, edge_index, edge_type, author_mask, original_data):
            self.x = x
            self.edge_index = edge_index
            self.edge_type = edge_type
            self.author_mask = author_mask
            self.y = original_data['author'].y
            self.train_mask = original_data['author'].train_mask
            self.val_mask = original_data['author'].val_mask
            self.test_mask = original_data['author'].test_mask

    gat_data = HomoData(x_homo, edge_index_homo, author_mask_homo, author_start_idx, data)
    rgcn_data = RGCNData(x_rgcn, edge_index_rgcn, edge_type_rgcn, author_mask_rgcn, data)

    # HGT Model
    print("\n" + "="*50)
    print("Training HGT Model...")
    print("="*50)
    hgt_model = HGT(hidden_channels=64, out_channels=num_classes, num_heads=4, num_layers=2, metadata=data.metadata())
    hgt_model = hgt_model.to(device)
    hgt_optimizer = torch.optim.Adam(hgt_model.parameters(), lr=0.005, weight_decay=5e-4)

    train_full_batch(hgt_model, data, hgt_optimizer, device, model_type='hetero', epochs=100)
    test_acc, test_f1 = evaluate_full_batch(hgt_model, data, device, model_type='hetero')
    results['HGT'] = {'accuracy': test_acc, 'f1_score': test_f1}
    print(f'HGT Test Results - Acc: {test_acc:.4f}, F1: {test_f1:.4f}')

    # RGCN Model
    print("\n" + "="*50)
    print("Training RGCN Model...")
    print("="*50)
    rgcn_model = RGCN(in_channels=feature_dim, hidden_channels=64, out_channels=num_classes,
                     num_layers=2, num_relations=len(data.edge_types))
    rgcn_model = rgcn_model.to(device)
    rgcn_optimizer = torch.optim.Adam(rgcn_model.parameters(), lr=0.005, weight_decay=5e-4)

    train_full_batch(rgcn_model, rgcn_data, rgcn_optimizer, device, model_type='rgcn', epochs=100)
    test_acc, test_f1 = evaluate_full_batch(rgcn_model, rgcn_data, device, model_type='rgcn')
    results['RGCN'] = {'accuracy': test_acc, 'f1_score': test_f1}
    print(f'RGCN Test Results - Acc: {test_acc:.4f}, F1: {test_f1:.4f}')

    # # HAN Model (Fixed)
    # print("\n" + "="*50)
    # print("Training HAN Model...")
    # print("="*50)
    # han_model = HAN(in_channels=feature_dim, hidden_channels=64, out_channels=num_classes,
    #                num_heads=4, num_layers=2, metadata=data.metadata())
    # han_model = han_model.to(device)
    # han_optimizer = torch.optim.Adam(han_model.parameters(), lr=0.005, weight_decay=5e-4)

    # train_full_batch(han_model, data, han_optimizer, device, model_type='hetero', epochs=100)
    # test_acc, test_f1 = evaluate_full_batch(han_model, data, device, model_type='hetero')
    # results['HAN'] = {'accuracy': test_acc, 'f1_score': test_f1}
    # print(f'HAN Test Results - Acc: {test_acc:.4f}, F1: {test_f1:.4f}')

    # GAT Model
    print("\n" + "="*50)
    print("Training GAT Model...")
    print("="*50)
    gat_model = GAT(hidden_channels=64, out_channels=num_classes, num_heads=4, num_layers=2)
    gat_model = gat_model.to(device)
    gat_optimizer = torch.optim.Adam(gat_model.parameters(), lr=0.005, weight_decay=5e-4)

    train_full_batch(gat_model, gat_data, gat_optimizer, device, model_type='gat', epochs=100)
    test_acc, test_f1 = evaluate_full_batch(gat_model, gat_data, device, model_type='gat')
    results['GAT'] = {'accuracy': test_acc, 'f1_score': test_f1}
    print(f'GAT Test Results - Acc: {test_acc:.4f}, F1: {test_f1:.4f}')

    return results

# Run benchmark
if __name__ == "__main__":
    results = benchmark_models()

    print("\n" + "="*60)
    print("FINAL BENCHMARK RESULTS:")
    print("="*60)
    for model_name, metrics in results.items():
        print(f"{model_name}:")
        print(f"  Accuracy: {metrics['accuracy']:.4f}")
        print(f"  F1-Score: {metrics['f1_score']:.4f}")
        print()

    # Find best model
    best_model = max(results.items(), key=lambda x: x[1]['accuracy'])
    print(f"üèÜ BEST MODEL: {best_model[0]} with accuracy {best_model[1]['accuracy']:.4f}")

Dataset info:
Number of nodes: 26128
Number of edges: 239566
Node types: ['author', 'paper', 'term', 'conference']
Edge types: [('author', 'to', 'paper'), ('paper', 'to', 'author'), ('paper', 'to', 'term'), ('paper', 'to', 'conference'), ('term', 'to', 'paper'), ('conference', 'to', 'paper')]
Number of author classes: 4
Creating consistent node features...
Node feature dimensions:
  author: torch.Size([4057, 128])
  paper: torch.Size([14328, 128])
  term: torch.Size([7723, 128])
  conference: torch.Size([20, 128])
Using device: cpu
Preparing data for GAT and RGCN...

Training HGT Model...
Epoch 000, Loss: 1.4116, Val Acc: 0.1677, Val F1: 0.0718
Epoch 020, Loss: 0.5421, Val Acc: 0.6708, Val F1: 0.6398
Epoch 040, Loss: 0.0360, Val Acc: 0.9285, Val F1: 0.9199
Epoch 060, Loss: 0.0020, Val Acc: 0.9309, Val F1: 0.9226
Epoch 080, Loss: 0.0023, Val Acc: 0.9322, Val F1: 0.9237
HGT Test Results - Acc: 0.9397, F1: 0.9323

Training RGCN Model...
Epoch 000, Loss: 1.7967, Val Acc: 0.2910, Val F1: 0.

In [25]:
# Save this as bench_dblp.py and run with: python bench_dblp.py

import torch
import random
import numpy as np
from torch_geometric.datasets import DBLP
from torch_geometric.nn import HGTConv, RGCNConv, GATConv, HANConv, Linear
from torch_geometric.data import HeteroData
from torch.nn import functional as F
from sklearn.metrics import f1_score, accuracy_score
import time

# Set random seeds for reproducibility
torch.manual_seed(42)
np.random.seed(42)
random.seed(42)

# Load DBLP dataset
dataset = DBLP(root='/tmp/DBLP')
data = dataset[0]

print("Dataset info:")
print(f"Number of nodes: {data.num_nodes}")
print(f"Number of edges: {data.num_edges}")
print(f"Node types: {data.node_types}")
print(f"Edge types: {data.edge_types}")
print(f"Number of author classes: {len(set(data['author'].y.tolist()))}")

# Fix: Create consistent node features with same dimensions
print("Creating consistent node features...")
feature_dim = 128  # Consistent feature dimension for all node types

for node_type in data.node_types:
    num_nodes = data[node_type].num_nodes
    # Create random features with consistent dimension
    data[node_type].x = torch.randn(num_nodes, feature_dim, dtype=torch.float)

print("Node feature dimensions:")
for node_type in data.node_types:
    print(f"  {node_type}: {data[node_type].x.shape}")

# Define device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Create train/val/test masks for author nodes (the target node type)
author_nodes = data['author'].num_nodes
train_mask = torch.zeros(author_nodes, dtype=torch.bool)
val_mask = torch.zeros(author_nodes, dtype=torch.bool)
test_mask = torch.zeros(author_nodes, dtype=torch.bool)

# Randomly split author nodes
indices = torch.randperm(author_nodes)
train_idx = indices[:int(0.6 * author_nodes)]
val_idx = indices[int(0.6 * author_nodes):int(0.8 * author_nodes)]
test_idx = indices[int(0.8 * author_nodes):]

train_mask[train_idx] = True
val_mask[val_idx] = True
test_mask[test_idx] = True

data['author'].train_mask = train_mask
data['author'].val_mask = val_mask
data['author'].test_mask = test_mask

# Define HGT Model
class HGT(torch.nn.Module):
    def __init__(self, hidden_channels, out_channels, num_heads, num_layers, metadata):
        super().__init__()
        self.lin_dict = torch.nn.ModuleDict()
        for node_type in metadata[0]:
            self.lin_dict[node_type] = Linear(-1, hidden_channels)

        self.convs = torch.nn.ModuleList()
        for _ in range(num_layers):
            conv = HGTConv(hidden_channels, hidden_channels, metadata, num_heads)
            self.convs.append(conv)

        self.lin = Linear(hidden_channels, out_channels)

    def forward(self, x_dict, edge_index_dict):
        x_dict = {
            node_type: self.lin_dict[node_type](x).relu()
            for node_type, x in x_dict.items()
        }

        for conv in self.convs:
            x_dict = conv(x_dict, edge_index_dict)

        return self.lin(x_dict['author'])

# Define RGCN Model
class RGCN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_layers, num_relations):
        super().__init__()
        self.convs = torch.nn.ModuleList()
        # First layer with explicit input dimension
        self.convs.append(RGCNConv(in_channels, hidden_channels, num_relations=num_relations))
        # Middle layers
        for _ in range(num_layers - 2):
            self.convs.append(RGCNConv(hidden_channels, hidden_channels, num_relations=num_relations))
        # Last layer
        if num_layers > 1:
            self.convs.append(RGCNConv(hidden_channels, hidden_channels, num_relations=num_relations))

        self.lin = Linear(hidden_channels, out_channels)

    def forward(self, x, edge_index, edge_type):
        for conv in self.convs:
            x = conv(x, edge_index, edge_type).relu()
        return self.lin(x)

# Define HAN Model
class HAN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_heads, num_layers, metadata):
        super().__init__()
        self.convs = torch.nn.ModuleList()
        # First layer with explicit input dimension
        self.convs.append(HANConv(in_channels, hidden_channels, heads=num_heads,
                                 metadata=metadata, dropout=0.6))
        # Middle layers
        for _ in range(num_layers - 1):
            self.convs.append(HANConv(hidden_channels * num_heads, hidden_channels,
                                     heads=num_heads, metadata=metadata, dropout=0.6))

        self.lin = Linear(hidden_channels * num_heads, out_channels)

    def forward(self, x_dict, edge_index_dict):
        for conv in self.convs[:-1]:
            x_dict = conv(x_dict, edge_index_dict)
            x_dict = {key: F.elu(x) for key, x in x_dict.items()}
        x_dict = self.convs[-1](x_dict, edge_index_dict)
        return self.lin(x_dict['author'])

# Define GAT Model (homogeneous version)
class GAT(torch.nn.Module):
    def __init__(self, hidden_channels, out_channels, num_heads, num_layers):
        super().__init__()
        self.convs = torch.nn.ModuleList()
        self.convs.append(GATConv(-1, hidden_channels, heads=num_heads))
        for _ in range(num_layers - 1):
            self.convs.append(GATConv(hidden_channels * num_heads, hidden_channels,
                                     heads=num_heads))
        self.lin = Linear(hidden_channels * num_heads, out_channels)

    def forward(self, x, edge_index):
        for conv in self.convs[:-1]:
            x = conv(x, edge_index).relu()
        x = self.convs[-1](x, edge_index)
        return self.lin(x)

# Utility function to convert heterogeneous graph to homogeneous for GAT
def hetero_to_homo(hetero_data):
    """Convert heterogeneous graph to homogeneous for GAT"""
    # Create mapping from node type to global indices
    node_offsets = {}
    current_idx = 0
    for node_type in hetero_data.node_types:
        node_offsets[node_type] = current_idx
        current_idx += hetero_data[node_type].num_nodes

    # Combine all node features (they now have same dimension)
    x_list = []
    for node_type in hetero_data.node_types:
        x_list.append(hetero_data[node_type].x)
    x = torch.cat(x_list, dim=0)

    # Combine all edges with offsets
    edge_indices = []
    for edge_type in hetero_data.edge_types:
        src_type, rel_type, dst_type = edge_type
        edge_index = hetero_data[edge_type].edge_index
        edge_index = edge_index.clone()
        edge_index[0] += node_offsets[src_type]
        edge_index[1] += node_offsets[dst_type]
        edge_indices.append(edge_index)

    edge_index = torch.cat(edge_indices, dim=1)

    # Create author node mask for homogeneous graph
    author_mask = torch.zeros(x.size(0), dtype=torch.bool)
    author_start = node_offsets['author']
    author_end = author_start + hetero_data['author'].num_nodes
    author_mask[author_start:author_end] = True

    return x, edge_index, author_mask, node_offsets['author']

# Prepare data for RGCN
def prepare_rgcn_data(hetero_data):
    """Prepare data for RGCN with edge types"""
    x_list = []
    for node_type in hetero_data.node_types:
        x_list.append(hetero_data[node_type].x)
    x = torch.cat(x_list, dim=0)

    edge_indices = []
    edge_types = []
    for i, edge_type in enumerate(hetero_data.edge_types):
        edge_index = hetero_data[edge_type].edge_index
        edge_indices.append(edge_index)
        edge_types.append(torch.full((edge_index.size(1),), i, dtype=torch.long))

    edge_index = torch.cat(edge_indices, dim=1)
    edge_type = torch.cat(edge_types, dim=0)

    # Author mask
    author_mask = torch.zeros(x.size(0), dtype=torch.bool)
    current_idx = 0
    for node_type in hetero_data.node_types:
        if node_type == 'author':
            author_mask[current_idx:current_idx + hetero_data[node_type].num_nodes] = True
        current_idx += hetero_data[node_type].num_nodes

    return x, edge_index, edge_type, author_mask

# Convert data for GAT and RGCN
print("Preparing data for GAT and RGCN...")
x_homo, edge_index_homo, author_mask_homo, author_start_idx = hetero_to_homo(data)
x_rgcn, edge_index_rgcn, edge_type_rgcn, author_mask_rgcn = prepare_rgcn_data(data)

# Simple training function without NeighborLoader
def train_full_batch(model, data, optimizer, device, model_type='hetero', epochs=100):
    model.train()
    best_val_acc = 0

    for epoch in range(epochs):
        optimizer.zero_grad()

        if model_type == 'gat':
            out = model(data.x.to(device), data.edge_index.to(device))
            out = out[data.author_mask]
            y = data.y.to(device)
        elif model_type == 'rgcn':
            out = model(data.x.to(device), data.edge_index.to(device), data.edge_type.to(device))
            out = out[data.author_mask]
            y = data.y.to(device)
        else:  # hetero models (HGT, HAN)
            x_dict = {node_type: data[node_type].x.to(device) for node_type in data.node_types}
            edge_index_dict = {edge_type: data[edge_type].edge_index.to(device) for edge_type in data.edge_types}
            out = model(x_dict, edge_index_dict)
            y = data['author'].y.to(device)

        # Only use training nodes
        if model_type in ['gat', 'rgcn']:
            train_out = out[data.train_mask]
            train_y = y[data.train_mask]
        else:
            train_out = out[data['author'].train_mask]
            train_y = y[data['author'].train_mask]

        loss = F.cross_entropy(train_out, train_y)
        loss.backward()
        optimizer.step()

        if epoch % 20 == 0:
            # Validation
            model.eval()
            with torch.no_grad():
                if model_type in ['gat', 'rgcn']:
                    val_out = out[data.val_mask]
                    val_y = y[data.val_mask]
                else:
                    val_out = out[data['author'].val_mask]
                    val_y = y[data['author'].val_mask]

                val_pred = val_out.argmax(dim=1)
                val_acc = accuracy_score(val_y.cpu().numpy(), val_pred.cpu().numpy())
                val_f1 = f1_score(val_y.cpu().numpy(), val_pred.cpu().numpy(), average='macro')

                print(f'Epoch {epoch:03d}, Loss: {loss:.4f}, Val Acc: {val_acc:.4f}, Val F1: {val_f1:.4f}')

                if val_acc > best_val_acc:
                    best_val_acc = val_acc
                    # Save best model state
                    torch.save(model.state_dict(), f'best_{model_type}.pth')

            model.train()

    return best_val_acc

def evaluate_full_batch(model, data, device, model_type='hetero'):
    model.eval()
    with torch.no_grad():
        if model_type == 'gat':
            out = model(data.x.to(device), data.edge_index.to(device))
            out = out[data.author_mask]
            y = data.y.to(device)
            test_out = out[data.test_mask]
            test_y = y[data.test_mask]
        elif model_type == 'rgcn':
            out = model(data.x.to(device), data.edge_index.to(device), data.edge_type.to(device))
            out = out[data.author_mask]
            y = data.y.to(device)
            test_out = out[data.test_mask]
            test_y = y[data.test_mask]
        else:  # hetero models (HGT, HAN)
            x_dict = {node_type: data[node_type].x.to(device) for node_type in data.node_types}
            edge_index_dict = {edge_type: data[edge_type].edge_index.to(device) for edge_type in data.edge_types}
            out = model(x_dict, edge_index_dict)
            y = data['author'].y.to(device)
            test_out = out[data['author'].test_mask]
            test_y = y[data['author'].test_mask]

        test_pred = test_out.argmax(dim=1)
        test_acc = accuracy_score(test_y.cpu().numpy(), test_pred.cpu().numpy())
        test_f1 = f1_score(test_y.cpu().numpy(), test_pred.cpu().numpy(), average='macro')

    return test_acc, test_f1

# Benchmark different models
def benchmark_models():
    results = {}
    num_classes = len(set(data['author'].y.tolist()))

    # Create data objects for GAT and RGCN
    class HomoData:
        def __init__(self, x, edge_index, author_mask, author_start_idx, original_data):
            self.x = x
            self.edge_index = edge_index
            self.author_mask = author_mask
            self.author_start_idx = author_start_idx
            self.y = original_data['author'].y
            self.train_mask = original_data['author'].train_mask
            self.val_mask = original_data['author'].val_mask
            self.test_mask = original_data['author'].test_mask

    class RGCNData:
        def __init__(self, x, edge_index, edge_type, author_mask, original_data):
            self.x = x
            self.edge_index = edge_index
            self.edge_type = edge_type
            self.author_mask = author_mask
            self.y = original_data['author'].y
            self.train_mask = original_data['author'].train_mask
            self.val_mask = original_data['author'].val_mask
            self.test_mask = original_data['author'].test_mask

    gat_data = HomoData(x_homo, edge_index_homo, author_mask_homo, author_start_idx, data)
    rgcn_data = RGCNData(x_rgcn, edge_index_rgcn, edge_type_rgcn, author_mask_rgcn, data)

    # HGT Model
    print("\n" + "="*50)
    print("Training HGT Model...")
    print("="*50)
    hgt_model = HGT(hidden_channels=64, out_channels=num_classes, num_heads=4, num_layers=2, metadata=data.metadata())
    hgt_model = hgt_model.to(device)
    hgt_optimizer = torch.optim.Adam(hgt_model.parameters(), lr=0.005, weight_decay=5e-4)

    train_full_batch(hgt_model, data, hgt_optimizer, device, model_type='hetero', epochs=100)
    test_acc, test_f1 = evaluate_full_batch(hgt_model, data, device, model_type='hetero')
    results['HGT'] = {'accuracy': test_acc, 'f1_score': test_f1}
    print(f'HGT Test Results - Acc: {test_acc:.4f}, F1: {test_f1:.4f}')

    # RGCN Model
    print("\n" + "="*50)
    print("Training RGCN Model...")
    print("="*50)
    rgcn_model = RGCN(in_channels=feature_dim, hidden_channels=64, out_channels=num_classes,
                     num_layers=2, num_relations=len(data.edge_types))
    rgcn_model = rgcn_model.to(device)
    rgcn_optimizer = torch.optim.Adam(rgcn_model.parameters(), lr=0.005, weight_decay=5e-4)

    train_full_batch(rgcn_model, rgcn_data, rgcn_optimizer, device, model_type='rgcn', epochs=100)
    test_acc, test_f1 = evaluate_full_batch(rgcn_model, rgcn_data, device, model_type='rgcn')
    results['RGCN'] = {'accuracy': test_acc, 'f1_score': test_f1}
    print(f'RGCN Test Results - Acc: {test_acc:.4f}, F1: {test_f1:.4f}')

    # # HAN Model
    # print("\n" + "="*50)
    # print("Training HAN Model...")
    # print("="*50)
    # han_model = HAN(in_channels=feature_dim, hidden_channels=64, out_channels=num_classes,
    #                num_heads=4, num_layers=2, metadata=data.metadata())
    # han_model = han_model.to(device)
    # han_optimizer = torch.optim.Adam(han_model.parameters(), lr=0.005, weight_decay=5e-4)

    # train_full_batch(han_model, data, han_optimizer, device, model_type='hetero', epochs=100)
    # test_acc, test_f1 = evaluate_full_batch(han_model, data, device, model_type='hetero')
    # results['HAN'] = {'accuracy': test_acc, 'f1_score': test_f1}
    # print(f'HAN Test Results - Acc: {test_acc:.4f}, F1: {test_f1:.4f}')

    # GAT Model
    print("\n" + "="*50)
    print("Training GAT Model...")
    print("="*50)
    gat_model = GAT(hidden_channels=64, out_channels=num_classes, num_heads=4, num_layers=2)
    gat_model = gat_model.to(device)
    gat_optimizer = torch.optim.Adam(gat_model.parameters(), lr=0.005, weight_decay=5e-4)

    train_full_batch(gat_model, gat_data, gat_optimizer, device, model_type='gat', epochs=100)
    test_acc, test_f1 = evaluate_full_batch(gat_model, gat_data, device, model_type='gat')
    results['GAT'] = {'accuracy': test_acc, 'f1_score': test_f1}
    print(f'GAT Test Results - Acc: {test_acc:.4f}, F1: {test_f1:.4f}')

    return results

# Run benchmark
if __name__ == "__main__":
    print("Starting Node Classification Benchmark on DBLP Dataset")
    print("=" * 60)

    results = benchmark_models()

    print("\n" + "="*60)
    print("FINAL BENCHMARK RESULTS:")
    print("="*60)
    for model_name, metrics in results.items():
        print(f"{model_name}:")
        print(f"  Accuracy: {metrics['accuracy']:.4f}")
        print(f"  F1-Score: {metrics['f1_score']:.4f}")
        print()

    # Find best model
    best_model = max(results.items(), key=lambda x: x[1]['accuracy'])
    print(f"BEST MODEL: {best_model[0]} with accuracy {best_model[1]['accuracy']:.4f}")

Dataset info:
Number of nodes: 26128
Number of edges: 239566
Node types: ['author', 'paper', 'term', 'conference']
Edge types: [('author', 'to', 'paper'), ('paper', 'to', 'author'), ('paper', 'to', 'term'), ('paper', 'to', 'conference'), ('term', 'to', 'paper'), ('conference', 'to', 'paper')]
Number of author classes: 4
Creating consistent node features...
Node feature dimensions:
  author: torch.Size([4057, 128])
  paper: torch.Size([14328, 128])
  term: torch.Size([7723, 128])
  conference: torch.Size([20, 128])
Using device: cpu
Preparing data for GAT and RGCN...
Starting Node Classification Benchmark on DBLP Dataset

Training HGT Model...
Epoch 000, Loss: 1.4116, Val Acc: 0.1677, Val F1: 0.0718
Epoch 020, Loss: 0.5421, Val Acc: 0.6708, Val F1: 0.6398
Epoch 040, Loss: 0.0360, Val Acc: 0.9285, Val F1: 0.9199
Epoch 060, Loss: 0.0020, Val Acc: 0.9309, Val F1: 0.9226
Epoch 080, Loss: 0.0023, Val Acc: 0.9322, Val F1: 0.9237
HGT Test Results - Acc: 0.9397, F1: 0.9323

Training RGCN Model.

In [27]:
# Save this as bench_dblp.py and run with: python bench_dblp.py

import torch
import random
import numpy as np
from torch_geometric.datasets import DBLP
from torch_geometric.nn import HGTConv, RGCNConv, GATConv, HANConv, Linear
from torch_geometric.data import HeteroData
from torch.nn import functional as F
from sklearn.metrics import f1_score, accuracy_score
import time
import os

# Set random seeds for reproducibility
torch.manual_seed(42)
np.random.seed(42)
random.seed(42)

# Load DBLP dataset
dataset = DBLP(root='/tmp/DBLP')
data = dataset[0]

print("Dataset info:")
print(f"Number of nodes: {data.num_nodes}")
print(f"Number of edges: {data.num_edges}")
print(f"Node types: {data.node_types}")
print(f"Edge types: {data.edge_types}")
print(f"Number of author classes: {len(set(data['author'].y.tolist()))}")

# Fix: Create consistent node features with same dimensions
print("Creating consistent node features...")
feature_dim = 128  # Consistent feature dimension for all node types

for node_type in data.node_types:
    num_nodes = data[node_type].num_nodes
    # Create random features with consistent dimension
    data[node_type].x = torch.randn(num_nodes, feature_dim, dtype=torch.float)

print("Node feature dimensions:")
for node_type in data.node_types:
    print(f"  {node_type}: {data[node_type].x.shape}")

# Define device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Create train/val/test masks for author nodes (the target node type)
author_nodes = data['author'].num_nodes
train_mask = torch.zeros(author_nodes, dtype=torch.bool)
val_mask = torch.zeros(author_nodes, dtype=torch.bool)
test_mask = torch.zeros(author_nodes, dtype=torch.bool)

# Randomly split author nodes
indices = torch.randperm(author_nodes)
train_idx = indices[:int(0.6 * author_nodes)]
val_idx = indices[int(0.6 * author_nodes):int(0.8 * author_nodes)]
test_idx = indices[int(0.8 * author_nodes):]

train_mask[train_idx] = True
val_mask[val_idx] = True
test_mask[test_idx] = True

data['author'].train_mask = train_mask
data['author'].val_mask = val_mask
data['author'].test_mask = test_mask

# Define HGT Model
class HGT(torch.nn.Module):
    def __init__(self, hidden_channels, out_channels, num_heads, num_layers, metadata):
        super().__init__()
        self.lin_dict = torch.nn.ModuleDict()
        for node_type in metadata[0]:
            self.lin_dict[node_type] = Linear(-1, hidden_channels)

        self.convs = torch.nn.ModuleList()
        for _ in range(num_layers):
            conv = HGTConv(hidden_channels, hidden_channels, metadata, num_heads)
            self.convs.append(conv)

        self.lin = Linear(hidden_channels, out_channels)
        self.dropout = torch.nn.Dropout(0.5)

    def forward(self, x_dict, edge_index_dict):
        x_dict = {
            node_type: self.lin_dict[node_type](x).relu()
            for node_type, x in x_dict.items()
        }

        for conv in self.convs:
            x_dict = conv(x_dict, edge_index_dict)
            x_dict = {key: self.dropout(x.relu()) for key, x in x_dict.items()}

        return self.lin(x_dict['author'])

# Define RGCN Model
class RGCN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_layers, num_relations):
        super().__init__()
        self.convs = torch.nn.ModuleList()
        # First layer with explicit input dimension
        self.convs.append(RGCNConv(in_channels, hidden_channels, num_relations=num_relations))
        # Middle layers
        for _ in range(num_layers - 2):
            self.convs.append(RGCNConv(hidden_channels, hidden_channels, num_relations=num_relations))
        # Last layer
        if num_layers > 1:
            self.convs.append(RGCNConv(hidden_channels, hidden_channels, num_relations=num_relations))

        self.lin = Linear(hidden_channels, out_channels)
        self.dropout = torch.nn.Dropout(0.5)

    def forward(self, x, edge_index, edge_type):
        for conv in self.convs:
            x = conv(x, edge_index, edge_type)
            x = self.dropout(x.relu())
        return self.lin(x)

# Define HAN Model
class HAN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_heads, num_layers, metadata):
        super().__init__()
        self.convs = torch.nn.ModuleList()
        # First layer with explicit input dimension
        self.convs.append(HANConv(in_channels, hidden_channels, heads=num_heads,
                                 metadata=metadata, dropout=0.6))
        # Middle layers
        for _ in range(num_layers - 1):
            self.convs.append(HANConv(hidden_channels * num_heads, hidden_channels,
                                     heads=num_heads, metadata=metadata, dropout=0.6))

        self.lin = Linear(hidden_channels * num_heads, out_channels)
        self.dropout = torch.nn.Dropout(0.5)

    def forward(self, x_dict, edge_index_dict):
        for conv in self.convs[:-1]:
            x_dict = conv(x_dict, edge_index_dict)
            x_dict = {key: self.dropout(F.elu(x)) for key, x in x_dict.items()}
        x_dict = self.convs[-1](x_dict, edge_index_dict)
        return self.lin(x_dict['author'])

# Define GAT Model (homogeneous version)
class GAT(torch.nn.Module):
    def __init__(self, hidden_channels, out_channels, num_heads, num_layers):
        super().__init__()
        self.convs = torch.nn.ModuleList()
        self.convs.append(GATConv(-1, hidden_channels, heads=num_heads, dropout=0.6))
        for _ in range(num_layers - 1):
            self.convs.append(GATConv(hidden_channels * num_heads, hidden_channels,
                                     heads=num_heads, dropout=0.6))
        self.lin = Linear(hidden_channels * num_heads, out_channels)
        self.dropout = torch.nn.Dropout(0.5)

    def forward(self, x, edge_index):
        for conv in self.convs[:-1]:
            x = conv(x, edge_index)
            x = self.dropout(F.elu(x))
        x = self.convs[-1](x, edge_index)
        return self.lin(x)

# Utility function to convert heterogeneous graph to homogeneous for GAT
def hetero_to_homo(hetero_data):
    """Convert heterogeneous graph to homogeneous for GAT"""
    # Create mapping from node type to global indices
    node_offsets = {}
    current_idx = 0
    for node_type in hetero_data.node_types:
        node_offsets[node_type] = current_idx
        current_idx += hetero_data[node_type].num_nodes

    # Combine all node features (they now have same dimension)
    x_list = []
    for node_type in hetero_data.node_types:
        x_list.append(hetero_data[node_type].x)
    x = torch.cat(x_list, dim=0)

    # Combine all edges with offsets
    edge_indices = []
    for edge_type in hetero_data.edge_types:
        src_type, rel_type, dst_type = edge_type
        edge_index = hetero_data[edge_type].edge_index
        edge_index = edge_index.clone()
        edge_index[0] += node_offsets[src_type]
        edge_index[1] += node_offsets[dst_type]
        edge_indices.append(edge_index)

    edge_index = torch.cat(edge_indices, dim=1)

    # Create author node mask for homogeneous graph
    author_mask = torch.zeros(x.size(0), dtype=torch.bool)
    author_start = node_offsets['author']
    author_end = author_start + hetero_data['author'].num_nodes
    author_mask[author_start:author_end] = True

    return x, edge_index, author_mask, node_offsets['author']

# Prepare data for RGCN
def prepare_rgcn_data(hetero_data):
    """Prepare data for RGCN with edge types"""
    x_list = []
    for node_type in hetero_data.node_types:
        x_list.append(hetero_data[node_type].x)
    x = torch.cat(x_list, dim=0)

    edge_indices = []
    edge_types = []
    for i, edge_type in enumerate(hetero_data.edge_types):
        edge_index = hetero_data[edge_type].edge_index
        edge_indices.append(edge_index)
        edge_types.append(torch.full((edge_index.size(1),), i, dtype=torch.long))

    edge_index = torch.cat(edge_indices, dim=1)
    edge_type = torch.cat(edge_types, dim=0)

    # Author mask
    author_mask = torch.zeros(x.size(0), dtype=torch.bool)
    current_idx = 0
    for node_type in hetero_data.node_types:
        if node_type == 'author':
            author_mask[current_idx:current_idx + hetero_data[node_type].num_nodes] = True
        current_idx += hetero_data[node_type].num_nodes

    return x, edge_index, edge_type, author_mask

# Convert data for GAT and RGCN
print("Preparing data for GAT and RGCN...")
x_homo, edge_index_homo, author_mask_homo, author_start_idx = hetero_to_homo(data)
x_rgcn, edge_index_rgcn, edge_type_rgcn, author_mask_rgcn = prepare_rgcn_data(data)

# Create data objects for GAT and RGCN
class HomoData:
    def __init__(self, x, edge_index, author_mask, author_start_idx, original_data):
        self.x = x
        self.edge_index = edge_index
        self.author_mask = author_mask
        self.author_start_idx = author_start_idx
        self.y = original_data['author'].y
        self.train_mask = original_data['author'].train_mask
        self.val_mask = original_data['author'].val_mask
        self.test_mask = original_data['author'].test_mask

class RGCNData:
    def __init__(self, x, edge_index, edge_type, author_mask, original_data):
        self.x = x
        self.edge_index = edge_index
        self.edge_type = edge_type
        self.author_mask = author_mask
        self.y = original_data['author'].y
        self.train_mask = original_data['author'].train_mask
        self.val_mask = original_data['author'].val_mask
        self.test_mask = original_data['author'].test_mask

gat_data = HomoData(x_homo, edge_index_homo, author_mask_homo, author_start_idx, data)
rgcn_data = RGCNData(x_rgcn, edge_index_rgcn, edge_type_rgcn, author_mask_rgcn, data)

# Improved training function with proper model reset
def train_model(model, data, optimizer, device, model_type='hetero', epochs=100, model_name=""):
    model.train()
    best_val_acc = 0
    best_model_state = None

    for epoch in range(epochs):
        optimizer.zero_grad()

        if model_type == 'gat':
            out = model(data.x.to(device), data.edge_index.to(device))
            out = out[data.author_mask]
            y = data.y.to(device)
        elif model_type == 'rgcn':
            out = model(data.x.to(device), data.edge_index.to(device), data.edge_type.to(device))
            out = out[data.author_mask]
            y = data.y.to(device)
        else:  # hetero models (HGT, HAN)
            x_dict = {node_type: data[node_type].x.to(device) for node_type in data.node_types}
            edge_index_dict = {edge_type: data[edge_type].edge_index.to(device) for edge_type in data.edge_types}
            out = model(x_dict, edge_index_dict)
            y = data['author'].y.to(device)

        # Only use training nodes
        if model_type in ['gat', 'rgcn']:
            train_out = out[data.train_mask]
            train_y = y[data.train_mask]
        else:
            train_out = out[data['author'].train_mask]
            train_y = y[data['author'].train_mask]

        loss = F.cross_entropy(train_out, train_y)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()

        if epoch % 20 == 0 or epoch == epochs - 1:
            # Validation
            model.eval()
            with torch.no_grad():
                if model_type in ['gat', 'rgcn']:
                    val_out = out[data.val_mask]
                    val_y = y[data.val_mask]
                else:
                    val_out = out[data['author'].val_mask]
                    val_y = y[data['author'].val_mask]

                val_pred = val_out.argmax(dim=1)
                val_acc = accuracy_score(val_y.cpu().numpy(), val_pred.cpu().numpy())
                val_f1 = f1_score(val_y.cpu().numpy(), val_pred.cpu().numpy(), average='macro')

                print(f'{model_name} Epoch {epoch:03d}, Loss: {loss:.4f}, Val Acc: {val_acc:.4f}, Val F1: {val_f1:.4f}')

                if val_acc > best_val_acc:
                    best_val_acc = val_acc
                    best_model_state = model.state_dict().copy()

            model.train()

    # Load best model state
    if best_model_state is not None:
        model.load_state_dict(best_model_state)

    return best_val_acc

def evaluate_model(model, data, device, model_type='hetero'):
    model.eval()
    with torch.no_grad():
        if model_type == 'gat':
            out = model(data.x.to(device), data.edge_index.to(device))
            out = out[data.author_mask]
            y = data.y.to(device)
            test_out = out[data.test_mask]
            test_y = y[data.test_mask]
        elif model_type == 'rgcn':
            out = model(data.x.to(device), data.edge_index.to(device), data.edge_type.to(device))
            out = out[data.author_mask]
            y = data.y.to(device)
            test_out = out[data.test_mask]
            test_y = y[data.test_mask]
        else:  # hetero models (HGT, HAN)
            x_dict = {node_type: data[node_type].x.to(device) for node_type in data.node_types}
            edge_index_dict = {edge_type: data[edge_type].edge_index.to(device) for edge_type in data.edge_types}
            out = model(x_dict, edge_index_dict)
            y = data['author'].y.to(device)
            test_out = out[data['author'].test_mask]
            test_y = y[data['author'].test_mask]

        test_pred = test_out.argmax(dim=1)
        test_acc = accuracy_score(test_y.cpu().numpy(), test_pred.cpu().numpy())
        test_f1 = f1_score(test_y.cpu().numpy(), test_pred.cpu().numpy(), average='macro')

    return test_acc, test_f1

# Benchmark different models with proper initialization
def benchmark_models():
    results = {}
    num_classes = len(set(data['author'].y.tolist()))

    # Clear any previous model states
    if os.path.exists('best_hgt.pth'): os.remove('best_hgt.pth')
    if os.path.exists('best_rgcn.pth'): os.remove('best_rgcn.pth')
    if os.path.exists('best_han.pth'): os.remove('best_han.pth')
    if os.path.exists('best_gat.pth'): os.remove('best_gat.pth')

    # HGT Model with different hyperparameters
    print("\n" + "="*50)
    print("Training HGT Model...")
    print("="*50)
    torch.manual_seed(42)  # Reset seed for HGT
    hgt_model = HGT(hidden_channels=128, out_channels=num_classes, num_heads=8, num_layers=3, metadata=data.metadata())
    hgt_model = hgt_model.to(device)
    hgt_optimizer = torch.optim.Adam(hgt_model.parameters(), lr=0.001, weight_decay=1e-4)

    train_model(hgt_model, data, hgt_optimizer, device, model_type='hetero', epochs=150, model_name='HGT')
    test_acc, test_f1 = evaluate_model(hgt_model, data, device, model_type='hetero')
    results['HGT'] = {'accuracy': test_acc, 'f1_score': test_f1}
    print(f'HGT Test Results - Acc: {test_acc:.4f}, F1: {test_f1:.4f}')

    # RGCN Model with different hyperparameters
    print("\n" + "="*50)
    print("Training RGCN Model...")
    print("="*50)
    torch.manual_seed(43)  # Different seed for RGCN
    rgcn_model = RGCN(in_channels=feature_dim, hidden_channels=96, out_channels=num_classes,
                     num_layers=3, num_relations=len(data.edge_types))
    rgcn_model = rgcn_model.to(device)
    rgcn_optimizer = torch.optim.Adam(rgcn_model.parameters(), lr=0.002, weight_decay=5e-4)

    train_model(rgcn_model, rgcn_data, rgcn_optimizer, device, model_type='rgcn', epochs=120, model_name='RGCN')
    test_acc, test_f1 = evaluate_model(rgcn_model, rgcn_data, device, model_type='rgcn')
    results['RGCN'] = {'accuracy': test_acc, 'f1_score': test_f1}
    print(f'RGCN Test Results - Acc: {test_acc:.4f}, F1: {test_f1:.4f}')

    # # HAN Model with different hyperparameters
    # print("\n" + "="*50)
    # print("Training HAN Model...")
    # print("="*50)
    # torch.manual_seed(44)  # Different seed for HAN
    # han_model = HAN(in_channels=feature_dim, hidden_channels=64, out_channels=num_classes,
    #                num_heads=6, num_layers=2, metadata=data.metadata())
    # han_model = han_model.to(device)
    # han_optimizer = torch.optim.Adam(han_model.parameters(), lr=0.003, weight_decay=1e-3)

    # train_model(han_model, data, han_optimizer, device, model_type='hetero', epochs=100, model_name='HAN')
    # test_acc, test_f1 = evaluate_model(han_model, data, device, model_type='hetero')
    # results['HAN'] = {'accuracy': test_acc, 'f1_score': test_f1}
    # print(f'HAN Test Results - Acc: {test_acc:.4f}, F1: {test_f1:.4f}')

    # GAT Model with different hyperparameters
    print("\n" + "="*50)
    print("Training GAT Model...")
    print("="*50)
    torch.manual_seed(45)  # Different seed for GAT
    gat_model = GAT(hidden_channels=80, out_channels=num_classes, num_heads=8, num_layers=2)
    gat_model = gat_model.to(device)
    gat_optimizer = torch.optim.Adam(gat_model.parameters(), lr=0.005, weight_decay=5e-4)

    train_model(gat_model, gat_data, gat_optimizer, device, model_type='gat', epochs=80, model_name='GAT')
    test_acc, test_f1 = evaluate_model(gat_model, gat_data, device, model_type='gat')
    results['GAT'] = {'accuracy': test_acc, 'f1_score': test_f1}
    print(f'GAT Test Results - Acc: {test_acc:.4f}, F1: {test_f1:.4f}')

    return results

# Run benchmark
if __name__ == "__main__":
    print("Starting Node Classification Benchmark on DBLP Dataset")
    print("=" * 60)

    results = benchmark_models()

    print("\n" + "="*60)
    print("FINAL BENCHMARK RESULTS:")
    print("="*60)
    for model_name, metrics in results.items():
        print(f"{model_name}:")
        print(f"  Accuracy: {metrics['accuracy']:.4f}")
        print(f"  F1-Score: {metrics['f1_score']:.4f}")
        print()

    # Find best model
    best_model = max(results.items(), key=lambda x: x[1]['accuracy'])
    print(f"BEST MODEL: {best_model[0]} with accuracy {best_model[1]['accuracy']:.4f}")

Dataset info:
Number of nodes: 26128
Number of edges: 239566
Node types: ['author', 'paper', 'term', 'conference']
Edge types: [('author', 'to', 'paper'), ('paper', 'to', 'author'), ('paper', 'to', 'term'), ('paper', 'to', 'conference'), ('term', 'to', 'paper'), ('conference', 'to', 'paper')]
Number of author classes: 4
Creating consistent node features...
Node feature dimensions:
  author: torch.Size([4057, 128])
  paper: torch.Size([14328, 128])
  term: torch.Size([7723, 128])
  conference: torch.Size([20, 128])
Using device: cpu
Preparing data for GAT and RGCN...
Starting Node Classification Benchmark on DBLP Dataset

Training HGT Model...
HGT Epoch 000, Loss: 1.3820, Val Acc: 0.2984, Val F1: 0.1191
HGT Epoch 020, Loss: 1.3742, Val Acc: 0.2984, Val F1: 0.1207
HGT Epoch 040, Loss: 1.1986, Val Acc: 0.3243, Val F1: 0.1948
HGT Epoch 060, Loss: 0.6809, Val Acc: 0.7411, Val F1: 0.6357
HGT Epoch 080, Loss: 0.3417, Val Acc: 0.8755, Val F1: 0.8655
HGT Epoch 100, Loss: 0.2273, Val Acc: 0.8952