In [11]:
import os.path as osp

from pathlib import Path

import torch
import torch.nn.functional as F

from torch_geometric.datasets import DBLP
from torch_geometric.nn import HeteroConv, Linear, SAGEConv

# TODO: regularisation like https://stackoverflow.com/questions/42704283/l1-l2-regularization-in-pytorch


# path = osp.join(osp.dirname(osp.realpath(__file__)), '../../data/DBLP')
path = Path("../data/datasets/DBLP")
path.mkdir(parents=True, exist_ok=True)
dataset = DBLP(str(path))
data = dataset[0]
print(data)

# We initialize conference node features with a single feature.
data["conference"].x = torch.ones(data["conference"].num_nodes, 1)


class HeteroGNN(torch.nn.Module):
    def __init__(self, metadata, hidden_channels, out_channels, num_layers):
        super().__init__()

        self.convs = torch.nn.ModuleList()
        for _ in range(num_layers):
            conv = HeteroConv(
                {
                    edge_type: SAGEConv((-1, -1), hidden_channels)
                    for edge_type in metadata[1]
                }
            )
            self.convs.append(conv)

        self.lin = Linear(hidden_channels, out_channels)

    def forward(self, x_dict, edge_index_dict):
        for conv in self.convs:
            x_dict = conv(x_dict, edge_index_dict)
            x_dict = {key: F.leaky_relu(x) for key, x in x_dict.items()}
        return self.lin(x_dict["author"])


model = HeteroGNN(data.metadata(), hidden_channels=64, out_channels=4, num_layers=2)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
data, model = data.to(device), model.to(device)

with torch.no_grad():  # Initialize lazy modules.
    out = model(data.x_dict, data.edge_index_dict)

optimizer = torch.optim.Adam(model.parameters(), lr=0.005, weight_decay=0.001)


def train():
    model.train()
    optimizer.zero_grad()
    out = model(data.x_dict, data.edge_index_dict)
    mask = data["author"].train_mask
    loss = F.cross_entropy(out[mask], data["author"].y[mask])
    loss.backward()
    optimizer.step()
    return float(loss)


@torch.no_grad()
def test():
    model.eval()
    pred = model(data.x_dict, data.edge_index_dict).argmax(dim=-1)

    accs = []
    for split in ["train_mask", "val_mask", "test_mask"]:
        mask = data["author"][split]
        acc = (pred[mask] == data["author"].y[mask]).sum() / mask.sum()
        accs.append(float(acc))
    return accs


for epoch in range(1, 101):
    loss = train()
    train_acc, val_acc, test_acc = test()
    print(
        f"Epoch: {epoch:03d}, Loss: {loss:.4f}, Train: {train_acc:.4f}, "
        f"Val: {val_acc:.4f}, Test: {test_acc:.4f}"
    )

Downloading https://www.dropbox.com/s/yh4grpeks87ugr2/DBLP_processed.zip?dl=1
Extracting ../data/datasets/DBLP/raw/DBLP_processed.zip
Processing...
Done!


HeteroData(
  [1mauthor[0m={
    x=[4057, 334],
    y=[4057],
    train_mask=[4057],
    val_mask=[4057],
    test_mask=[4057]
  },
  [1mpaper[0m={ x=[14328, 4231] },
  [1mterm[0m={ x=[7723, 50] },
  [1mconference[0m={ num_nodes=20 },
  [1m(author, to, paper)[0m={ edge_index=[2, 19645] },
  [1m(paper, to, author)[0m={ edge_index=[2, 19645] },
  [1m(paper, to, term)[0m={ edge_index=[2, 85810] },
  [1m(paper, to, conference)[0m={ edge_index=[2, 14328] },
  [1m(term, to, paper)[0m={ edge_index=[2, 85810] },
  [1m(conference, to, paper)[0m={ edge_index=[2, 14328] }
)
Epoch: 001, Loss: 1.3903, Train: 0.4100, Val: 0.3075, Test: 0.3491
Epoch: 002, Loss: 1.3038, Train: 0.5925, Val: 0.4400, Test: 0.4851
Epoch: 003, Loss: 1.2089, Train: 0.7450, Val: 0.5325, Test: 0.6045
Epoch: 004, Loss: 1.0839, Train: 0.8025, Val: 0.6050, Test: 0.6721
Epoch: 005, Loss: 0.9363, Train: 0.8925, Val: 0.6550, Test: 0.7025
Epoch: 006, Loss: 0.7749, Train: 0.9200, Val: 0.6750, Test: 0.7295
Epoch: 0