In [1]:
# This code works in Python 3.10.6
import matplotlib.pyplot as plt
import networkx as nx
import torch
import torch_geometric.utils
from torch_geometric.data import HeteroData
import torch_geometric.transforms as T
import torch.nn.functional as F
from torch_geometric.datasets.dblp import DBLP
from torch_geometric.nn import GCNConv
import time
from torch_geometric.logging import log
import os
from collections import Counter
import pandas as pd
import random

In [2]:
dataset = DBLP(root='./dblp_data', transform=T.Constant(node_types='conference'))
hetero_data = dataset[0]

In [3]:
hetero_data

HeteroData(
  author={
    x=[4057, 334],
    y=[4057],
    train_mask=[4057],
    val_mask=[4057],
    test_mask=[4057],
  },
  paper={ x=[14328, 4231] },
  term={ x=[7723, 50] },
  conference={
    num_nodes=20,
    x=[20, 1],
  },
  (author, to, paper)={ edge_index=[2, 19645] },
  (paper, to, author)={ edge_index=[2, 19645] },
  (paper, to, term)={ edge_index=[2, 85810] },
  (paper, to, conference)={ edge_index=[2, 14328] },
  (term, to, paper)={ edge_index=[2, 85810] },
  (conference, to, paper)={ edge_index=[2, 14328] }
)

In [4]:
# This code works in torch-geometric==2.6.0
data = hetero_data.to_homogeneous(add_edge_type=False)

In [5]:
data

Data(edge_index=[2, 239566], x=[26128, 4231], y=[26128], train_mask=[26128], val_mask=[26128], test_mask=[26128], node_type=[26128])

In [6]:
data.node_type

tensor([0, 0, 0,  ..., 3, 3, 3])

In [7]:
data.x = F.one_hot(data.node_type, num_classes=len(torch.unique(data.node_type))).float()

In [8]:
data.x

tensor([[1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        ...,
        [0., 0., 0., 1.],
        [0., 0., 0., 1.],
        [0., 0., 0., 1.]])

In [9]:
torch.save(data, '../data/dblp/processed/data.pt')

In [10]:
torch.unique(data.y)

tensor([-1,  0,  1,  2,  3])

In [11]:
Counter(data.y.tolist())

Counter({-1: 22071, 0: 1197, 2: 1109, 3: 1006, 1: 745})

In [12]:
data.has_isolated_nodes()

False

In [13]:
data.has_self_loops()

False