In [1]:
# Install required packages.
!pip install -q torch-scatter -f https://pytorch-geometric.com/whl/torch-1.10.0+cu113.html
!pip install -q torch-sparse -f https://pytorch-geometric.com/whl/torch-1.10.0+cu113.html
!pip install -q git+https://github.com/rusty1s/pytorch_geometric.git

In [84]:
from torch_geometric.datasets import OGB_MAG, AMiner

dataset = OGB_MAG(root='./data', preprocess="metapath2vec")
data = dataset[0]
torch.save(data, "OGB_MAG Heterogeneous Graph.pt")

In [63]:
print(data)

HeteroData(
  [1mpaper[0m={
    x=[736389, 128],
    year=[736389],
    y=[736389],
    train_mask=[736389],
    val_mask=[736389],
    test_mask=[736389]
  },
  [1mauthor[0m={ x=[1134649, 128] },
  [1minstitution[0m={ x=[8740, 128] },
  [1mfield_of_study[0m={ x=[59965, 128] },
  [1m(author, affiliated_with, institution)[0m={ edge_index=[2, 1043998] },
  [1m(author, writes, paper)[0m={ edge_index=[2, 7145660] },
  [1m(paper, cites, paper)[0m={ edge_index=[2, 5416271] },
  [1m(paper, has_topic, field_of_study)[0m={ edge_index=[2, 7505078] }
)


In [82]:
import numpy as np

def dense_adj(data):
    adj_dict = {}
    for key in data.edge_index_dict.keys():
        a,_,b = key
        shape = (data.num_nodes_dict[a], data.num_nodes_dict[b])
        print(shape)
        adj = np.zeros(shape)
        for (i,j) in data.edge_index_dict[key].numpy().transpose():
            adj[i][j] = 1
        adj_dict[key] = adj
    return adj_dict    

In [8]:
import torch

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)


cpu


In [9]:
import torch_geometric.transforms as T

data = T.ToUndirected()(data)
data = T.AddSelfLoops()(data)
data = T.NormalizeFeatures()(data)
data = T.ToDevice(device)(data)

In [21]:
import torch.nn as nn
import torch.nn.functional as F
import torch
from torch_geometric.nn import GCNConv, SAGEConv, to_hetero

class Encoder(nn.Module):
    def __init__(self, num_features, hidden_channels, dropout):
        super().__init__()
        self.conv1 = SAGEConv((num_features, -1), hidden_channels)
        self.conv2 = SAGEConv((-1, -1), hidden_channels)
        self.dropout = dropout

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index).relu()
        x = F.dropout(x, self.dropout, training=self.training)
        x = self.conv2(x, edge_index).relu()
        return x

class Attribute_Decoder(nn.Module):
    def __init__(self, num_features, hidden_channels, dropout):
        super().__init__()

        self.conv1 = SAGEConv((-1, -1), hidden_channels)
        self.conv2 = SAGEConv((-1, -1), num_features)
        self.dropout = dropout

    def forward(self, x, adj):

        x = F.relu(self.conv1(x, adj))
        x = F.dropout(x, self.dropout, training=self.training)
        x = F.relu(self.conv2(x, adj))

        return x

class Structure_Decoder(nn.Module):
    def __init__(self, hidden_channels, dropout):
        super().__init__()

        self.gc1 = SAGEConv((-1, -1), hidden_channels)
        self.dropout = dropout

    def forward(self, x, adj):

        x, adj = F.relu(self.gc1(x, adj))
        adj = F.dropout(x, self.dropout, training=self.training)
        x = x @ x.T

        return adj

class Dominant(nn.Module):
    def __init__(self, feat_size, hidden_size, dropout, metadata):
        super().__init__()
        
        self.shared_encoder = to_hetero(Encoder(feat_size, hidden_size, dropout), metadata, aggr='sum')
        self.attr_decoder = to_hetero(Attribute_Decoder(feat_size, hidden_size, dropout), metadata, aggr='sum')
        self.struct_decoder = to_hetero(Structure_Decoder(hidden_size, dropout), metadata, aggr='sum')
    
    def forward(self, x_dict, adj_dict):

        # encode
        x_dict = self.shared_encoder(x_dict, adj_dict)
        # decode feature matrix
        x_hat_dict = self.attr_decoder(x_dict, adj_dict)
        # decode adjacency matrix
        struct_reconstructed_dict = self.struct_decoder(x_dict, adj_dict)
        # return reconstructed matrices
        return struct_reconstructed_dict, x_hat_dict

In [22]:
model = Dominant(feat_size=128, hidden_size=16, dropout=0.3, metadata=data.metadata()).to(device)

optimizer = torch.optim.Adam(model.parameters(), lr = 5e-3)



In [23]:
data.x_dict['author'].device

device(type='cpu')

In [13]:
from torch_geometric.loader import NeighborLoader

train_loader = NeighborLoader(
    data,
    # Sample 15 neighbors for each node and each edge type for 2 iterations:
    num_neighbors=[15] * 2,
    # Use a batch size of 128 for sampling training nodes of type "paper":
    batch_size=128,
    input_nodes=('paper', data['paper'].train_mask),
)

In [83]:
batch = next(iter(train_loader))

dense_adj(batch)

(4301, 305)
(4301, 20569)
(20569, 20569)
(20569, 2610)
(305, 4301)
(20569, 4301)
(2610, 20569)


{('author',
  'affiliated_with',
  'institution'): array([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]]),
 ('author',
  'writes',
  'paper'): array([[1., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.],
        [1., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]]),
 ('paper',
  'cites',
  'paper'): array([[1., 0., 0., ..., 0., 0., 0.],
        [0., 1., 0., ..., 0., 0., 0.],
        [0., 0., 1., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]]),
 ('paper',
  'has_topic',
  'field_of_study'): array([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0.

In [53]:
from torch_geometric.utils import to_dense_adj

to_dense_adj(batch.edge_index_dict[('paper', 'cites', 'paper')])

tensor([[[1., 0., 0.,  ..., 0., 0., 0.],
         [0., 1., 0.,  ..., 0., 0., 0.],
         [0., 0., 1.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]]])

In [24]:
model.train()
optimizer.zero_grad()
A_hat, X_hat = model(batch.x_dict, batch.edge_index_dict)

torch.Size([20588, 128])

In [49]:
from torch_geometric.utils import to_dense_adj

A = batch.edge_index_dict
X = data.x_dict

for key in list(A.keys())[1:]:
    print(key)
    print(A[key].shape)
    print(to_dense_adj(A[key]).shape)

('author', 'writes', 'paper')
torch.Size([2, 5770])
torch.Size([1, 4342, 4342])
('paper', 'cites', 'paper')
torch.Size([2, 12126])
torch.Size([1, 7962, 7962])
('paper', 'has_topic', 'field_of_study')
torch.Size([2, 10573])
torch.Size([1, 18032, 18032])
('institution', 'rev_affiliated_with', 'author')
torch.Size([2, 836])
torch.Size([1, 654, 654])
('paper', 'rev_writes', 'author')
torch.Size([2, 5532])
torch.Size([1, 20588, 20588])
('field_of_study', 'rev_has_topic', 'paper')
torch.Size([2, 10324])
torch.Size([1, 2601, 2601])


In [None]:
X_hat

{'author': tensor([[0.0000, 0.0712, 0.3332,  ..., 1.0965, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0875,  ..., 0.9298, 0.0000, 0.0000],
         [0.0000, 0.0252, 0.4676,  ..., 0.9029, 0.0000, 0.0000],
         ...,
         [0.0000, 0.0000, 0.1883,  ..., 0.8712, 0.0000, 0.0000],
         [0.0259, 0.0000, 0.0970,  ..., 0.8344, 0.0000, 0.0000],
         [0.1243, 0.0000, 0.0000,  ..., 0.6018, 0.0000, 0.0000]],
        grad_fn=<ReluBackward0>),
 'field_of_study': tensor([[0.0000, 0.0192, 0.0000,  ..., 0.1130, 0.0000, 0.0000],
         [0.0000, 0.0153, 0.0000,  ..., 0.0000, 0.0072, 0.0000],
         [0.0590, 0.1485, 0.0000,  ..., 0.0696, 0.1130, 0.0369],
         ...,
         [0.0592, 0.0000, 0.0000,  ..., 0.0290, 0.2288, 0.1459],
         [0.0602, 0.0000, 0.0000,  ..., 0.0316, 0.2262, 0.1489],
         [0.0192, 0.0323, 0.0000,  ..., 0.0307, 0.2703, 0.1211]],
        grad_fn=<ReluBackward0>),
 'institution': tensor([[0.1092, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
         [0.09

Training Dominant

In [17]:
def loss_func(attrs, X_hat):
    # Attribute reconstruction loss
    diff_attribute = torch.pow(X_hat - attrs, 2)
    attribute_reconstruction_errors = torch.sqrt(torch.sum(diff_attribute, 1))
    attribute_cost = torch.mean(attribute_reconstruction_errors)

    # structure reconstruction loss
    # diff_structure = torch.pow(A_hat - adj, 2)
    # structure_reconstruction_errors = torch.sqrt(torch.sum(diff_structure, 1))
    # structure_cost = torch.mean(structure_reconstruction_errors)
    structure_cost = 0

    cost =  attribute_reconstruction_errors


    return cost, structure_cost, attribute_cost

In [20]:
from sklearn.metrics import roc_auc_score

epochs = 2

X=batch.x_dict
for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        A_hat, X_hat = model(batch.x_dict, batch.edge_index_dict)
        loss, struct_loss, feat_loss = loss_func(X['author'], X_hat['author'])
        l = torch.mean(loss)
        l.backward()
        optimizer.step()        
        print("Epoch:", '%04d' % (epoch), "train_loss=", "{:.5f}".format(l.item()),"train/feat_loss=", "{:.5f}".format(feat_loss.item()))

        if epoch == epochs - 1:
            model.eval()
            A_hat, X_hat = model(batch.x_dict, batch.edge_index_dict)
            loss, struct_loss, feat_loss = loss_func(X['author'], X_hat['author'])
            score = loss.detach().cpu().numpy()
            print("Score = ", score)

Epoch: 0000 train_loss= 4.42346 train/feat_loss= 4.42346
Epoch: 0001 train_loss= 4.28383 train/feat_loss= 4.28383
Score =  [3.641211  3.63537   4.070783  ... 2.3870046 4.1165385 4.244906 ]


In [None]:
len(score)

1134649

In [None]:
X['author'].shape

torch.Size([1134649, 128])