In [2]:
import torch
import torch_geometric

from torch_geometric.datasets import TUDataset
from torch_geometric.loader import DataLoader

In [3]:
import matplotlib as plt
import numpy as np
import networkx
from torch_geometric.data import Data
from sklearn.manifold import TSNE

In [4]:
from torch_geometric.utils import to_dense_adj
from torch.nn import CosineSimilarity

## Data Preparation on MUTAG

In [5]:
dataset = TUDataset(root="../dataset", name='MUTAG')

In [6]:
adj_o = to_dense_adj(dataset[2].edge_index)
adj_c = abs(to_dense_adj(dataset[2].edge_index) - 1) - torch.eye(len(dataset[2].x))

print("Original:\n", adj_o)
print("Complementary:\n", (adj_c))

Original:
 tensor([[[0., 1., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
         [1., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 1., 0., 1., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
         [0., 0., 1., 0., 1., 0., 0., 0., 1., 0., 0., 0., 0.],
         [0., 0., 0., 1., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 1., 0., 1., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 1., 0., 1., 0., 0., 0., 0., 0.],
         [0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 1., 0., 0.],
         [1., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 1., 1.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.]]])
Complementary:
 tensor([[[0., 0., 1., 1., 1., 1., 1., 1., 1., 0., 1., 1., 1.],
         [0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
         [1., 0., 0., 0., 

In [7]:
def toComplementary(g):
    c = abs(to_dense_adj(g.edge_index) - 1) - torch.eye(len(g.x))
    c = c[0].nonzero().t().contiguous()
    return c

In [8]:
dataset_c = []
for graph in dataset:
    edge_c = toComplementary(graph)
    dataset_c.append(Data(edge_index=edge_c, x=graph.x, y=graph.y))

In [9]:
dataset.y

tensor([1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1,
        0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
        1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0,
        1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0,
        1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0,
        1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0])

In [10]:
ys = []
for d in dataset_c:
    ys.append(d.y.item())
print(ys)

[1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0]


In [15]:
# Train test split
ratio = 0.8
total = len(dataset)

# original graph
g_train = dataset[:round(ratio*total)]
g_test = dataset[round(ratio*total):]

# complementary graph
gc_train = dataset_c[:round(ratio*total)]
gc_test = dataset_c[round(ratio*total):]

In [16]:
print(f'g_train {g_train}')
print(f'g_test {g_test}')
print(f'gc_train {len(gc_train)}')
print(f'gc_test {len(gc_test)}')

g_train MUTAG(150)
g_test MUTAG(38)
gc_train 150
gc_test 38


In [19]:
print([x.y.item() for x in g_train])

[1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0]


In [20]:
print([x.y.item() for x in gc_train])

[1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0]


In [27]:
bs = 32
seed = 12345

g_train_loader = DataLoader(g_train, batch_size=bs, shuffle=False)
g_test_loader = DataLoader(g_test, batch_size=bs, shuffle=False)

gc_train_loader = DataLoader(gc_train, batch_size=bs, shuffle=False)
gc_test_loader = DataLoader(gc_test, batch_size=bs, shuffle=False)

In [28]:
for g in g_train_loader:
    print(g.y)
#     break

tensor([1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1])
tensor([1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1,
        1, 1, 1, 1, 1, 0, 1, 1])
tensor([0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0,
        0, 1, 1, 1, 1, 1, 1, 1])
tensor([1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0,
        1, 1, 0, 0, 1, 1, 1, 1])
tensor([0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0])


In [29]:
for g in g_test_loader:
    print(g.y)
    break

tensor([1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
        0, 1, 1, 1, 0, 1, 0, 0])


In [30]:
for g in gc_train_loader:
    print(g.y)
#     break

tensor([1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1])
tensor([1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1,
        1, 1, 1, 1, 1, 0, 1, 1])
tensor([0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0,
        0, 1, 1, 1, 1, 1, 1, 1])
tensor([1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0,
        1, 1, 0, 0, 1, 1, 1, 1])
tensor([0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0])


In [25]:
for g in gc_test_loader:
    print(g.y)
    break

tensor([1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1])


## Building model

In [26]:
from torch_geometric.nn import GCNConv
from torch.nn import Linear
from torch.nn import Linear
from torch_geometric.nn import global_mean_pool
from torch_geometric.nn import global_max_pool
from torch_geometric.nn import global_add_pool
import torch.nn.functional as F

In [48]:
class ComplementarySupCon(torch.nn.Module):
    def __init__(self, dataset, hidden_channels):
        super(ComplementarySupCon, self).__init__()
        
        # weight seed
        torch.manual_seed(42)
        self.conv1_o = GCNConv(dataset.num_node_features, hidden_channels)
        self.conv2_o = GCNConv(hidden_channels, hidden_channels)
        
        self.conv1_c = GCNConv(dataset.num_node_features, hidden_channels)
        self.conv2_c = GCNConv(hidden_channels, hidden_channels)
        
        # classification layer
        # self.lin1 = Linear(hidden_channels, hidden_channels)
        self.lin = Linear(hidden_channels, dataset.num_classes)

    def forward(self, x_o, x_c, edge_index_o, edge_index_c, batch_o):
        x_o = self.conv1_o(x_o, edge_index_o)
        x_o = x_o.relu()
        x_o = self.conv2_o(x_o, edge_index_o)
        
        x_c = self.conv1_c(x_c, edge_index_c)
        x_c = x_c.relu()
        x_c = self.conv2_c(x_c, edge_index_c)


        h = (x_o + x_c)/2
        h = global_add_pool(h, batch_o)
        
        # h.relu()
        h = self.lin(h)
        return h

Train

In [49]:
def train(model, g_loader, gc_loader, classification = False):
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    criterion = torch.nn.CrossEntropyLoss()
    model.train()
    
    for _, (g_o, g_c) in enumerate(zip(g_loader, gc_loader)):
        h = model(g_o.x, g_c.x, g_o.edge_index, g_c.edge_index, g_o.batch)
        loss = criterion(h, g_o.y)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
    
    return h, loss

@torch.no_grad()
def test(model, g_loader, gc_loader):
    model.eval()
    correct = 0
    for _, (g_o, g_c) in enumerate(zip(g_loader, gc_loader)):
        z = model(g_o.x, g_c.x, g_o.edge_index, g_c.edge_index, g_o.batch)
        pred = z.argmax(dim=1)
        correct += int((pred == g_o.y).sum())

    return correct/len(g_loader.dataset)

Pretrain model

In [50]:
model = ComplementarySupCon(dataset, 64)

In [51]:
for epoch in range(0, 50):
    h, loss = train(model, g_train_loader, gc_train_loader)
    acc = test(model, g_test_loader, gc_test_loader)
    print(f"epoch: {epoch+1} loss: {loss:.4f} accuracy: {acc:.4f}")

epoch: 1 loss: 1.6808 accuracy: 0.6842
epoch: 2 loss: 0.6577 accuracy: 0.3158
epoch: 3 loss: 0.7700 accuracy: 0.3158
epoch: 4 loss: 0.9265 accuracy: 0.6842
epoch: 5 loss: 0.6739 accuracy: 0.3158
epoch: 6 loss: 0.9625 accuracy: 0.6842
epoch: 7 loss: 0.6392 accuracy: 0.3158
epoch: 8 loss: 1.0295 accuracy: 0.6842
epoch: 9 loss: 0.6415 accuracy: 0.6842
epoch: 10 loss: 0.6401 accuracy: 0.3158
epoch: 11 loss: 1.0641 accuracy: 0.6842
epoch: 12 loss: 0.6457 accuracy: 0.6842
epoch: 13 loss: 0.6627 accuracy: 0.3158
epoch: 14 loss: 1.0875 accuracy: 0.6842
epoch: 15 loss: 0.6765 accuracy: 0.5000
epoch: 16 loss: 0.8766 accuracy: 0.7105
epoch: 17 loss: 0.6953 accuracy: 0.3158
epoch: 18 loss: 1.1732 accuracy: 0.6842
epoch: 19 loss: 0.8668 accuracy: 0.6842
epoch: 20 loss: 0.7010 accuracy: 0.3158
epoch: 21 loss: 1.1579 accuracy: 0.7105
epoch: 22 loss: 0.7146 accuracy: 0.7632
epoch: 23 loss: 0.8840 accuracy: 0.7105
epoch: 24 loss: 0.8049 accuracy: 0.6316
epoch: 25 loss: 0.8800 accuracy: 0.7632
epoch: 26