In [26]:
from gcn import GCN
from graphsage import GraphSAGE, GraphSAGE2
from gat import MultiLayerGAT, GAT
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch_geometric.datasets as tg_datasets
from torch_geometric.transforms import NormalizeFeatures
from torch_geometric.utils import to_dense_adj


In [27]:
def get_device():
    return torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [5]:
# Load the Cora dataset
cora_dataset = tg_datasets.Planetoid(root='.', name='Cora', transform=NormalizeFeatures())

# Get the data object
data = cora_dataset[0]

# Get the feature matrix
X = data.x

# Get the labels
labels = data.y

# Get the train, validation, and test masks
train_mask = data.train_mask
val_mask = data.val_mask
test_mask = data.test_mask

# Get the edge index and convert it to a dense adjacency matrix
adj = to_dense_adj(data.edge_index).squeeze(0)

# Add self-loops and normalize the adjacency matrix
adj = adj + torch.eye(adj.size(0))
D = torch.diag(torch.sum(adj, dim=1)**(-0.5))
adj_normalized = torch.matmul(torch.matmul(D, adj), D)

device = get_device()

# Move data to the device
X = X.to(device)
labels = labels.to(device)
adj_normalized = adj_normalized.to(device)
adj = adj.to(device)
train_mask = train_mask.to(device)
val_mask = val_mask.to(device)
test_mask = test_mask.to(device)


In [15]:
def train(num_epochs, hidden_dim, num_layers, model_type, adj_matrix, aggregator=None):
    input_dim = X.size(1)
    output_dim = cora_dataset.num_classes
    if model_type == GCN:
        model = model_type(input_dim, hidden_dim, output_dim, num_layers).to(device)
    elif model_type == GraphSAGE2:
        model = model_type(input_dim, hidden_dim, output_dim).to(device)
    elif model_type == GAT:
        model = model_type(input_dim, hidden_dim, output_dim, 0.5, 0.2, 8).to(device)
    else:
        model = model_type(input_dim, hidden_dim, output_dim, num_layers, aggregator).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

    for epoch in range(num_epochs):
        model.train()
        optimizer.zero_grad()
        output = model(X, adj_matrix)

        # Compute the loss only for the labeled nodes
        loss = criterion(output[train_mask], labels[train_mask])

        # Backpropagation
        loss.backward()
        optimizer.step()

        # Evaluate the model on the validation set
        model.eval()
        with torch.no_grad():
            _, pred = torch.max(output[val_mask], dim=1)
            correct = (pred == labels[val_mask]).sum().item()
            accuracy = correct / val_mask.sum().item()
            # if epoch % 100 == 99:
            #     print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {loss.item()}, Validation accuracy: {accuracy:.4f}")
    # print(model)
    return model


def eval(model, adj_matrix):
    model.eval()
    output = model(X, adj_matrix)
    with torch.no_grad():
        _, pred = torch.max(output[test_mask], dim=1)
        correct = (pred == labels[test_mask]).sum().item()
        accuracy = correct / test_mask.sum().item()
        print(f"Test accuracy: {accuracy:.4f}")


In [5]:
%%time

model = train(200, 128, 2, GCN, adj_normalized)
eval(model, adj_normalized)

Test accuracy: 0.8040
CPU times: user 1.07 s, sys: 1.13 s, total: 2.2 s
Wall time: 2.09 s


In [6]:
%%time

model = train(400, 64, 2, GraphSAGE, adj, 'mean')
eval(model, adj)

Test accuracy: 0.7430
CPU times: user 1.09 s, sys: 85.3 ms, total: 1.17 s
Wall time: 932 ms


In [7]:
%%time

model = train(200, 64, 2, GraphSAGE, adj, 'pool')
eval(model, adj)

Test accuracy: 0.7730
CPU times: user 1.42 s, sys: 83.4 ms, total: 1.51 s
Wall time: 1.33 s


In [8]:
model = train(200, 32, 2, GraphSAGE2, data.edge_index.to(device))
eval(model, data.edge_index.to(device))



Test accuracy: 0.7750


In [27]:
import torch.nn as nn
import random

def unsupervised_loss(embeddings, edge_index, num_neg_samples=5):
    pos_score = torch.sum(embeddings[edge_index[0]] * embeddings[edge_index[1]], dim=-1)
    pos_loss = F.logsigmoid(pos_score).mean()

    neg_samples = torch.randint(0, embeddings.size(0), (num_neg_samples, edge_index.size(1)), device=device)
    neg_score = torch.sum(embeddings[edge_index[0].view(-1, 1).repeat(1, num_neg_samples)].view(-1, embeddings.size(1)) * embeddings[neg_samples.view(-1)], dim=-1)
    neg_loss = F.logsigmoid(-neg_score.view(num_neg_samples, -1)).mean()

    return -pos_loss - neg_loss

def train(data, adj, alpha=0.5):
    model.train()
    optimizer.zero_grad()
    # embeddings = model(data.x.to(device), data.edge_index.to(device))
    embeddings = model(data.x.to(device), adj)
    supervised_loss = F.cross_entropy(embeddings[data.train_mask], data.y[data.train_mask].to(device))
    unsupervised_loss_val = unsupervised_loss(embeddings, data.edge_index.to(device))
    loss = alpha * supervised_loss + (1 - alpha) * unsupervised_loss_val
    loss.backward()
    optimizer.step()

def test(data):
    model.eval()
    embeddings = model(data.x.to(device), data.edge_index.to(device))
    pred = embeddings.argmax(dim=1)
    accs = []
    for mask in [data.train_mask, data.val_mask, data.test_mask]:
        correct = pred[mask].eq(data.y[mask].to(device)).sum().item()
        accs.append(correct / mask.sum().item())
    return accs


In [29]:
input_dim = X.size(1)
hidden_dim = 32
output_dim = cora_dataset.num_classes
# model = GraphSAGE2(input_dim, hidden_dim, output_dim).to(device)
model = GraphSAGE(input_dim, hidden_dim, output_dim, 2, 'mean').to(device)
optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
for e in range(200):
    train(data, adj)
eval(model, adj)

Test accuracy: 0.7340


In [23]:
print(model)

GAT(
  (attentions): ModuleList(
    (0-1): 2 x GraphAttentionLayer(
      (linear): Linear(in_features=1433, out_features=16, bias=True)
      (attention): Linear(in_features=32, out_features=1, bias=True)
      (leakyrelu): LeakyReLU(negative_slope=0.2)
    )
  )
  (out_att): GraphAttentionLayer(
    (linear): Linear(in_features=32, out_features=7, bias=True)
    (attention): Linear(in_features=14, out_features=1, bias=True)
    (leakyrelu): LeakyReLU(negative_slope=0.2)
  )
)


In [28]:
%%time

device = get_device()
input_dim = X.size(1)
hidden_dim = 16
output_dim = cora_dataset.num_classes
model = GAT(input_dim, hidden_dim, output_dim, 0.5, 0.2, 2).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

for epoch in range(200):
    model.train()
    optimizer.zero_grad()
    output = model(X, adj)

    # Compute the loss only for the labeled nodes
    loss = criterion(output[train_mask], labels[train_mask])

    # Backpropagation
    loss.backward()
    optimizer.step()


CPU times: user 23.3 s, sys: 73.9 ms, total: 23.4 s
Wall time: 23.2 s


In [29]:
model.eval()
output = model(X, adj)
with torch.no_grad():
    _, pred = torch.max(output[test_mask], dim=1)
    correct = (pred == labels[test_mask]).sum().item()
    accuracy = correct / test_mask.sum().item()
    print(f"Test accuracy: {accuracy:.4f}")


Test accuracy: 0.8170


In [30]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GATConv
from torch_geometric.nn import global_mean_pool
from torch_geometric.data import Data, DataLoader

class GAT_MPNN(torch.nn.Module):
    def __init__(self, nfeat, nhid, nclass, dropout, alpha, nheads, n_layers):
        super(GAT_MPNN, self).__init__()
        self.n_layers = n_layers
        self.dropout = dropout

        # Multi-head attention layers
        self.attentions = torch.nn.ModuleList([GATConv(nfeat, nhid, heads=nheads, dropout=dropout) for _ in range(n_layers - 1)])

        # Output attention layer
        self.out_att = GATConv(nhid * nheads, nclass, heads=1, concat=False, dropout=dropout)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        # Message passing and node updates
        for attention_layer in self.attentions:
            x = F.elu(attention_layer(x, edge_index))
            x = F.dropout(x, p=self.dropout, training=self.training)

        # Readout function
        x = self.out_att(x, edge_index)
        return F.log_softmax(x, dim=1)


In [39]:
%%time
data = data.to(device)

device = get_device()
input_dim = X.size(1)
hidden_dim = 32
output_dim = cora_dataset.num_classes
model = GAT_MPNN(input_dim, hidden_dim, output_dim, 0.5, 0.2, 4, 2).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

for epoch in range(200):
    model.train()
    optimizer.zero_grad()
    output = model(data)

    # Compute the loss only for the labeled nodes
    loss = criterion(output[train_mask], labels[train_mask])

    # Backpropagation
    loss.backward()
    optimizer.step()


CPU times: user 983 ms, sys: 96.2 ms, total: 1.08 s
Wall time: 974 ms


In [40]:
model.eval()
output = model(data)
with torch.no_grad():
    _, pred = torch.max(output[test_mask], dim=1)
    correct = (pred == labels[test_mask]).sum().item()
    accuracy = correct / test_mask.sum().item()
    print(f"Test accuracy: {accuracy:.4f}")


Test accuracy: 0.7970


In [42]:
data.edge_index.shape

torch.Size([2, 10556])

In [49]:
data.x

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]], device='cuda:0')

In [48]:
data.y

tensor([3, 4, 4,  ..., 3, 3, 3], device='cuda:0')

In [56]:
to_dense_adj(data.edge_index).squeeze(0).shape

torch.Size([2708, 2708])

In [75]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GATConv
from torch_geometric.data import Data, DataLoader

class GAT_MPNN2(torch.nn.Module):
    def __init__(self, nfeat, nhid, nclass, dropout, alpha, nheads, n_layers):
        super(GAT_MPNN2, self).__init__()
        self.n_layers = n_layers
        self.dropout = dropout

        # Multi-head attention layers
        self.attentions = torch.nn.ModuleList([GATConv(nfeat, nhid, heads=nheads, dropout=dropout) for _ in range(n_layers - 1)])

        # Output attention layer
        self.out_att = GATConv(nhid * nheads, nclass, heads=1, concat=False, dropout=dropout)

    def message(self, x, edge_index):
        messages = []
        for attention_layer in self.attentions:
            messages.append(attention_layer(x, edge_index))
        return messages

    def update(self, x, messages):
        x = torch.cat(messages, dim=1)
        x = F.dropout(x, p=self.dropout, training=self.training)
        return x

    def readout(self, x, edge_index):
        x = F.elu(self.out_att(x, edge_index))
        return F.log_softmax(x, dim=1)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        # Message passing
        messages = self.message(x, edge_index)

        # Node updates
        x = self.update(x, messages)

        # Readout function
        x = self.readout(x, edge_index)
        return x


In [76]:
%%time
data = data.to(device)

device = get_device()
input_dim = X.size(1)
hidden_dim = 32
output_dim = cora_dataset.num_classes
model = GAT_MPNN2(input_dim, hidden_dim, output_dim, 0.5, 0.2, 4, 2).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

for epoch in range(1):
    model.train()
    optimizer.zero_grad()
    output = model(data)

    # Compute the loss only for the labeled nodes
    loss = criterion(output[train_mask], labels[train_mask])

    # Backpropagation
    loss.backward()
    optimizer.step()


[tensor([[-5.4318e-03,  1.1499e-03, -4.9245e-03,  ...,  3.2788e-03,
          7.5543e-03,  8.8980e-03],
        [ 5.4508e-03, -9.2905e-04,  1.8591e-03,  ..., -5.8226e-03,
          1.7247e-03, -9.2903e-04],
        [-2.6059e-03,  2.4326e-04,  8.5951e-03,  ..., -6.8162e-03,
          5.2241e-03, -4.6661e-03],
        ...,
        [-9.5754e-03, -2.7873e-02, -6.4762e-03,  ..., -2.9698e-03,
          1.3995e-02,  3.2855e-03],
        [ 5.8869e-03, -1.3036e-03,  3.0231e-03,  ...,  1.6307e-04,
         -5.5354e-03, -9.3102e-03],
        [-4.7539e-03, -6.1458e-05,  4.8899e-03,  ...,  1.1061e-03,
         -5.1247e-03, -7.3336e-03]], device='cuda:0', grad_fn=<AddBackward0>)]
torch.Size([2708, 128])
CPU times: user 127 ms, sys: 0 ns, total: 127 ms
Wall time: 14.4 ms
