In [1]:
import torch
from torch_geometric.data import Data

# edge_index contains all edges; shape=(2, num_edges); dtype is always torch.long
edge_index = torch.tensor(
    [
        [0, 1, 1, 2],
        [1, 0, 2, 1]
    ],
    dtype=torch.long
)

# x contains node features
x = torch.tensor(
    [
        [-1],
        [0],
        [1]
    ],
    dtype=torch.float
)
data = Data(x=x, edge_index=edge_index)
data

Data(x=[3, 1], edge_index=[2, 4])

## Validating the Format

In [2]:
data.num_nodes

3

In [3]:
data.num_edges

4

In [4]:
data.num_node_features

1

In [5]:
data.has_isolated_nodes()

False

In [6]:
data.has_self_loops()

False

In [7]:
data.is_directed()

False

## Datasets

In [13]:
from torch_geometric.datasets import TUDataset
from torch_geometric.loader import DataLoader

dataset = TUDataset(root='/tmp/ENZYMES', name='ENZYMES', use_node_attr=True)
loader = DataLoader(dataset, batch_size=32, shuffle=True)

for batch in loader:
    print(batch)
    

DataBatch(edge_index=[2, 3902], x=[1036, 21], y=[32], batch=[1036], ptr=[33])
DataBatch(edge_index=[2, 4762], x=[1245, 21], y=[32], batch=[1245], ptr=[33])
DataBatch(edge_index=[2, 4030], x=[1121, 21], y=[32], batch=[1121], ptr=[33])
DataBatch(edge_index=[2, 4376], x=[1159, 21], y=[32], batch=[1159], ptr=[33])
DataBatch(edge_index=[2, 3680], x=[948, 21], y=[32], batch=[948], ptr=[33])
DataBatch(edge_index=[2, 3748], x=[950, 21], y=[32], batch=[950], ptr=[33])
DataBatch(edge_index=[2, 3670], x=[947, 21], y=[32], batch=[947], ptr=[33])
DataBatch(edge_index=[2, 4288], x=[1106, 21], y=[32], batch=[1106], ptr=[33])
DataBatch(edge_index=[2, 3524], x=[911, 21], y=[32], batch=[911], ptr=[33])
DataBatch(edge_index=[2, 4294], x=[1166, 21], y=[32], batch=[1166], ptr=[33])
DataBatch(edge_index=[2, 3556], x=[937, 21], y=[32], batch=[937], ptr=[33])
DataBatch(edge_index=[2, 3944], x=[1044, 21], y=[32], batch=[1044], ptr=[33])
DataBatch(edge_index=[2, 3680], x=[992, 21], y=[32], batch=[992], ptr=[33]

In [9]:
from torch_geometric.datasets import Planetoid
from torch_geometric.transforms import NormalizeFeatures

dataset = Planetoid(root='data/Planetoid', name='Cora', transform=NormalizeFeatures())

print()
print(f'Dataset: {dataset}:')
print('======================')
print(f'Number of graphs: {len(dataset)}')
print(f'Number of features: {dataset.num_features}')
print(f'Number of classes: {dataset.num_classes}')

data = dataset[0]  # Get the first graph object.

print()
print(data)
print('===========================================================================================================')

# Gather some statistics about the graph.
print(f'Number of nodes: {data.num_nodes}')
print(f'Number of edges: {data.num_edges}')
print(f'Average node degree: {data.num_edges / data.num_nodes:.2f}')
print(f'Number of training nodes: {data.train_mask.sum()}')
print(f'Training node label rate: {int(data.train_mask.sum()) / data.num_nodes:.2f}')
print(f'Has isolated nodes: {data.has_isolated_nodes()}')
print(f'Has self-loops: {data.has_self_loops()}')
print(f'Is undirected: {data.is_undirected()}')

6

In [15]:
import torch
from torch.nn import Linear
import torch.nn.functional as F
from torch_geometric.nn import GATConv


class GAT(torch.nn.Module):
    def __init__(self, hidden_channels, heads):
        super().__init__()
        torch.manual_seed(1234567)
        self.conv1 = GATConv(dataset.num_features, hidden_channels, heads=heads)  # TODO
        self.conv2 = GATConv(hidden_channels * heads, dataset.num_classes, heads=heads)  # TODO

    def forward(self, x, edge_index):
        x = F.dropout(x, p=0.6, training=self.training)
        x = self.conv1(x, edge_index)
        x = F.elu(x)
        x = F.dropout(x, p=0.6, training=self.training)
        # print(x.size())
        x = self.conv2(x, edge_index)
        return x

model = GAT(hidden_channels=8, heads=8)
print(model)

optimizer = torch.optim.Adam(model.parameters(), lr=0.005, weight_decay=5e-4)
criterion = torch.nn.CrossEntropyLoss()

def train():
    model.train()
    optimizer.zero_grad()  # Clear gradients.
    out = model(data.x, data.edge_index)  # Perform a single forward pass.
    loss = criterion(out[data.train_mask], data.y[data.train_mask])  # Compute the loss solely based on the training nodes.
    loss.backward()  # Derive gradients.
    optimizer.step()  # Update parameters based on gradients.
    return loss

def test(mask):
    model.eval()
    out = model(data.x, data.edge_index)
    pred = out.argmax(dim=1)  # Use the class with highest probability.
    correct = pred[mask] == data.y[mask]  # Check against ground-truth labels.
    acc = int(correct.sum()) / int(mask.sum())  # Derive ratio of correct predictions.
    return acc


for epoch in range(1, 201):
    loss = train()
    val_acc = test(data.val_mask)
    test_acc = test(data.test_mask)
    print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Val: {val_acc:.4f}, Test: {test_acc:.4f}')

GAT(
  (conv1): GATConv(21, 8, heads=8)
  (conv2): GATConv(64, 6, heads=8)
)


RuntimeError: mat1 and mat2 shapes cannot be multiplied (3x1 and 21x64)