In [1]:
# Install required packages.
import os
import torch
os.environ['TORCH'] = torch.__version__
print(torch.__version__)

!pip install -q torch-scatter -f https://data.pyg.org/whl/torch-${TORCH}.html
!pip install -q torch-sparse -f https://data.pyg.org/whl/torch-${TORCH}.html
!pip install -q torch-cluster -f https://data.pyg.org/whl/torch-${TORCH}.html
!pip install -q git+https://github.com/pyg-team/pytorch_geometric.git

1.13.0


In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F

from torch_geometric.datasets import MNISTSuperpixels
from torch_geometric.loader import DataLoader
import torch_geometric.transforms as T
import torch_geometric.nn as gnn
from torch_geometric.utils import normalized_cut  

In [3]:
path = ".data/"
transform = T.Compose([
    T.GCNNorm(),
    T.Cartesian(cat=False),
    T.NormalizeScale(),
])

batch_size = 64

train_dataset = MNISTSuperpixels(path, True, transform=transform)
test_dataset = MNISTSuperpixels(path, False, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, drop_last=True)
d = train_dataset

print(f"Dataset:\n{d}")

sample = train_dataset[0]
print()
print(sample)
print('=============================================================')

# Gather some statistics about the first graph.
print(f"Number of features:  {sample.num_features}")
print(f"Number of node features: {sample.num_node_features}")
print(f'Number of nodes: {sample.num_nodes}')
print(f'Number of edges: {sample.num_edges}')
# print(f"Adjacency Matrix: {data.adj_t}")
print(f'Average node degree: {sample.num_edges / sample.num_nodes:.2f}')
print(f'Has isolated nodes: {sample.has_isolated_nodes()}')
print(f'Has self-loops: {sample.has_self_loops()}')
# print(f'Is undirected: {sample.is_undirected()}')
# print(f"Adjacency matrix: {sample.adj_t.size()}")
print(f"Edge weight min: {torch.min(sample.edge_weight)}, max: {torch.max(sample.edge_weight)}")


Dataset:
MNISTSuperpixels(60000)

Data(x=[75, 1], edge_index=[2, 1471], y=[1], pos=[75, 2], edge_weight=[1471], edge_attr=[1471, 2])
Number of features:  1
Number of node features: 1
Number of nodes: 75
Number of edges: 1471
Average node degree: 19.61
Has isolated nodes: False
Has self-loops: True
Edge weight min: 0.03333333134651184, max: 0.1249999925494194


In [4]:
test_batch = next(iter(train_loader))

print()
print(test_batch)
print('=============================================================')

# Gather some statistics about the first graph.
print(f"Number of features:  {test_batch.num_features}")
print(f"Number of node features: {test_batch.num_node_features}")
print(f'Number of nodes: {test_batch.num_nodes}')
print(f'Number of edges: {test_batch.num_edges}')
# print(f"Adjacency Matrix: {data.adj_t}")
print(f'Average node degree: {test_batch.num_edges / test_batch.num_nodes:.2f}')
print(f'Has isolated nodes: {test_batch.has_isolated_nodes()}')
print(f'Has self-loops: {test_batch.has_self_loops()}')
print(f'Is undirected: {test_batch.is_undirected()}')


DataBatch(x=[4800, 1], edge_index=[2, 93392], y=[64], pos=[4800, 2], edge_weight=[93392], edge_attr=[93392, 2], batch=[4800], ptr=[65])
Number of features:  1
Number of node features: 1
Number of nodes: 4800
Number of edges: 93392
Average node degree: 19.46
Has isolated nodes: False
Has self-loops: True
Is undirected: False


In [5]:
class GCNLayer(nn.Module):
  def __init__(self, conv, act, norm, dropout:float = 0.0):
    super().__init__()
    self.conv = conv
    self.act = act
    self.norm = norm
    self.dropout = dropout

  def forward(self, x, edge_index, edge_attr = None):
    x = shortcut = self.conv(x, edge_index, edge_attr)
    x = self.norm(x)
    x = self.act(x)
    x = x + shortcut
    return F.dropout(x, p=self.dropout, training=self.training)

class MLPLayer(nn.Module):
  def __init__(self, channels, hidden_channels, norm = "batch", act = "relu"):
    super().__init__()
    self.lin_in = nn.Linear(channels, hidden_channels)
    self.lin_out = nn.Linear(hidden_channels, channels)
    self.norms = None
    if norm == "batch":
      self.norms = nn.ModuleList([
          nn.BatchNorm1d(hidden_channels),
          nn.BatchNorm1d(channels),
      ])
    elif norm == "layer":
      self.norms = nn.ModuleList([
          nn.LayerNorm(hidden_channels),
          nn.LayerNorm(channels),
      ])
    self.acts = None
    if act == "relu":
      self.acts = nn.ModuleList([
          nn.ReLU(),
          nn.ReLU(),
      ])
    elif act == "gelu":
      self.acts = nn.ModuleList([
          nn.GELU(),
          nn.GELU(),
      ])
    elif act == "elu":
      self.acts = nn.ModuleList([
          nn.ELU(),
          nn.ELU(),
      ])
  def forward(self, x):
    shortcut = x
    
    x = self.acts[0](self.norms[0](self.lin_in(x)))
    x = self.norms[1](self.lin_out(x))

    return self.acts[1](x + shortcut)

def normalized_cut_2d(edge_index: torch.Tensor, pos: torch.Tensor):
  row, col = edge_index
  edge_attr = torch.norm(pos[row] - pos[col], p=2, dim=1)
  return normalized_cut(edge_index, edge_attr, num_nodes=pos.size(0))


class Net(nn.Module):
  def __init__(self, input_features, output_features, hidden_features, num_layers, use_cluster_pooling = False, dropout = 0.0):
    super().__init__()
    self.input_features = input_features
    self.output_features = output_features
    self.hidden_features = hidden_features
    self.num_layers = num_layers
    self.use_cluster_pooling = use_cluster_pooling
    self.dropout = dropout

    # self.edge_encoder = nn.Linear(test_batch.num_edges, self.hidden_features, bias=False)
    # self.attr_encoder = nn.Linear(sample.num_edge_features, self.hidden_features, bias=False)

    self.fc_in = nn.Linear(self.input_features, self.hidden_features)
    self.fc_hidden = MLPLayer(self.hidden_features, 2 * self.hidden_features, act="relu", norm="batch")
    self.fc_out = nn.Linear(self.hidden_features, self.output_features)

    self.conv_layers = nn.ModuleList()
    for i in range(num_layers):
      self.conv_layers.append(
          GCNLayer(
              # gnn.GCNConv(self.hidden_features, self.hidden_features, improved=True),
              # gnn.GCN2Conv(self.hidden_features, 0.1, 0.5, i+1, shared_weights=False, normalize=False),
              gnn.GENConv(self.hidden_features, self.hidden_features, learn_t=True, edge_dim=sample.edge_attr.size(-1)),
              # gnn.GeneralConv(self.hidden_features, self.hidden_features, in_edge_channels=sample.edge_attr.size(-1)),
              nn.BatchNorm1d(self.hidden_features),
              # nn.LayerNorm(self.hidden_features),
              # nn.ELU(inplace=True),
              # nn.GELU(),
              nn.ReLU(inplace=True),
          )
      )
    
  def forward(self, data):
    data.x = self.fc_in(data.x)
    # data.x = x_0 = self.fc_in(data.x)
    # data.edge_index = self.edge_encoder(data.edge_index)
    # data.edge_attr = self.attr_encoder(data.edge_attr)

    for idx, layer in enumerate(self.conv_layers):
      # data.x = layer(data.x, x_0, data.edge_index, data.edge_attr)
      data.x = layer(data.x, data.edge_index, data.edge_attr)
      # forward for GCNConv
      # data.x = layer(data.x, data.edge_index)

    x, batch = data.x, data.batch
    if self.use_cluster_pooling:
      data.edge_attr = None
      weight = normalized_cut_2d(data.edge_index, data.pos)
      cluster = gnn.graclus(data.edge_index, weight, data.x.size(0))
      x, batch = gnn.max_pool_x(cluster, x, batch)

    # Readout Layer
    x = gnn.global_mean_pool(x, batch)

    # x = F.gelu(x)
    x = F.relu(x)
    # x = F.elu(x)
    x = F.dropout(x, p=self.dropout, training=self.training)
    x = self.fc_hidden(x)
    x = F.dropout(x, p=self.dropout, training=self.training)
    return self.fc_out(x)


In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = Net(d.num_features, d.num_classes, 64, 4, use_cluster_pooling=False).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss().to(device)

params = sum([p.numel() for p in model.parameters() if p.requires_grad])
print(f"There are {params:,} trainable Parameters.")
print()
print(model)

There are 85,326 trainable Parameters.

Net(
  (fc_in): Linear(in_features=1, out_features=64, bias=True)
  (fc_hidden): MLPLayer(
    (lin_in): Linear(in_features=64, out_features=128, bias=True)
    (lin_out): Linear(in_features=128, out_features=64, bias=True)
    (norms): ModuleList(
      (0): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (acts): ModuleList(
      (0): ReLU()
      (1): ReLU()
    )
  )
  (fc_out): Linear(in_features=64, out_features=10, bias=True)
  (conv_layers): ModuleList(
    (0): GCNLayer(
      (conv): GENConv(64, 64, aggr=softmax)
      (act): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (norm): ReLU(inplace=True)
    )
    (1): GCNLayer(
      (conv): GENConv(64, 64, aggr=softmax)
      (act): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (no

In [7]:
def train():
  model.train()

  epoch_loss = 0
  correct = 0
  for data in train_loader:
    data = data.to(device)

    optimizer.zero_grad()

    outputs = model(data)
    loss = criterion(outputs, data.y)
    loss.backward()

    optimizer.step()

    epoch_loss += loss.item()
    # preds = outputs.max(1)[1]
    # epoch_acc += preds.eq(data.y).sum().item()
    # print(f"outputs: {outputs.size()}")
    # print(f"data.y: {data.y.size()}")
    # print(f"preds: {outputs.argmax(1).eq(data.y).sum()}")
    # total += 1
    correct += outputs.argmax(1).eq(data.y).sum().item()
  return epoch_loss / len(train_loader), correct / len(train_loader)

def eval():
  model.eval()

  epoch_loss = 0
  correct = 0
  # total = 0
  for data in test_loader:
    data = data.to(device)

    outputs = model(data)
    loss = criterion(outputs, data.y)

    epoch_loss += loss.item()
    # preds = outputs.max(1)[1]
    # epoch_acc += preds.eq(data.y).sum().item()
    # total += 1
    # if outputs.argmax(1).item() == data.y.item():
    #   correct += 1
    correct += outputs.argmax(1).eq(data.y).sum().item()
  return epoch_loss / len(test_loader), correct / len(test_loader)


In [8]:
EPOCHS = 50

train_loss = []
train_acc = []
test_loss = []
test_acc = []

for epoch in range(1, EPOCHS + 1):
  print(f"Epoch: {epoch}")
  epoch_loss, epoch_acc = train()
  train_loss.append(epoch_loss)
  train_acc.append(epoch_acc)
  print(f"\tTrain Loss: {epoch_loss:.3f}, Train Acc: {epoch_acc:.2f}")
  epoch_loss, epoch_acc = eval()
  test_loss.append(epoch_loss)
  test_acc.append(epoch_acc)
  print(f"\tTest Loss: {epoch_loss:.3f}, Test Acc: {epoch_acc:.2f}")

Epoch: 1
	Train Loss: 0.380, Train Acc: 56.14
	Test Loss: 0.583, Test Acc: 51.08
Epoch: 2
	Train Loss: 0.165, Train Acc: 60.53
	Test Loss: 0.401, Test Acc: 54.99
Epoch: 3


KeyboardInterrupt: 

In [None]:
# import networkx as nx
import torch_geometric

loader = DataLoader(test_dataset, batch_size=1)

graph = next(iter(loader))
print(graph)
print(f"graph.y: {graph.y}")
# nx.draw(torch_geometric.utils.to_networkx(graph))
outputs = model(graph.to(device))
print(f"outputs: {outputs}")
print(f"prediction: {outputs.argmax(1)}")

In [None]:
correct = 0
total = 0

for idx, graph in enumerate(loader):
  outputs = model(graph.to(device))
  # print(f"Pred: {outputs.argmax(1).item()}, Target: {graph.y.item()}")
  # if idx >= 50:
  #   break
  if outputs.argmax(1).item() == graph.y.item():
    correct += 1
  total += 1

print(f"acc: {correct / total}")

## Test Setup

* Batch Size:    64
* Epochs:        50
* Optimizer: Adam
* Learning Rate: 0.001
* num layers: 4


## Results

* GCNConv (BN + ReLU): 0.4469 (BN + GELU): 0.5475 (BN + ELU): -
* GCNIIConv: (BN + ReLU): - (BN + GELU): - (BN + ELU): -
* GENConv: (BN + ReLU): - (BN + GELU): - (BN + ELU): - 
* GeneralConv: (BN + ReLU): 0.9002 (BN + GELU): - (BN + ELU): -


## Results With Clustering

* GCNConv (BN + ReLU): 0.6025 (BN + GELU): 0.5462 (BN + ELU): 0.5664
* GCNIIConv: (BN + ReLU): - (BN + GELU): - (BN + ELU): -
* GENConv: (BN + ReLU): - (BN + GELU): - (BN + ELU): - 
* GeneralConv: (BN + ReLU): - (BN + GELU): - (BN + ELU): -


## GENConv Additional Results

* hidden_size = 64, num_layers = 4, use_cluster_pooling: false ACC: 
* hidden_size = 64, num_layers = 4, use_cluster_pooling: True ACC: 
