In [1]:
# Install required packages.
import os
import torch
os.environ['TORCH'] = torch.__version__
print(torch.__version__)

!pip install -q torch-scatter -f https://data.pyg.org/whl/torch-${TORCH}.html
!pip install -q torch-sparse -f https://data.pyg.org/whl/torch-${TORCH}.html
!pip install -q torch-cluster -f https://data.pyg.org/whl/torch-${TORCH}.html
!pip install -q git+https://github.com/pyg-team/pytorch_geometric.git

1.13.0


In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F

from torch_geometric.datasets import MNISTSuperpixels
from torch_geometric.loader import DataLoader
import torch_geometric.transforms as T
import torch_geometric.nn as gnn
from torch_geometric.utils import normalized_cut  

In [3]:
path = ".data/"
transform = T.Compose([
    T.GCNNorm(),
    T.Cartesian(cat=False),
    T.NormalizeScale(),
    T.ToSparseTensor(),
])

batch_size = 64

train_dataset = MNISTSuperpixels(path, True, transform=transform)
test_dataset = MNISTSuperpixels(path, False, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, drop_last=True)
d = train_dataset

print(f"Dataset:\n{d}")

sample = train_dataset[0]
print()
print(sample)
print('=============================================================')

# Gather some statistics about the first graph.
print(f"Number of features:  {sample.num_features}")
print(f"Number of node features: {sample.num_node_features}")
print(f'Number of nodes: {sample.num_nodes}')
print(f'Number of edges: {sample.num_edges}')
# print(f"Adjacency Matrix: {data.adj_t}")
print(f'Average node degree: {sample.num_edges / sample.num_nodes:.2f}')
print(f'Has isolated nodes: {sample.has_isolated_nodes()}')
print(f'Has self-loops: {sample.has_self_loops()}')
# print(f'Is undirected: {sample.is_undirected()}')
# print(f"Adjacency matrix: {sample.adj_t.size()}")
# print(f"Edge weight min: {torch.min(sample.edge_weight)}, max: {torch.max(sample.edge_weight)}")


Dataset:
MNISTSuperpixels(60000)

Data(x=[75, 1], y=[1], pos=[75, 2], edge_attr=[1471, 2], adj_t=[75, 75, nnz=1471])
Number of features:  1
Number of node features: 1
Number of nodes: 75
Number of edges: 1471
Average node degree: 19.61
Has isolated nodes: False
Has self-loops: True


In [4]:
test_batch = next(iter(train_loader))

print()
print(test_batch)
print('=============================================================')

# Gather some statistics about the first graph.
print(f"Number of features:  {test_batch.num_features}")
print(f"Number of node features: {test_batch.num_node_features}")
print(f'Number of nodes: {test_batch.num_nodes}')
print(f'Number of edges: {test_batch.num_edges}')
# print(f"Adjacency Matrix: {data.adj_t}")
print(f'Average node degree: {test_batch.num_edges / test_batch.num_nodes:.2f}')
print(f'Has isolated nodes: {test_batch.has_isolated_nodes()}')
print(f'Has self-loops: {test_batch.has_self_loops()}')
print(f'Is undirected: {test_batch.is_undirected()}')


DataBatch(x=[4800, 1], y=[64], pos=[4800, 2], edge_attr=[93066, 2], adj_t=[4800, 4800, nnz=93066], batch=[4800], ptr=[65])
Number of features:  1
Number of node features: 1
Number of nodes: 4800
Number of edges: 93066
Average node degree: 19.39
Has isolated nodes: False
Has self-loops: True
Is undirected: True


In [5]:
class GCNLayer(nn.Module):
  def __init__(self, conv, act, norm, dropout:float = 0.0):
    super().__init__()
    self.conv = conv
    self.act = act
    self.norm = norm
    self.dropout = dropout

  # def forward(self, x, edge_index, edge_attr = None):
  # GCNII
  def forward(self, x, x_0, adj_t):
    x = shortcut = self.conv(x, x_0, adj_t)
    x = self.norm(x)
    x = self.act(x)
    x = x + shortcut
    return F.dropout(x, p=self.dropout, training=self.training)

class MLPLayer(nn.Module):
  def __init__(self, channels, hidden_channels, norm = "batch", act = "relu"):
    super().__init__()
    self.lin_in = nn.Linear(channels, hidden_channels)
    self.lin_out = nn.Linear(hidden_channels, channels)
    self.norms = None
    if norm == "batch":
      self.norms = nn.ModuleList([
          nn.BatchNorm1d(hidden_channels),
          nn.BatchNorm1d(channels),
      ])
    elif norm == "layer":
      self.norms = nn.ModuleList([
          nn.LayerNorm(hidden_channels),
          nn.LayerNorm(channels),
      ])
    self.acts = None
    if act == "relu":
      self.acts = nn.ModuleList([
          nn.ReLU(),
          nn.ReLU(),
      ])
    elif act == "gelu":
      self.acts = nn.ModuleList([
          nn.GELU(),
          nn.GELU(),
      ])
    elif act == "elu":
      self.acts = nn.ModuleList([
          nn.ELU(),
          nn.ELU(),
      ])
  def forward(self, x):
    shortcut = x
    
    x = self.acts[0](self.norms[0](self.lin_in(x)))
    x = self.norms[1](self.lin_out(x))

    return self.acts[1](x + shortcut)

def normalized_cut_2d(edge_index: torch.Tensor, pos: torch.Tensor):
  row, col = edge_index
  edge_attr = torch.norm(pos[row] - pos[col], p=2, dim=1)
  return normalized_cut(edge_index, edge_attr, num_nodes=pos.size(0))


class Net(nn.Module):
  def __init__(self, input_features, output_features, hidden_features, num_layers, use_cluster_pooling = False, dropout = 0.0):
    super().__init__()
    self.input_features = input_features
    self.output_features = output_features
    self.hidden_features = hidden_features
    self.num_layers = num_layers
    self.use_cluster_pooling = use_cluster_pooling
    self.dropout = dropout

    # self.edge_encoder = nn.Linear(test_batch.num_edges, self.hidden_features, bias=False)
    # self.attr_encoder = nn.Linear(sample.num_edge_features, self.hidden_features, bias=False)

    self.fc_in = nn.Linear(self.input_features, self.hidden_features)
    self.fc_hidden = MLPLayer(self.hidden_features, 2 * self.hidden_features, act="elu", norm="batch")
    self.fc_out = nn.Linear(self.hidden_features, self.output_features)

    self.conv_layers = nn.ModuleList()
    for i in range(num_layers):
      self.conv_layers.append(
          GCNLayer(
              # gnn.GCNConv(self.hidden_features, self.hidden_features, improved=True),
              gnn.GCN2Conv(self.hidden_features, 0.2, 0.5, i+1, shared_weights=False, normalize=True),
              # gnn.GENConv(self.hidden_features, self.hidden_features, learn_t=True, edge_dim=sample.edge_attr.size(-1)),
              # gnn.GeneralConv(self.hidden_features, self.hidden_features, in_edge_channels=sample.edge_attr.size(-1)),
              nn.BatchNorm1d(self.hidden_features),
              # nn.LayerNorm(self.hidden_features),
              nn.ELU(inplace=True),
              # nn.GELU(),
              # nn.ReLU(inplace=True),
          )
      )
    
  def forward(self, data):
    # data.x = self.fc_in(data.x)
    data.x = x_0 = self.fc_in(data.x)
    # data.edge_index = self.edge_encoder(data.edge_index)
    # data.edge_attr = self.attr_encoder(data.edge_attr)

    for idx, layer in enumerate(self.conv_layers):
      # GCNII
      data.x = layer(data.x, x_0, data.adj_t)
      # GENConv, GeneralConv
      # data.x = layer(data.x, data.edge_index, data.edge_attr)
      # forward for GCNConv
      # data.x = layer(data.x, data.edge_index)

    x, batch = data.x, data.batch
    if self.use_cluster_pooling:
      data.edge_attr = None
      weight = normalized_cut_2d(data.edge_index, data.pos)
      cluster = gnn.graclus(data.edge_index, weight, data.x.size(0))
      x, batch = gnn.max_pool_x(cluster, x, batch)

    # Readout Layer
    x = gnn.global_mean_pool(x, batch)

    # x = F.gelu(x)
    # x = F.relu(x)
    x = F.elu(x)
    x = F.dropout(x, p=self.dropout, training=self.training)
    x = self.fc_hidden(x)
    x = F.dropout(x, p=self.dropout, training=self.training)
    return self.fc_out(x)


In [6]:
from pathlib import Path

# TODO update for your location
save_loc = "../saved_models/"
# model_name = "GCN2_clus-false_BN_RELU.pt"
# model_name = "GCN2_clus-false_BN_GELU.pt"
model_name = "GCN2_clus-false_BN_ELU.pt"

file_path = save_loc + model_name
path = Path(file_path)
state_dict = None
if path.exists() and path.is_file():
    print("found save!")
    state_dict = torch.load(path)
else:
    print("new model")

new model


In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = Net(d.num_features, d.num_classes, 64, 4, use_cluster_pooling=False).to(device)
if state_dict is not None:
    model.load_state_dict(state_dict)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss().to(device)

params = sum([p.numel() for p in model.parameters() if p.requires_grad])
print(f"There are {params:,} trainable Parameters.")
print()
print(model)

There are 51,018 trainable Parameters.

Net(
  (fc_in): Linear(in_features=1, out_features=64, bias=True)
  (fc_hidden): MLPLayer(
    (lin_in): Linear(in_features=64, out_features=128, bias=True)
    (lin_out): Linear(in_features=128, out_features=64, bias=True)
    (norms): ModuleList(
      (0): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (acts): ModuleList(
      (0): ELU(alpha=1.0)
      (1): ELU(alpha=1.0)
    )
  )
  (fc_out): Linear(in_features=64, out_features=10, bias=True)
  (conv_layers): ModuleList(
    (0): GCNLayer(
      (conv): GCN2Conv(64, alpha=0.2, beta=0.4054651081081644)
      (act): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (norm): ELU(alpha=1.0, inplace=True)
    )
    (1): GCNLayer(
      (conv): GCN2Conv(64, alpha=0.2, beta=0.22314355131420976)
      (act): BatchNorm1d(64, eps=1e-0

In [8]:
def train():
  model.train()

  epoch_loss = 0
  correct = 0
  for data in train_loader:
    data = data.to(device)

    optimizer.zero_grad()

    outputs = model(data)
    loss = criterion(outputs, data.y)
    loss.backward()

    optimizer.step()

    epoch_loss += loss.item()
    # preds = outputs.max(1)[1]
    # epoch_acc += preds.eq(data.y).sum().item()
    # print(f"outputs: {outputs.size()}")
    # print(f"data.y: {data.y.size()}")
    # print(f"preds: {outputs.argmax(1).eq(data.y).sum()}")
    # total += 1
    correct += outputs.argmax(1).eq(data.y).sum().item()
  return epoch_loss / len(train_loader), correct / len(train_loader)

def eval():
  model.eval()

  epoch_loss = 0
  correct = 0
  # total = 0
  for data in test_loader:
    data = data.to(device)

    outputs = model(data)
    loss = criterion(outputs, data.y)

    epoch_loss += loss.item()
    # preds = outputs.max(1)[1]
    # epoch_acc += preds.eq(data.y).sum().item()
    # total += 1
    # if outputs.argmax(1).item() == data.y.item():
    #   correct += 1
    correct += outputs.argmax(1).eq(data.y).sum().item()
  return epoch_loss / len(test_loader), correct / len(test_loader)


In [9]:
EPOCHS = 50

train_loss = []
train_acc = []
test_loss = []
test_acc = []
best_loss = float('inf')

for epoch in range(1, EPOCHS + 1):
  print(f"Epoch: {epoch}")
  epoch_loss, epoch_acc = train()
  train_loss.append(epoch_loss)
  train_acc.append(epoch_acc)
  print(f"\tTrain Loss: {epoch_loss:.3f}, Train Acc: {epoch_acc:.2f}")
  epoch_loss, epoch_acc = eval()
  test_loss.append(epoch_loss)
  test_acc.append(epoch_acc)
  print(f"\tTest Loss: {epoch_loss:.3f}, Test Acc: {epoch_acc:.2f}")
  if epoch_loss < best_loss:
    best_loss = epoch_loss
    torch.save(model.state_dict(), path)

Epoch: 1
	Train Loss: 1.618, Train Acc: 26.48
	Test Loss: 8.694, Test Acc: 7.98
Epoch: 2
	Train Loss: 1.401, Train Acc: 31.60
	Test Loss: 43.964, Test Acc: 6.24
Epoch: 3
	Train Loss: 1.314, Train Acc: 33.80
	Test Loss: 13.005, Test Acc: 7.26
Epoch: 4
	Train Loss: 1.250, Train Acc: 35.22
	Test Loss: 12.941, Test Acc: 10.41
Epoch: 5
	Train Loss: 1.184, Train Acc: 36.79
	Test Loss: 25.394, Test Acc: 13.47
Epoch: 6
	Train Loss: 1.132, Train Acc: 38.04
	Test Loss: 31.403, Test Acc: 9.27
Epoch: 7
	Train Loss: 1.093, Train Acc: 38.98
	Test Loss: 6.913, Test Acc: 14.31
Epoch: 8
	Train Loss: 1.062, Train Acc: 39.73
	Test Loss: 22.181, Test Acc: 6.54
Epoch: 9
	Train Loss: 1.036, Train Acc: 40.51
	Test Loss: 19.964, Test Acc: 6.24
Epoch: 10
	Train Loss: 1.013, Train Acc: 40.98
	Test Loss: 19.913, Test Acc: 7.26
Epoch: 11
	Train Loss: 0.993, Train Acc: 41.51
	Test Loss: 25.501, Test Acc: 7.33
Epoch: 12
	Train Loss: 0.985, Train Acc: 41.74
	Test Loss: 25.881, Test Acc: 6.30
Epoch: 13
	Train Loss: 0

In [10]:
# import networkx as nx
import torch_geometric

loader = DataLoader(test_dataset, batch_size=1)

graph = next(iter(loader))
print(graph)
print(f"graph.y: {graph.y}")
# nx.draw(torch_geometric.utils.to_networkx(graph))
model.load_state_dict(torch.load(path))
outputs = model(graph.to(device))
print(f"outputs: {outputs}")
print(f"prediction: {outputs.argmax(1)}")

DataBatch(x=[75, 1], y=[1], pos=[75, 2], edge_attr=[1479, 2], adj_t=[75, 75, nnz=1479], batch=[75], ptr=[2])
graph.y: tensor([7])
outputs: tensor([[-2.3027,  1.7251,  2.6850, -2.8008,  4.9331, -4.2960, -4.4903,  5.2800,
         -6.8988,  1.4461]], grad_fn=<AddmmBackward0>)
prediction: tensor([7])


In [11]:
correct = 0
total = 0

for idx, graph in enumerate(loader):
  outputs = model(graph.to(device))
  # print(f"Pred: {outputs.argmax(1).item()}, Target: {graph.y.item()}")
  # if idx >= 50:
  #   break
  if outputs.argmax(1).item() == graph.y.item():
    correct += 1
  total += 1

print(f"acc: {correct / total}")

acc: 0.4137


## Test Setup

* Batch Size:    64
* Epochs:        50
* Optimizer: Adam
* Learning Rate: 0.001
* num layers: 4


## Results

* GCNConv (BN + ReLU): 0.4469 (BN + GELU): 0.5475 (BN + ELU): 0.5589
* GCNIIConv: (BN + ReLU): 0.2339 (BN + GELU): 0.3716 (BN + ELU): 0.4317
* GENConv: (BN + ReLU): - (BN + GELU): - (BN + ELU): - 
* GeneralConv: (BN + ReLU): 0.9002 (BN + GELU): 0.6302 (BN + ELU): 0.9532


## Results With Clustering

* GCNConv (BN + ReLU): 0.6025 (BN + GELU): 0.5462 (BN + ELU): 0.5664
* GCNIIConv: (BN + ReLU): - (BN + GELU): - (BN + ELU): -
* GENConv: (BN + ReLU): 0.9424 (BN + GELU): 0.9817 (BN + ELU): - 
* GeneralConv: (BN + ReLU): - (BN + GELU): - (BN + ELU): *


## GENConv Additional Results

* hidden_size = 64, num_layers = 4, use_cluster_pooling: false ACC: 
* hidden_size = 64, num_layers = 4, use_cluster_pooling: True ACC: 
