Working through [this tutorial](https://graphneural.network/getting-started/)

# Getting Dataset


In [2]:
from spektral.datasets import TUDataset

2022-10-29 11:48:59.248463: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
dataset = TUDataset('PROTEINS')

Downloading PROTEINS dataset.


100%|█████████████████████████████████████████| 447k/447k [00:00<00:00, 591kB/s]


Successfully loaded PROTEINS.


In [4]:
dataset

TUDataset(n_graphs=1113)

In [8]:
dataset[0]
dataset.filter(lambda g: g.n_nodes < 500)

# Transforming Graphs 

In [9]:
# compute the maximum degree of the dataset, so that we know the size of the one-hot vectors
max_degree = dataset.map(lambda g: g.a.sum(-1).max(), reduce=max)
max_degree

12.0

In [11]:
# augment our node features with the one-hot-encoded degree
from spektral.transforms import Degree


In [13]:
dataset.apply(Degree(int(max_degree)))

In [14]:
dataset[0]

Graph(n_nodes=42, n_node_features=17, n_edge_features=None, n_labels=2)

In [17]:
# following this paper: https://arxiv.org/abs/1609.02907
# so do extra pre-processing of the adjacency matrix

from spektral.transforms import GCNFilter
dataset.apply(GCNFilter())

# every convolutional layer in Spektral has a preprocess(a) method that you can use to transform the adjacency matrix as needed.


# Creating a GNN

In [19]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Dropout
from spektral.layers import GCNConv, GlobalSumPool

In [20]:
class MyFirstGNN(Model):

    def __init__(self, n_hidden, n_labels):
        super().__init__()
        self.graph_conv = GCNConv(n_hidden)
        self.pool = GlobalSumPool()
        self.dropout = Dropout(0.5)
        self.dense = Dense(n_labels, 'softmax')

    def call(self, inputs):
        out = self.graph_conv(inputs)
        out = self.dropout(out)
        out = self.pool(out)
        out = self.dense(out)

        return out

In [21]:
model = MyFirstGNN(32, dataset.n_labels)
model.compile('adam', 'categorical_crossentropy')

2022-10-29 12:00:59.122595: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [26]:
import numpy as np 
np.random.shuffle(dataset)
split = int(0.8 * len(dataset))
dataset_train, dataset_test = dataset[:split], dataset[split:]

  np.random.shuffle(dataset)


In [27]:
#terating over a dataset in mini-batches is not trivial and we cannot simply use the model.fit() bc we can't resize graphs 
# user loader!!

from spektral.data import BatchLoader
loader = BatchLoader(dataset_train, batch_size=32)
model.fit(loader.load(), steps_per_epoch=loader.steps_per_epoch, epochs=10)


  np.random.shuffle(a)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fc6658e6b50>

# Evaluation


In [28]:
loader = BatchLoader(dataset_test, batch_size=32)

In [29]:
loss = model.evaluate(loader.load(), steps=loader.steps_per_epoch)

print('Test loss: {}'.format(loss))

Test loss: 1.1552215814590454
