In [77]:
!pip install spektral



In [78]:
import spektral
from spektral import datasets, transforms

In [79]:
dataset = datasets.TUDataset('PROTEINS')

Successfully loaded PROTEINS.


In [80]:
dataset

TUDataset(n_graphs=1113)

In [81]:
max_degree = dataset.map(lambda g: g.a.sum(-1).max(), reduce=max)
print("Max degree: ", max_degree)

Max degree:  25.0


In [82]:
dataset[0]

Graph(n_nodes=42, n_node_features=4, n_edge_features=None, n_labels=2)

In [83]:
dataset.apply(transforms.Degree(int(max_degree)))

In [84]:
dataset[0]

Graph(n_nodes=42, n_node_features=30, n_edge_features=None, n_labels=2)

In [85]:
dataset[0].a.todense()

matrix([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 1.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 1.],
        [0., 1., 0., ..., 0., 1., 0.]])

In [86]:
dataset.apply(transforms.GCNFilter())

In [87]:
dataset[0].a.todense()

matrix([[0.25      , 0.        , 0.        , ..., 0.        , 0.        ,
         0.        ],
        [0.        , 0.25      , 0.        , ..., 0.        , 0.        ,
         0.18898224],
        [0.        , 0.        , 0.25      , ..., 0.        , 0.        ,
         0.        ],
        ...,
        [0.        , 0.        , 0.        , ..., 0.2       , 0.        ,
         0.        ],
        [0.        , 0.        , 0.        , ..., 0.        , 0.25      ,
         0.18898224],
        [0.        , 0.18898224, 0.        , ..., 0.        , 0.18898224,
         0.14285714]])

In [88]:
dataset[0].a.todense().mean()

0.023547617650508366

In [89]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Dropout
from spektral.layers import GCNConv, GlobalSumPool

In [90]:
class MyFirstGNN(Model):
    def __init__(self, n_hidden, n_labels):
        super().__init__()
        self.graph_conv = GCNConv(n_hidden)
        self.pool = GlobalSumPool()
        self.dropout = Dropout(0.5)
        self.dense = Dense(n_labels, 'softmax')

    def call(self, inputs):
        out = self.graph_conv(inputs)
        out = self.dropout(out)
        out = self.pool(out)
        out = self.dense(out)

        return out

In [91]:
print(f"There are a total of {len(dataset):,} graphs in this dataset\n")

print("In the first graph we have an adjacency matrix (A) of shape: [nodes, nodes]")
print(dataset[0].a.shape)

print("And we have the nodes features (X) of shape: [nodes, n_feat]")
print(dataset[0].x.shape)

print("Lastly we have the edge features (E) of shape: [edges, e_feat]")
print(dataset[0].e)

There are a total of 1,113 graphs in this dataset

In the first graph we have an adjacency matrix (A) of shape: [nodes, nodes]
(42, 42)
And we have the nodes features (X) of shape: [nodes, n_feat]
(42, 30)
Lastly we have the edge features (E) of shape: [edges, e_feat]
None


In [92]:
model = MyFirstGNN(32, dataset.n_labels)
model.compile('adam', 'categorical_crossentropy')

In [98]:
from spektral.data import DisjointLoader
loader = DisjointLoader(dataset, batch_size=5, shuffle=True)
batch = loader.__next__()
inputs, target = batch
x, a, i = inputs
print(f"Combined graph now has shape (X) of: {x.shape}")

Combined graph now has shape (X) of: (198, 30)


In [119]:
from spektral.data import BatchLoader
loader = BatchLoader(dataset, batch_size=32)
model.fit(loader.load(), steps_per_epoch=loader.steps_per_epoch, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7fd8b0b9e890>

In [120]:
loader = BatchLoader(dataset, batch_size=32)
loss = model.evaluate(loader.load(), steps=loader.steps_per_epoch)
print(f'Test loss: {loss}')

Test loss: 0.9085749387741089
