<a href="https://colab.research.google.com/github/ariahosseini/DeepML/blob/main/014_TensorFlow_Proj_Fourteen_GNN_Spektral_GraphLevel.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# install
# !pip install spektral
# !pip install ogb

In [None]:
# utils
import numpy as np
import scipy.sparse as sp
from tqdm import tqdm
from ogb.graphproppred import Evaluator, GraphPropPredDataset
# sklearn
from sklearn.metrics.cluster import (completeness_score,
                                     homogeneity_score,
                                     v_measure_score)
# tensorflow
import tensorflow as tf
from tensorflow.keras.layers import Dense, Input, Dropout
from tensorflow.keras.losses import (CategoricalCrossentropy,
                                     BinaryCrossentropy,
                                     MeanSquaredError,
                                     SparseCategoricalCrossentropy)
from tensorflow.keras.models import Model
from tensorflow.keras.metrics import categorical_accuracy, sparse_categorical_accuracy
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.backend import clear_session
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.regularizers import l2
# spektral
from spektral.data import Dataset, DisjointLoader, Graph, BatchLoader, MixedLoader
from spektral.datasets import TUDataset, OGB, QM9
from spektral.datasets.mnist import MNIST
from spektral.datasets.citation import Cora
from spektral.models import GeneralGNN
from spektral.layers import (GCSConv, ECCConv, GlobalSumPool, GlobalAvgPool,
                             GraphMasking, GINConv, MinCutPool, GCNConv)
# from spektral.layers.convolutional import GCSConv
# from spektral.layers.pooling import MinCutPool
from spektral.transforms.normalize_adj import NormalizeAdj
from spektral.utils.sparse import sp_matrix_to_sp_tensor
from spektral.utils.convolution import normalized_adjacency
# visual
import matplotlib.pyplot as plt

In [None]:
# physical_devices = tf.config.list_physical_devices("GPU")
# if len(physical_devices) > 0:
#     tf.config.experimental.set_memory_growth(physical_devices[0], True)

# PROTEINS Data General GNN

In [None]:
# config
batch_size = 16
learning_rate = 0.01
epochs = 10

In [None]:
# load data
data = TUDataset("PROTEINS")



Successfully loaded PROTEINS.




In [None]:
print(f"Data set name: {data.name}")
print(f"Data set type: {type(data)}")
print(f"No. of graphs: {data.n_graphs}")
print(f"No. of nodes: {data.n_nodes}")
print(f"No. of classes: {data.n_labels}")
print(f"No. of edge features: {data.n_edge_features}")
print(f"No. of node features: {data.n_node_features}")
print(f"Recap: {data.graphs}")

Data set name: PROTEINS
Data set type: <class 'spektral.datasets.tudataset.TUDataset'>
No. of graphs: 1113
No. of nodes: None
No. of classes: 2
No. of edge features: None
No. of node features: 4
Recap: [Graph(n_nodes=42, n_node_features=4, n_edge_features=None, n_labels=2), Graph(n_nodes=27, n_node_features=4, n_edge_features=None, n_labels=2), Graph(n_nodes=10, n_node_features=4, n_edge_features=None, n_labels=2), Graph(n_nodes=24, n_node_features=4, n_edge_features=None, n_labels=2), Graph(n_nodes=11, n_node_features=4, n_edge_features=None, n_labels=2), Graph(n_nodes=336, n_node_features=4, n_edge_features=None, n_labels=2), Graph(n_nodes=108, n_node_features=4, n_edge_features=None, n_labels=2), Graph(n_nodes=154, n_node_features=4, n_edge_features=None, n_labels=2), Graph(n_nodes=19, n_node_features=4, n_edge_features=None, n_labels=2), Graph(n_nodes=11, n_node_features=4, n_edge_features=None, n_labels=2), Graph(n_nodes=20, n_node_features=4, n_edge_features=None, n_labels=2), Gr

In [None]:
print(f"Graph nodes' features:\n{data[0].x}")
print(f"Graph nodes' size:\n{np.shape(data[0].x)}")
print(f"Graph edges' features:\n{data[0].e}")
print(f"Graph adjacency matrix:\n{data[0].a}")
print(f"Graph labels:\n{data[0].y}")
print(f"Graph labels size:\n{np.shape(data[0].y)}")

Graph nodes' features:
[[23.  1.  0.  0.]
 [10.  1.  0.  0.]
 [25.  1.  0.  0.]
 [ 7.  1.  0.  0.]
 [12.  1.  0.  0.]
 [11.  1.  0.  0.]
 [ 5.  1.  0.  0.]
 [ 7.  1.  0.  0.]
 [ 9.  1.  0.  0.]
 [ 3.  1.  0.  0.]
 [ 6.  1.  0.  0.]
 [22.  1.  0.  0.]
 [ 8.  1.  0.  0.]
 [26.  1.  0.  0.]
 [ 7.  1.  0.  0.]
 [12.  1.  0.  0.]
 [11.  1.  0.  0.]
 [ 5.  1.  0.  0.]
 [ 7.  1.  0.  0.]
 [ 8.  1.  0.  0.]
 [ 3.  1.  0.  0.]
 [ 6.  1.  0.  0.]
 [ 3.  0.  1.  0.]
 [ 9.  0.  1.  0.]
 [10.  0.  1.  0.]
 [ 7.  0.  1.  0.]
 [10.  0.  1.  0.]
 [ 8.  0.  1.  0.]
 [ 5.  0.  1.  0.]
 [ 4.  0.  1.  0.]
 [ 3.  0.  1.  0.]
 [ 3.  0.  1.  0.]
 [ 3.  0.  1.  0.]
 [ 9.  0.  1.  0.]
 [10.  0.  1.  0.]
 [ 7.  0.  1.  0.]
 [10.  0.  1.  0.]
 [ 8.  0.  1.  0.]
 [ 5.  0.  1.  0.]
 [ 4.  0.  1.  0.]
 [ 3.  0.  1.  0.]
 [ 3.  0.  1.  0.]]
Graph nodes' size:
(42, 4)
Graph edges' features:
None
Graph adjacency matrix:
  (0, 11)	1.0
  (0, 22)	1.0
  (0, 32)	1.0
  (1, 23)	1.0
  (1, 31)	1.0
  (1, 41)	1.0
  (2, 24)	1.0
 

In [None]:
# split data
np.random.shuffle(data)
split = int(0.8 * len(data))
data_train, data_test = data[:split], data[split:]

  np.random.shuffle(data)


In [None]:
# data loaders
loader_train = DisjointLoader(data_train, batch_size=batch_size, epochs=epochs)
loader_test = DisjointLoader(data_test, batch_size=batch_size)

In [None]:
# signatures
loader_train.tf_signature()

((TensorSpec(shape=(None, 4), dtype=tf.float64, name=None),
  SparseTensorSpec(TensorShape([None, None]), tf.float64),
  TensorSpec(shape=(None,), dtype=tf.int64, name=None)),
 TensorSpec(shape=(None, 2), dtype=tf.float64, name=None))

In [None]:
# build model
model = GeneralGNN(data.n_labels, activation="softmax")
optimizer = Adam(learning_rate)
loss_func = CategoricalCrossentropy()

In [None]:
# fit model
@tf.function(input_signature=loader_train.tf_signature(), experimental_relax_shapes=True)
def train_step(inputs, target):
    with tf.GradientTape() as tape:
        predictions = model(inputs, training=True)
        loss = loss_func(target, predictions) + sum(model.losses)
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    acc = tf.reduce_mean(categorical_accuracy(target, predictions))
    return loss, acc

def evaluate(loader):
    output = []
    step = 0
    while step < loader.steps_per_epoch:
        step += 1
        inputs, target = loader.__next__()
        pred = model(inputs, training=False)
        outs = (loss_func(target, pred), tf.reduce_mean(categorical_accuracy(target, pred)), len(target))
        output.append(outs)
        if step == loader.steps_per_epoch:
            output = np.array(output)
            return np.average(output[:, :-1], 0, weights=output[:, -1])
epoch = step = 0
results = []
for batch in loader_train:
    step += 1
    loss, acc = train_step(*batch)
    results.append((loss, acc))
    if step == loader_train.steps_per_epoch:
        step = 0
        epoch += 1
        results_test = evaluate(loader_test)
        print("Ep. {} - Loss: {:.3f} - Acc: {:.3f} - Test loss: {:.3f} - Test acc: {:.3f}".format(epoch, *np.mean(results, 0), *results_test))
        results = []

  np.random.shuffle(a)


Ep. 1 - Loss: 0.606 - Acc: 0.678 - Test loss: 5.767 - Test acc: 0.453
Ep. 2 - Loss: 0.559 - Acc: 0.726 - Test loss: 1.211 - Test acc: 0.475
Ep. 3 - Loss: 0.544 - Acc: 0.718 - Test loss: 1.600 - Test acc: 0.457
Ep. 4 - Loss: 0.516 - Acc: 0.745 - Test loss: 0.590 - Test acc: 0.731
Ep. 5 - Loss: 0.532 - Acc: 0.741 - Test loss: 0.600 - Test acc: 0.673
Ep. 6 - Loss: 0.518 - Acc: 0.739 - Test loss: 0.876 - Test acc: 0.444
Ep. 7 - Loss: 0.522 - Acc: 0.745 - Test loss: 0.589 - Test acc: 0.691
Ep. 8 - Loss: 0.503 - Acc: 0.758 - Test loss: 0.592 - Test acc: 0.713
Ep. 9 - Loss: 0.509 - Acc: 0.756 - Test loss: 0.593 - Test acc: 0.682
Ep. 10 - Loss: 0.495 - Acc: 0.763 - Test loss: 0.606 - Test acc: 0.677


In [None]:
# evaluate model
results_test = evaluate(loader_test)
print("Final results - Loss: {:.3f} - Acc: {:.3f}".format(*results_test))

Final results - Loss: 0.606 - Acc: 0.677


# Custom Dataset

In [None]:
# config
learning_rate = 1e-2  # learning rate
epochs = 400  # number of training epochs
patience = 10  # patience for early stopping
batch_size = 32  # batch size

In [None]:
# load data
class MyDataset(Dataset):
    """
    A dataset of random colored graphs.
    The task is to classify each graph with the color which occurs the most in its nodes.
    The graphs have "n_colors" colors, of at least "n_min" and at most "n_max" nodes connected with probability "p".
    """
    def __init__(self, n_samples, n_colors=3, n_min=10, n_max=100, p=0.1, **kwargs):
        self.n_samples = n_samples
        self.n_colors = n_colors
        self.n_min = n_min
        self.n_max = n_max
        self.p = p
        super().__init__(**kwargs)

    def read(self):
        def make_graph():
            n = np.random.randint(self.n_min, self.n_max)
            colors = np.random.randint(0, self.n_colors, size=n)
            x = np.zeros((n, self.n_colors))
            x[np.arange(n), colors] = 1 # node features
            a = np.random.rand(n, n) <= self.p
            a = np.maximum(a, a.T).astype(int)
            a = sp.csr_matrix(a) # edges
            y = np.zeros((self.n_colors,))
            color_counts = x.sum(0)
            y[np.argmax(color_counts)] = 1 # labels
            return Graph(x=x, a=a, y=y)
        return [make_graph() for _ in range(self.n_samples)] # list of Graph objects
data = MyDataset(1000, transforms=NormalizeAdj())

In [None]:
# train/valid/test split
idxs = np.random.permutation(len(data))
split_valid, split_test = int(0.8 * len(data)), int(0.9 * len(data))
idx_train, idx_valid, idx_test = np.split(idxs, [split_valid, split_test])
data_train = data[idx_train]
data_valid = data[idx_valid]
data_test = data[idx_test]

In [None]:
# data loaders
loader_train = DisjointLoader(data_train, batch_size=batch_size, epochs=epochs)
loader_valid = DisjointLoader(data_valid, batch_size=batch_size)
loader_test = DisjointLoader(data_test, batch_size=batch_size)

In [None]:
# build model
class Net(Model):
    def __init__(self):
        super().__init__()
        self.conv1 = GCSConv(32, activation="relu")
        self.conv2 = GCSConv(32, activation="relu")
        self.conv3 = GCSConv(32, activation="relu")
        self.global_pool = GlobalAvgPool()
        self.dense = Dense(data.n_labels, activation="softmax")

    def call(self, inputs):
        x, a, i = inputs
        x = self.conv1([x, a])
        x = self.conv2([x, a])
        x = self.conv3([x, a])
        output = self.global_pool([x, i])
        output = self.dense(output)
        return output

model = Net()
optimizer = Adam(learning_rate=learning_rate)
loss_fn = CategoricalCrossentropy()

In [None]:
# fit model
@tf.function(input_signature=loader_train.tf_signature(), experimental_relax_shapes=True)
def train_step(inputs, target):
    with tf.GradientTape() as tape:
        predictions = model(inputs, training=True)
        loss = loss_fn(target, predictions) + sum(model.losses)
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    acc = tf.reduce_mean(categorical_accuracy(target, predictions))
    return loss, acc

def evaluate(loader):
    output = []
    step = 0
    while step < loader.steps_per_epoch:
        step += 1
        inputs, target = loader.__next__()
        pred = model(inputs, training=False)
        outs = (loss_fn(target, pred), tf.reduce_mean(categorical_accuracy(target, pred)), len(target))
        output.append(outs)
        if step == loader.steps_per_epoch:
            output = np.array(output)
            return np.average(output[:, :-1], 0, weights=output[:, -1])

epoch = step = 0
best_val_loss = np.inf
best_weights = None
patience = patience
results = []
for batch in loader_train:
    step += 1
    loss, acc = train_step(*batch)
    results.append((loss, acc))
    if step == loader_train.steps_per_epoch:
        step = 0
        epoch += 1
        val_loss, val_acc = evaluate(loader_valid)
        print("Ep. {} - Loss: {:.3f} - Acc: {:.3f} - Val loss: {:.3f} - Val acc: {:.3f}".format(epoch, *np.mean(results, 0), val_loss, val_acc))
        # check if loss improved for early stopping
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience = patience
            print("New best val_loss {:.3f}".format(val_loss))
            best_weights = model.get_weights()
        else:
            patience -= 1
            if patience == 0:
                print("Early stopping (best val_loss: {})".format(best_val_loss))
                break
        results = []
model.set_weights(best_weights)  # Load best model
test_loss, test_acc = evaluate(loader_test)
print("Done. Test loss: {:.4f}. Test acc: {:.2f}".format(test_loss, test_acc))

  np.random.shuffle(a)


Ep. 1 - Loss: 1.021 - Acc: 0.480 - Val loss: 0.846 - Val acc: 0.590
New best val_loss 0.846
Ep. 2 - Loss: 0.623 - Acc: 0.757 - Val loss: 0.495 - Val acc: 0.830
New best val_loss 0.495
Ep. 3 - Loss: 0.443 - Acc: 0.800 - Val loss: 0.400 - Val acc: 0.870
New best val_loss 0.400
Ep. 4 - Loss: 0.295 - Acc: 0.890 - Val loss: 0.292 - Val acc: 0.920
New best val_loss 0.292
Ep. 5 - Loss: 0.300 - Acc: 0.875 - Val loss: 0.547 - Val acc: 0.780
Ep. 6 - Loss: 0.300 - Acc: 0.870 - Val loss: 0.316 - Val acc: 0.880
Ep. 7 - Loss: 0.237 - Acc: 0.906 - Val loss: 0.259 - Val acc: 0.930
New best val_loss 0.259
Ep. 8 - Loss: 0.270 - Acc: 0.882 - Val loss: 0.469 - Val acc: 0.770
Ep. 9 - Loss: 0.270 - Acc: 0.891 - Val loss: 0.312 - Val acc: 0.850
Ep. 10 - Loss: 0.270 - Acc: 0.882 - Val loss: 0.293 - Val acc: 0.870
Ep. 11 - Loss: 0.272 - Acc: 0.895 - Val loss: 0.220 - Val acc: 0.940
New best val_loss 0.220
Ep. 12 - Loss: 0.264 - Acc: 0.887 - Val loss: 0.504 - Val acc: 0.750
Ep. 13 - Loss: 0.288 - Acc: 0.885 - V

# OGB Mol HIV Clf (Edge Attributes)

In [None]:
# config
learning_rate = 1e-3  # learning rate
epochs = 2  # number of training epochs
batch_size = 32  # batch size

In [None]:
# load data
dataset_name = "ogbg-molhiv"
ogb_dataset = GraphPropPredDataset(name=dataset_name)
dataset = OGB(ogb_dataset)

Downloading http://snap.stanford.edu/ogb/data/graphproppred/csv_mol_download/hiv.zip


Downloaded 0.00 GB: 100%|██████████| 3/3 [00:00<00:00,  6.49it/s]


Extracting dataset/hiv.zip
Loading necessary files...
This might take a while.
Processing graphs...


100%|██████████| 41127/41127 [00:00<00:00, 76829.68it/s]


Saving...


In [None]:
# params
num_feats = dataset.n_node_features  # dimension of node features
num_edges = dataset.n_edge_features  # dimension of edge features
num_labels = dataset.n_labels  # dimension of the target

In [None]:
# train/test split
idx = ogb_dataset.get_idx_split()
idx_train, idx_valid, idx_test = idx["train"], idx["valid"], idx["test"]
dataset_train = dataset[idx_train]
dataset_valid = dataset[idx_valid]
dataset_test = dataset[idx_test]

In [None]:
# loaders
loader_train = DisjointLoader(dataset_train, batch_size=batch_size, epochs=epochs)
loader_test = DisjointLoader(dataset_test, batch_size=batch_size, epochs=1)

In [None]:
# build model
Nodes_input = Input(shape=(num_feats,))
Adj_input = Input(shape=(None,), sparse=True)
Edges_input = Input(shape=(num_edges,))
Ind_input = Input(shape=(), dtype=tf.int64)
X = ECCConv(32, activation="relu")([Nodes_input, Adj_input, Edges_input])
X = ECCConv(32, activation="relu")([Nodes_input, Adj_input, Edges_input])
X = GlobalSumPool()([X, Ind_input])
output = Dense(num_labels, activation="sigmoid")(X)
# model
model = Model(inputs=[Nodes_input, Adj_input, Edges_input, Ind_input], outputs=output)
optimizer = Adam(learning_rate)
loss_fn = BinaryCrossentropy()

In [None]:
# fit model
@tf.function(input_signature=loader_train.tf_signature(), experimental_relax_shapes=True)
def train_step(inputs, target):
    with tf.GradientTape() as tape:
        predictions = model(inputs, training=True)
        loss = loss_fn(target, predictions) + sum(model.losses)
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    return loss

step = loss = 0
for batch in loader_train:
    step += 1
    loss += train_step(*batch)
    if step == loader_train.steps_per_epoch:
        step = 0
        print("Loss: {}".format(loss / loader_train.steps_per_epoch))
        loss = 0

  np.random.shuffle(a)


Loss: 0.7644172310829163
Loss: 0.29914745688438416


In [None]:
# evaluate model
print("Testing model")
evaluator = Evaluator(name=dataset_name)
y_true = []
y_pred = []
for batch in loader_test:
    inputs, target = batch
    p = model(inputs, training=False)
    y_true.append(target)
    y_pred.append(p.numpy())
y_true = np.vstack(y_true)
y_pred = np.vstack(y_pred)
model_loss = loss_fn(y_true, y_pred)
ogb_score = evaluator.eval({"y_true": y_true, "y_pred": y_pred})
print("Done. Test loss: {:.4f}. ROC-AUC: {:.2f}".format(model_loss, ogb_score["rocauc"]))

Testing model


  np.random.shuffle(a)


Done. Test loss: 0.4085. ROC-AUC: 0.28


# QM9 Reg With ECC (Custom Training Loop)

In [None]:
# config
learning_rate = 1e-3  # learning rate
epochs = 20  # number of training epochs
batch_size = 32  # batch size

In [None]:
# load data
dataset = QM9(amount=1000)

Downloading data from https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/gdb9.tar.gz
Loading QM9 dataset.
Reading SDF


100%|██████████████████████████████████████| 1000/1000 [00:01<00:00, 604.60it/s]


In [None]:
# params
num_node_feats = dataset.n_node_features  # dimension of node features
num_edge_feats = dataset.n_edge_features  # dimension of edge features
num_labels = dataset.n_labels  # dimension of the target

In [None]:
# train/test split
idxs = np.random.permutation(len(dataset))
split = int(0.9 * len(dataset))
idx_train, idx_test = np.split(idxs, [split])
dataset_train, dataset_test = dataset[idx_train], dataset[idx_test]

In [None]:
# loaders
loader_train = DisjointLoader(dataset_train, batch_size=batch_size, epochs=epochs)
loader_test = DisjointLoader(dataset_test, batch_size=batch_size, epochs=1)

In [None]:
# build model
class Net(Model):
    def __init__(self):
        super().__init__()
        self.conv1 = ECCConv(32, activation="relu")
        self.conv2 = ECCConv(32, activation="relu")
        self.global_pool = GlobalSumPool()
        self.dense = Dense(num_labels)

    def call(self, inputs):
        x, a, e, i = inputs
        x = self.conv1([x, a, e])
        x = self.conv2([x, a, e])
        output = self.global_pool([x, i])
        output = self.dense(output)
        return output
model = Net()
optimizer = Adam(learning_rate)
loss_fn = MeanSquaredError()

In [None]:
# fit model
@tf.function(input_signature=loader_train.tf_signature(), experimental_relax_shapes=True)
def train_step(inputs, target):
    with tf.GradientTape() as tape:
        predictions = model(inputs, training=True)
        loss = loss_fn(target, predictions) + sum(model.losses)
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    return loss
step = loss = 0
for batch in loader_train:
    step += 1
    loss += train_step(*batch)
    if step == loader_train.steps_per_epoch:
        step = 0
        print("Loss: {}".format(loss / loader_train.steps_per_epoch))
        loss = 0

  np.random.shuffle(a)
  return py_builtins.overload_of(f)(*args)


Loss: 22115854.0
Loss: 22055086.0
Loss: 21920856.0
Loss: 21822152.0
Loss: 21800426.0
Loss: 21794390.0
Loss: 21791298.0
Loss: 21789060.0
Loss: 21786364.0
Loss: 21784574.0
Loss: 21781888.0
Loss: 21779386.0
Loss: 21776378.0
Loss: 21773476.0
Loss: 21770846.0
Loss: 21766544.0
Loss: 21764332.0
Loss: 21759198.0
Loss: 21756552.0
Loss: 21750520.0


In [None]:
# evaluate model
print("Testing model")
loss = 0
for batch in loader_test:
    inputs, target = batch
    predictions = model(inputs, training=False)
    loss += loss_fn(target, predictions)
loss /= loader_test.steps_per_epoch
print("Done. Test loss: {}".format(loss))

Testing model
Done. Test loss: 10660.388671875




# QM9 Reg With ECC (Batch Mode)

In [None]:
# config
learning_rate = 1e-3  # learning rate
epochs = 10  # number of training epochs
batch_size = 32  # batch size

In [None]:
# Load data
dataset = QM9(amount=1000)

Loading QM9 dataset.
Reading SDF


100%|██████████████████████████████████████| 1000/1000 [00:01<00:00, 893.16it/s]


In [None]:
# params
num_node_feats = dataset.n_node_features  # dimension of node features
num_edge_feats = dataset.n_edge_features  # dimension of edge features
num_labels = dataset.n_labels  # dimension of the target

In [None]:
# train/test split
idxs = np.random.permutation(len(dataset))
split = int(0.9 * len(dataset))
idx_tr, idx_te = np.split(idxs, [split])
dataset_train, dataset_test = dataset[idx_train], dataset[idx_test]

In [None]:
# build model
class Net(Model):
    def __init__(self):
        super().__init__()
        self.masking = GraphMasking()
        self.conv1 = ECCConv(32, activation="relu")
        self.conv2 = ECCConv(32, activation="relu")
        self.global_pool = GlobalSumPool()
        self.dense = Dense(num_labels)

    def call(self, inputs):
        x, a, e = inputs
        x = self.masking(x)
        x = self.conv1([x, a, e])
        x = self.conv2([x, a, e])
        output = self.global_pool(x)
        output = self.dense(output)
        return output

model = Net()
optimizer = Adam(learning_rate)
model.compile(optimizer=optimizer, loss="mse")

In [None]:
# fit model
loader_train = BatchLoader(dataset_train, batch_size=batch_size, mask=True)
model.fit(loader_train.load(), steps_per_epoch=loader_train.steps_per_epoch, epochs=epochs)

  np.random.shuffle(a)


Epoch 1/10


  return py_builtins.overload_of(f)(*args)


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7a03c009d360>

In [None]:
# evaluate model
print("Testing model")
loader_test = BatchLoader(dataset_test, batch_size=batch_size, mask=True)
loss = model.evaluate(loader_te.load(), steps=loader_test.steps_per_epoch)
print("Done. Test loss: {}".format(loss))

Testing model
Done. Test loss: 12110.3447265625


# TUDataset Clf With GIN

In [None]:
# config
learning_rate = 1e-3  # learning rate
channels = 128  # hidden units
layers = 3  # GIN layers
epochs = 10  # number of training epochs
batch_size = 32  # batch size

In [None]:
# load data
dataset = TUDataset("PROTEINS", clean=True)

Downloading PROTEINS dataset (clean).


100%|████████████████████████████████████████| 433k/433k [00:00<00:00, 5.18MB/s]


Successfully loaded PROTEINS.




In [None]:
# params
num_feats = dataset.n_node_features  # dimension of node features
num_labels = dataset.n_labels  # dimension of the target

In [None]:
# train/test split
idxs = np.random.permutation(len(dataset))
split = int(0.9 * len(dataset))
idx_train, idx_test = np.split(idxs, [split])
dataset_train, dataset_test = dataset[idx_train], dataset[idx_test]

In [None]:
# loaders
loader_train = DisjointLoader(dataset_train, batch_size=batch_size, epochs=epochs)
loader_test = DisjointLoader(dataset_test, batch_size=batch_size, epochs=1)

In [None]:
# build model
class GIN0(Model):
    def __init__(self, channels, n_layers):
        super().__init__()
        self.conv1 = GINConv(channels, epsilon=0, mlp_hidden=[channels, channels])
        self.convs = []
        for _ in range(1, n_layers):
            self.convs.append(GINConv(channels, epsilon=0, mlp_hidden=[channels, channels]))
        self.pool = GlobalAvgPool()
        self.dense1 = Dense(channels, activation="relu")
        self.dropout = Dropout(0.5)
        self.dense2 = Dense(num_labels, activation="softmax")

    def call(self, inputs):
        x, a, i = inputs
        x = self.conv1([x, a])
        for conv in self.convs:
            x = conv([x, a])
        x = self.pool([x, i])
        x = self.dense1(x)
        x = self.dropout(x)
        return self.dense2(x)

model = GIN0(channels, layers)
optimizer = Adam(learning_rate)
loss_fn = CategoricalCrossentropy()

In [None]:
# fit model
@tf.function(input_signature=loader_train.tf_signature(), experimental_relax_shapes=True)
def train_step(inputs, target):
    with tf.GradientTape() as tape:
        predictions = model(inputs, training=True)
        loss = loss_fn(target, predictions) + sum(model.losses)
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    acc = tf.reduce_mean(categorical_accuracy(target, predictions))
    return loss, acc

epoch = step = 0
results = []
for batch in loader_train:
    step += 1
    loss, acc = train_step(*batch)
    results.append((loss, acc))
    if step == loader_train.steps_per_epoch:
        step = 0
        epoch += 1
        print("Ep. {} - Loss: {}. Acc: {}".format(epoch, *np.mean(results, 0)))
        results = []

  np.random.shuffle(a)


Ep. 1 - Loss: 0.7328217625617981. Acc: 0.640281617641449
Ep. 2 - Loss: 0.6843549609184265. Acc: 0.6753090620040894
Ep. 3 - Loss: 0.6694830656051636. Acc: 0.6709306836128235
Ep. 4 - Loss: 0.5705536603927612. Acc: 0.7260473966598511
Ep. 5 - Loss: 0.6184412240982056. Acc: 0.7099072337150574
Ep. 6 - Loss: 0.6134802103042603. Acc: 0.7088770866394043
Ep. 7 - Loss: 0.591161847114563. Acc: 0.7127403616905212
Ep. 8 - Loss: 0.6021036505699158. Acc: 0.7177197337150574
Ep. 9 - Loss: 0.5704256296157837. Acc: 0.7261332273483276
Ep. 10 - Loss: 0.5586482286453247. Acc: 0.7172905206680298


In [None]:
# evaluate model
results = []
for batch in loader_test:
    inputs, target = batch
    predictions = model(inputs, training=False)
    results.append((loss_fn(target, predictions), tf.reduce_mean(categorical_accuracy(target, predictions))))
print("Done. Test loss: {}. Test acc: {}".format(*np.mean(results, 0)))

Done. Test loss: 1.0085724592208862. Test acc: 0.5703125


# TUDataset Clf With MinCut Pooling

In [None]:
# config
learning_rate = 1e-3  # learning rate
epochs = 10  # number of training epochs
batch_size = 32  # batch size

In [None]:
# load data
dataset = TUDataset("PROTEINS", clean=True)



Successfully loaded PROTEINS.




In [None]:
# params
max_num_nodes = max(graph.n_nodes for graph in dataset)
num_node_feats = dataset.n_node_features  # dimension of node features
num_edge_feats = dataset.n_edge_features  # dimension of edge features
num_labels = dataset.n_labels  # dimension of the target

In [None]:
# train/test split
idxs = np.random.permutation(len(dataset))
split_valid, split_test = int(0.8 * len(dataset)), int(0.9 * len(dataset))
idx_train, idx_valid, idx_test = np.split(idxs, [split_valid, split_test])
dataset_train = dataset[idx_train]
dataset_valid = dataset[idx_valid]
dataset_test = dataset[idx_test]

In [None]:
# loaders
loader_train = BatchLoader(dataset_train, batch_size=batch_size, mask=True)
loader_valid = BatchLoader(dataset_valid, batch_size=batch_size, mask=True)
loader_test = BatchLoader(dataset_test, batch_size=batch_size, mask=True)

In [None]:
# build model
class Net(Model):
    def __init__(self):
        super().__init__()
        self.mask = GraphMasking()
        self.conv1 = GCSConv(32, activation="relu")
        self.pool = MinCutPool(max_num_nodes // 2)
        self.conv2 = GCSConv(32, activation="relu")
        self.global_pool = GlobalSumPool()
        self.dense1 = Dense(num_labels)

    def call(self, inputs):
        x, a = inputs
        x = self.mask(x)
        x = self.conv1([x, a])
        x_pool, a_pool = self.pool([x, a])
        x_pool = self.conv2([x_pool, a_pool])
        output = self.global_pool(x_pool)
        output = self.dense1(output)
        return output
model = Net()
opt = Adam(learning_rate=learning_rate)
model.compile(optimizer=opt, loss="categorical_crossentropy", metrics=["acc"])

In [None]:
# fit model
model.fit(
    loader_train.load(),
    steps_per_epoch=loader_train.steps_per_epoch,
    epochs=epochs,
    validation_data=loader_valid,
    validation_steps=loader_valid.steps_per_epoch,
    callbacks=[EarlyStopping(patience=10, restore_best_weights=True)]
    )

  np.random.shuffle(a)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7a03c1337190>

In [None]:
# evaluate model
print("Testing model")
loss, acc = model.evaluate(loader_test.load(), steps=loader_test.steps_per_epoch)
print("Done. Test loss: {}. Test acc: {}".format(loss, acc))

Testing model
Done. Test loss: 0.9282967448234558. Test acc: 0.7244898080825806
