In [12]:
import spektral
import tensorflow as tf
import numpy as np

print(tf.__version__)

2.6.0


Load and pre-process graph representation data.

In [13]:
adj, features, labels, train_mask, val_mask, test_mask = spektral.datasets.citation.load_data(dataset_name='cora')

Loading cora dataset
Pre-processing node features


In [14]:
features = features.todense()
# The adjacency matric doesn't come with self edges so we need to add the identity matrix 
adj = adj.todense() + np.eye(adj.shape[0])
features = features.astype('float32')
adj = adj.astype('float32')
labels = labels.astype('float32')

print(features.shape[0], 'nodes')
print(features.shape[1], 'features in every node')
print(labels.shape[1], 'classes')

print(np.sum(train_mask), 'training nodes')
print(np.sum(val_mask), 'validation nodes')
print(np.sum(test_mask), 'test nodes')

2708 nodes
1433 features in every node
7 classes
140 training nodes
500 validation nodes
1000 test nodes


In [26]:
def masked_softmax_cross_entropy(logits, labels, mask):
    '''Applies loss function taking into account the mask to
       only take into account relevant nodes.
       Returns cross entropy loss over the masked nodes of the graph.'''
    loss = tf.nn.softmax_cross_entropy_with_logits(labels=labels, logits=logits)
    mask = tf.cast(mask, dtype=tf.float32)
    # Divide mask by its average value - that will enable us to take the product of the mask with the loss
    mask /= tf.reduce_mean(mask)  # Is this step used to normalise?
    loss *= mask
    # return average across all positions
    return tf.reduce_mean(loss)


def masked_accuracy(logits, labels, mask):
    '''Returns accuracy over the masked nodes of the graph.'''
    correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1))
    accuracy_all = tf.cast(correct_prediction, tf.float32)
    mask = tf.cast(mask, dtype=tf.float32)
    mask /= tf.reduce_mean(mask)
    accuracy_all *= mask
    return tf.reduce_mean(accuracy_all)

In [27]:
def gnn(fts, adj, transform, activation):
    '''
    Define a Graph Neural Network layer.
    fts: node feature matrix
    adj: adjacency matrix
    transform: some transformation that we wish to apply to each node
    activation: activation function
    '''
    # Transform each node 
    seq_fts = transform(fts)
    # Once we have the features we want to aggregate we multiply by the adjacency matrix
    ret_fts = tf.matmul(adj, seq_fts)
    return activation(ret_fts)

In [39]:
def train_cora(fts, adj, gnn_fn, units, epochs, lr):
    '''Define simple 2 layer GNN.
       gnn_fn: gnn model function
       units: how many units we want our NN to compute in each node - 
              how many dimentions in our latent features.'''
    lyr_1 = tf.keras.layers.Dense(units)
    # Computes classification of nodes
    lyr_2 = tf.keras.layers.Dense(7)
    
    def cora_gnn(fts, adj):
        ''' Define the GNN that is used to solve this problem
            on a specific set of features and adjacencies.'''
        # Computes hidden features in every node
        hidden = gnn_fn(fts, adj, lyr_1, tf.nn.relu)
        logits = gnn_fn(hidden, adj, lyr_2, tf.identity)  # We don't need any further transformation so we use the identity matrix as activation
        return logits
    
    optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
    
    best_accuracy = 0.0
    for ep in range(epochs+1):
        # Use tape to keep track of gradients 
        with tf.GradientTape() as t:
            logits = cora_gnn(fts, adj)
            loss = masked_softmax_cross_entropy(logits, labels, train_mask)
            
        # Look at variables that gradient tape is watching
        variables = t.watched_variables()  # Get variables
        grads = t.gradient(loss, variables)  # Get gradients

        optimizer.apply_gradients(zip(grads, variables))  # Apply gradients
        
        # Track val and test accuracy
        logits = cora_gnn(fts, adj)  # Take logits after gradients have been updated
        val_accuracy = masked_accuracy(logits, labels, val_mask)
        test_accuracy = masked_accuracy(logits, labels, test_mask)
        
        if val_accuracy > best_accuracy:
            best_accuracy = val_accuracy
            print('Epoch', ep, '| Training Loss:', loss.numpy(), '|Val accuracy:',
                  val_accuracy.numpy(), '|Test accuracy:', test_accuracy.numpy())
            
    return cora_gnn

In [29]:
hidden_features = 32
epochs = 200
learning_rate = 0.01

train_cora(features, adj, gnn, hidden_features, epochs, learning_rate)

Epoch 0 | Training Loss: 1.969773 |Val accuracy: 0.19399999 |Test accuracy: 0.19899999
Epoch 1 | Training Loss: 1.9085302 |Val accuracy: 0.25199997 |Test accuracy: 0.24199998
Epoch 2 | Training Loss: 1.5839084 |Val accuracy: 0.26 |Test accuracy: 0.268
Epoch 3 | Training Loss: 1.4373523 |Val accuracy: 0.71199995 |Test accuracy: 0.738
Epoch 4 | Training Loss: 1.1906141 |Val accuracy: 0.72999996 |Test accuracy: 0.749
Epoch 5 | Training Loss: 1.1051586 |Val accuracy: 0.73399997 |Test accuracy: 0.746
Epoch 6 | Training Loss: 1.0439419 |Val accuracy: 0.73999995 |Test accuracy: 0.747
Epoch 14 | Training Loss: 0.4841191 |Val accuracy: 0.74799997 |Test accuracy: 0.7569999
Epoch 15 | Training Loss: 0.44554722 |Val accuracy: 0.75399995 |Test accuracy: 0.76599985
Epoch 16 | Training Loss: 0.41409317 |Val accuracy: 0.762 |Test accuracy: 0.76999986
Epoch 17 | Training Loss: 0.38092667 |Val accuracy: 0.77199996 |Test accuracy: 0.76799995
Epoch 21 | Training Loss: 0.27847943 |Val accuracy: 0.7720001 |

In [30]:
hidden_features = 32
epochs = 200
learning_rate = 0.01

# Is it useful to use the graph?
# Test by changing the adjacency matrix with the identity matrix (standard point-wise MLP model)
train_cora(features, tf.eye(adj.shape[0]), gnn, hidden_features, epochs, learning_rate)

Epoch 0 | Training Loss: 1.9450082 |Val accuracy: 0.16599998 |Test accuracy: 0.20699999
Epoch 1 | Training Loss: 1.9298187 |Val accuracy: 0.21999998 |Test accuracy: 0.272
Epoch 2 | Training Loss: 1.9091934 |Val accuracy: 0.294 |Test accuracy: 0.36200002
Epoch 3 | Training Loss: 1.8837721 |Val accuracy: 0.364 |Test accuracy: 0.406
Epoch 4 | Training Loss: 1.8545489 |Val accuracy: 0.36999997 |Test accuracy: 0.419
Epoch 5 | Training Loss: 1.8226416 |Val accuracy: 0.38799998 |Test accuracy: 0.42799997
Epoch 6 | Training Loss: 1.787902 |Val accuracy: 0.39399996 |Test accuracy: 0.425
Epoch 22 | Training Loss: 0.95110416 |Val accuracy: 0.41799998 |Test accuracy: 0.442
Epoch 23 | Training Loss: 0.8916823 |Val accuracy: 0.42399997 |Test accuracy: 0.45199996
Epoch 24 | Training Loss: 0.8331007 |Val accuracy: 0.42999998 |Test accuracy: 0.45799997
Epoch 25 | Training Loss: 0.77563894 |Val accuracy: 0.436 |Test accuracy: 0.45899996
Epoch 26 | Training Loss: 0.719587 |Val accuracy: 0.444 |Test accur

In [31]:
hidden_features = 32
epochs = 200
learning_rate = 0.01

# Since the graph is useful we can explore mean pooling
# Calculate the degree matrix
deg = tf.reduce_sum(adj, axis=-1)  # Compute the degree matrix as the degree of each node and then spread across the diagonal
train_cora(features, adj/deg, gnn, hidden_features, epochs, learning_rate)  # This will give us a normalised propagation rule which should deal with any exploding signal

Epoch 0 | Training Loss: 1.9451618 |Val accuracy: 0.121999994 |Test accuracy: 0.134
Epoch 3 | Training Loss: 1.8901854 |Val accuracy: 0.12799999 |Test accuracy: 0.147
Epoch 4 | Training Loss: 1.8675568 |Val accuracy: 0.14199999 |Test accuracy: 0.171
Epoch 5 | Training Loss: 1.8443671 |Val accuracy: 0.17799999 |Test accuracy: 0.19999999
Epoch 6 | Training Loss: 1.8197027 |Val accuracy: 0.218 |Test accuracy: 0.234
Epoch 7 | Training Loss: 1.7928916 |Val accuracy: 0.25599998 |Test accuracy: 0.26900002
Epoch 8 | Training Loss: 1.7637992 |Val accuracy: 0.304 |Test accuracy: 0.30900002
Epoch 9 | Training Loss: 1.7324904 |Val accuracy: 0.36399996 |Test accuracy: 0.37199998
Epoch 10 | Training Loss: 1.6993543 |Val accuracy: 0.41600004 |Test accuracy: 0.429
Epoch 11 | Training Loss: 1.6652437 |Val accuracy: 0.486 |Test accuracy: 0.49199998
Epoch 12 | Training Loss: 1.6295828 |Val accuracy: 0.544 |Test accuracy: 0.553
Epoch 13 | Training Loss: 1.592213 |Val accuracy: 0.59 |Test accuracy: 0.605
E

In [32]:
norm_deg = tf.linalg.diag(1.0 / tf.sqrt(deg))
norm_adj = tf.matmul(norm_deg, tf.matmul(adj, norm_deg))
train_cora(features, norm_adj, gnn, hidden_features, epochs, learning_rate)

Epoch 0 | Training Loss: 1.9457494 |Val accuracy: 0.252 |Test accuracy: 0.22399999
Epoch 1 | Training Loss: 1.936319 |Val accuracy: 0.258 |Test accuracy: 0.248
Epoch 6 | Training Loss: 1.8478409 |Val accuracy: 0.27199998 |Test accuracy: 0.27699998
Epoch 7 | Training Loss: 1.8239292 |Val accuracy: 0.318 |Test accuracy: 0.306
Epoch 8 | Training Loss: 1.7981963 |Val accuracy: 0.358 |Test accuracy: 0.34600002
Epoch 9 | Training Loss: 1.77068 |Val accuracy: 0.408 |Test accuracy: 0.377
Epoch 10 | Training Loss: 1.7411994 |Val accuracy: 0.446 |Test accuracy: 0.41799998
Epoch 11 | Training Loss: 1.7096944 |Val accuracy: 0.464 |Test accuracy: 0.46099997
Epoch 12 | Training Loss: 1.6762741 |Val accuracy: 0.516 |Test accuracy: 0.501
Epoch 13 | Training Loss: 1.6409322 |Val accuracy: 0.566 |Test accuracy: 0.559
Epoch 14 | Training Loss: 1.6037468 |Val accuracy: 0.618 |Test accuracy: 0.60999995
Epoch 15 | Training Loss: 1.5648412 |Val accuracy: 0.648 |Test accuracy: 0.64599997
Epoch 16 | Training L

In [40]:
model = train_cora(features, norm_adj, gnn, hidden_features, epochs, learning_rate)

Epoch 0 | Training Loss: 1.9456286 |Val accuracy: 0.19799998 |Test accuracy: 0.226
Epoch 1 | Training Loss: 1.9365743 |Val accuracy: 0.26 |Test accuracy: 0.273
Epoch 2 | Training Loss: 1.9250264 |Val accuracy: 0.266 |Test accuracy: 0.26099998
Epoch 3 | Training Loss: 1.9087073 |Val accuracy: 0.284 |Test accuracy: 0.27299997
Epoch 4 | Training Loss: 1.8908659 |Val accuracy: 0.322 |Test accuracy: 0.325
Epoch 5 | Training Loss: 1.8715485 |Val accuracy: 0.35599998 |Test accuracy: 0.38000003
Epoch 6 | Training Loss: 1.850498 |Val accuracy: 0.38599998 |Test accuracy: 0.41400003
Epoch 7 | Training Loss: 1.8279786 |Val accuracy: 0.42199996 |Test accuracy: 0.442
Epoch 8 | Training Loss: 1.803438 |Val accuracy: 0.45 |Test accuracy: 0.47
Epoch 9 | Training Loss: 1.7769442 |Val accuracy: 0.50200003 |Test accuracy: 0.49499997
Epoch 10 | Training Loss: 1.7488081 |Val accuracy: 0.524 |Test accuracy: 0.519
Epoch 11 | Training Loss: 1.7188131 |Val accuracy: 0.562 |Test accuracy: 0.555
Epoch 12 | Traini