<a href="https://colab.research.google.com/github/ariahosseini/DeepML/blob/main/016_TensorFlow_Proj_Sixteen_VariationalAutoEncoder_GNN_Spektral_EdgeLevel.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Link to the doc: https://arxiv.org/pdf/1611.07308.pdf

In [None]:
# install
# !pip install spektral

In [None]:
import numpy as np
import scipy.sparse as sp
# sklearn
from sklearn.metrics import roc_auc_score, average_precision_score
# tensorflow
import tensorflow as tf
from tensorflow.keras.layers import Dropout, Input
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
# spektral
from spektral.datasets import citation
from spektral.layers import GCNConv
from spektral.utils.sparse import sp_matrix_to_sp_tensor

In [None]:
# physical_devices = tf.config.list_physical_devices("GPU")
# if len(physical_devices) > 0:
#     tf.config.experimental.set_memory_growth(physical_devices[0], True)

## Utils

In [None]:
def sparse_to_tuple(sparse_mx):
    if not sp.isspmatrix_coo(sparse_mx):
        sparse_mx = sparse_mx.tocoo()
    coords = np.vstack((sparse_mx.row, sparse_mx.col)).transpose()
    values = sparse_mx.data
    shape = sparse_mx.shape
    return coords, values, shape

In [None]:
def mask_test_edges(adj): # func to build test set with 10% positive links
    adj = adj - sp.dia_matrix((adj.diagonal()[np.newaxis, :], [0]), shape=adj.shape) # remove diagonal elements
    adj.eliminate_zeros()
    assert np.diag(adj.todense()).sum() == 0 # check that diag is zero
    adj_triu = sp.triu(adj)
    adj_tuple = sparse_to_tuple(adj_triu)
    edges = adj_tuple[0]
    edges_all = sparse_to_tuple(adj)[0]
    num_test = int(np.floor(edges.shape[0] / 10.))
    num_val = int(np.floor(edges.shape[0] / 20.))
    all_edge_idx = list(range(edges.shape[0]))
    np.random.shuffle(all_edge_idx)
    val_edge_idx = all_edge_idx[:num_val]
    test_edge_idx = all_edge_idx[num_val:(num_val + num_test)]
    test_edges = edges[test_edge_idx]
    val_edges = edges[val_edge_idx]
    train_edges = np.delete(edges, np.hstack([test_edge_idx, val_edge_idx]), axis=0)
    def ismember(a, b, tol=5):
        rows_close = np.all(np.round(a - b[:, None], tol) == 0, axis=-1)
        return np.any(rows_close)
    test_edges_false = []
    while len(test_edges_false) < len(test_edges):
        idx_i = np.random.randint(0, adj.shape[0])
        idx_j = np.random.randint(0, adj.shape[0])
        if idx_i == idx_j:
            continue
        if ismember([idx_i, idx_j], edges_all):
            continue
        if test_edges_false:
            if ismember([idx_j, idx_i], np.array(test_edges_false)):
                continue
            if ismember([idx_i, idx_j], np.array(test_edges_false)):
                continue
        test_edges_false.append([idx_i, idx_j])
    val_edges_false = []
    while len(val_edges_false) < len(val_edges):
        idx_i = np.random.randint(0, adj.shape[0])
        idx_j = np.random.randint(0, adj.shape[0])
        if idx_i == idx_j:
            continue
        if ismember([idx_i, idx_j], train_edges):
            continue
        if ismember([idx_j, idx_i], train_edges):
            continue
        if ismember([idx_i, idx_j], val_edges):
            continue
        if ismember([idx_j, idx_i], val_edges):
            continue
        if val_edges_false:
            if ismember([idx_j, idx_i], np.array(val_edges_false)):
                continue
            if ismember([idx_i, idx_j], np.array(val_edges_false)):
                continue
        val_edges_false.append([idx_i, idx_j])
    assert ~ismember(test_edges_false, edges_all)
    assert ~ismember(val_edges_false, edges_all)
    assert ~ismember(val_edges, train_edges)
    assert ~ismember(test_edges, train_edges)
    assert ~ismember(val_edges, test_edges)
    data = np.ones(train_edges.shape[0])
    adj_train = sp.csr_matrix((data, (train_edges[:, 0], train_edges[:, 1])), shape=adj.shape) # re-build adj matrix
    adj_train = adj_train + adj_train.T
    return adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false # note: these edge lists only contain single direction of edge!

In [None]:
def get_roc_score(edges_pos, edges_neg, adj_rec):
    preds = []
    for e in edges_pos:
        preds.append(adj_rec[e[0], e[1]])
    preds_neg = []
    for e in edges_neg:
        preds_neg.append(adj_rec[e[0], e[1]])
    preds_all = np.hstack([preds, preds_neg])
    labels_all = np.hstack([np.ones(len(preds)), np.zeros(len(preds_neg))])
    roc_score = roc_auc_score(labels_all, preds_all)
    ap_score = average_precision_score(labels_all, preds_all)
    return roc_score, ap_score

## AutoEncoder

In [None]:
# load data
dataset = citation.Cora()
graph = dataset[0]
nodes = graph.x # nodes features

Downloading cora dataset.


  self._set_arrayXarray(i, j, x)


In [None]:
# target graph to reconstruct
adj_label = graph.a + sp.eye(graph.a.shape[0], dtype=np.float32)
adj_label = adj_label.toarray().reshape([-1])

In [None]:
# remove edges randomly from training set and put them in the validation/test sets
adj_train, _, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges(graph.a)

In [None]:
# normalize the adj matrix and convert it to sparse tensor
adj_norm = GCNConv.preprocess(adj_train)
adj_norm = sp_matrix_to_sp_tensor(adj_norm)

In [None]:
# compute the class weights (necessary due to imbalanceness in the number of non-zero edges)
pos_weight = float(adj_train.shape[0] * adj_train.shape[0] - adj_train.sum()) / adj_train.sum()

In [None]:
# params
hidden_dim1, hidden_dim2 = 32, 16 # units in the GCN layers
dropout = 0.0                     # dropout rate
l2_reg = 0e-5                     # L2 regularization rate
learning_rate = 1e-2              # learning rate
epochs = 20                      # max number of training epochs
val_epochs = 2                   # after how many epochs should check the validation set
num_nodes = dataset.n_nodes               # number of nodes in the graph
num_feats = dataset.n_node_features       # original size of node features

In [None]:
# GNN architecture
nodes_input = Input(shape=(num_feats,))
adj_input = Input((num_nodes,), sparse=True)
gc = GCNConv(hidden_dim1, activation="relu", kernel_regularizer=l2(l2_reg))([nodes_input, adj_input])
gc = Dropout(dropout)(gc)
z = GCNConv(hidden_dim2, activation=None, kernel_regularizer=l2(l2_reg))([gc, adj_input])
output = tf.matmul(z, tf.transpose(z))
adj_rec = tf.keras.layers.Activation('sigmoid')(output)
output = tf.reshape(output, [-1])
model = Model(inputs=[nodes_input, adj_input], outputs=[output, adj_rec, z]) # build model
optimizer = Adam(learning_rate=learning_rate)

In [None]:
# train & test
@tf.function
def train():
    with tf.GradientTape() as tape:
        predictions, _, _ = model([nodes, adj_norm], training=True)
        loss = tf.reduce_mean(tf.nn.weighted_cross_entropy_with_logits(logits=predictions, labels=adj_label, pos_weight=pos_weight))
        loss += sum(model.losses)
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    return loss
# train
best_val_roc = 0
for epoch in range(1, epochs):
    loss = train()
    print(f"epoch: {epoch:d} -- loss: {loss:.3f}")
    if epoch%val_epochs==0:
        _, adj_rec, _ = model([nodes, adj_norm])
        adj_rec = adj_rec.numpy()
        val_roc, _ = get_roc_score(val_edges, val_edges_false, adj_rec)
        if val_roc <= best_val_roc:
            break
        else:
            best_val_roc = val_roc
            acc = np.mean(np.round(adj_rec) == graph.a.toarray())
            print(f"Val AUC: {best_val_roc*100:.1f}, Accuracy: {acc*100:.1f}")

epoch: 1 -- loss: 0.869
epoch: 2 -- loss: 0.869
Val AUC: 98.2, Accuracy: 57.4
epoch: 3 -- loss: 0.868
epoch: 4 -- loss: 0.867
Val AUC: 98.3, Accuracy: 57.5
epoch: 5 -- loss: 0.866
epoch: 6 -- loss: 0.865
Val AUC: 98.4, Accuracy: 57.5
epoch: 7 -- loss: 0.865
epoch: 8 -- loss: 0.864
Val AUC: 98.5, Accuracy: 57.5
epoch: 9 -- loss: 0.863
epoch: 10 -- loss: 0.863
Val AUC: 98.5, Accuracy: 57.5
epoch: 11 -- loss: 0.862
epoch: 12 -- loss: 0.861
Val AUC: 98.6, Accuracy: 57.5
epoch: 13 -- loss: 0.861
epoch: 14 -- loss: 0.860
Val AUC: 98.6, Accuracy: 57.5
epoch: 15 -- loss: 0.860
epoch: 16 -- loss: 0.859
Val AUC: 98.6, Accuracy: 57.5
epoch: 17 -- loss: 0.859
epoch: 18 -- loss: 0.858
Val AUC: 98.7, Accuracy: 57.5
epoch: 19 -- loss: 0.858


In [None]:
# test
_, adj_rec, node_emb = model([nodes, adj_norm])
adj_rec = adj_rec.numpy()
roc_score, ap_score = get_roc_score(test_edges, test_edges_false, adj_rec)
print(f"AUC: {roc_score*100:.1f}, AP: {ap_score*100:.1f}")
test_acc = np.mean(np.round(adj_rec.ravel()) == adj_label)
print(f"Test accuracy: {test_acc*100:.1f}")

AUC: 98.3, AP: 97.9
Test accuracy: 57.5


## AutoEncoder with Wasserstein Loss

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Conv2D, Flatten, Reshape, Conv2DTranspose
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
import numpy as np


In [None]:
# Encoder
def build_encoder(input_shape, latent_dim):
    inputs = Input(shape=input_shape)
    x = Conv2D(32, kernel_size=3, strides=2, activation='relu', padding='same')(inputs)
    x = Conv2D(64, kernel_size=3, strides=2, activation='relu', padding='same')(x)
    x = Flatten()(x)
    z = Dense(latent_dim, activation=None)(x)  # Latent space
    return Model(inputs, z, name="encoder")

# Decoder
def build_decoder(output_shape, latent_dim):
    latent_inputs = Input(shape=(latent_dim,))
    x = Dense(units=7*7*64, activation='relu')(latent_inputs)  # Adjust size as per your data
    x = Reshape((7, 7, 64))(x)
    x = Conv2DTranspose(64, kernel_size=3, strides=2, activation='relu', padding='same')(x)
    x = Conv2DTranspose(32, kernel_size=3, strides=2, activation='relu', padding='same')(x)
    outputs = Conv2DTranspose(output_shape[-1], kernel_size=3, activation='sigmoid', padding='same')(x)
    return Model(latent_inputs, outputs, name="decoder")


In [None]:
def build_critic(input_shape):
    inputs = Input(shape=input_shape)
    x = Conv2D(32, kernel_size=3, strides=2, activation='relu', padding='same')(inputs)
    x = Conv2D(64, kernel_size=3, strides=2, activation='relu', padding='same')(x)
    x = Flatten()(x)
    outputs = Dense(1, activation=None)(x)  # Output score
    return Model(inputs, outputs, name="critic")


In [None]:
def wasserstein_loss(y_true, y_pred):
    return tf.reduce_mean(y_true * y_pred)


In [None]:
def gradient_penalty(critic, real_samples, fake_samples):
    alpha = tf.random.uniform(shape=[real_samples.shape[0], 1, 1, 1], minval=0.0, maxval=1.0)
    interpolates = alpha * real_samples + (1 - alpha) * fake_samples

    with tf.GradientTape() as tape:
        tape.watch(interpolates)
        predictions = critic(interpolates)

    gradients = tape.gradient(predictions, interpolates)
    gradient_norm = tf.sqrt(tf.reduce_sum(tf.square(gradients), axis=[1, 2, 3]))
    return tf.reduce_mean((gradient_norm - 1.0) ** 2)


In [None]:
@tf.function
def train_step(encoder, decoder, critic, optimizer_ae, optimizer_critic, real_samples, lambda_gp=10):
    # Autoencoder forward pass
    with tf.GradientTape() as tape_ae:
        z = encoder(real_samples)
        generated_samples = decoder(z)
        ae_loss = tf.reduce_mean(tf.square(real_samples - generated_samples))

    # Update autoencoder
    gradients_ae = tape_ae.gradient(ae_loss, encoder.trainable_variables + decoder.trainable_variables)
    optimizer_ae.apply_gradients(zip(gradients_ae, encoder.trainable_variables + decoder.trainable_variables))

    # Critic forward pass
    with tf.GradientTape() as tape_critic:
        fake_scores = critic(generated_samples)
        real_scores = critic(real_samples)
        gp = gradient_penalty(critic, real_samples, generated_samples)
        critic_loss = tf.reduce_mean(fake_scores) - tf.reduce_mean(real_scores) + lambda_gp * gp

    # Update critic
    gradients_critic = tape_critic.gradient(critic_loss, critic.trainable_variables)
    optimizer_critic.apply_gradients(zip(gradients_critic, critic.trainable_variables))

    return ae_loss, critic_loss


In [None]:
# Hyperparameters
latent_dim = 16
input_shape = (28, 28, 1)  # Example for grayscale images
output_shape = input_shape
batch_size = 64
epochs = 100
lambda_gp = 10

# Data
(x_train, _), (_, _) = tf.keras.datasets.mnist.load_data()
x_train = x_train.astype("float32") / 255.0
x_train = np.expand_dims(x_train, axis=-1)

# Models
encoder = build_encoder(input_shape, latent_dim)
decoder = build_decoder(output_shape, latent_dim)
critic = build_critic(output_shape)

# Optimizers
optimizer_ae = Adam(learning_rate=0.0001, beta_1=0.5)
optimizer_critic = Adam(learning_rate=0.0001, beta_1=0.5)

# Training
for epoch in range(epochs):
    for i in range(0, len(x_train), batch_size):
        real_samples = x_train[i:i + batch_size]
        ae_loss, critic_loss = train_step(encoder, decoder, critic, optimizer_ae, optimizer_critic, real_samples, lambda_gp)

    print(f"Epoch {epoch + 1}/{epochs}, AE Loss: {ae_loss.numpy()}, Critic Loss: {critic_loss.numpy()}")


In [None]:
def generate_samples(decoder, num_samples=10, latent_dim=16):
    z = np.random.normal(size=(num_samples, latent_dim))
    generated_samples = decoder.predict(z)
    return generated_samples


## Variational Auto Encoder

In [None]:
# load data
dataset = citation.Cora()
graph = dataset[0]
nodes = graph.x # nodes features

  self._set_arrayXarray(i, j, x)


In [None]:
# target graph to reconstruct
adj_label = graph.a + sp.eye(graph.a.shape[0], dtype=np.float32)
adj_label = adj_label.toarray().reshape([-1])

In [None]:
# remove edges randomly from training set and put them in the validation/test sets
adj_train, _, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges(graph.a)

In [None]:
# normalize the adj matrix and convert it to sparse tensor
adj_norm = GCNConv.preprocess(adj_train)
adj_norm = sp_matrix_to_sp_tensor(adj_norm)

In [None]:
# compute the class weights (necessary due to imbalanceness in the number of non-zero edges)
pos_weight = float(adj_train.shape[0] * adj_train.shape[0] - adj_train.sum()) / adj_train.sum()
norm = adj_train.shape[0] * adj_train.shape[0] / float((adj_train.shape[0] * adj_train.shape[0] - adj_train.sum()) * 2)

In [None]:
# params
hidden_dim1, hidden_dim2 = 32, 16 # units in the GCN layers
dropout = 0.0                     # dropout rate
l2_reg = 0e-5                     # L2 regularization rate
learning_rate = 1e-2              # learning rate
epochs = 20                       # max number of training epochs
val_epochs = 2                    # after how many epochs should check the validation set
num_nodes = dataset.n_nodes               # number of nodes in the graph
num_feats = dataset.n_node_features       # original size of node features

In [None]:
# GNN architecture
nodes_input = Input(shape=(num_feats,))
adj_input = Input((num_nodes,), sparse=True)
gc = GCNConv(hidden_dim1, activation="relu", kernel_regularizer=l2(l2_reg))([nodes_input, adj_input])
gc = Dropout(dropout)(gc)
z_mean = GCNConv(hidden_dim2, activation=None, kernel_regularizer=l2(l2_reg))([gc, adj_input])
z_log_std = GCNConv(hidden_dim2, activation=None, kernel_regularizer=l2(l2_reg))([gc, adj_input])
z = z_mean + tf.random.normal([num_nodes, hidden_dim2]) * tf.exp(z_log_std)
output = tf.matmul(z, tf.transpose(z))
output = tf.reshape(output, [-1])
output_det = tf.matmul(z_mean, tf.transpose(z_mean)) # this is not used for training and we make it deterministic
adj_rec = tf.keras.layers.Activation('sigmoid')(output_det)
model = Model(inputs=[nodes_input, adj_input], outputs=[output, adj_rec, z_mean, z_log_std]) # build model
optimizer = Adam(learning_rate=learning_rate)

In [None]:
# tain
@tf.function
def train():
    with tf.GradientTape() as tape:
        predictions, _, model_z_mean, model_z_log_std = model([nodes, adj_norm], training=True)
        rec_loss = norm*tf.reduce_mean(tf.nn.weighted_cross_entropy_with_logits(logits=predictions, labels=adj_label, pos_weight=pos_weight)) # reconstruction loss
        kl_loss = (0.5 / num_nodes) * tf.reduce_mean(tf.reduce_sum(1 + 2 * model_z_log_std - tf.square(model_z_mean) - tf.square(tf.exp(model_z_log_std)), 1)) # latent loss
        loss = rec_loss - kl_loss + sum(model.losses) # total loss
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    return loss

best_val_roc = 0
for epoch in range(1, epochs):
    loss = train()
    print(f"epoch: {epoch:d} -- loss: {loss:.3f}")
    if epoch%val_epochs==0:
        _, adj_rec, _, _ = model([nodes, adj_norm])
        adj_rec = adj_rec.numpy()
        val_roc, _ = get_roc_score(val_edges, val_edges_false, adj_rec)
        if val_roc <= best_val_roc:
            break
        else:
            best_val_roc = val_roc
            acc = np.mean(np.round(adj_rec) == graph.a.toarray())
            print(f"Val AUC: {best_val_roc*100:.1f}, Accuracy: {acc*100:.1f}")

epoch: 1 -- loss: 1.863
epoch: 2 -- loss: 1.452
Val AUC: 70.5, Accuracy: 0.1
epoch: 3 -- loss: 1.249
epoch: 4 -- loss: 1.092
Val AUC: 72.4, Accuracy: 0.1
epoch: 5 -- loss: 0.960
epoch: 6 -- loss: 0.863
Val AUC: 76.9, Accuracy: 0.5
epoch: 7 -- loss: 0.795
epoch: 8 -- loss: 0.752
Val AUC: 79.2, Accuracy: 2.4
epoch: 9 -- loss: 0.725
epoch: 10 -- loss: 0.704
Val AUC: 83.8, Accuracy: 11.4
epoch: 11 -- loss: 0.679
epoch: 12 -- loss: 0.654
Val AUC: 84.6, Accuracy: 33.9
epoch: 13 -- loss: 0.627
epoch: 14 -- loss: 0.597


In [None]:
# test
_, adj_rec, node_emb, _ = model([nodes, adj_norm])
adj_rec = adj_rec.numpy()
roc_score, ap_score = get_roc_score(test_edges, test_edges_false, adj_rec)
print(f"AUC: {roc_score*100:.1f}, AP: {ap_score*100:.1f}")
test_acc = np.mean(np.round(adj_rec.ravel()) == adj_label)
print(f"Test accuracy: {test_acc*100:.1f}")

AUC: 85.0, AP: 83.3
Test accuracy: 45.7
