# Exploring GCN

Graph clasification using GCNs implemented in Spektral framework

In [1]:
import pandas as pd
import numpy as np
import matplotlib as mlp
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
import pickle

In [2]:
from sklearn.model_selection import train_test_split
import tensorflow as tf
import spektral as sp

# Restore preprocessed datasets

In [3]:
%store -r np_timeseries

In [4]:
df_patients = pd.read_csv('patients-cleaned.csv', index_col=0)

In [5]:
X = np_timeseries
y = df_patients['target'].values

In [6]:
print(X.shape)
print(y.shape)

(190, 90, 400)
(190,)


In [10]:
# Parameters
samples = X.shape[0]            # Number of subjects.       | 190
N = regions = X.shape[1]        # Number of regions.        | 90
F = X.shape[2]                  # Features - timeseries.    | 400
n_classes = 2                   # Control / patient.        | 2

In [11]:
# gc = np.empty((samples, regions, regions))

# # Granger matrix.
# for p in range(samples):
#     for i in range(regions):
#         ts1 = np_timeseries[p,i,:]
#         for j in range(i, regions):
#             ts2 = np_timeseries[p,j,:]
#             gc_test = sm.tsa.stattools.grangercausalitytests(np.vstack((ts1, ts2)).T,1, verbose=False)
#             p_value = gc_test[1][0]['ssr_ftest'][1]

#             gc[p, i, j] = 0 if p_value < 0.05 else 1

# with open('granger-lambda=1-400-nopreprocessing.pickle', 'wb') as f:
#     pickle.dump(gc, f)

In [64]:
# corr = []
# cov = []

# # Covariance matrix.
# for p in range(samples):
#     ts = np_timeseries[p,:,:]
#     cov.append(np.cov(ts))
#     corr.append(np.corrcoef(ts))

# corr = np.asarray(corr)
# cov = np.asarray(cov)

# with open('covariance-400-nopreprocessing.pickle', 'wb') as f:
#     pickle.dump(cov, f)

# with open('correlation-400-nopreprocessing.pickle', 'wb') as f:
#     pickle.dump(corr, f)

In [69]:
# with open('granger-lambda=1-400-nopreprocessing.pickle', 'rb') as f:
#     gs = pickle.load(f)

# with open('covariance-400-nopreprocessing.pickle', 'rb') as f:
#     cov = pickle.load(f)

with open('correlation-400-nopreprocessing.pickle', 'rb') as f:
    corr = pickle.load(f)

In [72]:
# Threshold graph matrix.
thresh = 0.5

A = np.where(corr < 0.2, 0, 1)

In [73]:
print(f'Graph density: {np.sum(A) / (samples * regions * regions):.2f}')

Graph density: 0.36


## Training and testing datasets

In [92]:
# Train/test split
A_train, A_test, \
x_train, x_test, \
y_train, y_test = train_test_split(A, X, y, test_size=0.2, shuffle=True, random_state=42)


In [96]:
# Generate artificial training data by adding Gaussian noise.
for p in range(x_train.shape[0]):
    print(x_train[p].shape)
    break

(90, 400)


In [83]:
# From `example-spectral`.
l2_reg = 0.0001            # Regularization rate for l2
learning_rate = 0.001     # Learning rate for Adam
epochs = 200              # Number of training epochs
batch_size = 4              # Batch size.

In [88]:
# Model definition
X_in = tf.keras.layers.Input(shape=(N, F))
A_in = tf.keras.layers.Input((N, N))

l2 = tf.keras.regularizers.L2(l2=l2_reg)

gc1 = sp.layers.GraphAttention(8, activation=tf.nn.relu, kernel_regularizer=l2)([X_in, A_in])
gc2 = sp.layers.GraphAttention(8, activation=tf.nn.relu, kernel_regularizer=l2)([gc1, A_in])
pool = sp.layers.GlobalAttentionPool(128)(gc2)

output = tf.keras.layers.Dense(1, activation=tf.nn.sigmoid)(pool)

In [89]:
# Build model
model = tf.keras.models.Model(inputs=[X_in, A_in], outputs=output)
optimizer = tf.keras.optimizers.Adam(lr=learning_rate)
loss = tf.keras.losses.BinaryCrossentropy()

model.compile(optimizer=optimizer, loss=loss, metrics=['acc'])
model.summary()

Model: "functional_20"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_25 (InputLayer)           [(None, 90, 400)]    0                                            
__________________________________________________________________________________________________
input_26 (InputLayer)           [(None, 90, 90)]     0                                            
__________________________________________________________________________________________________
graph_attention_10 (GraphAttent (None, 90, 8)        3224        input_25[0][0]                   
                                                                 input_26[0][0]                   
__________________________________________________________________________________________________
graph_attention_11 (GraphAttent (None, 90, 8)        88          graph_attention_10[0]

In [90]:
#stopping_cb = tf.keras.callbacks.EarlyStopping(patience=es_patience, restore_best_weights=True)
lr_cb = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=50)

In [91]:
# Train model
model.fit(
    [x_train, A_train],
    y_train,
    batch_size=batch_size,
    validation_split=0.1,
    epochs=epochs,
    callbacks=[lr_cb]
)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

<tensorflow.python.keras.callbacks.History at 0x1ae49821fc8>

## Only graph no node features

In [36]:
# Parameters
batch_size = 4  # Batch size
epochs = 200  # Number of training epochs
patience = 10  # Patience for early stopping
l2_reg = 0.001  # Regularization rate for l2
l2 = tf.keras.regularizers.L2(l2=l2_reg)

In [37]:
# Model definition
X_in = tf.keras.layers.Input(shape=(N, F))
A_in = tf.keras.layers.Input((N, N))

x = sp.layers.GraphConv(8, activation="elu", kernel_regularizer=l2)([X_in, A_in])
x = sp.layers.GraphConv(8, activation="elu", kernel_regularizer=l2)([x, A_in])
x = sp.layers.GraphConv(8, activation="elu", kernel_regularizer=l2)([x, A_in])

x = sp.layers.GlobalSumPool()(x)
outputs = tf.keras.layers.Dense(1, activation="sigmoid")(x)

In [38]:
model = tf.keras.models.Model(inputs=[X_in, A_in], outputs=outputs)
optimizer = tf.keras.optimizers.Adam(lr=learning_rate)
loss = tf.keras.losses.BinaryCrossentropy()

model.compile(optimizer=optimizer, loss=loss, metrics=['acc'])
model.summary()

Model: "functional_10"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_15 (InputLayer)           [(None, 90, 400)]    0                                            
__________________________________________________________________________________________________
input_16 (InputLayer)           [(None, 90, 90)]     0                                            
__________________________________________________________________________________________________
graph_conv_8 (GraphConv)        (None, 90, 8)        3208        input_15[0][0]                   
                                                                 input_16[0][0]                   
__________________________________________________________________________________________________
graph_conv_9 (GraphConv)        (None, 90, 8)        72          graph_conv_8[0][0]   

In [39]:
# Train model
model.fit(
    [x_train, A_train],
    y_train,
    batch_size=batch_size,
    validation_split=0.1,
    epochs=epochs
)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

<tensorflow.python.keras.callbacks.History at 0x1ae5aa00f08>