In [1]:
######################################################################
# Semi-Supervised Classification of Graph Nodes using Exponential Decay
# L42: Assessment 2
# Jan Ondras (jo356), Trinity College
######################################################################
# Baseline MLP training and validation, Cora dataset
#############################################################################################################
# Load data 
#############################################################################################################

import keras
from sklearn.metrics import accuracy_score
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.optimizers import SGD, Adam
from keras.callbacks import EarlyStopping
import numpy as np
import networkx as nx
from matplotlib import pyplot as plt
import time
import os
from gcn.utils import *

dataset_type = 'cora'

# Load data
adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask = load_data(dataset_type)

if dataset_type == 'cora':
    N_classes = 7
    N_features = 1433
    N_nodes = 2708
    N_edges = 5278 #5429 - incorrect in paper !
    if N_classes != y_train.shape[1] or N_features != features.shape[1] or N_nodes != features.shape[0] or N_edges != np.sum(adj.todense())/2:
        raise ValueError("Dataset dimensions differ from expected!")
else:
    raise ValueError("Dataset not supported!")
print dataset_type, "dataset:", "#classes =", N_classes, ", #features =", N_features, ", #nodes =", N_nodes, ", #edges =", N_edges

X_train = features[train_mask].toarray()
X_val = features[val_mask].toarray()
X_test = features[test_mask].toarray()
y_train = y_train[train_mask]
y_val = y_val[val_mask]
y_test = y_test[test_mask]

print "Feature set shapes (train, valid, test):", X_train.shape, X_val.shape, X_test.shape
print "Labels shapes (train, valid, test):", y_train.shape, y_val.shape, y_test.shape

Using TensorFlow backend.


cora dataset: #classes = 7 , #features = 1433 , #nodes = 2708 , #edges = 5278
Feature set shapes (train, valid, test): (140, 1433) (500, 1433) (1000, 1433)
Labels shapes (train, valid, test): (140, 7) (500, 7) (1000, 7)


In [None]:
# Show graph of the network
G = nx.from_scipy_sparse_matrix(adj)
# pos = nx.spring_layout(G)
plt.figure(figsize=(15,15))
nx.draw(G, node_size=5)
plt.show()

In [2]:
#############################################################################################################
# Baseline MLP
#############################################################################################################
# Tune #hidden layers and #hidden units (same for each layer)
# Dropout fixed

epochs = 10000
train_batch_size = len(X_train)
val_batch_size = len(X_val)
test_batch_size = len(X_test)

N_runs = 100
dropout = 0.5

N_hl_range = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] # range of numbers of hidden layers
N_hu_range = [10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80] # range of numbers of units per hidden layer
print "Validation over ", len(N_hl_range) * len(N_hu_range), "=", len(N_hl_range), "x", len(N_hu_range), "parameter settings"
vals = np.zeros((len(N_hl_range), len(N_hu_range)))
vals_std = np.zeros((len(N_hl_range), len(N_hu_range)))

for a, N_hl in enumerate(N_hl_range):
    for b, N_hu in enumerate(N_hu_range):
        st = time.time()
        # Create model
        model = Sequential()
        model.add(Dense(N_hu, activation='relu', kernel_initializer='he_uniform', input_dim=N_features))
        model.add(Dropout(dropout))
        for i in range(1, N_hl):
            model.add(Dense(N_hu, activation='relu', kernel_initializer='he_uniform'))
            model.add(Dropout(dropout))
        model.add(Dense(N_classes, activation='softmax'))

        model.compile(loss='categorical_crossentropy', optimizer=Adam(), metrics=['accuracy'])
        #print model.summary()
        early_stop = EarlyStopping(monitor='val_acc', patience=10, verbose=0) # stop after 10 epochs without improvement in val_acc

        vals_actual = []
        for i in range(N_runs):
            model.fit(X_train, y_train, epochs=epochs, batch_size=train_batch_size, 
                       validation_data = (X_val, y_val), verbose=0, callbacks=[early_stop])

            vals_actual.append( model.evaluate(X_val, y_val, batch_size=val_batch_size, verbose=0)[1] )

        vals[a][b] = np.mean(vals_actual)
        vals_std[a][b] = np.std(vals_actual)
        print "Time taken: ", time.time()-st, (time.time()-st)/60. 
# Total ~ 1.5 hod

Validation over  150 = 10 x 15 parameter settings
Time taken:  14.9907910824 0.249849418799
Time taken:  16.9219071865 0.28203531901
Time taken:  12.910943985 0.215182880561
Time taken:  13.5918500423 0.226533651352
Time taken:  13.7651000023 0.229419668516
Time taken:  15.1787371635 0.252981499831
Time taken:  14.5438978672 0.24239881436
Time taken:  14.503319025 0.241722500324
Time taken:  14.9355580807 0.248926432927
Time taken:  15.2085719109 0.253476715088
Time taken:  14.7952589989 0.246590264638
Time taken:  15.8221640587 0.263703465462
Time taken:  16.4285390377 0.273809631666
Time taken:  15.6358048916 0.260597248872
Time taken:  16.9731218815 0.28288586537
Time taken:  17.7909829617 0.296520316601
Time taken:  21.5377390385 0.358964852492
Time taken:  17.8638181686 0.297732019424
Time taken:  18.6987950802 0.311650983493
Time taken:  19.3455328941 0.322426116467
Time taken:  19.4145600796 0.323579068979
Time taken:  20.3973379135 0.339958449205
Time taken:  21.9887621403 0.36

In [3]:
#############################################################################################################
# Save the results ! IDs 0,1, are used
# TODO save as 1
ID = 0 # zeroth trial, smaller # of hidden sizes
ID = 1 # first ok trial , more # of hidden sizes

if os.path.exists('./../../../Dataset/baseline_' + str(ID) + '.npz'):
    raise NameError("Set saveID not in use!")
np.savez('./../../../Dataset/baseline_' + str(ID) + '.npz', vals=vals, vals_std=vals_std, 
         N_hl_range=N_hl_range, N_hu_range=N_hu_range, N_runs=N_runs, dropout=dropout, epochs=epochs)