# Edge prediction problem
- To build a model for predicting category of edge
- Reference : https://github.com/dragen1860/TensorFlow-2.x-Tutorials/tree/master/20-GCN
- Tensorflow 2.0 version

In [1]:
import os
import sys
import numpy as np
import pickle as pkl
import scipy.sparse as sp
import networkx as nx
import pandas as pd

from numba import cuda

In [2]:
pd.options.display.max_rows=None
pd.options.display.max_columns=None
pd.options.display.max_colwidth=99999

In [3]:
repo = '/data/private/talkhello_dev_12307/work/homer/tf_20_tutorial/20-GCN/'
names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph']
objects = []

# Loading cora data

In [4]:
def parse_index_file(filename):
    index = []
    for line in open(filename):
        index.append(int(line.strip()))
    return index

In [5]:
names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph']
objects = []

path = os.path.join(repo, "data/ind.{}.{}")
for i in range(len(names)):
    with open(path.format('cora', names[i]), 'rb') as f:
        if sys.version_info > (3, 0):
            objects.append(pkl.load(f, encoding='latin1'))
        else:
            objects.append(pkl.load(f))
            
x, y, tx, ty, allx, ally, graph = tuple(objects)

test_idx_path = os.path.join(repo, "data/ind.{}.test.index")
test_idx_reorder = parse_index_file(test_idx_path.format('cora'))
test_idx_range = np.sort(test_idx_reorder)

In [6]:
# allx : Feature matrix of train set (sparse)
print(allx.shape)

# ally : Lables
print('Label을 가진 데이터의 갯수 :', sum(ally.sum(axis=1)))

(1708, 1433)
Label을 가진 데이터의 갯수 : 1708


In [7]:
# Features
features = sp.vstack((allx, tx)).tolil()
labels = np.vstack((ally, ty))

# 테스트 셋만 셔플을 한다.
features[test_idx_reorder, :] = features[test_idx_range, :]
labels[test_idx_reorder, :] = labels[test_idx_range, :]

print('학습셋 + 테스트셋 :', features.shape, labels.shape)

학습셋 + 테스트셋 : (2708, 1433) (2708, 7)


In [8]:
# Adj : graph는 matrix가 아닌 dictionary의 형태이다.
adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))
print('Adj shape :', adj.shape)

Adj shape : (2708, 2708)


Sparse labels -> semi supervised learning setting으로 간다.
- Num of train set : 140
- Num of validation set : 500
- Num of test set : 1000

In [9]:
idx_test = test_idx_range.tolist()  # 1708 ~
idx_train = range(len(y))  # 0 ~ 139
idx_val = range(len(y), len(y)+500)  # 140 ~ 639

In [10]:
def sample_mask(idx, l):
    mask = np.zeros(l)
    mask[idx] = 1
    return np.array(mask, dtype=np.bool)

In [11]:
train_mask = sample_mask(idx_train, labels.shape[0])  # 0 ~ 139
val_mask = sample_mask(idx_val, labels.shape[0])  # 140 ~ 639
test_mask = sample_mask(idx_test, labels.shape[0])  # 1708 ~

In [12]:
y_train = np.zeros(labels.shape)
y_val = np.zeros(labels.shape)
y_test = np.zeros(labels.shape)

y_train[train_mask, :] = labels[train_mask, :]
y_val[val_mask, :] = labels[val_mask, :]
y_test[test_mask, :] = labels[test_mask, :]

In [13]:
print('adj:', adj.shape)
print('features:', features.shape)
print('y:', y_train.shape, y_val.shape, y_test.shape)
print('mask:', train_mask.shape, val_mask.shape, test_mask.shape)

adj: (2708, 2708)
features: (2708, 1433)
y: (2708, 7) (2708, 7) (2708, 7)
mask: (2708,) (2708,) (2708,)


# Row normalization of feature matrix

In [14]:
def sparse_to_tuple(sparse_mx):
    def to_tuple(mx):
        # COOrdinate format
        if not sp.isspmatrix_coo(mx):
            mx = mx.tocoo()
        coords = np.vstack((mx.row, mx.col)).transpose()
        values = mx.data
        shape = mx.shape
        return coords, values, shape

    if isinstance(sparse_mx, list):
        for i in range(len(sparse_mx)):
            sparse_mx[i] = to_tuple(sparse_mx[i])
    else:
        sparse_mx = to_tuple(sparse_mx)

    return sparse_mx

- $C_{ii} = \sum_{j}X_{ij}$
- $C^{-1}X$

In [15]:
rowsum = np.array(features.sum(1))
r_inv = np.power(rowsum, -1).flatten()
r_inv[np.isinf(r_inv)] = 0.
r_mat_inv = sp.diags(r_inv)
features = r_mat_inv.dot(features)
features = sparse_to_tuple(features)

In [17]:
print('features coordinates::', features[0].shape)
print('features data::', features[1].shape)
print('features shape::', features[2])

features coordinates:: (49216, 2)
features data:: (49216,)
features shape:: (2708, 1433)


# Adjacency matrix

$$\tilde{D}^{-\frac{1}{2}} \tilde{A} \tilde{D}^{-\frac{1}{2}}$$
- $\tilde{A}=A+I_N$ : the adjacency matrix of the undirected graph  $\mathcal{G}$ with added self-connections.
- $\tilde{D}_{ii} = \sum_{j}\tilde{A}_{ij}$ 

In [18]:
def normalize_adj(adj):
    adj = sp.coo_matrix(adj)
    rowsum = np.array(adj.sum(1)) # D
    d_inv_sqrt = np.power(rowsum, -0.5).flatten() # D^-0.5
    d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.
    d_mat_inv_sqrt = sp.diags(d_inv_sqrt) # D^-0.5
    return adj.dot(d_mat_inv_sqrt).transpose().dot(d_mat_inv_sqrt).tocoo() # D^-0.5AD^0.5

def preprocess_adj(adj):
    """Preprocessing of adjacency matrix for simple GCN model and conversion to tuple representation."""
    adj_normalized = normalize_adj(adj + sp.eye(adj.shape[0]))
    return sparse_to_tuple(adj_normalized)

In [19]:
support = [preprocess_adj(adj)]

# GCN model

In [20]:
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow import keras
from tensorflow.keras import optimizers

In [21]:
def sparse_dropout(x, keep_prob, noise_shape):
    random_tensor = keep_prob
    random_tensor += tf.random.uniform(noise_shape)
    dropout_mask = tf.cast(tf.floor(random_tensor), dtype=tf.bool)
    pre_out = tf.sparse.retain(x, dropout_mask)
    return pre_out * (1./keep_prob)

In [22]:
def dot(x, y, sparse=False):
    if sparse:
        res = tf.sparse.sparse_dense_matmul(x, y)
    else:
        res = tf.matmul(x, y)
    return res

In [23]:
def masked_softmax_cross_entropy(preds, labels, mask):
    loss = tf.nn.softmax_cross_entropy_with_logits(logits=preds, labels=labels)
    mask = tf.cast(mask, dtype=tf.float32)
    # True : scale up
    mask /= tf.reduce_mean(mask)
    loss *= mask
    return tf.reduce_mean(loss)

In [24]:
def masked_accuracy(preds, labels, mask):
    correct_prediction = tf.equal(tf.argmax(preds, 1), tf.argmax(labels, 1))
    accuracy_all = tf.cast(correct_prediction, tf.float32)
    mask = tf.cast(mask, dtype=tf.float32)
    # True : scale up
    mask /= tf.reduce_mean(mask)
    accuracy_all *= mask
    return tf.reduce_mean(accuracy_all)

In [25]:
class GraphConvolution(layers.Layer):
    def __init__(self, input_dim, output_dim, num_features_nonzero, dropout=0., sparse_inputs=False,
                 activation=tf.nn.relu, bias=False, featureless=False, **kwargs):
        super(GraphConvolution, self).__init__(**kwargs)

        self.dropout = dropout
        self.activation = activation
        self.sparse_inputs = sparse_inputs
        self.featureless = featureless
        self.bias = bias
        self.num_features_nonzero = num_features_nonzero

        self.weights_ = []
        for i in range(1):
            w = self.add_variable('weights' + str(i), shape=[input_dim, output_dim])
            self.weights_.append(w)
        if self.bias:
            self.bias = self.add_variable('bias', shape=[output_dim])

    def call(self, inputs, training=None):
        x, support = inputs

        if training is not False and self.sparse_inputs:
            x = sparse_dropout(x, 1-self.dropout, self.num_features_nonzero)
        elif training is not False:
            x = tf.nn.dropout(x, self.dropout)

        supports = list()
        
        # range(len(support))
        for i in range(1):
            if not self.featureless:
                pre_sup = dot(x, self.weights_[i], sparse=self.sparse_inputs)
            else:
                pre_sup = self.weights_[i]
            support = dot(support[i], pre_sup, sparse=True)
            supports.append(support)

        output = tf.add_n(supports)

        # bias
        if self.bias:
            output += self.bias

        return self.activation(output)

In [26]:
class GCN(keras.Model):

    def __init__(self, input_dim, output_dim, num_features_nonzero, **kwargs):
        super(GCN, self).__init__(**kwargs)

        self.input_dim = input_dim
        self.output_dim = output_dim
        self.num_features_nonzero = num_features_nonzero
        self.layers_ = []

        print('input dim:', input_dim)
        print('output dim:', output_dim)
        print('num_features_nonzero:', num_features_nonzero)
        
        self._build()

        for p in self.trainable_variables:
            print(p.name, p.shape)
            
    def _build(self):
        self.layers_.append(GraphConvolution(input_dim=self.input_dim,  # 1433
                                            output_dim=16,  # 16
                                            num_features_nonzero=self.num_features_nonzero,
                                            activation=tf.nn.relu,
                                            dropout=0.5,
                                            sparse_inputs=True))

        self.layers_.append(GraphConvolution(input_dim=16,  # 16
                                            output_dim=self.output_dim,  # 7
                                            num_features_nonzero=self.num_features_nonzero,
                                            activation=lambda x: x,
                                            dropout=0.5))

        

    def call(self, inputs, training=None):
        x, label, mask, support = inputs

        outputs = [x]

        for layer in self.layers:
            hidden = layer((outputs[-1], support), training)
            outputs.append(hidden)
        output = outputs[-1]

        # Weight decay loss
        loss = tf.zeros([])
        for var in self.layers_[0].trainable_variables:
            loss += 5e-4 * tf.nn.l2_loss(var)

        # Cross entropy error
        loss += masked_softmax_cross_entropy(output, label, mask)
        acc = masked_accuracy(output, label, mask)

        return loss, acc

# Training

In [27]:
# Adjacency matrix
support

[(array([[   0,    0],
         [ 633,    0],
         [1862,    0],
         ...,
         [1473, 2707],
         [2706, 2707],
         [2707, 2707]], dtype=int32),
  array([0.25     , 0.25     , 0.2236068, ..., 0.2      , 0.2      ,
         0.2      ]),
  (2708, 2708))]

In [28]:
# Features
features

(array([[   0, 1274],
        [   0, 1247],
        [   0, 1194],
        ...,
        [2707,  329],
        [2707,  186],
        [2707,   19]], dtype=int32),
 array([0.11111111, 0.11111111, 0.11111111, ..., 0.07692308, 0.07692308,
        0.07692308], dtype=float32),
 (2708, 1433))

In [29]:
model = GCN(input_dim=features[2][1], output_dim=y_train.shape[1], num_features_nonzero=features[1].shape)

input dim: 1433
output dim: 7
num_features_nonzero: (49216,)
Instructions for updating:
Please use `layer.add_weight` method instead.
weights0:0 (1433, 16)
weights0:0 (16, 7)


In [30]:
train_label = tf.convert_to_tensor(y_train)
train_mask = tf.convert_to_tensor(train_mask)
val_label = tf.convert_to_tensor(y_val)
val_mask = tf.convert_to_tensor(val_mask)
test_label = tf.convert_to_tensor(y_test)
test_mask = tf.convert_to_tensor(test_mask)
features = tf.SparseTensor(*features)
support = [tf.cast(tf.SparseTensor(*support[0]), dtype=tf.float32)]
num_features_nonzero = features.values.shape
dropout = 0.5

In [31]:
optimizer = optimizers.Adam(lr=1e-2)

In [32]:
epochs = 200

In [33]:
for epoch in range(epochs):
    with tf.GradientTape() as t:
        loss, acc = model((features, train_label, train_mask, support))
    grads = t.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(grads, model.trainable_variables))
    
    _, val_acc = model((features, val_label, val_mask, support), training=False)
    
    if epoch % 20 == 0:
        print(epoch, float(loss), float(acc), '\tval, d:', float(val_acc))
        
test_loss, test_acc = model((features, test_label, test_mask, support), training=False)

print('\ntest:', float(test_loss), float(test_acc))



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

0 1.9549707174301147 0.11428571492433548 	val, d: 0.1759999841451645
20 1.7731964588165283 0.5999999642372131 	val, d: 0.3739999830722809
40 1.5097137689590454 0.7285714149475098 	val, d: 0.5600000023841858
60 1.2669298648834229 0.8285714387893677 	val, d: 0.6840000152587891
80 1.0696214437484741 0.8928571343421936 	val, d: 0.7559998631477356
100 0.9236013889312744 0.9285714030265808 	val, d: 0.7739998698234558
120 0.8594609498977661 0.9285714626312256 	val, d: 0.7899999618530273
140 0.7561487555503845 0.9428571462631226 	val, d: 0.7820000052452087
160 0.6878491640090942 0.9500000476837158 	val, d: 0.7919999361038208
180 0.6463509202003479 0.9714285135269165 	val, d: 0.7899999618530273

tes

In [35]:
cuda.close()