In [4]:
from collections import namedtuple
from networkx import read_edgelist, set_node_attributes
from pandas import read_csv, Series
from numpy import array

In [5]:
DataSet = namedtuple(
    'DataSet',
    field_names=['X_train', 'y_train', 'X_test', 'y_test', 'network']
)

In [8]:
def load_karate_club():
    network = read_edgelist(
        'karate.edgelist',
        nodetype=int)

    attributes = read_csv(
        'karate.attributes.csv',
        index_col=['node'])

    for attribute in attributes.columns.values:
        set_node_attributes(
            network,
            values=Series(
                attributes[attribute],
                index=attributes.index).to_dict(),
            name=attribute
        )

    X_train, y_train = map(array, zip(*[
        ([node], data['role'] == 'Administrator')
        for node, data in network.nodes(data=True)
        if data['role'] in {'Administrator', 'Instructor'}
    ]))
    X_test, y_test = map(array, zip(*[
        ([node], data['community'] == 'Administrator')
        for node, data in network.nodes(data=True)
        if data['role'] == 'Member'
    ]))
    
    return DataSet(
        X_train, y_train,
        X_test, y_test,
        network)

In [11]:
from networkx import to_numpy_matrix, degree_centrality, betweenness_centrality, shortest_path_length
import mxnet.ndarray as nd

zkc = load_karate_club()

A = to_numpy_matrix(zkc.network)
A = nd.array(A)

X_train = zkc.X_train.flatten()
y_train = zkc.y_train
X_test = zkc.X_test.flatten()
y_test = zkc.y_test

In [12]:
from mxnet.gluon import HybridBlock
from mxnet.gluon.nn import Activation
import mxnet.ndarray as nd

In [13]:
class SpectralRule(HybridBlock):
    def __init__(self,
                 A, in_units, out_units,
                 activation, **kwargs):
        super().__init__(**kwargs)
        I = nd.eye(*A.shape)
        A_hat = A.copy() + I
        D = nd.sum(A_hat, axis=0)
        D_inv = D**-0.5
        D_inv = nd.diag(D_inv)
        A_hat = D_inv * A_hat * D_inv
        
        self.in_units, self.out_units = in_units, out_units
        
        with self.name_scope():
            self.A_hat = self.params.get_constant('A_hat', A_hat)
            self.W = self.params.get(
                'W', shape=(self.in_units, self.out_units)
            )
            if activation == 'ident':
                self.activation = lambda X: X
            else:
                self.activation = Activation(activation)
    def hybrid_forward(self, F, X, A_hat, W):
        aggregate = F.dot(A_hat, X)
        propagate = self.activation(
            F.dot(aggregate, W))
        return propagate

In [14]:
class LogisticRegressor(HybridBlock):
    def __init__(self, in_units, **kwargs):
        super().__init__(**kwargs)
        with self.name_scope():
            self.w = self.params.get(
                'w', shape=(1, in_units)
            )

            self.b = self.params.get(
                'b', shape=(1, 1)
            )

    def hybrid_forward(self, F, X, w, b):
        # Change shape of b to comply with MXnet addition API
        b = F.broadcast_axis(b, axis=(0,1), size=(34, 1))
        y = F.dot(X, w, transpose_b=True) + b

        return F.sigmoid(y)

In [19]:
from mxnet.gluon.nn import HybridSequential, Activation
from mxnet.ndarray import array
from mxnet.initializer import One, Uniform, Xavier
from mxnet.gluon.loss import SigmoidBinaryCrossEntropyLoss

def build_features(A, X):
    hidden_layer_specs = [(4, 'tanh'), (2, 'tanh')] # Format: (units in layer, activation function)
    in_units = in_units=X.shape[1]
  
    features = HybridSequential()
    with features.name_scope():
        for i, (layer_size, activation_func) in enumerate(hidden_layer_specs):
            layer = SpectralRule(
                A, in_units=in_units, out_units=layer_size, 
                activation=activation_func)
            features.add(layer)

            in_units = layer_size
    return features, in_units

def build_model(A, X):
    model = HybridSequential()
    hidden_layer_specs = [(4, 'tanh'), (2, 'tanh')]
    in_units = in_units=X.shape[1]

    with model.name_scope():
        features, out_units = build_features(A, X)
        model.add(features)

        classifier = LogisticRegressor(out_units)
        model.add(classifier)

    model.hybridize()
    model.initialize(Uniform(1))

    return model, features

from mxnet.gluon.nn import HybridSequential, Activation
from mxnet.ndarray import array
from mxnet.initializer import One, Uniform, Xavier
from mxnet.gluon.loss import SigmoidBinaryCrossEntropyLoss

def build_features(A, X):
    hidden_layer_specs = [(4, 'tanh'), (2, 'tanh')] # Format: (units in layer, activation function)
    in_units = in_units=X.shape[1]
  
    features = HybridSequential()
    with features.name_scope():
        for i, (layer_size, activation_func) in enumerate(hidden_layer_specs):
            layer = SpectralRule(
                A, in_units=in_units, out_units=layer_size, 
                activation=activation_func)
            features.add(layer)

            in_units = layer_size
    return features, in_units

def build_model(A, X):
    model = HybridSequential()
    hidden_layer_specs = [(4, 'tanh'), (2, 'tanh')]
    in_units = in_units=X.shape[1]

    with model.name_scope():
        features, out_units = build_features(A, X)
        model.add(features)

        classifier = LogisticRegressor(out_units)
        model.add(classifier)

    model.hybridize()
    model.initialize(Uniform(1))

    return model, features

from mxnet.gluon.nn import HybridSequential, Activation
from mxnet.ndarray import array
from mxnet.initializer import One, Uniform, Xavier
from mxnet.gluon.loss import SigmoidBinaryCrossEntropyLoss

def build_features(A, X):
    hidden_layer_specs = [(4, 'tanh'), (2, 'tanh')] # Format: (units in layer, activation function)
    in_units = in_units=X.shape[1]
  
    features = HybridSequential()
    with features.name_scope():
        for i, (layer_size, activation_func) in enumerate(hidden_layer_specs):
            layer = SpectralRule(
                A, in_units=in_units, out_units=layer_size, 
                activation=activation_func)
            features.add(layer)

            in_units = layer_size
    return features, in_units

def build_model(A, X):
    model = HybridSequential()
    hidden_layer_specs = [(4, 'tanh'), (2, 'tanh')]
    in_units = in_units=X.shape[1]

    with model.name_scope():
        features, out_units = build_features(A, X)
        model.add(features)

        classifier = LogisticRegressor(out_units)
        model.add(classifier)

    model.hybridize()
    model.initialize(Uniform(1))

    return model, features

In [20]:
X_1 = I = nd.eye(*A.shape)
model_1, features_1 = build_model(A, X_1)
model_1(X_1)


[[0.5106775 ]
 [0.5095114 ]
 [0.51096225]
 [0.51542187]
 [0.523025  ]
 [0.512526  ]
 [0.511652  ]
 [0.50208426]
 [0.5050116 ]
 [0.51883554]
 [0.5406651 ]
 [0.49158806]
 [0.5110878 ]
 [0.4936631 ]
 [0.5026252 ]
 [0.48504698]
 [0.5104648 ]
 [0.51193994]
 [0.5220448 ]
 [0.50951165]
 [0.50140274]
 [0.5098576 ]
 [0.53033787]
 [0.51043373]
 [0.50124794]
 [0.52045095]
 [0.50941336]
 [0.49507195]
 [0.5021959 ]
 [0.5068235 ]
 [0.5085803 ]
 [0.51637703]
 [0.50986254]
 [0.5153003 ]]
<NDArray 34x1 @cpu(0)>

In [23]:
X_2 = nd.zeros((A.shape[0], 2))
node_distance_instructor = shortest_path_length(zkc.network, target=33)
node_distance_administrator = shortest_path_length(zkc.network, target=0)

for node in zkc.network.nodes():
    X_2[node][0] = node_distance_administrator[node]
    X_2[node][1] = node_distance_instructor[node]

In [24]:
X_2 = nd.concat(X_1, X_2)
model_2, features_2 = build_model(A, X_2)
model_2(X_2)


[[0.30647328]
 [0.30417648]
 [0.3039074 ]
 [0.3026266 ]
 [0.2761228 ]
 [0.2871463 ]
 [0.2901096 ]
 [0.28990412]
 [0.29719502]
 [0.29228204]
 [0.23410328]
 [0.27259356]
 [0.29702023]
 [0.28627372]
 [0.28598285]
 [0.2841425 ]
 [0.29827604]
 [0.29667538]
 [0.27046144]
 [0.30377817]
 [0.28733426]
 [0.3051519 ]
 [0.28729206]
 [0.30674195]
 [0.278353  ]
 [0.28005856]
 [0.27564093]
 [0.29431537]
 [0.27612218]
 [0.2979673 ]
 [0.29401767]
 [0.29911888]
 [0.2852555 ]
 [0.29837865]]
<NDArray 34x1 @cpu(0)>

In [25]:

%time
from mxnet import autograd
from mxnet.gluon import Trainer
from mxnet.ndarray import sum as ndsum
import numpy as np

def train(model, features, X, X_train, y_train, epochs):
    cross_entropy = SigmoidBinaryCrossEntropyLoss(from_sigmoid=True)
    trainer = Trainer(model.collect_params(), 'sgd', {'learning_rate': 0.001, 'momentum': 1})

    feature_representations = [features(X).asnumpy()]

    for e in range(1, epochs + 1):
        cum_loss = 0
        cum_preds = []

        for i, x in enumerate(X_train):
            y = array(y_train)[i]
            with autograd.record():
                preds = model(X)[x]
                loss = cross_entropy(preds, y)
            loss.backward()
            trainer.step(1)

            cum_loss += loss.asscalar()
            cum_preds += [preds.asscalar()]

        feature_representations.append(features(X).asnumpy())
            
        if (e % (epochs//10)) == 0:
            print(f"Epoch {e}/{epochs} -- Loss: {cum_loss: .4f}")
            print(cum_preds)
    return feature_representations

def predict(model, X, nodes):
    preds = model(X)[nodes].asnumpy().flatten()
    return np.where(preds >= 0.5, 1, 0)

Wall time: 0 ns


In [26]:
from sklearn.metrics import classification_report

feature_representations_1 = train(model_1, features_1, X_1, X_train, y_train, epochs=5000)
y_pred_1 = predict(model_1, X_1, X_test)
print(classification_report(y_test, y_pred_1))


Epoch 500/5000 -- Loss:  0.0001
[0.9999902, 4.623498e-05]
Epoch 1000/5000 -- Loss:  0.0000
[1.0, 8.9681595e-10]
Epoch 1500/5000 -- Loss:  0.0000
[1.0, 1.752139e-14]
Epoch 2000/5000 -- Loss:  0.0000
[1.0, 3.4294267e-19]
Epoch 2500/5000 -- Loss:  0.0000
[1.0, 6.712246e-24]
Epoch 3000/5000 -- Loss:  0.0000
[1.0, 1.3137748e-28]
Epoch 3500/5000 -- Loss:  0.0000
[1.0, 2.5714253e-33]
Epoch 4000/5000 -- Loss:  0.0000
[1.0, 5.0596415e-38]
Epoch 4500/5000 -- Loss:  0.0000
[1.0, 0.0]
Epoch 5000/5000 -- Loss:  0.0000
[1.0, 0.0]
              precision    recall  f1-score   support

       False       0.62      0.50      0.55        16
        True       0.58      0.69      0.63        16

    accuracy                           0.59        32
   macro avg       0.60      0.59      0.59        32
weighted avg       0.60      0.59      0.59        32



In [27]:
feature_representations_2= train(model_2, features_2, X_2, X_train, y_train, epochs=250)
y_pred_2 = predict(model_2, X_2, X_test)
print(classification_report(y_test, y_pred_2))

Epoch 25/250 -- Loss:  1.4545
[0.35830984, 0.34828502]
Epoch 50/250 -- Loss:  1.3412
[0.5004881, 0.47743362]
Epoch 75/250 -- Loss:  1.3454
[0.6495548, 0.59905267]
Epoch 100/250 -- Loss:  1.2656
[0.7196256, 0.608015]
Epoch 125/250 -- Loss:  0.9153
[0.7071581, 0.43381798]
Epoch 150/250 -- Loss:  0.6043
[0.65691596, 0.16817342]
Epoch 175/250 -- Loss:  0.4856
[0.6474464, 0.0495758]
Epoch 200/250 -- Loss:  0.3495
[0.7186822, 0.018995365]
Epoch 225/250 -- Loss:  0.1905
[0.8350884, 0.010186707]
Epoch 250/250 -- Loss:  0.0813
[0.92820424, 0.006742709]
              precision    recall  f1-score   support

       False       0.54      0.94      0.68        16
        True       0.75      0.19      0.30        16

    accuracy                           0.56        32
   macro avg       0.64      0.56      0.49        32
weighted avg       0.64      0.56      0.49        32

