In [1]:
import config
import argparse
import networkx as nx
import numpy as np
from pyvis.network import Network
import matplotlib.pyplot as plt
from networkx.algorithms.community.modularity_max import greedy_modularity_communities

In [2]:
G = nx.karate_club_graph()

In [3]:
G.number_of_nodes(), G.number_of_edges()

(34, 78)

In [11]:
graphvis = Network(notebook=True)
graphvis.from_nx(G)
graphvis.show('karate.html')

In [9]:
nx.to_numpy_matrix(G)

matrix([[0., 4., 5., ..., 2., 0., 0.],
        [4., 0., 6., ..., 0., 0., 0.],
        [5., 6., 0., ..., 0., 2., 0.],
        ...,
        [2., 0., 0., ..., 0., 4., 4.],
        [0., 0., 2., ..., 4., 0., 5.],
        [0., 0., 0., ..., 4., 5., 0.]])

In [5]:
def renormalization(G):
    A = nx.to_numpy_matrix(G)
    I = np.eye(len(A))
    A_tilde = A + I
    D_tilde = np.zeros(A.shape, int)
    np.fill_diagonal(D_tilde, np.sum(A_tilde, axis=1).flatten())
    D_tilde = np.linalg.inv(D_tilde)
    D_tilde = np.power(D_tilde, 0.5)
    return D_tilde @ A_tilde @ D_tilde

In [7]:
A_hat = renormalization(G)
print(A_hat)

[[0.02325581 0.11136921 0.13076645 ... 0.06502561 0.         0.        ]
 [0.11136921 0.03333333 0.18786729 ... 0.         0.         0.        ]
 [0.13076645 0.18786729 0.02941176 ... 0.         0.0549235  0.        ]
 ...
 [0.06502561 0.         0.         ... 0.04545455 0.13655775 0.12182898]
 [0.         0.         0.0549235  ... 0.13655775 0.02564103 0.11437725]
 [0.         0.         0.         ... 0.12182898 0.11437725 0.02040816]]


In [10]:
features = np.eye(G.number_of_nodes())

In [None]:
def relu(x):
    return np.piecewise(x, [x <= 0, x > 0], [0, x])
    
    
def relu_derivative(x):
    return (x > 0) * 1
    
    
def softmax(x):
    return np.exp(x) / np.sum(np.exp(x), axis=0, keepdims=True)
    

def cross_ent(pred, labels):
    return -np.log(pred)[np.arange(pred.shape[0]), np.argmax(labels, axis=1)]


def glorot_init(in_dim, out_dim):
    sd = np.sqrt(6.0 / (in_dim + out_dim))
    return np.random.uniform(-sd, sd, size=(in_dim, out_dim))

In [None]:
class GradientDescent(object):
    def __init__(self, parameters, learning_rate):
        self.parameters = parameters
        self.learning_rate = learning_rate
        
        
    def zero_gradients(self):
        for layer in self.parameters:
            layer.zero_gradients()
    
    
    def step(self):
        for layer in self.parameters:
            layer.weights -= self.learning_rate * layer.gradients

In [None]:
class GCLayer(object):
    def __init__(self, input_dim, output_dim):
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.weights = glorot_init(np.zeros(input_dim, output_dim))
        self.gradients = None
        self.activations = None
        
        
    def __call__(self, G, x):
        # Transpose b/c need to store this for backward pass
        self._X = (G @ x).T

        # Rever the original order of AXW to W(GX).T to save having to compute both
        self._H = relu(self.weights @ self._X)

        # One last transpose so that the dimensions are correct for the forward pass
        return self._H.T
    
    
    def backward(self, G, x):
        self.gradients = x
        return relu_derivative(np.transpose(self.weights) @ G @ x)
    
    
    def zero_gradients(self):
        self.gradients = None

In [None]:
class GCN(object):
    def __init__(self, graph):
        self.G = graph
        self.num_features = 10
        self.embedding = np.array(self.graph.shape[0], self.num_features)
        self.l0 = GCLayer(self.num_features, 32)
        self.l1 = GCLayer(32, 2)
        self.parameters = [self.l0, self.l1]
        
    
    def __call__(self, x):
        return softmax(self.l1(self.l0(self.G, x)))
    
    
    def backward(self, x):
        self.l0.backward(self.G, self.l1.backward(self.G, x))

In [None]:
class Linear(object):
    def __init__(self, input_dim, output_dim):
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.W = np.zeros((self.input_dim, self.output_dim))
        self.b = np.zeros((self.output_dim, 1))
        self.gradient = None


    def __call__(self, x):
        return self.W @ x + self.b

In [None]:
def train(model, loss, epochs, dataset, opt):
    
    for e in range(epochs):
        epoch_loss = 0
        for x, y in dataset:
            epoch_loss += loss(model(x), y)
            
        model.backward()
        opt.step()
        opt.zero_gradients()
        
        epoch_loss /= len(dataset)
        print(f'epoch {e} loss: {epoch_loss}')

In [None]:
model = GCN(A_hat)
opt = GradientDescent(model.parameters, lr)
train(model, cross_ent, epochs, features, opt)

For one iteration of gradient descent:

Compute global gradient as a chain of local gradients using chain rul of calculus

For last layer $l$:

$$
\delta^{(l)}=\frac{\partial}{\partial z^{(l)}}\mathcal{L}
$$

Think of $\delta^{(l)}$ as serving as the same function as $x$ during the forward pass.

For layer i in $l-1$ to $2$:

$$
\begin{align}
    \nabla_{w^{(i)}}\mathcal{L}&=\delta^{(i)}a^{(i-1)^{\top}}\\\\
    \nabla_{b^{(i)}}\mathcal{L}&=\delta^{(i)}\\\\  
    \delta^{i-1} &= W^{i^{\top}}\delta^{i}\odot\frac{\partial}{\partial z}f(z^{(i-1)})
\end{align}
$$

For each layer i, apply gradient descent:

$$
\begin{align}
    W^{(i)} &= W^{(i)} - \alpha\mathbb{E}[\nabla_{W^{(i)}}]\\
    b^{(i)} &= b^{(i)} - \alpha\mathbb{E}[\nabla_{b^{(i)}}]
\end{align}
$$

Here the expected gradient is taken over the batch that generated these gradients.