In [None]:
import config
import argparse
import networkx as nx
import numpy as np
from pyvis.network import Network
import matplotlib.pyplot as plt
from networkx.algorithms.community.modularity_max import greedy_modularity_communities

### Graph data

In [None]:
G = nx.karate_club_graph()

In [None]:
G.number_of_nodes(), G.number_of_edges()

In [None]:
graphvis = Network(notebook=True)
graphvis.from_nx(G)
graphvis.show('karate.html')

In [None]:
nx.to_numpy_matrix(G)

In [None]:
def renormalization(G):
    A = nx.to_numpy_matrix(G)
    I = np.eye(len(A))
    A_tilde = A + I
    D_tilde = np.zeros(A.shape, int)
    np.fill_diagonal(D_tilde, np.sum(A_tilde, axis=1).flatten())
    D_tilde = np.linalg.inv(D_tilde)
    D_tilde = np.power(D_tilde, 0.5)
    return D_tilde @ A_tilde @ D_tilde

In [None]:
A_hat = renormalization(G)
print(A_hat)

### Generate labels from communities

In [None]:
communities = greedy_modularity_communities(G)
colors = np.zeros(G.number_of_nodes())
classes = set()

for i, c in enumerate(communities):
    colors[list(c)] = i
    classes.add(i)
    
labels = np.eye(len(classes))[colors.astype(int)]

### Helper functions

In [None]:
def softmax(x):
    return np.exp(x) / np.sum(np.exp(x), axis=0, keepdims=True)

def glorot_init(in_dim, out_dim):
    sd = np.sqrt(6.0 / (in_dim + out_dim))
    return np.random.uniform(-sd, sd, size=(in_dim, out_dim))

### Gradient Descent

For each iteration of gradient descent, compute local gradients with **backpropagation** - compute global gradient as a chain of local gradients using chain rule of calculus.

Starting from the last layer $l$:

$$
\delta^{(l)}=\frac{\partial}{\partial z^{(l)}}\mathcal{L}
$$

Think of the **error signal** $\delta^{(l)}$ as serving as the same function as $x$ during the forward pass.

For layer $i$ in $l-1$ to $2$:

$$
\begin{align*}
    \nabla_{w^{(i)}}\mathcal{L}&=\delta^{(i+1)}a^{(i-1)^{\top}}&&\quad a^{(i-1)}\text{ is the incoming activation to layer } i \text{ from layer }i-1\\\\
    \nabla_{b^{(i)}}\mathcal{L}&=\delta^{(i+1)}&&\quad\text{shift by error signal}\\\\  
    \delta^{i} &= W^{i^{\top}}\delta^{i+1}\odot\frac{\partial}{\partial z}f(z^{(i-1)})&&\quad W^{i^{\top}} \text{ propagates the error signal backwards as a linear combination scaled by the derivative}
\end{align*}
$$

Think about the convexity of the latent space as discretized across dimensions of $z\in\mathbb{R}^{N}$. Each row of a hidden layer corresponds to a neuron, which corresponds to a position in the activation vector for a given input vector. As $\frac{\partial}{\partial z}f(z)\rightarrow 0$, for the particular combination of activations given to this neuron from the previous layer, the derivative indicates how this specific neuron is changing - which measures the convexity of this particular dimension in latent space $\mathbb{R}^{N}$.

As $\frac{\partial}{\partial z}f(z)\rightarrow 0$, the neuron reacts less to the incoming linear combination, which means that for this combination of features, there is a local minima in this latent space. So as backpropagation progresses across training, gradients of neurons converge to local minima.

For each layer $i$, apply gradient descent:

$$
\begin{align}
    W^{(i)} &= W^{(i)} - \alpha\mathbb{E}[\nabla_{W^{(i)}}\mathcal{L}]\\
    b^{(i)} &= b^{(i)} - \alpha\mathbb{E}[\nabla_{b^{(i)}}\mathcal{L}]
\end{align}
$$

$$
\mathbb{E}[\nabla_{W_{j}}]=\frac{1}{b}\sum^{b}_{j=1}\nabla_{W_{j}}\mathcal{L} \text{ is the expected gradient of the batch of samples and same for bias}
$$

In [None]:
class GradientDescent(object):
    def __init__(self, parameters, learning_rate):
        self.parameters = parameters
        self.learning_rate = learning_rate
        
        
    def zero_gradients(self):
        for layer in self.parameters:
            layer.W_grad = np.zeros(layer.W.shape)
            layer.b_grad = np.zeros(layer.b.shape)
    
    
    def step(self):
        for layer in self.parameters:
            layer.W -= self.learning_rate * layer.W_grad
            layer.b -= self.learning_rate * layer.b_grad

### Graph Convolutional Layer

$$
\text{ReLU}(\hat{A}XW^{1}+b^{1})
$$

In [None]:
class GCLayer(object):
    def __init__(self, input_dim, output_dim):
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.W = glorot_init(input_dim, output_dim)
        self.W_grad = np.zeros(self.W.shape)
        self.b = np.ones((output_dim, 1))
        self.b_grad = np.zeros(self.b.shape)
        
        
    def relu(self, x):
        return np.piecewise(x, [x <= 0, x > 0], [0, x])
    
    
    def relu_derivative(self, x):
        return (x > 0) * 1
    
    
    '''
    inputs:
    G (nx.Graph)   Normalized Laplacian matrix for a static graph.
                   Dimensions: N x N where N is the number of nodes.
    x (np.ndarray) Embedding matrix
                   Dimensions: N x F where F is the number of features.
    '''
    def __call__(self, G, x):
        # (nodes x nodes), (nodes x features), so need to transpose
        # before taking linear combination
        self.i = x # (nxf)
        self.X = (G @ x).T # (n,n) x (n,f) -> (n,f).T -> (f,n)
        self.z = self.W @ self.X + self.b # (h,f) x (f,n) + (h,1) -> (h,n)
        self.a = self.relu(self.z) # (h,n), where n is number of samples/nodes
        
        # transpose so can multiply by adjacency matrix in next layer
        return self.a.T # (n,h)
    
    
    def backward(self, G, error):
        samples = self.X.shape[0] # batch size
        #should this be self.X or self.i?
        self.W_grad += error @ self.X.T # (h,n) x (n,f) -> (h,f) which matches W.shape 
        self.b_grad += error # (h,n)
        return self.W.T @ error * self.relu_derivative(self.z)
    

### Linear Layer

In [None]:
class Linear(object):
    def __init__(self, input_dim, output_dim):
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.W = np.zeros((self.input_dim, self.output_dim))
        self.W_grad = np.zeros(self.W.shape)
        self.b = np.ones((self.output_dim, 1))
        self.b_grad = np.zeros(self.b.shape)
        
        
    def relu(self, x):
        return np.piecewise(x, [x <= 0, x > 0], [0, x])
    
    
    def relu_derivative(self, x):
        return (x > 0) * 1
    

    '''
    inputs:
    x (np.ndarray) Inputs to this layer
    
    outputs:
    a (np.ndarray) Output activations
    '''
    def __call__(self, x):
        self.x = x # (f,n)
        self.z = self.W @ x + self.b # (h,f) x (f,n) + (h,1)
        self.a = self.relu(self.z)   # (h,n)
        return self.a # (h,n)
    
    
    '''
    inputs:
    error (np.ndarray) Error signal of shape (W.out_dim, batch_size) from subsequent layer
    
    outputs:
    
    '''
    def backward(self, error):
        self.W_grad += error @ self.x.T # (h,n) x (n,f)
        self.b_grad += error            # (h,n)
        return self.W.T @ error * self.relu_derivative(self.z) # (f,h) x (h,n) * (h,n)
        

### Graph Convolutional Network

In [None]:
class GCN(object):
    def __init__(self, graph):
        self.G = graph
        self.nodes = self.G.shape[0]
        self.embedding = np.eye(self.nodes)
        self.l0 = GCLayer(self.nodes, 16)
        self.l1 = Linear(16, 2)
        self.parameters = [self.l0, self.l1]
        
    
    def __call__(self, x):
        return softmax(self.l1(self.l0(self.G, x)))
    
    
    def backward(self, x):
        self.l0.backward(self.G, self.l1.backward(self.G, x))

### Loss function

In [None]:
def cross_ent(pred, labels):
    return -np.log(pred)[np.arange(pred.shape[0]), np.argmax(labels, axis=1)]

### Training loop

In [None]:
def train(model, loss, epochs, features, labels, opt):
    
    for e in range(epochs):
        epoch_loss = loss(model(features), labels).mean()
            
        model.backward()
        opt.step()
        opt.zero_gradients()
        
        print(f'epoch {e} loss: {epoch_loss}')

### Hyperparameters

In [None]:
epochs = 1
lr = 0.1

In [None]:
features = np.eye(G.number_of_nodes())
model = GCN(A_hat)
opt = GradientDescent(model.parameters, lr)
train(model, cross_ent, epochs, features, labels, opt)