### TODO
1. [GCN Backprop](https://github.com/dmlc/dgl/issues/4021)
2. Visualize loss
3. [Embeddings](https://beta.openai.com/docs/guides/embeddings/what-are-embeddings)

In [1]:
import sys
import config
import argparse
import random
import networkx as nx
import numpy as np
from collections import defaultdict
from pyvis.network import Network
import matplotlib.pyplot as plt
from networkx.algorithms.community.modularity_max import greedy_modularity_communities

In [2]:
seed = 100
hidden_dim = 16
hidden_dim2 = 20
np.random.seed(seed)

### Graph data

In [3]:
G = nx.karate_club_graph()

In [4]:
G.number_of_nodes(), G.number_of_edges()

(34, 78)

### Generate labels from communities

In [5]:
communities = greedy_modularity_communities(G)
colors = np.zeros(G.number_of_nodes())
classes = set()

for i, c in enumerate(communities):
    colors[list(c)] = i
    classes.add(i)
    
num_classes = len(classes)
labels = (np.eye(len(classes))[colors.astype(int)]).T

classes, samples = labels.shape
print(f'classes: {classes}\t samples: {samples}')

classes: 3	 samples: 34


### Color nodes

In [6]:
def random_color():
    return '#%02X%02X%02X' % (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))

# uncomment for random colors
# color_map = {cls: random_color() for cls in classes}
color_map = {0: '#46FB47', 1: '#B9E6B5', 2: '#9F9EBF'}

colored_graph = Network(width='100%', notebook=True)

for node in G.nodes():
    colored_graph.add_node(node, color=color_map[int(colors[node])])
    
for edge in G.edges():
    colored_graph.add_edge(int(edge[0]), int(edge[1]))
    
colored_graph.show('colored_graph.html')

Local cdn resources have problems on chrome/safari when used in jupyter-notebook. 


#### Renormalization trick

$A$ is the adjacency matrix, $I$ is the identity matrix, and $N$ is the cardinality of the set of nodes in the graph.

$$
\begin{align}
    \tilde{A} &= A + I_{N}\\
       \tilde{\mathcal{D}}_{ii} &= \sum_{i}\tilde{A}_{ij}\\
    \hat{\mathcal{A}}&=\tilde{\mathcal{D}}^{-\frac{1}{2}}\tilde{\mathcal{A}}\tilde{\mathcal{D}}^{-\frac{1}{2}}
\end{align}
$$

In [7]:
def renormalization(G):
    A = np.asarray(nx.to_numpy_matrix(G))
    I = np.eye(len(A))
    A_tilde = A + I
    D_tilde = np.zeros(A.shape, int)
    np.fill_diagonal(D_tilde, np.sum(A_tilde, axis=1).flatten())
    D_tilde = np.linalg.inv(D_tilde)
    D_tilde = np.power(D_tilde, 0.5)
    return D_tilde @ A_tilde @ D_tilde

In [8]:
nx.to_numpy_matrix(G)

matrix([[0., 4., 5., ..., 2., 0., 0.],
        [4., 0., 6., ..., 0., 0., 0.],
        [5., 6., 0., ..., 0., 2., 0.],
        ...,
        [2., 0., 0., ..., 0., 4., 4.],
        [0., 0., 2., ..., 4., 0., 5.],
        [0., 0., 0., ..., 4., 5., 0.]])

In [9]:
# Must pre-process offline
A_hat = renormalization(G)
print(A_hat)

[[0.02325581 0.11136921 0.13076645 ... 0.06502561 0.         0.        ]
 [0.11136921 0.03333333 0.18786729 ... 0.         0.         0.        ]
 [0.13076645 0.18786729 0.02941176 ... 0.         0.0549235  0.        ]
 ...
 [0.06502561 0.         0.         ... 0.04545455 0.13655775 0.12182898]
 [0.         0.         0.0549235  ... 0.13655775 0.02564103 0.11437725]
 [0.         0.         0.         ... 0.12182898 0.11437725 0.02040816]]


### Helper functions

In [10]:
def init(model, scheme):
    for i, layer in enumerate(model.parameters):
        model.parameters[i].W = scheme(*layer.W.shape)
        model.parameters[i].b = scheme(*layer.b.shape)
        
    return model

def glorot_init(in_dim, out_dim):
    sd = np.sqrt(6.0 / (in_dim + out_dim))
    return np.random.uniform(-sd, sd, size=(in_dim, out_dim))

In [11]:
class GradientDescent(object):
    def __init__(self, parameters, learning_rate):
        self.parameters = parameters
        self.learning_rate = learning_rate
        
        
    def zero_gradients(self):
        for layer in self.parameters:
            layer.dW = np.zeros(layer.W.shape)
            layer.db = np.zeros(layer.b.shape)
    
    
    def step(self):
        for i, layer in enumerate(self.parameters):
            # TODO: Replace with assertion
            if np.any(np.isnan(layer.db)):
                print(f'nans layer {i}')
                
            layer.W -= self.learning_rate * layer.dW
            layer.b -= self.learning_rate * layer.db

### Activation functions

In [12]:
def relu(x):
    return np.maximum(0, x)

def relu_(x):
    return (x > 0).astype(int)

def softmax(x, axis=0):
    return np.exp(x)/np.sum(np.exp(x), axis=axis, keepdims=True)

### Graph Convolutional Layer

$$
\text{ReLU}(\hat{A}XW^{1}+b^{1})
$$

In [13]:
class GCLayer(object):
    def __init__(self, input_dim, output_dim, name=''):
        self.name = name
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.W = glorot_init(output_dim, input_dim)
        self.dW = np.zeros(self.W.shape)
        self.b = np.ones((output_dim, 1))
        self.db = np.zeros(self.b.shape)
            
    '''
    inputs:
    G (nx.Graph)   Normalized Laplacian matrix for a static graph.
                   Dimensions: N x N where N is the number of nodes.
    x (np.ndarray) Embedding matrix
                   Dimensions: N x F where F is the number of features.
    '''
    def __call__(self, G, x, activation=None):
        if not activation:
            activation = lambda x: x
            
        # (nodes x nodes), (nodes x features), so need to transpose
        # before taking linear combination
        self.z = x # (n,f)
        
        # need to apply the activations along feature/hidden dimension
        # since x is (n,f), transpose to apply activations, then transpose back
        # to dot with the adjacency matrix
        self.a = activation(x.T).T
        
        # (n,n) x (n,f) -> (n,f).T -> (f,n) so can left-multiply weight with features
        # this is purely stylistic preference.
        X = (G @ self.a).T
        
        #print(f'({self.name}) W.shape: {self.W.shape}\t X.shape: {X.shape}')
        
        # transpose so can multiply by adjacency matrix in next layer, (n,h)
        return (self.W @ X + self.b).T # (h,f) x (f,n) + (h,1) -> (h,n). Broadcast bias vector.
    
    
    def backward(self, error, derivative=None):
        if not derivative:
            derivative = lambda x: x
            
        #print(f'({self.name}) W.T.shape: {self.W.T.shape}\t error.shape: {error.shape}\t a.shape: {self.a.shape}')
        
        #self.dW = error @ self.a.T # (h,n) x (n,f) -> (h,f) which matches W.shape
        self.dW = error @ self.a
        self.db = np.sum(error, axis=1, keepdims=True) # (h,n)
        
        return self.W.T @ error * derivative(self.z).T

    

### Linear Layer

In [14]:
class Linear(object):
    def __init__(self, input_dim, output_dim, name=''):
        self.name = name
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.W = glorot_init(output_dim, input_dim)
        self.dW = np.zeros(self.W.shape)
        self.b = np.ones((self.output_dim, 1))
        self.db = np.zeros(self.b.shape)    

    '''
    inputs:
    x (np.ndarray) Inputs to this layer
    
    outputs:
    a (np.ndarray) Output activations
    '''
    def __call__(self, x, activation=None):
        if not activation:
            activation = lambda x: x
            
        self.z = x # (f,n)
        self.a = activation(x)
        #print(f'({self.name}) W.shape: {self.W.shape}\t a.shape: {self.a.shape}')
        return self.W @ self.a + self.b # (h,f) x (f,n) + (h,1) -> (h,n). Broadcast bias vector.
    
    
    '''
    inputs:
    error (np.ndarray) Error signal of shape (W.out_dim, batch_size) from subsequent layer
    
    outputs:
    
    '''
    def backward(self, error, derivative=None):
        if not derivative:
            derivative = lambda x: x
            
        batch_size = error.shape[1]
        
        #print(f'({self.name}) W.T.shape: {self.W.T.shape}\t error.shape: {error.shape}\t a.T.shape: {self.a.T.shape}\t z.shape: {self.z.shape}')
        
        self.dW = error @ self.a.T # (h,n) x (n,f)        
        self.db = np.sum(error, axis=1, keepdims=True) # (h,n)
        
        return self.W.T @ error * derivative(self.z) # (f,h) x (h,n) * (h,n)
        

### Graph Convolutional Network

In [15]:
class GCN(object):
    def __init__(self, graph, num_classes):
        self.G = graph
        self.nodes = self.G.shape[0]
        self.embedding = np.eye(self.nodes)
        self.l0 = GCLayer(self.nodes, hidden_dim, name='l0')
        self.l1 = GCLayer(hidden_dim, hidden_dim2, name='l1')
        self.l2 = Linear(hidden_dim2, num_classes, name='l2')
        self.parameters = [self.l0, self.l1, self.l2]
        
    
    def __call__(self, x):
        a0 = self.l0(self.G, x, activation=relu)
        a1 = self.l1(self.G, a0, activation=relu).T # transpose b/c Linear layer expects (f,n)
        a2 = self.l2(a1)
        return softmax(a2)
    
    
    def backward(self, x):
        # Transpose errors from (34,3) -> (3,34) because linear weights are (16,34), but transposed for BP
        # so computation must be (labels, batch_size) x (batch_size, hidden_dim) to get error @ x.T
        d2 = self.l2.backward(x, derivative=relu_)
        d1 = self.l1.backward(d2, derivative=relu_)
        self.l0.backward(d1, derivative=None)
        

### Forward pass

#### Layer 1
$$
\begin{align}
    &\text{ReLU}
    \Biggl(
    \
        \underset{\mathcal{W}^{(1)}\ \in\ \mathbb{R}^{16\times34}}{
        \begin{bmatrix}
            w_{1,1} & \ldots & w_{1,34}\\
            \vdots & \ddots & \vdots\\
            w_{16,1} & \ldots & w_{16,34}
        \end{bmatrix}}
        \Biggl(
        \
            \underset{\hat{\mathcal{A}}\ \in\ \mathbb{R}^{34\times34}}{
            \begin{bmatrix}
                \alpha_{1,1} & \ldots & \alpha_{1,34}\\
                \vdots & \ddots & \vdots\\
                \alpha_{34,1} & \ldots & \alpha_{34,34}
            \end{bmatrix}}
            \ 
            \underset{\mathcal{X}\ \in\ \mathbb{R}^{34\times34}}{
            \begin{bmatrix}
                x_{1,1} & \ldots & x_{1,34}\\
                \vdots & \ddots & \vdots\\
                x_{34,1} & \ldots & x_{34,34}
            \end{bmatrix}}
        \
        \Biggl)^{\top}
        +
        \underset{\mathcal{b}^{(1)}\ \in\ \mathbb{R}^{16\times34}}{
        \begin{bmatrix}
            b_{1,1} & \ldots & b_{1,34}\\
            \vdots & \ddots & \vdots\\
            b_{16,1} & \ldots & b_{16,34}
        \end{bmatrix}}
    \
    \Biggr)\\
    =\
    &\text{ReLU}
    \Biggl(
    \
        \underset{\mathcal{Z}^{(1)}\ \in\ \mathbb{R}^{16\times34}}{
        \begin{bmatrix}
            z_{1,1} & \ldots & z_{1,34}\\
            \vdots & \ddots & \vdots\\
            z_{16,1} & \ldots & z_{16,34}
        \end{bmatrix}}
    \
    \Biggr)\\
    =
    &\quad\quad\quad\underset{\mathcal{A}^{(2)}\ \in\ \mathbb{R}^{16\times34}}{
    \begin{bmatrix}
        a_{1,1} & \ldots & a_{1,34}\\
        \vdots & \ddots & \vdots\\
        a_{16,1} & \ldots & a_{16,34}
    \end{bmatrix}}
    \rightarrow
    \Biggl(
    \
        \underset{\mathcal{A}^{(2)}\ \in\ \mathbb{R}^{16\times34}}{
        \begin{bmatrix}
            a_{1,1} & \ldots & a_{1,34}\\
            \vdots & \ddots & \vdots\\
            a_{16,1} & \ldots & a_{16,34}
        \end{bmatrix}}
    \
    \Biggl)^{\top}
\end{align}
$$

#### Layer 2
$$
\begin{align}
    &\text{ReLU}
    \Biggl(
    \
        \underset{\mathcal{W}^{(2)}\ \in\ \mathbb{R}^{16\times16}}{
        \begin{bmatrix}
            w_{1,1} & \ldots & w_{1,16}\\
            \vdots & \ddots & \vdots\\
            w_{16,1} & \ldots & w_{16,16}
        \end{bmatrix}}
        \
        \Biggl(
        \
            \underset{\hat{\mathcal{A}}\ \in\ \mathbb{R}^{34\times34}}{
            \begin{bmatrix}
                \alpha_{1,1} & \ldots & \alpha_{1,34}\\
                \vdots & \ddots & \vdots\\
                \alpha_{34,1} & \ldots & \alpha_{34,34}
            \end{bmatrix}}
            \
            \underset{\mathcal{A}^{(2)^{{\top}}}\ \in\ \mathbb{R}^{34\times16}}{
            \begin{bmatrix}
                a_{1,1} & \ldots & a_{1,16}\\
                \vdots & \ddots & \vdots\\
                a_{34,1} & \ldots & a_{34,16}
            \end{bmatrix}}
        \
        \Biggl)^{\top}
        +
        \underset{\mathcal{b}^{(2)}\ \in\ \mathbb{R}^{16\times34}}{
        \begin{bmatrix}
            b_{1,1} & \ldots & b_{1,34}\\
            \vdots & \ddots & \vdots\\
            b_{16,1} & \ldots & b_{16,34}
        \end{bmatrix}}
    \
    \Biggr)\\
    =\
    &\text{ReLU}
    \Biggl(
    \
        \underset{\mathcal{Z}^{(2)}\ \in\ \mathbb{R}^{16\times34}}{
        \begin{bmatrix}
            z_{1,1} & \ldots & z_{1,34}\\
            \vdots & \ddots & \vdots\\
            z_{16,1} & \ldots & z_{16,34}
        \end{bmatrix}}
    \
    \Biggr)\\
    =
    &\quad\quad\quad\underset{\mathcal{A}^{(3)}\ \in\ \mathbb{R}^{16\times34}}{
    \begin{bmatrix}
        a_{1,1} & \ldots & a_{1,34}\\
        \vdots & \ddots & \vdots\\
        a_{16,1} & \ldots & a_{16,34}
    \end{bmatrix}}
    \rightarrow
    \Biggl(
    \
        \underset{\mathcal{A}^{(3)}\ \in\ \mathbb{R}^{16\times34}}{
        \begin{bmatrix}
            a_{1,1} & \ldots & a_{1,34}\\
            \vdots & \ddots & \vdots\\
            a_{16,1} & \ldots & a_{16,34}
        \end{bmatrix}}
    \Biggl)^{\top}
\end{align}
$$


#### Layer 3
$$
\begin{align}
    &\text{Softmax}
    \Biggl(
    \
        \underset{\mathcal{W}^{(3)}\ \in\ \mathbb{R}^{3\times16}}{
        \begin{bmatrix}
            w_{1,1} & \ldots & w_{1,16}\\
            w_{2,1} & \ldots & w_{2,16}\\
            w_{3,1} & \ldots & w_{3,16}
        \end{bmatrix}}
        \
        \Biggl(
        \
            \underset{\hat{\mathcal{A}}\ \in\ \mathbb{R}^{34\times34}}{
            \begin{bmatrix}
                \alpha_{1,1} & \ldots & \alpha_{1,34}\\
                \vdots & \ddots & \vdots\\
                \alpha_{34,1} & \ldots & \alpha_{34,34}
            \end{bmatrix}}
            \ 
            \underset{\mathcal{A}^{(3)^{\top}}\ \in\ \mathbb{R}^{34\times16}}{
            \begin{bmatrix}
                a_{1,1} & \ldots & a_{1,16}\\
                \vdots & \ddots & \vdots\\
                a_{34,1} & \ldots & a_{34,16}
            \end{bmatrix}}
        \Biggl)^{\top}
        +
        \underset{\mathcal{b}^{(3)}\ \in\ \mathbb{R}^{3\times34}}{
        \begin{bmatrix}
            b_{1,1} & \ldots & b_{1,34}\\
            b_{2,1} & \ldots & b_{2,34}\\
            b_{3,1} & \ldots & b_{3,34}
        \end{bmatrix}}
    \
    \Biggr)\\
    =\
    &\text{Softmax}
    \Biggl(
    \
        \underset{\mathcal{Z}^{(3)}\ \in\ \mathbb{R}^{3\times34}}{
        \begin{bmatrix}
            z_{1,1} & \ldots & z_{1,34}\\
            z_{2,1} & \ldots & z_{2,34}\\
            z_{3,1} & \ldots & z_{3,34}
        \end{bmatrix}}
    \
    \Biggr)\\
    =
    &\quad\quad\quad\quad\underset{\mathcal{A}^{(4)}\ \in\ \mathbb{R}^{3\times34}}{
    \begin{bmatrix}
        a_{1,1} & \ldots & a_{1,34}\\
        a_{2,1} & \ldots & a_{2,34}\\
        a_{3,1} & \ldots & a_{3,34}
    \end{bmatrix}}
\end{align}
$$


### Loss function

Why we use cross entropy loss for classification when doing MLE:
https://en.wikipedia.org/wiki/Cross_entropy#Relation_to_maximum_likelihood

In [16]:
def cross_ent(predictions, targets):
    N = predictions.shape[1] # (3,34), so index 1 for samples
    targets_ = np.squeeze(np.asarray(targets))
    predictions_ = np.squeeze(np.asarray(predictions))
    ce = -np.sum(targets_*np.log(predictions_))/N
    return ce

### Backpropagation

#### Cross entropy loss

$$
\begin{align}
    \delta^{(4)}=&\quad\frac{\partial}{\partial z^{(3)}}\ \frac{1}{2} \Big\lVert Y-H_{\mathcal{W},\mathcal{b}}(\mathcal{X})\Big\rVert^{2}\\
    =&\quad\mathcal{A}^{(4)}-Y\\
    =&\underset{\mathcal{A}^{(4)}\ \in\ \mathbb{R}^{3\times34}}{
    \begin{bmatrix}
        a_{1,1} & \ldots & a_{1,34}\\
        a_{2,1} & \ldots & a_{2,34}\\
        a_{3,1} & \ldots & a_{3,34}
    \end{bmatrix}}
    -
    \underset{\mathcal{Y}^{(4)}\ \in\ \mathbb{R}^{3\times34}}{
    \begin{bmatrix}
        y_{1,1} & \ldots & y_{1,34}\\
        y_{2,1} & \ldots & y_{2,34}\\
        y_{3,1} & \ldots & y_{3,34}
    \end{bmatrix}}\\
    =&\underset{\mathcal{\delta}^{(4)}\ \in\ \mathbb{R}^{3\times34}}{
    \begin{bmatrix}
        d_{1,1} & \ldots & d_{1,34}\\
        d_{2,1} & \ldots & d_{2,34}\\
        d_{3,1} & \ldots & d_{3,34}
    \end{bmatrix}}
\end{align}
$$

#### Layer 3
$$
\begin{align}
    \nabla W^{(3)} =& \delta^{(4)}A^{(3)^{\top}}\\
    =& 
    \underset{\mathcal{\delta}^{(4)}\ \in\ \mathbb{R}^{3\times34}}{
    \begin{bmatrix}
        d_{1,1} & \ldots & d_{1,34}\\
        d_{2,1} & \ldots & d_{2,34}\\
        d_{3,1} & \ldots & d_{3,34}
    \end{bmatrix}}
    \
    \underset{\mathcal{A}^{(3)^{\top}}\ \in\ \mathbb{R}^{34\times16}}{
    \begin{bmatrix}
        a_{1,1} & \ldots & a_{1,16}\\
        \vdots & \ddots & \vdots\\
        a_{34,1} & \ldots & a_{34,16}
    \end{bmatrix}}\\
    =&
    \underset{\nabla\mathcal{W}^{(3)}\ \in\ \mathbb{R}^{3\times16}}{
    \begin{bmatrix}
        w_{1,1} & \ldots & w_{1,16}\\
        w_{2,1} & \ldots & w_{2,16}\\
        w_{3,1} & \ldots & w_{3,16}
    \end{bmatrix}}\\
    \nabla b^{(3)}=&\delta^{(4)}\\
    =&\underset{\mathcal{\delta}^{(4)}\ \in\ \mathbb{R}^{3\times34}}{
    \begin{bmatrix}
        d_{1,1} & \ldots & d_{1,34}\\
        d_{2,1} & \ldots & d_{2,34}\\
        d_{3,1} & \ldots & d_{3,34}
    \end{bmatrix}}\\
    \delta^{(3)} =&\mathcal{W}^{(3)^{\top}}\delta^{(4)}\odot\frac{\partial}{\partial z^{(2)}}\text{ReLU}(z^{(2)})\\
    =&
    \underset{\mathcal{W}^{(3)^{\top}}\ \in\ \mathbb{R}^{16\times3}}{
    \begin{bmatrix}
        w_{1,1} & w_{1,2} & w_{1,3}\\
        \vdots & \ddots & \vdots\\
        w_{16,1} & w_{16,2} & w_{16,3}
    \end{bmatrix}}
    \
    \underset{\mathcal{\delta}^{(4)}\ \in\ \mathbb{R}^{3\times34}}{
    \begin{bmatrix}
        d_{1,1} & \ldots & d_{1,34}\\
        d_{2,1} & \ldots & d_{2,34}\\
        d_{3,1} & \ldots & d_{3,34}
    \end{bmatrix}}
    \odot
    \frac{\partial}{\partial z^{(2)}}
    \text{ReLU}
    \Biggl(
    \ 
        \underset{\mathcal{Z}^{(2)}\ \in\ \mathbb{R}^{16\times34}}{
        \begin{bmatrix}
            z_{1,1} & \ldots & z_{1,34}\\
            \vdots & \ddots & \vdots\\
            z_{16,1} & \ldots & z_{16,34}
        \end{bmatrix}}
    \ \Biggl)\\
    =&
    \underset{\mathcal{\delta}^{(3)}\ \in\ \mathbb{R}^{16\times34}}{
    \begin{bmatrix}
        d_{1,1} & \ldots & d_{1,34}\\
        \vdots & \ddots & \vdots\\
        d_{16,1} & \ldots & d_{16,34}
    \end{bmatrix}}
\end{align}
$$

#### Layer 2
$$
\begin{align}
    \nabla W^{(2)} =& \delta^{(3)}A^{(2)^{\top}}\\
    =&
    \underset{\mathcal{\delta}^{(3)}\ \in\ \mathbb{R}^{16\times34}}{
    \begin{bmatrix}
        d_{1,1} & \ldots & d_{1,34}\\
        \vdots & \ddots & \vdots\\
        d_{16,1} & \ldots & d_{16,34}
    \end{bmatrix}}
    \
    \underset{\mathcal{A}^{(2)^{{\top}}}\ \in\ \mathbb{R}^{34\times16}}{
            \begin{bmatrix}
                a_{1,1} & \ldots & a_{1,16}\\
                \vdots & \ddots & \vdots\\
                a_{34,1} & \ldots & a_{34,16}
            \end{bmatrix}}\\
    \nabla b^{(2)} =& \delta^{(3)}\\
    =&
    \underset{\mathcal{\delta}^{(3)}\ \in\ \mathbb{R}^{16\times34}}{
    \begin{bmatrix}
        d_{1,1} & \ldots & d_{1,34}\\
        \vdots & \ddots & \vdots\\
        d_{16,1} & \ldots & d_{16,34}
    \end{bmatrix}}\\
    \delta^{(2)} =&\mathcal{W}^{(2)^{\top}}\delta^{(3)}\odot\frac{\partial}{\partial z^{(1)}}\text{ReLU}(z^{(1)})\\
    =&
    \underset{\mathcal{W}^{(2)^{\top}}\ \in\ \mathbb{R}^{16\times16}}{
    \begin{bmatrix}
        w_{1,1} & \ldots & w_{1,16}\\
        \vdots & \ddots & \vdots\\
        w_{16,1} & \ldots & w_{16,16}
    \end{bmatrix}}
    \
    \underset{\mathcal{\delta}^{(3)}\ \in\ \mathbb{R}^{16\times34}}{
    \begin{bmatrix}
        d_{1,1} & \ldots & d_{1,34}\\
        \vdots & \ddots & \vdots\\
        d_{16,1} & \ldots & d_{16,34}
    \end{bmatrix}}
    \
    \odot\frac{\partial}{\partial z^{(1)}}\text{ReLU}
    \Biggl(
    \
        \underset{\mathcal{Z}^{(1)}\ \in\ \mathbb{R}^{16\times34}}{
        \begin{bmatrix}
            z_{1,1} & \ldots & z_{1,34}\\
            \vdots & \ddots & \vdots\\
            z_{16,1} & \ldots & z_{16,34}
        \end{bmatrix}}
    \
    \Biggl)\\
    =&
    \underset{\mathcal{\delta}^{(2)}\ \in\ \mathbb{R}^{16\times34}}{
    \begin{bmatrix}
        d_{1,1} & \ldots & d_{1,34}\\
        \vdots & \ddots & \vdots\\
        d_{16,1} & \ldots & d_{16,34}
    \end{bmatrix}}
\end{align}
$$

#### Layer 1
$$
\begin{align}
    \nabla W^{(1)} =& \delta^{(2)}\mathcal{X}^{\top}\\
    =&
    \underset{\mathcal{\delta}^{(2)}\ \in\ \mathbb{R}^{16\times34}}{
    \begin{bmatrix}
        d_{1,1} & \ldots & d_{1,34}\\
        \vdots & \ddots & \vdots\\
        d_{16,1} & \ldots & d_{16,34}
    \end{bmatrix}}
    \ 
    \underset{\mathcal{X^{\top}}\ \in\ \mathbb{R}^{34\times34}}{
            \begin{bmatrix}
                x_{1,1} & \ldots & x_{1,34}\\
                \vdots & \ddots & \vdots\\
                x_{34,1} & \ldots & x_{34,34}
            \end{bmatrix}}\\
    \nabla b^{(1)} =& \delta^{(2)}\\
    =& \underset{\mathcal{\delta}^{(2)}\ \in\ \mathbb{R}^{16\times34}}{
    \begin{bmatrix}
        d_{1,1} & \ldots & d_{1,34}\\
        \vdots & \ddots & \vdots\\
        d_{16,1} & \ldots & d_{16,34}
    \end{bmatrix}}
\end{align}
$$

### Training loop

In [17]:
def train(model, loss, epochs, features, labels, opt):
    for e in range(epochs):
        output = model(features)
        loss_val = loss(output, labels)
        deriv_loss = output - labels
        
#         try:
        model.backward(deriv_loss)
        opt.step()
        opt.zero_gradients()
#         except:
#             print(f'error at epoch {e}')
#             sys.exit(0)
        
        if e % 100 == 0:
            print(f'(epoch {e}) loss: {loss_val}')

### Hyperparameters

In [18]:
epochs = 10000
lr = 0.001

In [19]:
features = np.eye(G.number_of_nodes())
model = GCN(A_hat, num_classes)
opt = GradientDescent(model.parameters, lr)
train(model, cross_ent, epochs, features, labels, opt)

loss: 2.567373113054218
epoch 0 loss: 2.567373113054218
loss: 1.5426103777974707
loss: 1.3519693398754289
loss: 1.223366740692084
loss: 1.127410506947819
loss: 1.0664283685789915
loss: 1.0364800052928775
loss: 1.0244247847866381
loss: 1.0198563145420791
loss: 1.0178217509658367
loss: 1.0165535727921744
loss: 1.0155052588310678
loss: 1.0145151519110935
loss: 1.0135366387473557
loss: 1.012545152076279
loss: 1.0115633969578737
loss: 1.0105824553202192
loss: 1.009608943674775
loss: 1.008634963421475
loss: 1.0076627824518964
loss: 1.0066921368323707
loss: 1.005731248285342
loss: 1.004769689814484
loss: 1.0038086486726547
loss: 1.0028474356625807
loss: 1.001887360378827
loss: 1.0009282663305354
loss: 0.9999699896179225
loss: 0.9990123696285794
loss: 0.9980552493988192
loss: 0.9970984748879634
loss: 0.9961418944159204
loss: 0.9951853583166096
loss: 0.994228718731259
loss: 0.9932718294823559
loss: 0.9923145459926862
loss: 0.9913557472936674
loss: 0.9903965329739907
loss: 0.9894366595573667
los

loss: 0.47243198713893575
loss: 0.4707840806282802
loss: 0.46913319265219844
loss: 0.46750447072685747
loss: 0.4658779336742775
loss: 0.4642525828408585
loss: 0.4626483721186596
loss: 0.4610387059332496
loss: 0.4594507774055386
loss: 0.45786481037288473
loss: 0.4562794803234239
loss: 0.45470923368418115
loss: 0.45313843510169804
loss: 0.4515834844928766
loss: 0.4500284175290331
loss: 0.4484892025538442
loss: 0.44694971335146255
loss: 0.44542585968869935
loss: 0.44390835409965457
loss: 0.4423900358429136
loss: 0.4408881290264388
loss: 0.43938556208701646
loss: 0.4378987652074679
loss: 0.4364109775178938
loss: 0.4349387514715237
loss: 0.4334670434273005
loss: 0.43200920347304167
loss: 0.4305496747974809
loss: 0.4291007470454695
loss: 0.4276446973122763
loss: 0.4262061961173832
loss: 0.4247699083280031
loss: 0.4233578104056692
loss: 0.42193008828194856
loss: 0.42051882692418247
loss: 0.41910946990784254
loss: 0.41771282950669314
loss: 0.416332863453738
loss: 0.4149410263058747
loss: 0.413

loss: 0.10355899401907796
loss: 0.10328813904405798
loss: 0.10301814485919836
loss: 0.10274900961691831
loss: 0.10248073086454795
loss: 0.10221330574251777
loss: 0.10194673111623828
loss: 0.10168100366473304
loss: 0.10140732468891418
loss: 0.10114555355878325
loss: 0.10088385968879776
loss: 0.10062250007685003
loss: 0.10036164130860555
loss: 0.10010139079735503
loss: 0.09984181746027747
loss: 0.0995745176681802
loss: 0.09931858361750308
loss: 0.0990601218162088
loss: 0.09880271160552413
loss: 0.098546251589613
loss: 0.09828252775568468
loss: 0.09802985847499347
loss: 0.0977682438822164
loss: 0.09750945127479496
loss: 0.09725278560214849
loss: 0.0969970320041859
loss: 0.09674274326356784
loss: 0.0964897421910066
loss: 0.09623790074779866
loss: 0.09598712497665318
loss: 0.09573734488575726
loss: 0.09548850756131919
loss: 0.09524057241538077
loss: 0.0949935078644883
loss: 0.09474728897907371
loss: 0.09450189579932257
loss: 0.09425731211413231
loss: 0.09401077888813876
loss: 0.093765716018

loss: 0.040145815681940644
loss: 0.04005165986596296
loss: 0.03995770583030961
loss: 0.0398606200427781
loss: 0.0397678760580724
loss: 0.039675186353901264
loss: 0.03958259565855099
loss: 0.03949013879094504
loss: 0.03939784269556069
loss: 0.03930572811699387
loss: 0.039213810950946996
loss: 0.0391221033193851
loss: 0.039027422233215044
loss: 0.03893689279900112
loss: 0.03884642820013029
loss: 0.03875606915925891
loss: 0.038665847440151786
loss: 0.038575787676759885
loss: 0.03848590887806527
loss: 0.03839622564334675
loss: 0.038303692942941
loss: 0.038215130711887424
loss: 0.038126633508066436
loss: 0.03803824053118872
loss: 0.03794998244859685
loss: 0.037861883114304905
loss: 0.03777396098939273
loss: 0.037686230293612344
loss: 0.03759870192686364
loss: 0.037508469687563196
loss: 0.03742203272335446
loss: 0.037335670047787387
loss: 0.037249417292839405
loss: 0.037163302388260135
loss: 0.03707734710429156
loss: 0.036991568326092523
loss: 0.03690597908797593
loss: 0.03681780019859929
lo

loss: 0.016530359998101176
loss: 0.016501190098289157
loss: 0.016472090129337823
loss: 0.01644306072814005
loss: 0.016414102413713074
loss: 0.016385215602727238
loss: 0.016356400623147648
loss: 0.016327657726178803
loss: 0.01629898709669185
loss: 0.016270388862307167
loss: 0.01624158035531238
loss: 0.01621318190973325
loss: 0.016184848377103662
loss: 0.016156580740766823
loss: 0.01612837982644044
loss: 0.01610024632164327
loss: 0.016072180793072155
loss: 0.016044183702048243
loss: 0.016016255418177656
loss: 0.01598839623138312
loss: 0.015960606362465022
loss: 0.015932885972344855
loss: 0.015905235170137505
loss: 0.015877392840397016
loss: 0.015849932047478577
loss: 0.015822534002959764
loss: 0.015795199581754404
loss: 0.015767929518725066
loss: 0.015740724425668474
loss: 0.01571358480654311
loss: 0.01568651107103607
loss: 0.015659503546590483
loss: 0.01563256248902474
loss: 0.015605688091876738
loss: 0.015578880494604005
loss: 0.01555213292541445
loss: 0.015525452317201881
loss: 0.0154

loss: 0.008003086340400752
loss: 0.007993703795644382
loss: 0.007984339271238101
loss: 0.007974992723689263
loss: 0.00796566410941414
loss: 0.007956353384757229
loss: 0.007947060506009438
loss: 0.007937785429424074
loss: 0.007928528111231795
loss: 0.007919288507654328
loss: 0.007910066574916727
loss: 0.007900862269258731
loss: 0.007891675546945112
loss: 0.007882506364274995
loss: 0.007873354677590483
loss: 0.007864220443284452
loss: 0.00785510361780755
loss: 0.007846004157674766
loss: 0.007836922019471204
loss: 0.007827857159857421
loss: 0.007818809535574211
loss: 0.007809779103447042
loss: 0.0078007658203900305
loss: 0.007791769643409309
loss: 0.007782790529606552
loss: 0.007773828436181631
loss: 0.007764883320435358
loss: 0.007755955139771868
loss: 0.007747043851700706
loss: 0.007738159955286601
loss: 0.007729284895366608
loss: 0.007720426370229413
loss: 0.007711584356562358
loss: 0.0077027588297288955
loss: 0.007693949763871264
loss: 0.007685157132006711
loss: 0.007676380906118374
l

loss: 0.005211529091832322
loss: 0.005206964094845378
loss: 0.005202400783890747
loss: 0.0051978407600631795
loss: 0.005193296879809121
loss: 0.00518875359551809
loss: 0.005184214618789672
loss: 0.005179693804151568
loss: 0.005175168407726262
loss: 0.005170652375965591
loss: 0.005166149372480035
loss: 0.005161644761691899
loss: 0.005157148445427349
loss: 0.005152657470432856
loss: 0.005148183341848418
loss: 0.005143705733426013
loss: 0.005139236280991914
loss: 0.005134780764883037
loss: 0.005130324521705721
loss: 0.005125873691642772
loss: 0.00512142995377974
loss: 0.005117001744270927
loss: 0.005112571168516135
loss: 0.005108147538592169
loss: 0.005103738765422019
loss: 0.005099328163448197
loss: 0.0050949239572664505
loss: 0.005090526707166298
loss: 0.005086143759847083
loss: 0.005081759474996915
loss: 0.005077381009907715
loss: 0.005073009839245658
loss: 0.005068652817865414
loss: 0.005064294503599472
loss: 0.005059941859637222
loss: 0.005055596550859009
loss: 0.005051266392059717
l

loss: 0.0038011620509223317
loss: 0.0037985196307472177
loss: 0.0037958802662073307
loss: 0.003793249272675616
loss: 0.0037906159120508977
loss: 0.003787959745341758
loss: 0.0037853333026347026
loss: 0.0037827151813520053
loss: 0.0037800946916089558
loss: 0.00377745134160206
loss: 0.0037748377142776206
loss: 0.0037722323599010854
loss: 0.003769624636265981
loss: 0.0037670199193372547
loss: 0.003764392076620774
loss: 0.003761799428318165
loss: 0.003759204409451361
loss: 0.0037566123719134257
loss: 0.003753997156882831
loss: 0.003751417110133495
loss: 0.003748834692081695
loss: 0.0037462552308195275
loss: 0.003743678726406734
loss: 0.003741083985661729
loss: 0.0037385141091071973
loss: 0.0037359471642932954
loss: 0.003733383151366441
loss: 0.0037308008300178906
loss: 0.003728243391785988
loss: 0.003725688861085402
loss: 0.0037231372381420083
loss: 0.003720593679606617
loss: 0.0037180211222824972
loss: 0.0037154789479067244
loss: 0.0037129396566439256
loss: 0.0037104083825438526
loss: 0.0

loss: 0.0029041262789761147
loss: 0.002902481703264326
loss: 0.002900842057934919
loss: 0.002899200384194266
loss: 0.0028975281209153584
loss: 0.002895890290805343
loss: 0.002894257365483105
loss: 0.0028926224118057658
loss: 0.0028909568981974917
epoch 3400 loss: 0.0028909568981974917
loss: 0.002889325758141433
loss: 0.0028876960716498813
loss: 0.0028860712657101228
loss: 0.002884444438064944
loss: 0.0028827869851602694
loss: 0.002881163954356708
loss: 0.00287954236790425
loss: 0.002877925637521036
loss: 0.002876274842396477
loss: 0.0028746584121826374
loss: 0.002873043414323853
loss: 0.0028714298522479146
loss: 0.0028698211225501916
loss: 0.0028681782780036326
loss: 0.002866569847927452
loss: 0.0028649628413443705
loss: 0.002863360642661474
loss: 0.0028617564357293263
loss: 0.002860121507585948
loss: 0.00285852103533926
loss: 0.002856921977426704
loss: 0.0028553277032483636
loss: 0.0028536993151225985
loss: 0.002852105322291315
loss: 0.002850512731884173
loss: 0.002848921547322896
los

loss: 0.0023527980416821787
loss: 0.0023516358352238887
loss: 0.0023505095093129666
loss: 0.0023493814968774947
loss: 0.002348254311603951
loss: 0.0023470961962944147
loss: 0.0023459738836913904
epoch 3800 loss: 0.0023459738836913904
loss: 0.0023448498848588186
loss: 0.0023437267055810705
loss: 0.0023426068088665125
loss: 0.002341453485217651
loss: 0.00234033348942713
loss: 0.0023392143054215588
loss: 0.0023380983885495383
loss: 0.0023369808000469642
loss: 0.0023358322855312162
loss: 0.002334719531401606
loss: 0.002333605101052406
loss: 0.002332491481302653
loss: 0.002331378675030201
loss: 0.0023302373867893906
loss: 0.0023291269207934002
loss: 0.0023280172574934857
loss: 0.002326910833762036
loss: 0.002325802747997514
loss: 0.002324663747734221
loss: 0.002323558029022399
loss: 0.002322455534287123
loss: 0.002321351377453334
loss: 0.0023202163570790217
loss: 0.002319116968494895
loss: 0.0023180159134479133
loss: 0.0023169156522597767
loss: 0.0023158161878220162
loss: 0.0023146882763994

loss: 0.0019428725968596424
loss: 0.0019420708443704184
loss: 0.0019412806171462318
loss: 0.001940447305043108
loss: 0.0019396603484925121
loss: 0.0019388590239463998
loss: 0.0019380600757356878
loss: 0.0019372598440090861
loss: 0.0019364424585580542
loss: 0.001935645519008669
loss: 0.0019348472930124673
loss: 0.0019340624368410424
loss: 0.0019332668379660213
loss: 0.0019324394641217
loss: 0.001931656587721135
loss: 0.0019308629876591008
loss: 0.0019300681105747835
loss: 0.0019292737881113039
loss: 0.0019284641497168294
loss: 0.0019276712701033075
loss: 0.0019268789379263587
loss: 0.0019261017701585443
loss: 0.001925310221344448
loss: 0.0019244905659456953
loss: 0.001923713537898944
loss: 0.001922923976437163
loss: 0.0019221367572779396
loss: 0.0019213611101580523
loss: 0.0019205424075599202
loss: 0.001919757173314406
loss: 0.001918983478284816
loss: 0.0019181972285130494
loss: 0.0019174133153098008
loss: 0.0019166281428725878
loss: 0.001915825894954586
loss: 0.0019150439628188387
loss

loss: 0.00166396023694365
loss: 0.001663352722061628
loss: 0.0016627592816273085
loss: 0.0016621522605997899
loss: 0.0016615456234018704
loss: 0.0016609407883936421
loss: 0.0016603067670520832
loss: 0.0016597140444040031
loss: 0.0016591092043662821
loss: 0.0016585061573324317
loss: 0.0016579020561141651
loss: 0.00165731060960612
loss: 0.0016566803145342643
loss: 0.0016560776261671474
loss: 0.0016554875585679896
loss: 0.0016548867834893463
loss: 0.0016542849594759167
loss: 0.0016536835143703237
loss: 0.0016530544012557885
loss: 0.0016524664630697041
loss: 0.0016518678375092716
loss: 0.0016512681617856942
loss: 0.0016506811078108314
loss: 0.0016500833405122498
loss: 0.0016494564989292457
loss: 0.0016488708175137947
loss: 0.0016482744442965728
loss: 0.0016476770260371576
loss: 0.0016470799814463062
loss: 0.0016464847094897423
loss: 0.0016459006418282711
loss: 0.0016452764471330784
loss: 0.0016446811725032932
loss: 0.0016440876612698163
loss: 0.00164350532376003
loss: 0.0016429108955924156

loss: 0.0014353587954871383
loss: 0.0014348943307259554
loss: 0.0014344048658539923
loss: 0.0014339424567601086
loss: 0.0014334904240754013
loss: 0.0014330282938440353
loss: 0.0014325652758195636
loss: 0.0014321025074182413
loss: 0.0014316159065545138
loss: 0.0014311653005715844
loss: 0.0014307046111236696
loss: 0.001430243034251101
loss: 0.0014297817044051993
loss: 0.0014293217414952056
loss: 0.0014288608937204454
loss: 0.001428375137979351
loss: 0.0014279275595303815
loss: 0.00142746766292396
loss: 0.001427009127765973
loss: 0.0014265497081805392
loss: 0.0014260905340690436
loss: 0.0014256076112572635
loss: 0.0014251605521791303
loss: 0.0014247034357551364
loss: 0.00142424543540349
loss: 0.0014237876780909809
loss: 0.0014233312763771413
loss: 0.0014228739933402508
loss: 0.0014224030908832463
loss: 0.001421947632150127
loss: 0.0014214912907863874
loss: 0.001421036299634963
loss: 0.0014205804276430922
loss: 0.0014201247972702444
loss: 0.0014196566810342265
loss: 0.0014192017468917374
l

loss: 0.001268350571673095
loss: 0.0012679803731401277
loss: 0.0012676112725350719
loss: 0.0012672186781416382
loss: 0.001266860571143463
loss: 0.001266492144304755
loss: 0.001266122951836017
loss: 0.0012657539295067915
loss: 0.0012653860019009608
loss: 0.0012649946144450742
loss: 0.0012646273644877297
loss: 0.0012642593472005446
loss: 0.0012639027215122667
loss: 0.0012635348654117505
loss: 0.0012631681009278764
loss: 0.0012627779098347183
loss: 0.0012624118210540851
loss: 0.001262044967002783
loss: 0.0012616791984120372
loss: 0.0012613126662427158
loss: 0.0012609472196944528
loss: 0.0012605810112218062
loss: 0.0012602149701635478
loss: 0.001259827410207701
loss: 0.00125947230373519
loss: 0.0012591078516559248
loss: 0.001258743555434921
loss: 0.0012583784993708844
loss: 0.0012580136102141263
loss: 0.0012576498036753322
loss: 0.001257262666599048
loss: 0.0012569097811781073
loss: 0.0012565466348203875
loss: 0.0012561827306187536
loss: 0.001255818992739766
loss: 0.001255456334351465
loss

loss: 0.0011234838953447343
loss: 0.001123167197754955
loss: 0.0011228720884934472
loss: 0.0011225770855378136
loss: 0.0011222814232847115
loss: 0.0011219866419484854
loss: 0.0011216912027595316
loss: 0.0011213966447293094
loss: 0.0011211021951680396
loss: 0.0011207867161120147
loss: 0.0011204927756476783
loss: 0.001120198176657133
loss: 0.0011199044547622991
loss: 0.0011196108390234918
loss: 0.0011193165669568443
loss: 0.0011190231724582528
epoch 5900 loss: 0.0011190231724582528
loss: 0.001118729123050782
loss: 0.0011184156219384766
loss: 0.001118122840746637
loss: 0.0011178294025892845
loss: 0.0011175368379895384
loss: 0.001117243617892274
loss: 0.0011169512716317628
loss: 0.0011166590314123349
loss: 0.0011163661378523779
loss: 0.0011160538328268586
loss: 0.0011157614429122564
loss: 0.0011154699228421773
loss: 0.0011151785065558317
loss: 0.0011148864363709986
loss: 0.0011145952365538881
loss: 0.001114303384277251
loss: 0.001114012402620514
loss: 0.0011137012844209587
loss: 0.00111341

loss: 0.0010148001625746059
loss: 0.001014554695706428
loss: 0.001014309964859772
loss: 0.0010140653114673515
loss: 0.0010138200837959874
loss: 0.0010135755925884324
loss: 0.001013331179951929
loss: 0.0010130861948247267
loss: 0.0010128419465441165
loss: 0.0010125793318569727
loss: 0.0010123354809283124
loss: 0.0010120910565979925
loss: 0.0010118473660020958
loss: 0.001011603753071728
loss: 0.0010113595685409327
loss: 0.001011116118148753
epoch 6300 loss: 0.001011116118148753
loss: 0.0010108727465004018
loss: 0.001010610395774613
loss: 0.0010103674256521737
loss: 0.0010101245318978843
loss: 0.0010098817149125027
loss: 0.0010096383272188989
loss: 0.001009395670971627
loss: 0.001009153092588147
loss: 0.0010089099452581499
loss: 0.0010086491569626973
loss: 0.0010084070564268855
loss: 0.0010081643854671731
loss: 0.0010079224430927704
loss: 0.0010076805773224465
loss: 0.0010074387885436512
loss: 0.0010071964317235104
loss: 0.0010069548040591652
loss: 0.0010067132544429448
loss: 0.0010064528

loss: 0.0009172796798393488
loss: 0.00091707606718921
loss: 0.0009168725150777441
loss: 0.0009166690237756808
loss: 0.0009164650403166547
loss: 0.0009162616762970989
loss: 0.000916058373822151
loss: 0.0009158545804668905
loss: 0.0009156514067046785
loss: 0.0009154317295897189
loss: 0.0009152283272859359
loss: 0.0009150255418721641
loss: 0.0009148228167505564
loss: 0.0009146201521884226
loss: 0.0009144169972006856
loss: 0.0009142144593874251
loss: 0.0009140119828588865
loss: 0.0009138090171690884
loss: 0.0009135901397235338
loss: 0.0009133880512722514
loss: 0.0009131854724070867
loss: 0.0009129835082061799
loss: 0.0009127816040675465
loss: 0.0009125797602548417
loss: 0.0009123774277522314
loss: 0.000912175710187234
loss: 0.0009119740536630569
loss: 0.0009117719097006558
loss: 0.0009115538891932536
loss: 0.0009113526192211364
loss: 0.0009111508605591838
loss: 0.0009109497143246421
loss: 0.0009107486279027086
loss: 0.0009105476015536891
loss: 0.0009103460882222736
loss: 0.0009101451875819

loss: 0.0008411523303735082
loss: 0.0008409628429542081
loss: 0.0008407892005423574
loss: 0.0008406156070492959
loss: 0.0008404420626661131
loss: 0.0008402680865360104
loss: 0.0008400946449664403
loss: 0.000839921253022205
loss: 0.0008397479108838068
loss: 0.0008395741382805137
loss: 0.0008394009003106537
loss: 0.0008392277126310355
loss: 0.0008390545754120622
loss: 0.0008388659112145867
loss: 0.0008386930496531966
loss: 0.0008385202368718465
loss: 0.0008383469937930471
loss: 0.0008381742831419904
loss: 0.000838001621783468
loss: 0.0008378290098969474
loss: 0.0008376559689879255
loss: 0.0008374834605791438
loss: 0.0008373110021241546
loss: 0.0008371385937923567
loss: 0.0008369506980371925
loss: 0.0008367785639526695
loss: 0.0008366064783209132
loss: 0.0008364344413280688
loss: 0.000836261975802653
loss: 0.000836090040625363
loss: 0.0008359181545885109
loss: 0.0008357463178680025
loss: 0.0008355740538727741
loss: 0.0008354023202865676
loss: 0.0008352306364879938
loss: 0.0008350590026431

loss: 0.0007701249746119821
loss: 0.0007699733991976795
loss: 0.0007698088419436436
loss: 0.0007696575499787757
loss: 0.000769506325940035
loss: 0.0007693551696383144
loss: 0.000769212223510545
loss: 0.0007690611144330546
loss: 0.0007689100736054241
loss: 0.0007687591008328725
loss: 0.0007686081959226801
loss: 0.0007684573586840424
loss: 0.0007683065889282445
loss: 0.0007681428850590617
loss: 0.0007680005159493265
loss: 0.0007678500008737528
loss: 0.0007676995518679206
loss: 0.0007675491687611877
loss: 0.0007673988513846461
loss: 0.0007672485995711022
loss: 0.0007671065351756885
loss: 0.0007669433531769915
loss: 0.0007667933527676261
loss: 0.0007666434165651742
loss: 0.0007664935444194615
loss: 0.0007663437361818629
loss: 0.0007661939917051847
loss: 0.0007660524172930778
loss: 0.0007659027130480988
loss: 0.0007657530731816147
loss: 0.0007656034975389266
loss: 0.0007654410560228701
loss: 0.0007652917493508851
loss: 0.0007651505973246384
loss: 0.0007650013280546746
loss: 0.00076485212161

loss: 0.0007138776668756501
loss: 0.0007137476981657877
loss: 0.0007136177653714193
loss: 0.0007134878685637172
loss: 0.000713358007812854
loss: 0.0007132281831878975
loss: 0.0007130983947569164
loss: 0.0007129686425869343
loss: 0.0007128389267440046
loss: 0.0007126975455426152
loss: 0.0007125756850389545
loss: 0.0007124461199150183
loss: 0.0007123165908895605
loss: 0.0007121870980293612
loss: 0.0007120576413999909
loss: 0.0007119282210661946
loss: 0.0007117988370915683
loss: 0.0007116578080605463
loss: 0.0007115286161438753
loss: 0.0007114071171766372
loss: 0.000711277919088842
loss: 0.0007111487571992519
loss: 0.0007110196315712352
loss: 0.0007108905422671258
loss: 0.0007107614893484379
loss: 0.0007106208116943685
loss: 0.0007104919505653466
loss: 0.0007103631246540271
loss: 0.000710234334033269
loss: 0.0007101055787749422
loss: 0.000709976858949815
loss: 0.0007098481746276144
loss: 0.0007097271778319504
loss: 0.0007095868470101335
loss: 0.0007094583125029354
loss: 0.0007093298132693

loss: 0.0006610035465134496
loss: 0.0006608906365190546
loss: 0.0006607777560160054
loss: 0.0006606649050559289
loss: 0.0006605520836898293
loss: 0.000660439291967872
loss: 0.0006603265299395801
loss: 0.0006602137976536732
loss: 0.0006600905142684073
loss: 0.0006599852518075946
loss: 0.0006598726408833704
loss: 0.0006597600595380564
loss: 0.0006596475078208882
loss: 0.0006595349857802705
epoch 8400 loss: 0.0006595349857802705
loss: 0.0006594224934640533
loss: 0.0006593100309192612
loss: 0.0006591975981922965
loss: 0.000659074634372652
loss: 0.0006589696656569192
loss: 0.0006588573543394275
loss: 0.0006587450726690978
loss: 0.0006586328206928847
loss: 0.0006585205984569438
loss: 0.0006584084060068904
loss: 0.0006582962433875789
loss: 0.0006581735679609094
loss: 0.000658061566747236
loss: 0.0006579495944082557
loss: 0.0006578376509978889
loss: 0.0006577257365692569
loss: 0.0006576211471153491
loss: 0.0006575092218401727
loss: 0.0006573973264290535
loss: 0.0006572749362785243
loss: 0.0006

loss: 0.0006174973107981134
loss: 0.0006173974887718826
loss: 0.0006172976928091737
loss: 0.0006171882528156892
loss: 0.0006170955929553582
loss: 0.000616995900283284
epoch 8800 loss: 0.000616995900283284
loss: 0.0006168962335136039
loss: 0.0006167965926734819
loss: 0.000616696977789827
loss: 0.0006165973888890329
loss: 0.0006164978259970887
loss: 0.0006163982891396345
loss: 0.0006162987783418338
loss: 0.0006161896439559462
loss: 0.0006160972620209253
loss: 0.0006159978543460469
loss: 0.000615898472566602
loss: 0.0006157991167086714
loss: 0.0006156997867978129
loss: 0.0006156004828592888
loss: 0.000615501204917933
loss: 0.0006154019529982244
loss: 0.0006152930962007341
loss: 0.0006152009662396218
loss: 0.000615101817189001
loss: 0.0006150028409538755
loss: 0.0006149038894555282
loss: 0.0006148049627287887
loss: 0.0006147060608079475
loss: 0.0006146071837269397
loss: 0.0006145083315191029
loss: 0.0006144095042174097
loss: 0.0006143010912899186
loss: 0.0006142024020644404
loss: 0.0006141

loss: 0.0005766797379397629
loss: 0.000576591560876603
loss: 0.0005765034066244539
loss: 0.0005764152751932393
loss: 0.0005763271665926342
loss: 0.0005762390808321647
loss: 0.0005761510179211897
loss: 0.0005760629778688607
loss: 0.0005759749606840606
loss: 0.0005758936436482611
loss: 0.0005757968204123624
loss: 0.0005757088921230153
loss: 0.0005756209865317353
loss: 0.0005755331036486763
loss: 0.0005754452434836146
loss: 0.000575357406046313
loss: 0.0005752695913462225
loss: 0.0005751817993926067
loss: 0.0005750940301946658
loss: 0.0005750129531447482
loss: 0.0005749163970602922
loss: 0.0005748287162970919
loss: 0.000574741058122101
loss: 0.0005746534225455818
loss: 0.0005745658095774685
loss: 0.0005744782192276029
loss: 0.0005743906515056162
loss: 0.0005743031064208709
loss: 0.0005742155839826594
loss: 0.0005741347457255674
loss: 0.0005740384554867282
loss: 0.0005739510210319436
loss: 0.0005738636090582454
loss: 0.0005737762195760011
loss: 0.000573688852595302
loss: 0.0005736015081260

loss: 0.0005428243443730864
loss: 0.0005427452918524388
loss: 0.0005426726486245925
loss: 0.0005425854939249612
loss: 0.0005425065188615787
loss: 0.0005424275641865925
loss: 0.0005423486298989259
loss: 0.0005422697159974397
loss: 0.0005421908224809715
loss: 0.0005421119493482176
loss: 0.0005420330965979299
loss: 0.0005419542642286309
loss: 0.0005418818326799929
loss: 0.0005418029885499474
loss: 0.0005417160941694696
loss: 0.000541637359483715
loss: 0.0005415586450152455
loss: 0.0005414799507638242
loss: 0.0005414012767290868
loss: 0.0005413226229106221
loss: 0.00054124398930797
loss: 0.000541171748755823
loss: 0.0005410931031854867
loss: 0.000541014478310337
loss: 0.0005409358741250559
loss: 0.0005408572906244205
loss: 0.0005407787278030701
loss: 0.0005406921372371402
loss: 0.0005406136831968935
loss: 0.0005405352491938862
loss: 0.000540463200324557
loss: 0.0005403847541225778
loss: 0.0005403063284384555
loss: 0.000540227923267926
loss: 0.0005401495386067445
loss: 0.0005400711744506494