In [30]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [31]:
from wb import WeightsAndBiases
from sklearn.preprocessing import LabelBinarizer
from random import sample, choice
from fingerprint_vect import GraphFingerprint
from collections import defaultdict
from autograd import grad
from autograd.scipy.misc import logsumexp

import autograd.numpy as np
import networkx as nx
import math

In [32]:
def make_random_graph(nodes, n_edges, features_dict):
    """
    Makes a randomly connected graph. 
    """
    
    G = nx.Graph()
    for n in nodes:
        G.add_node(n, features=features_dict[n])
    
    for i in range(n_edges):
        u, v = sample(G.nodes(), 2)
        G.add_edge(u, v)
        
    return G

In [33]:
# features_dict will look like this:
# {0: array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0]),
#  1: array([0, 1, 0, 0, 0, 0, 0, 0, 0, 0]),
#  2: array([0, 0, 1, 0, 0, 0, 0, 0, 0, 0]),
#  3: array([0, 0, 0, 1, 0, 0, 0, 0, 0, 0]),
#  4: array([0, 0, 0, 0, 1, 0, 0, 0, 0, 0]),
#  5: array([0, 0, 0, 0, 0, 1, 0, 0, 0, 0]),
#  6: array([0, 0, 0, 0, 0, 0, 1, 0, 0, 0]),
#  7: array([0, 0, 0, 0, 0, 0, 0, 1, 0, 0]),
#  8: array([0, 0, 0, 0, 0, 0, 0, 0, 1, 0]),
#  9: array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1])}

all_nodes = [i for i in range(10)]    
lb = LabelBinarizer()
features_dict = {i:lb.fit_transform(all_nodes)[i] for i in all_nodes}

G = make_random_graph(sample(all_nodes, 6), 5, features_dict)
G.edges(data=True)
# G.nodes(data=True)

[(0, 7, {}), (1, 9, {}), (2, 9, {}), (2, 7, {})]

In [34]:
wb = WeightsAndBiases(n_layers=2, shapes=(10, 20, 10))
wb

{0: {'biases': array([[-0.01282643,  0.05126014,  0.12478795, -0.04467987, -0.23225055,
           0.05331752, -0.00878325,  0.02578893, -0.12787258,  0.02081464,
          -0.09019882,  0.13098511,  0.05393008, -0.06729331,  0.09554677,
           0.02914731,  0.01288287, -0.04030879,  0.01075938,  0.04975741]]),
  'nbr_weights': array([[-0.03632008,  0.0040548 ,  0.00716803,  0.01196963,  0.20626901,
           0.0655909 , -0.05097722, -0.10702418,  0.05852709, -0.0010574 ,
           0.09959072,  0.03393232, -0.00762132,  0.15056993, -0.05630408,
           0.03298202,  0.01378147,  0.08816432, -0.12404468,  0.06024068],
         [-0.08993807, -0.1001855 , -0.0840236 , -0.18067154, -0.04968924,
           0.04929193,  0.08026344,  0.16582503, -0.24756252,  0.0354438 ,
           0.00573073, -0.04362545,  0.01296043,  0.03062234, -0.16566107,
           0.07478288, -0.00085774, -0.09147302, -0.14658804, -0.06035202],
         [ 0.05898323,  0.13520439, -0.09526349,  0.02845017,  0.03

In [35]:
# def score(G):
#     """
#     The regressable score for each graph will be the sum of the 
#     (square root of each node + the sum of its neighbors.)
#     """
#     sum_score = 0
#     for n, d in G.nodes(data=True):
#         sum_score += math.sqrt(n)
        
#         for nbr in G.neighbors(n):
#             sum_score += nbr ** (1/3)
#     return sum_score

def score(G):
    """
    The regressable score for each graph is the number of nodes
    in the graph.
    """
    return len(G.nodes())

score(G)

6

In [36]:
G.nodes(data=True)[0][1]['features'].shape

(10,)

In [37]:
def softmax(X, axis=0):
    """
    The softmax function normalizes everything to between 0 and 1.
    """
    return np.exp(X - logsumexp(X, axis=axis, keepdims=True))

# test softmax:
X = np.random.random((1,10))
softmax(X, axis=1)

array([[ 0.07224867,  0.1477318 ,  0.12377   ,  0.0666501 ,  0.12325732,
         0.09957421,  0.13729153,  0.080011  ,  0.05878282,  0.09068256]])

In [38]:
def relu(X):
    """
    The ReLU - Rectified Linear Unit.
    """
    return X * (X > 0)


# test relu:
X = np.random.normal(0, 1, size=(5, 1))
print(X)
print('')
print(relu(X)) 

[[ 0.34396448]
 [-0.51544308]
 [ 0.12502438]
 [-0.35634757]
 [-0.77549162]]

[[ 0.34396448]
 [-0.        ]
 [ 0.12502438]
 [-0.        ]
 [-0.        ]]


In [39]:
# Make 1000 random graphs.
syngraphs = []
for i in range(100):
    n_nodes = choice([i for i in range(2, 10)])
    n_edges = choice([i for i in range(1, n_nodes**2)])
    
    G = make_random_graph(sample(all_nodes, n_nodes), n_edges, features_dict)
    syngraphs.append(G)
    
len(syngraphs)

100

In [40]:
# Write a function that computes the feature matrix, and writes the
# indices to the nodes of each graph.
def stacked_node_activations(graphs):
    """
    Note: this function should only be called for computing the
    stacked node activations after initializing the graphs.
    
    Inputs:
    =======
    - graphs: (list) a list of graphs on which to stack their
              feature vectors.
    """
    features = []
    curr_idx = 0
    for g in graphs:
        for n, d in g.nodes(data=True):
            features.append(d['features'])
            g.node[n]['idx'] = curr_idx
            curr_idx += 1
    return np.vstack(features)

# test stacked_node_activations
layers = dict()
layers[0] = stacked_node_activations(syngraphs)
layers[1] = stacked_node_activations(syngraphs)
# layers[1]

In [41]:
# Write a function that gets the indices of each node's neighbors.
def neighbor_indices(G, n):
    """
    Inputs:
    =======
    - G: the graph to which the node belongs to.
    - n: the node inside the graph G.
    
    Returns:
    ========
    - indices: (list) a list of indices, which should (but is not
               guaranteed to) correspond to a row in a large 
               stacked matrix of features.
    """
    indices = []
    for n in G.neighbors(n):
        indices.append(G.node[n]['idx'])
    return indices


# test neighbor_indices
nbr_idxs = neighbor_indices(syngraphs[0], syngraphs[0].nodes()[0])
nbr_idxs

[1]

In [42]:
# Write a function that sums each of the neighbors' activations for a
# given node in a given graph.
def neighbor_activations(G, n, activations_dict, layer):
    """
    Inputs:
    =======
    - G: the graph to which the node belongs to.
    - n: the node inside the graph G
    - activations_dict: a dictionary that stores the node activations 
                        at each layer.
    - layer: the layer at which to compute neighbor activations.
    """
    nbr_indices = neighbor_indices(G, n)
    return np.sum(activations_dict[layer][nbr_indices], axis=0)

neighbor_activations(syngraphs[0], syngraphs[0].nodes()[0], layers, 0)

array([0, 0, 1, 0, 0, 0, 0, 0, 0, 0])

In [43]:
# Write a function that stacks each of the nodes' neighbors
# activations together into a large feature matrix.

def stacked_neighbor_activations(graphs, activations_dict, layer):
    """
    Inputs:
    =======
    - graphs: (list) a list of NetworkX graphs.
    - activations_dict: (dict) a dictionary where keys are the layer
                        number and values are the node activations.
    
    Returns:
    ========
    - a stacked numpy array of neighbor activations
    """
    nbr_activations = []
    for g in graphs:
        for n in g.nodes():
            nbr_acts = neighbor_activations(g, n, activations_dict, layer)
            nbr_activations.append(nbr_acts)
    return np.vstack(nbr_activations)

stacked_neighbor_activations(syngraphs, layers, 1)

array([[0, 0, 1, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 1],
       [0, 1, 0, ..., 0, 1, 1],
       ..., 
       [0, 0, 0, ..., 0, 1, 0],
       [0, 0, 0, ..., 0, 1, 0],
       [0, 1, 0, ..., 1, 0, 0]])

In [44]:
# Write a function that computes the next layers' activations.

def activation(activations_dict, wb, layer, graphs):
    """
    Inputs:
    =======
    - activations_dict: (dict) a dictionary where keys are the layer
                        number and values are the node activations.
    - wb: (wb.WeightsAndBiases) the WB class storing the weights and
          biases.
    - layer: (int) the layer for which to compute the activations.    
    
    Returns:
    ========
    - a stacked numpy array of activations, which can be assigned to
      the activations_dict's next layer if desired (actually it
      should be).
    """
    
    self_acts = activations_dict[layer]
    self_acts = np.dot(self_acts, wb[layer]['self_weights'])

    nbr_acts = stacked_neighbor_activations(graphs, activations_dict, layer)
    # print('nbr_dtype: {0}....... wb_dtype: {1}'.format(nbr_acts.dtype, wb[layer]['nbr_weights'].dtype))
    # print('nbr_act type: {0}'.format(type(nbr_acts)))
    # print('nbr_acts:')
    # print(nbr_acts)
    # print('nbr_weights:')
    # print(wb[layer]['nbr_weights'])
    nbr_acts = np.dot(nbr_acts, wb[layer]['nbr_weights'])
    # print(nbr_acts)
    
    biases = wb[layer]['biases']
    # print('result_activation dtype: {0}'.format((self_acts + nbr_acts + biases).dtype))
    # print('result')
    # print(self_acts + nbr_acts + biases)
    return relu(self_acts + nbr_acts + biases)

print(activation(layers, wb, 0, syngraphs))
# print(activation(layers, wb, 1, syngraphs))
# print(activation(layers, wb, 2, syngraphs))

[[ 0.08077149  0.26617627 -0.         ..., -0.         -0.          0.06247178]
 [-0.         -0.          0.17111185 ...,  0.1100053   0.10972534
   0.06213238]
 [-0.          0.06887595 -0.         ..., -0.         -0.          0.42113589]
 ..., 
 [ 0.09517687  0.33361817  0.02114029 ...,  0.01350959 -0.          0.3693808 ]
 [ 0.03914639  0.07409041 -0.         ..., -0.          0.01219503
   0.35051585]
 [-0.         -0.         -0.         ..., -0.         -0.          0.07927655]]


In [45]:
act = np.dot(stacked_neighbor_activations(syngraphs, layers, 0), wb[0]['nbr_weights']) + wb[0]['biases']
act.shape

(561, 20)

In [46]:
# Write a function that gets the indices of all of the nodes in the
# graph.
def graph_indices(g):
    """
    Returns the row indices of each of the nodes in the graphs.
    """
    return [d['idx'] for _, d in g.nodes(data=True)]

In [47]:
# Write a function that makes the fingerprint used for predictions.
def fingerprint(activations_dict, graphs):
    """
    Computes the final layer fingerprint for each graph.
    
    Inputs:
    =======
    - activations_dict: (dict) a dictionary where keys are the layer
                        number and values are the node activations.
    - graphs: a list of graphs for which to compute the fingerprints.
    
    Returns:
    ========
    - a stacked numpy array of fingerprints, of length len(graphs).
    """
    top_layer = max(activations_dict.keys())
    fingerprints = []
    for g in graphs:
        idxs = graph_indices(g)
        fp = np.sum(activations_dict[top_layer][idxs], axis=0)
        fingerprints.append(fp)
    
    return relu(np.vstack(fingerprints))

# test fingerprint function
fingerprint(layers, syngraphs)

array([[0, 0, 1, 0, 0, 0, 0, 0, 0, 1],
       [1, 1, 1, 1, 1, 1, 1, 0, 1, 1],
       [1, 0, 0, 0, 0, 0, 1, 0, 0, 1],
       [0, 1, 0, 1, 0, 1, 0, 0, 0, 0],
       [0, 0, 0, 0, 1, 1, 0, 1, 0, 0],
       [1, 1, 1, 1, 0, 1, 1, 0, 1, 1],
       [1, 0, 1, 1, 1, 1, 1, 1, 1, 1],
       [1, 0, 1, 1, 1, 0, 1, 1, 0, 0],
       [1, 0, 0, 1, 0, 0, 0, 0, 0, 1],
       [1, 0, 1, 0, 0, 0, 0, 1, 1, 0],
       [1, 1, 1, 0, 1, 1, 1, 0, 1, 1],
       [0, 1, 0, 1, 0, 0, 0, 0, 0, 1],
       [1, 0, 0, 0, 1, 0, 0, 0, 0, 0],
       [1, 1, 1, 1, 0, 1, 0, 1, 1, 1],
       [1, 1, 1, 1, 1, 1, 1, 1, 1, 0],
       [1, 1, 1, 1, 1, 1, 1, 1, 0, 1],
       [0, 0, 0, 0, 1, 1, 0, 0, 0, 0],
       [0, 1, 1, 1, 1, 0, 0, 0, 1, 0],
       [1, 0, 1, 0, 1, 0, 1, 1, 1, 0],
       [0, 0, 1, 0, 1, 1, 1, 0, 1, 1],
       [1, 1, 1, 1, 0, 0, 1, 1, 1, 0],
       [0, 1, 1, 0, 0, 0, 1, 0, 0, 0],
       [0, 0, 0, 0, 1, 1, 0, 0, 0, 1],
       [1, 0, 0, 1, 1, 0, 0, 0, 0, 0],
       [1, 0, 0, 1, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 1, 0,

In [48]:
# Write a function that makes the forward pass predictions.
def predict(wb_vect, wb_unflattener, activations_dict, graphs):
    """
    Makes predictions.
    
    Change this function for each new learning problem.
    
    Inputs:
    =======
    - wb_vect: (WeightsAndBiases.vect)
    - wb_unfalttener (WeightsAndBiases.unflattener)
    - activations_dict: (dict) a dictionary where keys are the layer
                        number and values are the node activations.
    - graphs: a list of graphs for which to compute the fingerprints.
    
    Returns:
    ========
    - a numpy array of predictions, of length len(graphs).
    """
    
    wb = wb_unflattener(wb_vect)
    for k in sorted(wb.keys()):
        activations_dict[k + 1] = activation(activations_dict, wb, k, graphs)
        # print(activations_dict[k])
    
    top_layer = max(wb.keys())
    
    fps = fingerprint(layers, graphs)
    
    return np.dot(fps, wb[top_layer]['linweights'])

predict(*wb.flattened(), layers, syngraphs)

array([[-0.03521828],
       [-0.50638317],
       [-0.08300308],
       [-0.04979961],
       [-0.07955716],
       [-0.26432701],
       [-1.43592535],
       [-0.16698255],
       [-0.09445827],
       [-0.1593711 ],
       [-0.32426517],
       [-0.06252155],
       [-0.03428062],
       [-1.31626076],
       [-0.50621963],
       [-0.96950534],
       [-0.0397043 ],
       [-0.17819871],
       [-0.25704755],
       [-0.15071348],
       [-0.16114747],
       [-0.06814167],
       [-0.07055512],
       [-0.08307516],
       [-0.03690267],
       [-0.06135548],
       [-0.08243147],
       [-0.48955387],
       [-0.07655244],
       [-0.3211851 ],
       [-0.74178985],
       [-0.33583623],
       [-0.23711611],
       [-0.06503832],
       [-0.13641978],
       [-0.37008644],
       [-0.14108591],
       [-0.03261727],
       [-0.20669762],
       [-0.07996231],
       [-0.06615539],
       [-0.03427786],
       [-0.28221307],
       [-0.09265613],
       [-0.18684824],
       [-0

In [49]:
# Write a function that computes the training loss.
def train_loss(wb_vect, wb_unflattener, activations_dict, graphs):
    """
    Computes the training loss as mean squared error.
    
    Inputs:
    =======
    - wb_vect: (WeightsAndBiases.vect)
    - wb_unfalttener (WeightsAndBiases.unflattener)
    - activations_dict: (dict) a dictionary where keys are the layer
                        number and values are the node activations.
    - graphs: a list of graphs for which to compute the fingerprints.

    Returns:
    ========
    - mean squared error.
    """
    
    scores = np.array([score(g) for g in graphs]).reshape((len(graphs), 1))
    # print(scores)
    preds = predict(wb_vect, wb_unflattener, activations_dict, graphs)
    # print(preds)
    return np.sum(np.abs(preds - scores)) / len(scores)

train_loss(wb.vect, wb.unflattener, layers, syngraphs)

5.9175226935169913

In [50]:
gradfunc = grad(train_loss, argnum=0)
gradfunc(wb.vect, wb.unflattener, layers, syngraphs)

array([-0.34161722,  0.21893978,  0.00538399, ..., -0.87374541,
       -1.01269378, -0.06060852])

In [52]:
def sgd(grad, wb_vect, wb_unflattener, activations_dict, graphs, callback=None, num_iters=200, step_size=0.1, mass=0.9):
    """
    Stochastic gradient descent with momentum.
    """
    velocity = np.zeros(len(wb_vect))
    for i in range(num_iters):
        print(i)
        g = grad(wb_vect, wb_unflattener, activations_dict, graphs)

        velocity = mass * velocity - (1.0 - mass) * g
        wb_vect += step_size * velocity
        # print(wb_vect)
        print(train_loss(wb_vect, wb_unflattener, activations_dict, graphs))
    return wb_vect, wb_unflattener

wb_vect, wb_unflattener = sgd(gradfunc, wb.vect, wb.unflattener, layers, syngraphs, num_iters=200, step_size=0.001)

0
5.85073881171
1
5.84457159019
2
5.83582307169
3
5.82478564025
4
5.81177303917
5
5.79686245053
6
5.78028589435
7
5.76231856991
8
5.74324312263
9
5.72311197145
10
5.70196153215
11
5.67982872567
12
5.65671583035
13
5.63275326223
14
5.60800721181
15
5.58237205945
16
5.55577868215
17
5.52810898695
18
5.49950475366
19
5.47006171089
20
5.44004945322
21
5.40929787154
22
5.37768854822
23
5.3449782691
24
5.31100221178
25
5.27561676606
26
5.23907629225
27
5.2011369265
28
5.16169450978
29
5.12043310163
30
5.07717107834
31
5.03171420892
32
4.98391111933
33
4.9335082699
34
4.88022602775
35
4.82373864795
36
4.76371707327
37
4.69985876357
38
4.6316825337
39
4.55882356348
40
4.48080836423
41
4.39713529383
42
4.30722642362
43
4.21023091831
44
4.11810236746
45
4.03334256909
46
3.95787460109
47
3.9068731395
48
3.87649742704
49
3.8522175501
50
3.82833776435
51
3.80486714752
52
3.78170948231
53
3.77239569646
54
3.7701615418
55
3.7695829148
56
3.7722154011
57
3.77391571285
58
3.77805107965
59
3.78203512024

In [23]:
train_loss(wb_vect, wb.unflattener, layers, syngraphs)

4.7804210300155514

In [24]:
wb.unflattener(wb.vect)[2]['linweights']

array([[ 0.13523469],
       [ 0.0693613 ],
       [ 0.07628269],
       [ 0.03652902],
       [ 0.13936013],
       [ 0.02577675],
       [-0.02201426],
       [ 0.07041358],
       [-0.00626544],
       [ 0.00346891]])

In [25]:
scores = [score(g) for g in syngraphs]

In [26]:
preds = predict(wb_vect, wb.unflattener, layers, syngraphs)

In [27]:
[i for i in zip(scores, preds)]

[(9, array([ 0.19499841])),
 (8, array([ 0.9013011])),
 (5, array([ 0.29641369])),
 (9, array([ 2.61036359])),
 (2, array([ 0.00708888])),
 (5, array([ 0.37208714])),
 (5, array([ 0.37169495])),
 (2, array([ 0.01057891])),
 (2, array([ 0.00726456])),
 (3, array([ 0.02077415])),
 (6, array([ 0.25659966])),
 (2, array([ 0.01021909])),
 (9, array([ 3.24096215])),
 (6, array([ 0.94539905])),
 (5, array([ 0.47199414])),
 (5, array([ 0.15118387])),
 (4, array([ 0.23316094])),
 (3, array([ 0.07847319])),
 (3, array([ 0.01973731])),
 (7, array([ 0.49615244])),
 (2, array([ 0.00857779])),
 (5, array([ 0.10338652])),
 (6, array([ 0.06584597])),
 (6, array([ 0.69345539])),
 (5, array([ 0.12810961])),
 (6, array([ 0.14935139])),
 (2, array([ 0.0101271])),
 (7, array([ 0.11523734])),
 (8, array([ 0.0522055])),
 (5, array([ 0.4975073])),
 (2, array([ 0.00612571])),
 (6, array([ 0.75824486])),
 (5, array([ 0.23375768])),
 (8, array([ 2.0248615])),
 (7, array([ 1.08374248])),
 (5, array([ 0.31255017])

In [28]:
new_graphs = [make_random_graph(sample(all_nodes, 4), 5, features_dict) for i in range(100)]
# predict(wb_vect, wb.unflattener, layers, new_graphs)
new_graphs[0].nodes(data=True)

stacked_node_activations(new_graphs)

array([[0, 0, 0, ..., 0, 1, 0],
       [1, 0, 0, ..., 0, 0, 0],
       [0, 0, 1, ..., 0, 0, 0],
       ..., 
       [0, 0, 1, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [29]:
predict(wb_vect, wb.unflattener, layers, new_graphs)

ValueError: operands could not be broadcast together with shapes (534,20) (400,20) 