In [490]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [491]:
from fingerprint import GraphFingerprint
from wb import WeightsAndBiases
from itertools import combinations
from random import choice, sample

import autograd.numpy as np
import networkx as nx
import math

In [492]:
shapes = dict()
shapes[0] = 10
shapes[1] = 10
shapes[2] = 10
wb = WeightsAndBiases(2, shapes)
# wb[0]

In [493]:
# Generate n synthetic graphs that have a random configuration of nodes which have fixed feature vectors.
# - nodes are 'A' through 'G'
# - select random set of nodes to add to the graph.
# - choose a number of edges to add, and add them in randomly to the graph.

def rnd():
    return np.random.binomial(1, 0.2, size=10)

all_letters = ['A', 'B', 'C', 'D', 'E', 'F', 'G']
num_nodes = [i for i in range(2, len(all_letters) + 1)]
features = dict()
for letter in all_letters:
    features[letter] = rnd()


# Make the synthetic graphs.
syngraphs = []  # the synthetic graphs
num_graphs = 1000  # the total number of synthetic graphs to make
for i in range(num_graphs):
    # add in nodes
    n_nodes = choice(num_nodes)
    letters = sample(all_letters, n_nodes)
    G = nx.Graph()
    for letter in letters:
        G.add_node(letter, features=features[letter])
        
    # add in edges
    n_nodes = len(G.nodes())
    num_edges = choice(range(1, int(n_nodes**2 / 2 - n_nodes / 2 + 1)))
    edges = sample([i for i in combinations(G.nodes(), 2)], num_edges)
    for u, v in edges:
        G.add_edge(u, v)
    syngraphs.append(G)

In [494]:
len(syngraphs)

1000

In [495]:
len(syngraphs[10].nodes())

2

In [496]:
node2 = syngraphs[0].nodes(data=True)[0][1]['features']
node2

array([1, 0, 1, 0, 1, 0, 0, 0, 0, 0])

In [497]:
np.dot(node2, wb[1]['nbr_weights'])

array([ 2.71645819,  1.6917751 ,  1.19386692,  1.17987785,  0.92159857,
        1.50600502,  1.64027674,  0.88589575,  1.54787033,  2.18219285])

In [498]:
# Compute fingerprints for each of the graphs, given the weights and biases.
fingerprints = np.zeros((len(syngraphs), 10))
for i, g in enumerate(syngraphs):
    gfp = GraphFingerprint(g, 2, wb)
    # print(gfp.layers[0].nodes(data=True))
    # gfp.compute_node_activations()
    fp = gfp.compute_fingerprint(wb)
    fingerprints[i] = fp
np.array(fingerprints)

array([[ 0.01757677,  0.01269351,  0.01299234, ...,  0.01322189,
         0.86815186,  0.01236146],
       [ 0.09705448,  0.07974722,  0.1025951 , ...,  0.08458763,
         0.18605949,  0.07895852],
       [ 0.05437263,  0.04081959,  0.04376174, ...,  0.04282226,
         0.57945043,  0.03949851],
       ..., 
       [ 0.0522878 ,  0.03931991,  0.03690793, ...,  0.03914757,
         0.62546902,  0.0368816 ],
       [ 0.09969262,  0.09359214,  0.10342073, ...,  0.09546503,
         0.12106917,  0.09351647],
       [ 0.09520435,  0.08828335,  0.09807973, ...,  0.0922179 ,
         0.15471339,  0.0881083 ]])

In [499]:
import pandas as pd
X = pd.DataFrame(np.array(fingerprints))
Y = np.array([len(g.nodes()) for g in syngraphs])
Y

array([7, 6, 6, 7, 2, 3, 6, 6, 7, 3, 2, 6, 6, 5, 3, 5, 5, 7, 5, 3, 5, 5, 4,
       6, 7, 6, 7, 2, 2, 7, 2, 5, 5, 3, 6, 5, 5, 5, 2, 6, 4, 6, 3, 6, 7, 3,
       2, 6, 6, 3, 5, 5, 6, 3, 2, 2, 2, 5, 6, 7, 5, 6, 6, 5, 7, 2, 2, 6, 2,
       7, 4, 5, 6, 5, 3, 6, 5, 5, 4, 7, 3, 3, 2, 2, 6, 3, 3, 3, 3, 2, 4, 3,
       4, 2, 6, 3, 7, 2, 4, 7, 2, 7, 3, 3, 2, 5, 3, 6, 3, 7, 2, 2, 5, 4, 6,
       7, 5, 4, 2, 2, 6, 2, 3, 5, 3, 7, 6, 4, 6, 6, 5, 4, 3, 7, 2, 2, 2, 7,
       7, 2, 7, 3, 4, 5, 5, 4, 2, 5, 5, 3, 5, 2, 7, 7, 2, 6, 5, 3, 2, 5, 6,
       4, 6, 4, 4, 3, 7, 6, 5, 6, 5, 7, 7, 5, 6, 3, 6, 4, 5, 7, 5, 6, 7, 5,
       6, 7, 6, 4, 2, 2, 5, 4, 6, 2, 2, 3, 2, 7, 4, 5, 4, 4, 3, 5, 7, 2, 4,
       6, 3, 2, 3, 6, 2, 7, 6, 5, 2, 6, 2, 7, 6, 6, 3, 2, 5, 4, 6, 7, 3, 4,
       7, 5, 5, 7, 2, 5, 3, 5, 7, 7, 5, 3, 4, 4, 7, 6, 2, 4, 2, 3, 4, 3, 4,
       4, 4, 4, 3, 5, 5, 6, 2, 2, 6, 2, 4, 2, 3, 2, 2, 3, 4, 3, 7, 3, 5, 4,
       6, 5, 6, 3, 2, 3, 5, 2, 7, 6, 6, 6, 6, 5, 3, 6, 2, 6, 4, 4, 4, 4, 5,
       2, 2,

In [500]:
# A simple test - the weights are random, so given the random weights, what is the prediction accuracy using
# random forest?

from sklearn.ensemble import RandomForestRegressor
from sklearn.cross_validation import train_test_split, ShuffleSplit, cross_val_score

cv = ShuffleSplit(n=len(X), n_iter=10)

X_train, X_test, Y_train, Y_test = train_test_split(X, Y)
print(X_train.shape)
print(Y_train.shape)
print(X_test.shape)
print(Y_test.shape)

(750, 10)
(750,)
(250, 10)
(250,)


In [501]:
rfr = RandomForestRegressor()
rfr.fit(X_train, Y_train)
# preds = np.rint(rfr.predict(X_test))
preds = rfr.predict(X_test)

from sklearn.metrics import mean_squared_error as mse

print(preds)
mse(preds, Y_test)

[ 6.1  3.3  2.   2.   5.   5.1  3.   6.5  6.2  2.   3.   3.   2.   3.   3.3
  6.9  4.   4.7  4.1  6.4  3.   6.1  3.   6.   6.6  6.2  5.   4.9  2.   5.6
  4.   3.   2.   3.   7.   2.   2.   7.   5.7  4.7  5.3  7.   4.   6.6  7.
  6.2  4.   7.   6.   3.2  3.8  6.   4.9  7.   4.1  5.6  4.   7.   5.   7.
  5.   4.5  7.   3.   7.   2.   4.7  3.   2.   6.1  4.8  3.   3.   6.9  2.
  6.4  5.2  4.9  5.1  3.   3.   2.   5.2  4.   4.   5.8  6.2  6.2  4.5  6.3
  6.   3.   5.2  6.8  6.9  6.3  2.   4.   3.9  6.3  3.1  5.   4.   2.   3.
  4.   5.   3.   6.2  4.   5.1  4.   6.   5.4  3.   3.   6.   5.   5.7  2.
  4.5  4.   3.8  4.   7.   4.   3.   6.5  4.   4.   4.   5.8  6.   3.8  5.
  4.   5.   3.   2.   5.   2.   3.2  2.   5.   2.   2.   5.   5.7  7.   3.
  5.   5.8  2.   4.3  5.   4.   5.3  7.   3.   6.   4.6  5.   2.   5.   5.8
  4.1  2.   2.   5.1  6.   3.8  3.   2.   3.   4.9  5.   4.   2.   5.   6.4
  3.   4.   4.5  5.7  4.   5.   5.1  7.   5.3  3.1  5.8  4.   2.   5.   6.7
  5.   5.2  4.4  2.

0.13668

In [502]:
from numpy.random import permutation
# How does this compare with randomly shuffled data?
mse(permutation(Y_test), Y_test)

5.4960000000000004

# Optimization with Autograd

Here, I try using autograd to do the optimizations required.

In [503]:
def predict(wb, graph_fp, linweights):
    """
    Given the weights and biases for each layer, make a prediction for the graph.
    """
    fp = graph_fp.compute_fingerprint(wb)
    
    return np.dot(fp, linweights)


linweights = np.random.random((10, 1))
predict(wb, gfp, linweights)

array([[ 0.63750228]])

In [504]:
def train_loss(wb):
    """
    Training loss function.
    
    
    """
    sum_loss = 0
    for i, g in enumerate(syngraphs):
        gfp = GraphFingerprint(g, 2, wb)
        loss = Y[i] - predict(wb, gfp, linweights)
        # print(loss)
        sum_loss = sum_loss + loss**2
    return sum_loss

train_loss(wb)

array([[ 16716.84676636]])

In [505]:
from autograd import grad

grad_func = grad(train_loss)
grad_func(wb)

TypeError: 'NoDerivativeNode' object does not support indexing

In [487]:
class Class(object):
    """docstring for ClassName"""
    def __init__(self, arg):
        super(Class, self).__init__()
        self.arg = arg
        
    def function(self, value, other_thing):
        return value['k']['v'] ** 2 + value['y'] ** 3
        
        
# def function(value):
#     return value ** 2

In [489]:
c = Class(3)

from collections import OrderedDict
value = OrderedDict({'k':{'v':3.0}, 'y':2.0})
gradfunc = grad(c.function)
gradfunc(value, 'string')

TypeError: 'NoDerivativeNode' object is not subscriptable