In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
from fingerprint import GraphFingerprint
from wb import WeightsAndBiases
from itertools import combinations
from random import choice, sample
from numpy.random import permutation
from sklearn.ensemble import RandomForestRegressor
from sklearn.cross_validation import train_test_split, ShuffleSplit, cross_val_score
from autograd import grad
from time import time

import autograd.numpy as np
import networkx as nx
import math
import matplotlib.pyplot as plt
from numba import jit


In [3]:
shapes = dict()
shapes[0] = 10
shapes[1] = 10
shapes[2] = 10
wb = WeightsAndBiases(2, shapes)
# wb[0]

In [4]:
def rnd():
    return np.random.binomial(1, 0.2, size=10)

all_letters = ['A', 'B', 'C', 'D', 'E', 'F', 'G']


def make_nodes_with_features():
    features = dict()
    for letter in all_letters:
        features[letter] = rnd()

    return features

node_features = make_nodes_with_features()


def make_synthetic_graphs(num_graphs, features):
    num_nodes = [i for i in range(2, len(all_letters) + 1)]

    # Make the synthetic graphs.
    syngraphs = []  # the synthetic graphs
    for i in range(num_graphs):
        # add in nodes
        n_nodes = choice(num_nodes)
        letters = sample(all_letters, n_nodes)
        G = nx.Graph()
        for letter in letters:
            G.add_node(letter, features=features[letter])

        # add in edges
        n_nodes = len(G.nodes())
        num_edges = choice(range(1, int(n_nodes**2 / 2 - n_nodes / 2 + 1)))
        edges = sample([i for i in combinations(G.nodes(), 2)], num_edges)
        for u, v in edges:
            G.add_edge(u, v)
        syngraphs.append(G)
    return syngraphs

In [5]:
syngraphs = make_synthetic_graphs(1000, node_features)

In [6]:
len(syngraphs)

1000

Set up a learning scenario where...

In [7]:
fingerprints = np.zeros((len(syngraphs), 10))

for i, g in enumerate(syngraphs):
    gfp = GraphFingerprint(g, 2, shapes)
    fp = gfp.compute_fingerprint(wb.vect, wb.unflattener)
    fingerprints[i] = fp

In [8]:
import pandas as pd
X = pd.DataFrame(np.array(fingerprints))
Y = [len(g.nodes()) for g in syngraphs]
Y

[5,
 2,
 3,
 5,
 2,
 6,
 6,
 3,
 2,
 4,
 4,
 5,
 6,
 4,
 5,
 5,
 3,
 6,
 2,
 5,
 4,
 4,
 7,
 6,
 3,
 7,
 6,
 2,
 3,
 2,
 6,
 2,
 7,
 4,
 4,
 3,
 4,
 5,
 7,
 4,
 7,
 5,
 2,
 3,
 5,
 7,
 5,
 7,
 2,
 4,
 7,
 2,
 3,
 5,
 4,
 6,
 7,
 7,
 3,
 4,
 5,
 3,
 4,
 6,
 5,
 3,
 6,
 6,
 4,
 4,
 6,
 2,
 2,
 6,
 2,
 2,
 7,
 6,
 2,
 6,
 6,
 2,
 2,
 4,
 6,
 6,
 2,
 4,
 3,
 6,
 2,
 6,
 4,
 2,
 3,
 2,
 6,
 7,
 4,
 4,
 7,
 6,
 4,
 4,
 2,
 5,
 6,
 2,
 7,
 6,
 4,
 5,
 6,
 3,
 7,
 7,
 3,
 6,
 5,
 4,
 2,
 6,
 6,
 5,
 5,
 5,
 7,
 5,
 7,
 2,
 3,
 5,
 3,
 4,
 2,
 3,
 6,
 4,
 6,
 5,
 3,
 4,
 4,
 5,
 6,
 2,
 3,
 3,
 5,
 5,
 7,
 7,
 2,
 3,
 5,
 2,
 6,
 7,
 7,
 3,
 7,
 6,
 3,
 3,
 5,
 6,
 3,
 3,
 7,
 5,
 5,
 6,
 6,
 7,
 4,
 2,
 2,
 4,
 7,
 5,
 6,
 5,
 7,
 7,
 2,
 6,
 4,
 3,
 7,
 4,
 2,
 7,
 7,
 2,
 5,
 6,
 6,
 4,
 2,
 5,
 2,
 4,
 4,
 6,
 6,
 3,
 6,
 6,
 7,
 2,
 3,
 7,
 5,
 3,
 3,
 3,
 5,
 2,
 2,
 3,
 3,
 6,
 5,
 6,
 4,
 7,
 7,
 3,
 4,
 3,
 3,
 3,
 2,
 7,
 5,
 6,
 5,
 3,
 7,
 6,
 2,
 7,
 4,
 6,
 3,
 3,
 4,
 3,
 5,
 5,


In [9]:
# A simple test - the weights are random, so given the random weights, what is the prediction accuracy using
# random forest?

cv = ShuffleSplit(n=len(X), n_iter=10)

X_train, X_test, Y_train, Y_test = train_test_split(X, Y)

In [10]:
rfr = RandomForestRegressor()
rfr.fit(X_train, Y_train)
# preds = np.rint(rfr.predict(X_test))
preds = rfr.predict(X_test)

from sklearn.metrics import mean_squared_error as mse

print(preds)
mse(preds, Y_test)

[ 5.9  3.   3.   2.   5.9  4.   5.   6.2  2.   6.1  3.   5.4  6.   2.9  6.9
  2.   5.3  3.   7.   7.   4.   2.   2.   6.6  4.   2.   5.   2.   2.   4.6
  7.   3.   5.   5.2  3.9  5.1  6.6  7.   2.   3.   7.   6.6  5.   5.6  6.
  5.9  6.4  4.1  6.   6.7  4.   2.   2.   6.   6.5  7.   5.3  7.   3.   3.
  2.   4.   5.   6.   6.   3.   5.   5.2  3.   4.   5.1  3.1  3.   3.1  2.
  6.9  4.2  3.   5.2  6.7  5.   6.   6.3  2.   3.   4.   4.   5.   7.   5.8
  4.   3.   6.9  2.9  3.   3.   5.   5.   6.9  2.   3.   2.   3.   3.   4.
  5.2  3.   2.   4.   3.   2.   6.9  5.9  7.   4.   5.2  4.   3.   5.6  6.
  3.   5.   2.   6.7  6.   4.2  2.   6.8  6.3  2.   4.   4.8  2.9  4.   6.8
  3.   4.   6.5  2.   7.   3.   6.   3.   3.   6.1  2.   2.   3.   6.   3.
  3.   5.1  4.   3.   3.   7.   5.   5.2  5.1  6.   4.8  3.   6.7  6.2  3.
  6.   5.2  6.8  2.   5.   5.6  5.   5.5  6.   5.6  2.   5.   6.   5.1  5.8
  3.   6.   6.   4.   2.   4.   3.9  2.   4.   2.   5.   2.   7.   7.   3.
  3.   3.   5.   7. 

0.10468000000000001

In [11]:
# How does this compare with randomly shuffled data?
mse(permutation(Y_test), Y_test)

6.032

In [12]:
[i for i in zip(Y_test, preds)]

[(7, 5.9000000000000004),
 (3, 3.0),
 (3, 3.0),
 (2, 2.0),
 (7, 5.9000000000000004),
 (4, 4.0),
 (5, 5.0),
 (6, 6.2000000000000002),
 (2, 2.0),
 (6, 6.0999999999999996),
 (3, 3.0),
 (7, 5.4000000000000004),
 (6, 6.0),
 (3, 2.8999999999999999),
 (7, 6.9000000000000004),
 (2, 2.0),
 (6, 5.2999999999999998),
 (3, 3.0),
 (7, 7.0),
 (7, 7.0),
 (4, 4.0),
 (2, 2.0),
 (2, 2.0),
 (7, 6.5999999999999996),
 (4, 4.0),
 (2, 2.0),
 (5, 5.0),
 (2, 2.0),
 (2, 2.0),
 (5, 4.5999999999999996),
 (7, 7.0),
 (3, 3.0),
 (5, 5.0),
 (5, 5.2000000000000002),
 (4, 3.8999999999999999),
 (5, 5.0999999999999996),
 (7, 6.5999999999999996),
 (7, 7.0),
 (2, 2.0),
 (3, 3.0),
 (7, 7.0),
 (7, 6.5999999999999996),
 (5, 5.0),
 (6, 5.5999999999999996),
 (6, 6.0),
 (6, 5.9000000000000004),
 (6, 6.4000000000000004),
 (4, 4.0999999999999996),
 (6, 6.0),
 (7, 6.7000000000000002),
 (4, 4.0),
 (2, 2.0),
 (2, 2.0),
 (6, 6.0),
 (7, 6.5),
 (7, 7.0),
 (5, 5.2999999999999998),
 (7, 7.0),
 (3, 3.0),
 (3, 3.0),
 (2, 2.0),
 (4, 4.0),
 (5

# Optimization with Autograd

Here, I try using autograd to do the optimizations required.

In [13]:
def predict(wb_vect, wb_unflattener, graph_fp):#, linweights):
    """
    Given the weights and biases for each layer, make a prediction for the graph.
    """
    fp = graph_fp.compute_fingerprint(wb_vect, wb_unflattener)
    wb = wb_unflattener(wb_vect)
    top_layer = max(wb.keys())
    linweights = wb[top_layer]['linweights']
    return np.dot(fp, linweights)

predict(wb.vect, wb.unflattener, gfp)

array([[ 0.57630778]])

In [14]:
@jit
def train_loss(wb_vect, wb_unflattener):
    """
    Training loss function - should take in a vector.
    """
    sum_loss = 0
    for i, g in enumerate(syngraphs):
        gfp = GraphFingerprint(g, 2, shapes)
        pred = predict(wb_vect, wb_unflattener, gfp)
        loss = len(g.nodes()) - predict(wb_vect, wb_unflattener, gfp)
        sum_loss = sum_loss + loss ** 2
    
    return sum_loss / len(syngraphs)

train_loss(wb.vect, wb.unflattener)

array([[ 18.46031647]])

In [15]:
def sgd(grad, wb_vect, wb_unflattener, callback=None, num_iters=200, step_size=0.1, mass=0.9):
    """
    Stochastic gradient descent with momentum.
    """
    velocity = np.zeros(len(wb_vect))
    for i in range(num_iters):
        print(i)
        g = grad(wb_vect, wb_unflattener)
        # if callback: callback(x, i, g)
        velocity = mass * velocity - (1.0 - mass) * g
        wb_vect += step_size * velocity
        print(train_loss(wb_vect, wb_unflattener))
    return wb_vect

In [16]:
train_loss(wb.vect, wb.unflattener)

array([[ 18.46031647]])

In [18]:
grad_func = grad(train_loss)

In [38]:
sgd(grad_func, wb.vect, wb.unflattener, num_iters=200)

0
[[ 0.69018149]]
1
[[ 0.68951395]]
2
[[ 0.68859367]]
3
[[ 0.68747845]]
4
[[ 0.68622442]]
5
[[ 0.68488338]]
6
[[ 0.68350076]]
7
[[ 0.68211447]]
8
[[ 0.68075421]]
9
[[ 0.6794415]]
10
[[ 0.67819007]]
11
[[ 0.67700668]]
12
[[ 0.67589212]]
13
[[ 0.67484237]]
14


TypeError: bad operand type for unary -: 'list'

In [None]:
trained_weights = wb.unflattener(wb.vect)[2]['linweights']
trained_weights

In [None]:
gfp = GraphFingerprint(syngraphs[0], 2, shapes)
gfp.layers[0].nodes(data=True)[4][1]['features'] @ trained_weights

In [39]:
test_graphs = make_synthetic_graphs(100, node_features)

test_fingerprints = np.zeros((len(test_graphs), 10))
# test_fingerprints
for i, g in enumerate(test_graphs):
    gfp = GraphFingerprint(g, 2, shapes)
    fp = gfp.compute_fingerprint(wb.vect, wb.unflattener)
    test_fingerprints[i] = fp

# test_fingerprints

In [40]:
preds = []
for i, g in enumerate(test_graphs):
    gfp = GraphFingerprint(g, 2, shapes)
#     fp = gfp.compute_fingerprint(wb.vect, wb.unflattener)
    preds.append(predict(wb.vect, wb.unflattener, gfp)[0])
# preds[0]

In [41]:
Y_test = [len(g) for g in syngraphs]

[i for i in zip(Y_test, preds)]

[(5, array([ 5.00303973])),
 (2, array([ 4.21978438])),
 (3, array([ 4.73538044])),
 (5, array([ 5.19453785])),
 (2, array([ 5.19341974])),
 (6, array([ 5.41108512])),
 (6, array([ 5.84051064])),
 (3, array([ 5.77599583])),
 (2, array([ 2.52383006])),
 (4, array([ 5.20779674])),
 (4, array([ 4.70890314])),
 (5, array([ 5.73078582])),
 (6, array([ 2.54403173])),
 (4, array([ 4.06355651])),
 (5, array([ 5.41559779])),
 (5, array([ 4.21306957])),
 (3, array([ 2.58145359])),
 (6, array([ 2.57563647])),
 (2, array([ 4.25690491])),
 (5, array([ 5.83894758])),
 (4, array([ 3.05971026])),
 (4, array([ 5.77488627])),
 (7, array([ 5.84094605])),
 (6, array([ 5.20779674])),
 (3, array([ 4.5804738])),
 (7, array([ 2.53785793])),
 (6, array([ 5.11027739])),
 (2, array([ 3.64983927])),
 (3, array([ 3.64681621])),
 (2, array([ 5.51839126])),
 (6, array([ 4.37628934])),
 (2, array([ 5.60817102])),
 (7, array([ 5.84094605])),
 (4, array([ 5.00505932])),
 (4, array([ 5.60817102])),
 (3, array([ 5.195891

In [None]:
plt.scatter(preds, n_nodes, alpha=0.3)
plt.xlabel('predictions')
plt.ylabel('actual')
plt.title('number of nodes')

In [None]:
class Class(object):
    """docstring for ClassName"""
    def __init__(self, arg):
        super(Class, self).__init__()
        self.arg = arg
        
    def __iter__():
        pass
        
    def function(self, value, other_thing):
        return value['k']['v']['x'] ** 2 + value['y'] ** 3
    
    def function2(self, value):
        return np.sum(np.dot(value['arr1'], value['arr2'])) + 1
        
        
# def function(value):
#     return value ** 2

In [None]:
c = Class(np.random.random((10,10)))

from collections import OrderedDict
value = dict({'k':{'v':{'x':3.0}}, 'y':2.0})
gradfunc = grad(c.function)
gradfunc(value, 'string')

In [None]:
def fun2(value):
    return np.sum(np.dot(value['arr1'], value['arr2']))

value = {'arr1':np.random.random((10,10)), 'arr2':np.random.random((10,10))}
gradfunc = grad(fun2)(value)
gradfunc

In [None]:
value = {'arr1':np.random.random((10,10)), 'arr2':np.random.random((10,10))}
# value
gradfunc = grad(c.function2)
gradfunc(value)
# np.dot(c.arg, value['arr1'])# , c.arg)
# c.function2(value)

In [None]:
np.dot(value['arr1'], value['arr2'])