In [25]:
# Dependencies


import numpy as np
import networkx as nx
from sklearn.base import BaseEstimator, ClassifierMixin
from datasets import load_dataset
from sklearn.model_selection import cross_val_score, ShuffleSplit

from graph import processDataset, transform
from hdc import hdv, bind, bundle, ItemMemory, hdvW, hdvA, cosine_similarity

In [2]:
# encodeGraph -> graphHD (graph, vertices, dimensions)


def encodeGraph(graph, vertices, dimensions):
    for node in graph.nodes:
        if node not in vertices:
            vertices[node] = hdv(dimensions)

    Edges = []

    for edge in graph.edges:
        v1 = vertices[edge[0]]
        v2 = vertices[edge[1]]
        E = bind([v1, v2])
        Edges.append(E)

    Graph = bundle(Edges)

    return Graph

In [3]:
# encodeGraphW -> vertices with hdw and edges (graph, vertices, base)


def encodeGraphW(graph, vertices, base):
    for node in graph.nodes:
        if node not in vertices:
            vertices[node] = hdvW(base, float(node))

    Edges = []

    for edge in graph.edges:
        v1 = vertices[edge[0]]
        v2 = vertices[edge[1]]
        E = bind([v1, v2])
        Edges.append(E)

    Graph = bundle(Edges)

    return Graph

In [4]:
# encodeGraphV -> vertices with hdv and no edges (graph, vertices, base)


def encodeGraphV(graph, vertices, base):
    Vertices = []

    for node in graph.nodes:
        if node not in vertices:
            vertices[node] = hdvW(base, float(node))
        Vertices.append(vertices[node])

    Graph = bundle(Vertices)

    return Graph

In [5]:
# encodeGraphVW -> vertices with hdw and no edges (graph, vertices, base)


def encodeGraphVW(graph, vertices, base):
    Vertices = []

    for node in graph.nodes:
        if node not in vertices:
            vertices[node] = hdvW(base, float(node))
        Vertices.append(vertices[node])

    Graph = bundle(Vertices)

    return Graph

In [32]:
class GraphEstimator(BaseEstimator, ClassifierMixin):
    def __init__(self, encoder, alpha=0.45, digits=4, step=20):
        self.encoder = encoder
        self.alpha = alpha
        self.digits = digits
        self.step = step
        self.memory = ItemMemory([])
        self.vertices = dict()

    def fit(self, X, y):
        classes = {label: [] for label in set(y)}
        graphs = transform(X, self.alpha, self.digits)

        for i in range(len(graphs)):
            Graph = self.encoder(graphs[i], self.vertices)
            classes[y[i]].append(Graph)

        for key, value in classes.items():
            for i in range(0, len(value), self.step):
                H = bundle(value[i : i + self.step])
                self.memory.addVector(str(key), H)

        return self

    def predict(self, X):
        p, s, graphs = [], [], transform(X, self.alpha, self.digits)

        for testGraph in graphs:
            queryVector = self.encoder(testGraph, self.vertices)
            cleanVector = self.memory.cleanup(queryVector)

            p.append(int(cleanVector[0]))
            # s.append(cosine_similarity(queryVector, cleanVector[1]))

        # print("%.5f" % round(np.mean(s), 5), "0:", p.count(0), "1:", p.count(1))
        return p

In [7]:
MUTAG = load_dataset("graphs-datasets/MUTAG")["train"]
PROTEINS = load_dataset("graphs-datasets/PROTEINS")["train"]
AIDS = load_dataset("graphs-datasets/AIDS")["full"]
IMDB = load_dataset("graphs-datasets/IMDB-BINARY")["train"]

In [8]:
(graphs, labels) = processDataset(MUTAG)

In [33]:
from functools import partial

FOLDS, REPS = 10, 10
ALPHA, DIGITS, DIMENSIONS, STEP = 0.85, 8, 10000, 50
CV = FOLDS

encoders = [
    partial(encodeGraph, dimensions=DIMENSIONS),
    partial(encodeGraphV, base=hdv(DIMENSIONS)),
    partial(encodeGraphW, base=hdv(DIMENSIONS)),
    partial(encodeGraphVW, base=hdv(DIMENSIONS)),
]


def main():
    for encoder in encoders:
        clf = GraphEstimator(encoder, alpha=ALPHA, digits=DIGITS, step=STEP)
        sum = 0
        for i in range(REPS):
            CV = ShuffleSplit()  # random_state=0
            scores = cross_val_score(
                clf, graphs, labels, n_jobs=-1, cv=CV, verbose=0, error_score="raise"
            )
            sum += scores.mean()
            print(i, "->", "%.5f" % scores.mean())
            del scores

        print("S => %.5f" % (sum / REPS))


main()

0 -> 0.81579
1 -> 0.84211
2 -> 0.77895
3 -> 0.75263
4 -> 0.76842
5 -> 0.80526
6 -> 0.80526
7 -> 0.86316
8 -> 0.86316
9 -> 0.81579
S => 0.81105
