In [20]:
# Dependencies


import numpy as np
import networkx as nx
from sklearn.base import BaseEstimator, ClassifierMixin
from datasets import load_dataset
from sklearn.model_selection import cross_val_score, ShuffleSplit

from graph import processDataset, transform
from hdc import hdv, bind, bundle, ItemMemory, hdvW, hdvA

In [11]:
# encodeGraph -> graphHD (graph, vertices, dimensions)


def encodeGraph(graph, vertices, dimensions):
    for node in graph.nodes:
        if node not in vertices:
            vertices[node] = hdv(dimensions)

    Edges = []

    for edge in graph.edges:
        v1 = vertices[edge[0]]
        v2 = vertices[edge[1]]
        E = bind([v1, v2])
        Edges.append(E)

    Graph = bundle(Edges)

    return Graph

In [12]:
# encodeGraphW -> vertices with hdw and edges (graph, vertices, base)


def encodeGraphW(graph, vertices, dimensions, base):
    for node in graph.nodes:
        if node not in vertices:
            vertices[node] = hdvW(base, float(node))

    Edges = []

    for edge in graph.edges:
        v1 = vertices[edge[0]]
        v2 = vertices[edge[1]]
        E = bind([v1, v2])
        Edges.append(E)

    Graph = bundle(Edges)

    return Graph

In [13]:
# encodeGraphV -> vertices with hdv and no edges (graph, vertices, base)


def encodeGraphV(graph, vertices, dimensions, base):
    Vertices = []

    for node in graph.nodes:
        if node not in vertices:
            vertices[node] = hdvW(base, float(node))
        Vertices.append(vertices[node])

    Graph = bundle(Vertices)

    return Graph

In [14]:
# encodeGraphVW -> vertices with hdw and no edges (graph, vertices, base)


def encodeGraphVW(graph, vertices, dimensions, base):
    Vertices = []

    for node in graph.nodes:
        if node not in vertices:
            vertices[node] = hdvW(base, float(node))
        Vertices.append(vertices[node])

    Graph = bundle(Vertices)

    return Graph

In [15]:
encodeGraph = encodeGraph

In [16]:
class GraphEstimator(BaseEstimator, ClassifierMixin):
    def __init__(self, alpha=0.45, digits=4, dimensions=10000, step=20):
        self.alpha = alpha
        self.digits = digits
        self.step = step
        self.memory = ItemMemory([])

        # self.dimensions = dimensions
        # self.B = hdv(dimensions)

    def fit(self, X, y):
        self.vertices = dict()
        self.labels = list(set(y))
        dictLabels = dict()

        graphs = transform(X, self.alpha, self.digits)

        for label in self.labels:
            dictLabels[label] = []

        for i in range(len(graphs)):
            Graph = encodeGraph(graphs[i], self.vertices, self.dimensions, self.B)
            dictLabels[y[i]].append(Graph)

        for key, value in dictLabels.items():
            for i in range(0, len(value), self.step):
                H = bundle(value[i : i + self.step])
                self.memory.addVector(str(key), H)

        return self

    def predict(self, X):
        p = []
        s = []

        graphs = transform(X, self.alpha, self.digits)

        for testGraph in graphs:
            queryVector = encodeGraph(testGraph, self.vertices, self.dimensions, self.B)
            cleanVector = self.memory.cleanup(queryVector)

            p.append(int(cleanVector[0]))
            # s.append(cosine_similarity(queryVector, cleanVector[1]))

        # print("%.5f" % round(np.mean(s), 5), "0:", p.count(0), "1:", p.count(1))
        return p

In [17]:
MUTAG = load_dataset("graphs-datasets/MUTAG")["train"]
PROTEINS = load_dataset("graphs-datasets/PROTEINS")["train"]
AIDS = load_dataset("graphs-datasets/AIDS")["full"]
IMDB = load_dataset("graphs-datasets/IMDB-BINARY")["train"]

In [18]:
(graphs, labels) = processDataset(MUTAG)

In [19]:
FOLDS, REPS = 10, 10
ALPHA, DIGITS, DIMENSIONS, STEP = 0.4, 4, 10000, 50
CV = FOLDS


def main():
    clf = GraphEstimator(ALPHA, DIGITS, DIMENSIONS, STEP)
    sum = 0
    for i in range(REPS):
        CV = ShuffleSplit(n_splits=5, test_size=0.25)  # random_state=0
        scores = cross_val_score(
            clf, graphs, labels, n_jobs=-1, cv=CV, verbose=0, error_score="raise"
        )
        sum += scores.mean()
        print(i, "->", "%.5f" % scores.mean())
        del scores

    print("S => %.5f" % (sum / REPS))


main()

TypeError: encodeGraph() takes 3 positional arguments but 4 were given