In [59]:
# Dependencies


import numpy as np
import functools as ft
import networkx as nx
from sklearn.base import BaseEstimator, ClassifierMixin
from datasets import load_dataset
from sklearn.model_selection import cross_val_score

In [60]:
# Basic arithemitic operations


def hdv(d):
    return np.random.choice([-1, 1], d)


def bind(xs):
    return ft.reduce(lambda x, y: x * y, xs)


def bundle(xs):
    return ft.reduce(lambda x, y: x + y, xs)


def similarity(A, B):
    return np.dot(A, B) / len(A)


def cosine_similarity(A, B):
    dot_product = np.dot(A, B)
    norm_A = np.linalg.norm(A)
    norm_B = np.linalg.norm(B)

    if norm_A == 0 or norm_B == 0:
        return 0

    return dot_product / (norm_A * norm_B)

In [61]:
# Memory


class ItemMemory:
    def __init__(self, vectors=[]):
        self.vectors = vectors

    def addVector(self, label, V):
        self.vectors.append((label, V))

    def cleanup(self, V):
        return max(self.vectors, key=lambda x: cosine_similarity(V, x[1]))

In [62]:
# Stochastic arithemtic operations


def weightedAverage(A, B, p, q):
    return np.fromiter(
        map(lambda t: np.random.choice([t[0], t[1]], p=[p, q]), zip(A, B)),
        dtype=np.int_,
    )


def hdvA(B, a):
    return weightedAverage(B, -B, (a + 1) / 2, (1 - a) / 2)


def hdvW(B, w):
    start = round(w * len(B))
    head = B[:start]
    tail = B[start:] * -1
    return np.concatenate([head, tail])

In [63]:
# # Basic tests

# A, B = hdv(10000), hdv(10000)
# C = weightedAverage(A, B, 0.90, 0.1)
# # print(A)
# # print(B)
# # print(C)
# # print(cosine_similarity(A, B))
# # print(cosine_similarity(B, C))

# a = 0.75

# Ba = hdvA(B, a)
# Bb = hdvA(B, 0.6)
# print(cosine_similarity(B, Ba))
# # print(cosine_similarity(Ba, Bb))
# print((cosine_similarity(B, Ba) + 1) / 2)
# print((similarity(B, Ba) + 1) / 2)

# Bw = hdvW(B, 0.1)
# print(B)
# print(Bw)
# print(cosine_similarity(B, Bw))
# print((cosine_similarity(B, Bw) + 1) / 2)

# Bw1 = hdvW(B, 0.0248)
# Bw2 = hdvW(B, 0.015)
# print(cosine_similarity(Bw1, Bw2))

In [64]:
# transform(graphs, 0.45, 5)[0].nodes

In [125]:
def processDataset(dataset):
    graphs = []
    labels = []

    for graph in dataset:
        G = nx.Graph()
        G.add_edges_from(zip(graph["edge_index"][0], graph["edge_index"][1]))
        graphs.append(G)
        labels.append(graph["y"][0])

    return (graphs, labels)


def transform(X, alpha, digits):
    graphs = []
    for graph in X:
        gpr = nx.pagerank(graph, alpha)
        nodes = dict()
        for key, value in gpr.items():
            nodes[key] = str(round(value, digits))
        H = nx.relabel_nodes(graph, nodes)
        graphs.append(H)
    return graphs


def encodeGraph(graph, vertices, dimensions, base):
    # for node in graph.nodes:
    #     if node not in vertices:
    #         vertices[node] = hdvW(base, float(node))

    Edges = []

    for edge in graph.edges:
        v1 = hdvW(base, float(edge[0]))
        v2 = hdvW(base, float(edge[1]))
        # v1 = vertices[edge[0]]
        # v2 = vertices[edge[1]]
        E = bind([v1, v2])
        Edges.append(E)

    Graph = bundle(Edges)

    return Graph

In [127]:
class GraphHD(BaseEstimator, ClassifierMixin):
    def __init__(self, alpha=0.45, digits=4, dimensions=10000, step=20):
        self.alpha = alpha
        self.digits = digits
        self.dimensions = dimensions
        self.step = step
        self.base = hdv(dimensions)
        self.memory = ItemMemory([])

    def fit(self, X, y):
        self.vertices = dict()
        self.labels = list(set(y))
        dictLabels = dict()

        graphs = transform(X, self.alpha, self.digits)

        for label in self.labels:
            dictLabels[label] = []

        for i in range(len(graphs)):
            Graph = encodeGraph(graphs[i], self.vertices, self.dimensions, self.base)
            dictLabels[y[i]].append(Graph)

        for key, value in dictLabels.items():
            print("fit", key, len(value))
            for i in range(0, len(value), self.step):
                H = bundle(value[i : i + self.step])
                self.memory.addVector(str(key), H)

        return self

    def predict(self, X):
        p = []
        s = []

        graphs = transform(X, self.alpha, self.digits)

        for testGraph in graphs:
            queryVector = encodeGraph(
                testGraph, self.vertices, self.dimensions, self.base
            )
            cleanVector = self.memory.cleanup(queryVector)

            p.append(int(cleanVector[0]))
            s.append(cosine_similarity(queryVector, cleanVector[1]))

        print("%.5f" % round(np.mean(s), 5), "0:", p.count(0), "1:", p.count(1))
        return p

In [122]:
MUTAG = load_dataset("graphs-datasets/MUTAG")
PROTEINS = load_dataset("graphs-datasets/PROTEINS")

In [121]:
(graphs, labels) = processDataset(PROTEINS["train"])

In [126]:
FOLDS, REPS = 10, 3
ALPHA, DIGITS, DIMENSIONS, STEP = 0.55, 3, 10000, 50


def main():
    clf = GraphHD(ALPHA, DIGITS, DIMENSIONS, STEP)
    sum = 0
    for i in range(REPS):
        scores = cross_val_score(clf, graphs, labels, n_jobs=1, cv=FOLDS, verbose=0)
        print(scores)
        sum += scores.mean()
        print(i, "->", "%.8f" % scores.mean())
        del scores

    print("S => %.5f" % (sum / REPS))


main()

0 596
1 405
0.99504 0: 59 1: 53
0 596
1 405
0.99470 0: 50 1: 62
0 596
1 405
0.99584 0: 71 1: 41
0 597
1 405
0.99575 0: 81 1: 30
0 597
1 405
0.99380 0: 76 1: 35
0 597
1 405
0.99499 0: 57 1: 54
0 597
1 405
0.99442 0: 65 1: 46
0 597
1 405
0.99572 0: 74 1: 37
0 597
1 405
0.99521 0: 62 1: 49
0 597
1 405
0.99450 0: 65 1: 46
[0.69642857 0.61607143 0.76785714 0.72072072 0.71171171 0.68468468
 0.72072072 0.65765766 0.65765766 0.7027027 ]
0 -> 0.69362130
0 596
1 405
0.99504 0: 59 1: 53
0 596
1 405
0.99470 0: 50 1: 62
0 596
1 405
0.99584 0: 71 1: 41
0 597
1 405
0.99575 0: 81 1: 30
0 597
1 405
0.99380 0: 76 1: 35
0 597
1 405
0.99499 0: 57 1: 54
0 597
1 405
0.99442 0: 65 1: 46
0 597
1 405
0.99572 0: 74 1: 37
0 597
1 405
0.99521 0: 62 1: 49
0 597
1 405
0.99450 0: 65 1: 46
[0.69642857 0.61607143 0.76785714 0.72072072 0.71171171 0.68468468
 0.72072072 0.65765766 0.65765766 0.7027027 ]
1 -> 0.69362130
0 596
1 405
0.99504 0: 59 1: 53
0 596
1 405
0.99470 0: 50 1: 62
0 596
1 405
0.99584 0: 71 1: 41
0 597
