In [17]:
import numpy as np
import networkx as nx
import functools as ft
import math
import random
from datasets import load_dataset

In [2]:
MUTAG = load_dataset("graphs-datasets/MUTAG")

In [18]:
# Params
DIMENSIONS = 10000
DIGITS = 7
ALPHA = 0.85

In [19]:
graphs = []

for i in range(150):
    graph = MUTAG["train"][i]
    egdes = zip(graph["edge_index"][0], graph["edge_index"][1])
    G = nx.Graph()
    G.add_edges_from(egdes)
    graphs.append((G, graph["y"][0]))

In [20]:
def generateRandomVector(d):
    return np.random.randint(2, size=d)


def similarity(A, B):
    if A.size != B.size:
        raise Exception("A and B have different dimensions.")

    count = 0
    for i in range(0, (A.size - 1)):
        if A[i] == B[i]:
            count += 1

    return count / A.size


def mult(A, B):
    return np.bitwise_xor(A, B)


def applyThreshold(V, n=2):
    if n % 2 == 1:
        threshold = lambda x: 1 if x > n / 2 else 0
    else:
        threshold = (
            lambda x: 1
            if x > n / 2
            else (1 if x == n / 2 and bool(random.getrandbits(1)) else 0)
        )

    vectorized_threshold = np.vectorize(threshold)
    return vectorized_threshold(V)


def arithmeticSumVectors(vectors):
    sum = ft.reduce(lambda x, y: x + y, vectors)

    return applyThreshold(sum, len(vectors))


def comparison(A, B):
    dividend = sum(A * B)
    divisor = math.sqrt(sum(A**2) * sum(B**2))

    return dividend / divisor


def cosine_similarity(A, B):
    dot_product = np.dot(A, B)
    norm_A = np.linalg.norm(A)
    norm_B = np.linalg.norm(B)

    return dot_product / (norm_A * norm_B)


class ItemMemory:
    def __init__(self, vectors=[]):
        self.vectors = vectors

    def addVector(self, label, V):
        self.vectors.append((label, V))

    def count(self):
        return len(self.vectors)

    def cleanup(self, V):
        return max(self.vectors, key=lambda x: cosine_similarity(V, x[1]))

In [21]:
def encodeGraph(graph):
    gpr = nx.pagerank(graph[0], ALPHA)

    for pr in nx.pagerank(graph[0], ALPHA).values():
        key = str(round(pr, DIGITS))
        vertices[key] = generateRandomVector(DIMENSIONS)

    Edges = []

    for edge in graph[0].edges:
        keyV1 = str(round(gpr[edge[0]], DIGITS))
        keyV2 = str(round(gpr[edge[1]], DIGITS))
        v1 = vertices[keyV1]
        v2 = vertices[keyV2]
        E = mult(v1, v2)
        Edges.append(E)

    Graph = arithmeticSumVectors(Edges)

    # c = 0
    # for e in Edges:
    #     c += cosine_similarity(Graph, e)

    #     print("comparison Graph with E", c / len(Edges))

    return Graph

In [22]:
memory = ItemMemory()
vertices = dict()
labels = [0, 1]

for label in labels:
    Graphs = []

    for graph in filter(lambda x: x[1] == label, graphs):
        Graphs.append(encodeGraph(graph))

    print(len(Graphs))
    Label = arithmeticSumVectors(Graphs)

    # c = 0
    # for g in Graphs:
    #     sim = similarity(Label, g)
    #     print(sim)
    #     c += sim

    # print("comparison Label with G", c / len(Graphs))

    memory.addVector(str(label), Label)

51
99


In [23]:
testGraphs = []

for i in range(38):
    graph = MUTAG["train"][i + 150]
    egdes = zip(graph["edge_index"][0], graph["edge_index"][1])
    G = nx.Graph()
    G.add_edges_from(egdes)
    testGraphs.append((G, graph["y"][0]))

count = 0
for testGraph in testGraphs:
    queryVector = encodeGraph(testGraph)
    cleanVector = memory.cleanup(queryVector)
    # print(comparison(queryVector, cleanVector[1]))
    # print(comparison(queryVector, Labels[0]))
    # print(comparison(queryVector, Labels[1]))

    print("predicted", cleanVector[0], "expected", testGraph[1])

    if int(cleanVector[0]) == testGraph[1]:
        count += 1

    # print(int(cleanVector[0]) == testGraph[1])
print(count / len(testGraphs))

predicted 0 expected 1
predicted 0 expected 1
predicted 0 expected 1
predicted 0 expected 0
predicted 0 expected 0
predicted 0 expected 0
predicted 0 expected 1
predicted 0 expected 1
predicted 0 expected 1
predicted 0 expected 0
predicted 0 expected 1
predicted 0 expected 1
predicted 0 expected 1
predicted 0 expected 1
predicted 0 expected 1
predicted 0 expected 1
predicted 0 expected 1
predicted 0 expected 0
predicted 0 expected 1
predicted 0 expected 1
predicted 0 expected 1
predicted 0 expected 1
predicted 0 expected 1
predicted 0 expected 1
predicted 0 expected 0
predicted 0 expected 1
predicted 0 expected 1
predicted 0 expected 1
predicted 0 expected 0
predicted 0 expected 1
predicted 0 expected 0
predicted 0 expected 0
predicted 0 expected 1
predicted 0 expected 1
predicted 0 expected 0
predicted 0 expected 0
predicted 0 expected 1
predicted 0 expected 0
0.3157894736842105


In [24]:
Av = generateRandomVector(DIMENSIONS)
Bv = generateRandomVector(DIMENSIONS)
print(comparison(Av, Bv))
print(cosine_similarity(Av, Bv))
print(similarity(Av, Bv))

0.4916220512677644
0.4916220512677644
0.4964


In [29]:
from sklearn.base import BaseEstimator, ClassifierMixin


class GraphHD(BaseEstimator, ClassifierMixin):
    def __init__(self, dimensions=10000, digits=7, alpha=0.85):
        self.dimensions = dimensions
        self.digits = digits
        self.alpha = alpha

    def fit(self, X, y):
        self.memory = ItemMemory()
        self.vertices = dict()
        self.labels = list(set(y))

        for label in self.labels:
            Graphs = []

            for graph in filter(lambda x: x[1] == label, X):
                Graphs.append(encodeGraph(graph))

            Label = arithmeticSumVectors(Graphs)

            self.memory.addVector(str(label), Label)

        return self

    def predict(self, X):
        predictions = []

        for testGraph in X:
            queryVector = encodeGraph(testGraph)
            cleanVector = self.memory.cleanup(queryVector)

            predictions.append(int(cleanVector[0]))

        print(predictions)
        return predictions

In [30]:
from sklearn.model_selection import cross_val_score

clf = GraphHD()
scores = cross_val_score(clf, graphs, [x[1] for x in graphs], cv=10)
print(scores)
print(scores.mean())

[0.33333333 0.33333333 0.33333333 0.33333333 0.33333333 0.33333333
 0.33333333 0.33333333 0.33333333 0.4       ]
0.33999999999999997
