In [1]:
import torch
import numpy as np
import networkx as nx
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.model_selection import cross_val_score, ShuffleSplit
from datasets import load_dataset
from graph import process_dataset, centrality
from IPython.display import clear_output
from functools import partial

import sys

sys.path.append("../")

from thdc import hdv, bind, bundle, hdvsc, ItemMemory

torch.set_default_device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
# encode_graphe -> graphHD (graph, vertices, dimensions)
DIMENSIONS = 10000


def encode_graphe(graph, vertices):
    for node in graph.nodes:
        if node not in vertices:
            vertices[node] = hdv(DIMENSIONS)

    Es = []

    for edge in graph.edges:
        Es.append(bind([vertices[edge[0]], vertices[edge[1]]]))

    return bundle(Es)

In [12]:
# encode_graphvc -> (graph, vs)
DIMENSIONS = 2000
N = 100

vs = hdvsc(N, DIMENSIONS, side=10, iter=4)

In [13]:
def encode_graphvc(graph):
    return bundle(list(map(lambda n: vs[round(float(n) * len(vs))], graph.nodes)))

In [5]:
class GraphClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self):
        self.memory = ItemMemory()
        self.vertices = dict()

    def fit(self, X, y):
        for i in range(len(X)):
            self.memory.add_vector(y[i], encode_graphvc(X[i]))
        return self

    def predict(self, X):
        p = []

        for query in X:
            p.append(self.memory.cleanup(encode_graphvc(query))[0])

        return p

In [6]:
DATASET = load_dataset("graphs-datasets/MUTAG")["train"]

In [21]:
(graphs, labels) = process_dataset(DATASET)

In [22]:
# graphs = centrality(graphs, rank=partial(nx.pagerank, alpha=0.85))
graphs = centrality(graphs, rank=partial(nx.laplacian_centrality))

for n in graphs[0]:
    print(n)

0.10769230769230786
0.10769230769230809
0.12307692307692379
0.12307692307692335
0.2153846153846154
0.20000000000000012
0.2153846153846155
0.12307692307692346
0.1230769230769239
0.1384615384615382
0.12307692307692303
0.12307692307692324
0.1999999999999999
0.16923076923076938
0.06153846153846173


In [9]:
FOLDS, REPS = 10, 3


def main():
    sum = 0
    for _ in range(REPS):
        clf = GraphClassifier()
        scores = cross_val_score(
            clf,
            graphs,
            labels,
            cv=ShuffleSplit(n_splits=FOLDS),
            n_jobs=1,
            verbose=4,
        )
        print("Acc =>", scores.mean())
        sum += scores.mean()

    print("Avg Acc =>", sum / REPS)


main()

[CV] END ................................ score: (test=0.789) total time=   0.9s
[CV] END ................................ score: (test=0.737) total time=   0.4s
[CV] END ................................ score: (test=0.684) total time=   0.4s
[CV] END ................................ score: (test=0.684) total time=   0.4s
[CV] END ................................ score: (test=0.789) total time=   0.4s
[CV] END ................................ score: (test=0.895) total time=   0.4s
[CV] END ................................ score: (test=0.842) total time=   0.4s
[CV] END ................................ score: (test=0.947) total time=   0.4s
[CV] END ................................ score: (test=0.842) total time=   0.4s
[CV] END ................................ score: (test=0.842) total time=   0.4s
Acc => 0.8052631578947368
[CV] END ................................ score: (test=0.842) total time=   0.4s
[CV] END ................................ score: (test=0.895) total time=   0.4s
[C