In [None]:
# Dependencies


import numpy as np
import networkx as nx
from sklearn.base import BaseEstimator, ClassifierMixin
from datasets import load_dataset
from sklearn.model_selection import cross_val_score, ShuffleSplit
from graph import process_dataset, transform, centrality
from IPython.display import clear_output
import sklearn
from sklearn.metrics import accuracy_score
from functools import partial
import sys
import time


sys.path.append("../")

import thdc

from hdc import (
    hdv,
    bind,
    bundle,
    sbundle,
    ItemMemory,
    hdvw,
    hdva,
    cosim,
    hdvsc,
    zero,
    hdvs,
    pm,
    permute,
)

import torch

torch.set_default_device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
DATASET = load_dataset("graphs-datasets/MUTAG")["train"]

In [None]:
FOLDS, REPS = 10, 3
DIMENSIONS, STEP = 10000, 20

In [None]:
tensor = torch.randint(0, 2, (30, DIMENSIONS), dtype=torch.float64).cuda()

In [None]:
tensor[tensor == 0] = -1

In [None]:
tensor
print(tensor)
indices = torch.tensor([0, 2])
sum = torch.sum(torch.index_select(tensor, 0, indices), dim=0)
print(sum)

In [None]:
FOLDS, REPS = 10, 3
DIMENSIONS, STEP = 10000, 20

# HVs = hdvs(30, DIMENSIONS)
P = pm(DIMENSIONS)

tvs = tensor
tp = torch.from_numpy(P).cuda()

memory = ItemMemory()

(graphs, labels) = process_dataset(DATASET)

In [None]:
# torch.matmul(tensor[0], tp)

In [None]:
def encode(graph, vectors):
    G = None
    for vs in nx.bfs_layers(graph, 0):
        indices = torch.tensor(vs)
        if G is None:
            G = torch.sum(torch.index_select(tensor, 0, indices), dim=0)
        else:
            # print(G[None, :].shape)
            # print(G)
            # print(torch.index_select(tensor, 0, indices).shape)
            # print(torch.index_select(tensor, 0, indices))
            G = torch.sum(
                torch.cat(
                    [
                        torch.matmul(G, tp)[None, :],
                        torch.index_select(tensor, 0, indices),
                    ],
                    0,
                ),
                dim=0,
            )

        # if G is None:
        #     G = thdc.bundle([*list(map(lambda v: vectors[v], vs))])
        #     print(G)
        # else:
        #     G = thdc.bundle([torch.matmul(tp, G), *list(map(lambda v: vectors[v], vs))])

        # s = torch.sum(
        #     vectors,
        #     where=np.atleast_2d(
        #         [True if v in vs else False for v in range(len(vectors))]
        #     ).T,
        #     axis=0,
        # # )

        # G = torch.sum([tp.dot(G), s], axis=0)
        # G = torch.sum([tp.dot(G), *list(map(lambda v: vectors[v], vs))], axis=0)
        # G = torch.sum([tp.dot(G), *list(map(lambda v: vectors[v], vs))], axis=0)
    return G


for graph in graphs[:20]:
    encode(graph, tensor)

In [None]:
class GraphClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, step=20):
        self.step = step
        self.memory = ItemMemory()
        self.vectors = hdvs(100, DIMENSIONS)

    def fit(self, X, y):
        classes = {label: [] for label in set(y)}

        for i in range(len(X)):
            classes[y[i]].append(encode(X[i], self.vectors))

        for key, value in classes.items():
            for i in range(0, len(value), self.step):
                H = bundle(value[i : i + self.step])
                self.memory.add_vector(str(key), H)

        return self

    def predict(self, X):
        p = []
        for query in X:
            (label, _, _) = self.memory.cleanup(encode(query, self.vectors))
            p.append(int(label))

        return p

In [None]:
def main():
    clf = GraphClassifier(step=STEP)
    sum = 0
    start_time = time.time()
    for i in range(REPS):
        scores = cross_val_score(
            clf, graphs, labels, cv=ShuffleSplit(), n_jobs=-1, verbose=4
        )
        sum += scores.mean()
        print(sum)
        del scores
    end_time = time.time()
    print(
        "  Acc => %.5f" % (sum / REPS),
        "T => %.5f" % ((end_time - start_time) / REPS),
    )


main()