In [1]:
# Dependencies


import numpy as np
import networkx as nx
from sklearn.base import BaseEstimator, ClassifierMixin
from datasets import load_dataset
from sklearn.model_selection import cross_val_score, ShuffleSplit
from graph import process_dataset, transform, centrality
from IPython.display import clear_output
import sklearn
from sklearn.metrics import accuracy_score
from functools import partial
import sys
import time


sys.path.append("../")

import thdc

from hdc import (
    hdv,
    bind,
    bundle,
    sbundle,
    ItemMemory,
    hdvw,
    hdva,
    cosim,
    hdvsc,
    zero,
    hdvs,
    pm,
    permute,
)

import torch

torch.set_default_device("cuda" if torch.cuda.is_available() else "cpu")
torch.cuda.empty_cache()

In [2]:
DATASET = load_dataset("graphs-datasets/MUTAG")["train"]

In [3]:
# tensor = torch.randint(0, 2, (30, DIMENSIONS), dtype=torch.float64).cuda()

# tensor[tensor == 0] = -1
# tensor
# print(tensor)
# indices = torch.tensor([0, 2])
# sum = torch.sum(torch.index_select(tensor, 0, indices), dim=0)
# print(sum)

In [4]:
FOLDS, REPS = 10, 1
DIMENSIONS, STEP = 1000, 20

# HVs = hdvs(30, DIMENSIONS)
P = pm(DIMENSIONS)

# tvs = tensor
tp = torch.from_numpy(P).cuda()

memory = thdc.ItemMemory()

(graphs, labels) = process_dataset(DATASET)

In [5]:
# torch.matmul(tensor[0], tp)

In [6]:
def encode(graph, vectors, mat, start):
    G = None
    for vs in nx.bfs_layers(graph, start):
        indices = torch.tensor(vs)
        if G is None:
            G = torch.sum(torch.index_select(vectors, 0, indices), dim=0)
        else:
            G = torch.sum(
                torch.cat(
                    [
                        torch.matmul(G, mat)[None, :],
                        torch.index_select(vectors, 0, indices),
                    ],
                    0,
                ),
                dim=0,
            )
    return G


# for graph in graphs[:20]:
#     encode(graph, tensor)

In [7]:
class GraphClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, step=20):
        self.step = step
        self.memory = thdc.ItemMemory()
        self.vectors = torch.randint(0, 2, (30, DIMENSIONS), dtype=torch.float64).cuda()
        self.vectors[self.vectors == 0] = -1
        self.mat = torch.from_numpy(pm(DIMENSIONS)).cuda()

    def fit(self, X, y):
        for i in range(len(X)):
            self.memory.add_vector(
                str(y[i]), encode(X[i], self.vectors, self.mat, start=0)
            )
            self.memory.add_vector(
                str(y[i]), encode(X[i], self.vectors, self.mat, start=5)
            )

        return self

    def predict(self, X):
        p = []
        for query in X:
            (label, _, _) = self.memory.cleanup(
                encode(query, self.vectors, self.mat, 0)
            )
            p.append(int(label))

        return p

In [8]:
def main():
    clf = GraphClassifier(step=STEP)
    sum = 0
    start_time = time.time()
    for i in range(REPS):
        scores = cross_val_score(
            clf,
            graphs,
            labels,
            cv=ShuffleSplit(),
            n_jobs=-1,
            verbose=4,
            error_score="raise",
        )
        sum += scores.mean()
        print(sum)
        del scores
    end_time = time.time()
    print(
        "  Acc => %.5f" % (sum / REPS),
        "T => %.5f" % ((end_time - start_time) / REPS),
    )


main()

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.


TypeError: encode() missing 1 required positional argument: 'start'