In [1]:
# Dependencies

import torch
import networkx as nx
from sklearn.base import BaseEstimator, ClassifierMixin
from datasets import load_dataset
from sklearn.model_selection import cross_val_score, ShuffleSplit, cross_val_predict
from graph import process_dataset
from sklearn.metrics import confusion_matrix, accuracy_score
import sys
import random


sys.path.append("../")

import thdc
from hdc import pm


torch.set_default_device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
DATASET = load_dataset("graphs-datasets/MUTAG")["train"]
# DATASET = load_dataset("graphs-datasets/AIDS")["full"]
# DATASET = load_dataset("graphs-datasets/PROTEINS")["train"]

In [3]:
def create_map(length):
    m = {}
    a = random.sample(range(length), length)
    for i in range(length):
        m[i] = a[i]

    return m

In [4]:
DIMENSIONS = 2000
(graphs, labels) = process_dataset(DATASET)
VECTORS = torch.randint(0, 2, (30, DIMENSIONS), dtype=torch.float64).cuda()
VECTORS[VECTORS == 0] = -1

MAT = torch.from_numpy(pm(DIMENSIONS)).cuda()

In [23]:
def encode(graph, vectors, mat):
    # g = graph
    g = nx.relabel_nodes(graph, create_map(len(list(graph))))
    # g = nx.relabel_nodes(graph, create_map(30))
    G = None
    for n in random.sample(list(g), 4):
        # for vs in nx.bfs_layers(g, [0]):
        for vs in nx.dfs_preorder_nodes(g, n):
            # for vs in nx.bfs_layers(g, [list(g)[0]]):
            # for vs in nx.bfs_layers(g, [n]):
            # for vs in nx.bfs_layers(g, [list(g)]):
            if G is None:
                G = torch.sum(
                    torch.index_select(vectors, 0, torch.tensor(vs)),
                    dim=0,
                )
            else:
                G = torch.sum(
                    torch.cat(
                        [
                            torch.matmul(G, mat)[None, :],
                            torch.index_select(vectors, 0, torch.tensor(vs)),
                        ],
                        0,
                    ),
                    dim=0,
                )
        yield G

In [6]:
class GraphClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self):
        self.memory = thdc.ItemMemory()

    def fit(self, X, y):
        for i in range(len(X)):
            for x in encode(X[i], VECTORS, MAT):
                self.memory.add_vector(y[i], x)
        return self

    def predict(self, X):
        p = []
        for query in X:
            max_score = 0

            for x in encode(query, VECTORS, MAT):
                # p.append(self.memory.cleanup(x)[0])
                vs = self.memory.cleanup_all(x, 5)
                s = sum(1 * v[2] ** 4 if v[0] == 1 else -1 * v[2] ** 4 for v in vs)
                if abs(s) > max_score:
                    max_score = s

            p.append(1 if max_score >= 0 else 0)

        return p

In [24]:
FOLDS, REPS = 10, 3


def main():
    sum = 0
    for _ in range(REPS):
        clf = GraphClassifier()
        scores = cross_val_score(
            clf,
            graphs,
            labels,
            cv=ShuffleSplit(n_splits=FOLDS),
            n_jobs=1,
            verbose=4,
        )
        print("Acc =>", scores.mean())
        sum += scores.mean()
    print("Avg Acc =>", sum / REPS)


def conf():
    clf = GraphClassifier()
    y_pred = cross_val_predict(clf, graphs, labels, cv=FOLDS, n_jobs=1)
    print(confusion_matrix(labels, y_pred))
    print(accuracy_score(labels, y_pred))


main()
# conf()

[CV] END ................................ score: (test=0.789) total time=   1.3s
[CV] END ................................ score: (test=0.789) total time=   1.3s
[CV] END ................................ score: (test=0.789) total time=   1.3s
[CV] END ................................ score: (test=0.895) total time=   1.3s
[CV] END ................................ score: (test=0.684) total time=   1.3s
[CV] END ................................ score: (test=0.579) total time=   1.3s
[CV] END ................................ score: (test=0.789) total time=   1.3s
[CV] END ................................ score: (test=0.737) total time=   1.3s
[CV] END ................................ score: (test=0.632) total time=   1.3s
[CV] END ................................ score: (test=0.737) total time=   1.3s
Acc => 0.7421052631578947
[CV] END ................................ score: (test=0.842) total time=   1.3s
[CV] END ................................ score: (test=0.842) total time=   1.3s
[C