In [1]:
import torch
import pandas as pd
import numpy as np

from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.model_selection import cross_val_score

import sys

sys.path.append(".././papers/")

from thdc import ItemMemory

In [2]:
data = pd.read_csv("../datasets/mnist_train.csv")
data = np.array(data)
np.random.shuffle(data)
labels = data[:1000, 0]
data = data[:1000, 1:]
data = torch.from_numpy(data)

In [3]:
vals = torch.randint(0, 2, (256, 10000), dtype=torch.float, device="cuda")
vals[vals == 0] = -1

board = torch.randint(0, 2, (784, 10000), dtype=torch.float, device="cuda")
board[board == 0] = -1

In [4]:
def encode(image):
    values = image[image != 0]
    indices = (image > 0).nonzero().reshape((values.shape[0],))

    vs = vals[values]
    bs = board[indices]

    base = torch.stack((vs, bs), dim=1)
    bin = torch.prod(base, 1)
    G = torch.sum(bin, 0)

    return G

In [10]:
import random


class MnistClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, step):
        self.memory = ItemMemory()
        self.step = step

    def fit(self, X, y):
        classes = {label: [] for label in set(y)}

        for i in range(len(X)):
            classes[y[i]].append(encode(X[i]))

        for key, value in classes.items():
            for i in range(0, len(value), self.step):
                H = torch.sum(torch.stack(value[i : i + self.step], dim=1), dim=1)
                self.memory.add_vector(key, H)

        return self

    def predict(self, X):
        p = []

        for query in X:
            p.append(self.memory.cleanup(encode(query))[0])
            # p.append(random.randint(0, 10))

        return p

In [11]:
def main():
    clf = MnistClassifier(step=1000)
    scores = cross_val_score(clf, data, labels, cv=3, n_jobs=1, verbose=4)
    print(scores.mean())


main()