In [1]:
import torch
import pandas as pd
import numpy as np

from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.model_selection import cross_val_score

import sys

sys.path.append(".././papers/")

from thdc import ItemMemory

In [2]:
data = pd.read_csv("../datasets/mnist_train.csv")
data = np.array(data)
np.random.shuffle(data)
data = torch.from_numpy(data)


labels = data[:10000, 0]
data = data[:10000, 1:]

In [3]:
vals = torch.randint(0, 2, (256, 10000), dtype=torch.float, device="cuda")
vals[vals == 0] = -1

board = torch.randint(0, 2, (784, 10000), dtype=torch.float, device="cuda")
board[board == 0] = -1

In [4]:
def encode(image):
    values = image[image != 0]
    indices = (image > 0).nonzero().reshape((values.shape[0],))

    vs = vals[values]
    bs = board[indices]

    base = torch.stack((vs, bs), dim=1)
    bin = torch.prod(base, 1)
    G = torch.sum(bin, 0)

    return G

In [5]:
class MnistClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self):
        self.memory = ItemMemory()

    def fit(self, X, y):
        for i in range(len(X)):
            self.memory.add_vector(y[i], encode(X[i]))
        return self

    def predict(self, X):
        p = []

        for query in X:
            p.append(self.memory.cleanup(encode(query))[0])

        return p

In [6]:
def main():
    clf = MnistClassifier()
    scores = cross_val_score(clf, data, labels, cv=3, n_jobs=1, verbose=4)
    print(scores.mean())


main()

[CV] END ................................ score: (test=0.669) total time=37.0min
[CV] END ................................ score: (test=0.693) total time=35.8min
[CV] END ................................ score: (test=0.693) total time=35.8min
0.6853015835476768
