In [159]:
import torch
import numpy as np
import time
import os
from utils import prepare_data, encode_and_save,load
from model import BModel, GModel
import argparse
import torchvision
from encoder import RandomFourierEncoder

In [160]:
torch.cuda.is_available()

True

In [207]:
args = argparse.Namespace()
args.dataset = "mnist" # mnist | fmnist | cifar | isolet | ucihar
args.model = "rff-gvsa" # rff-hdc linear-hdc rff-gvsa
args.dim = 10000
args.gamma = 0.3
args.gorder = 16
args.lr = 0.01
args.epoch = 10
args.seed = 42
args.data_dir = './encoded_data'
args.raw_data_dir = "./dataset"
torch.manual_seed(args.seed)
np.random.seed(args.seed)

In [208]:
if "hdc" in args.model:
   args.gorder = 2
   print("Use binary HDC with random fourier features, ignoring gorder, set to 2.")
args.data_dir = f"{args.data_dir}/{args.dataset}_{args.model}_order{args.gorder}_gamma{args.gamma}_dim{args.dim}"

In [209]:
try:
  os.makedirs(args.data_dir)
except FileExistsError:
  print("Encoded data folder already exists")

In [210]:
encode_and_save(args)

# of channels of data 1
# of training samples and test samples 60000 10000
Encoding with random fourier features encoder.
the threshold to discretize fourier features to group elements tensor([-1.5343, -1.1505, -0.8872, -0.6746, -0.4888, -0.3187, -0.1573,  0.0000,
         0.1573,  0.3187,  0.4888,  0.6746,  0.8872,  1.1505,  1.5343])
Encoded pixels to hypervectors with size:  torch.Size([256, 10000])
Encoding training data...
Start encoding data
12800 images encoded. Total time elapse = 20.28523015975952
25600 images encoded. Total time elapse = 40.03036856651306
38400 images encoded. Total time elapse = 58.91110110282898
51200 images encoded. Total time elapse = 77.51575827598572
Finish encoding data
Encoding test data...
Start encoding data
Finish encoding data


In [211]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [212]:
transform = torchvision.transforms.Compose(
        [
            torchvision.transforms.ToTensor(),
        ]
    )
train_dataset = torchvision.datasets.MNIST(root='./dataset', train=True, transform=transform, download=True)
input_dim = torch.prod(torch.tensor(list(train_dataset[0][0].size())))
train_hd, y_train, test_hd, y_test = load(args)

Loading encoded training data...
Loading encoded test data...
Size of encoded training data torch.Size([60000, 10000]) and test data torch.Size([10000, 10000])


In [213]:
encoder = RandomFourierEncoder(
        input_dim=input_dim,
        gamma=args.gamma,
        gorder=args.gorder,
        output_dim=args.dim,)
encoder.item_mem = torch.load(f"{args.data_dir}/item_mem.pt")

In [214]:
args.data_dir

'./encoded_data/mnist_rff-gvsa_order16_gamma0.3_dim10000'

In [215]:
centroids = {c: [] for c in range(10)}
print("Calculating centroids for each class...")
# Compute class centroids after encoding training data
for i in range(10):
    c_label = i
    print(c_label)
    centroids[c_label] = encoder.group_bundle(train_hd[y_train == c_label])
    print(centroids[c_label])

Calculating centroids for each class...
0
tensor([14.,  1.,  5.,  ...,  9.,  1., 15.])
1
tensor([ 8., 11., 13.,  ...,  9.,  5.,  7.])
2
tensor([ 4.,  5.,  7.,  ..., 11.,  4.,  5.])
3
tensor([ 4.,  7.,  9.,  ..., 11.,  5.,  5.])
4
tensor([ 6.,  7.,  8.,  ..., 16.,  9.,  8.])
5
tensor([ 3.,  5.,  9.,  ..., 10.,  4.,  4.])
6
tensor([ 3.,  6.,  9.,  ..., 12.,  4.,  4.])
7
tensor([ 5.,  5.,  5.,  ..., 11.,  5.,  6.])
8
tensor([ 6.,  8., 11.,  ..., 11.,  5.,  6.])
9
tensor([ 6.,  6.,  7.,  ..., 14.,  8.,  8.])


In [216]:
correct = 0.0
total = 0.0
for i , sample in enumerate(test_hd):
    distances = []
    for c, centroid in centroids.items():
        distance = encoder.similarity(sample, centroid)
        distances.append(distance)
    distances_arr = np.array(distances)
    predicted_class = np.argmax(distances_arr)
    if predicted_class == y_test[i].item():
        correct += 1.0
    total += 1.0
accuracy = 100 * correct / total
print(f"Accuracy: {accuracy:.4f}%")

Accuracy: 82.0600%


In [202]:
encoder.similarity(train_hd[y_train == 0][5], centroids[0])

tensor(0.1607)

In [206]:
correct = 0.0
total = 0.0
for i , sample in enumerate(test_hd):
    distances = []
    for c, centroid in centroids.items():
        distance = encoder.similarity_manhatten(sample, centroid)
        distances.append(distance)
    distances_arr = np.array(distances)
    predicted_class = np.argmin(distances_arr)
    if predicted_class == y_test[i].item():
        correct += 1.0
    total += 1.0
accuracy2 = 100 * correct / total
print(f"Accuracy: {accuracy2:.4f}%")

Accuracy: 11.3500%
