In [21]:
import time
import random
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms.functional as TF
import torchvision
import sklearn.metrics
from tqdm import tqdm

import hdc
import hdc.functional as HDF
import matplotlib.pyplot as plt

In [3]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device = torch.device(device)
print("Using {} device".format(device))

Using cuda device


In [4]:
DIMENSIONS = 10000
IMG_SIZE = 28
NUM_LEVELS = 1000
BATCH_SIZE = 12
LEARNING_RATE = 0.005

In [5]:
transform = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
])

In [6]:
train_ds = torchvision.datasets.MNIST("data", train=True, transform=transform, download=True)
train_ld = torch.utils.data.DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
test_ds = torchvision.datasets.MNIST("data", train=False, transform=transform, download=True)
test_ld = torch.utils.data.DataLoader(test_ds, batch_size=BATCH_SIZE, shuffle=False)
len(train_ds), len(test_ds)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to data/MNIST/raw/train-images-idx3-ubyte.gz


9913344it [00:00, 57191637.11it/s]                             


Extracting data/MNIST/raw/train-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to data/MNIST/raw/train-labels-idx1-ubyte.gz


29696it [00:00, 17134963.76it/s]         


Extracting data/MNIST/raw/train-labels-idx1-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to data/MNIST/raw/t10k-images-idx3-ubyte.gz


1649664it [00:00, 49937154.94it/s]         


Extracting data/MNIST/raw/t10k-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to data/MNIST/raw/t10k-labels-idx1-ubyte.gz


5120it [00:00, 15538955.48it/s]         

Extracting data/MNIST/raw/t10k-labels-idx1-ubyte.gz to data/MNIST/raw






(60000, 10000)

In [7]:
NUM_CLASSES = len(train_ds.classes)
POS_HV = HDF.random_hv(IMG_SIZE * IMG_SIZE, DIMENSIONS, dtype=torch.float, device=device)
LUM_HV = HDF.level_hv(NUM_LEVELS, DIMENSIONS, dtype=torch.float, device=device)

In [14]:
class Model(nn.Module):
    def __init__(self, size, train_embedding=False):
        super(Model, self).__init__()
        
        self.size = size

        self.pos_embed = hdc.embeddings.Random(size * size, DIMENSIONS)
        self.pos_embed.weight.requires_grad = train_embedding
        
        self.lum_embed = hdc.embeddings.Level(NUM_LEVELS, DIMENSIONS)
        self.lum_embed.weight.requires_grad = train_embedding
        
        self.classify = nn.Linear(DIMENSIONS, NUM_CLASSES)
        self.classify.weight.data.fill_(0.0)
        self.classify.bias.data.fill_(0.0)
        
    def encode(self, x):
        batch_size = x.size(0)
        x = x.reshape(batch_size, self.size * self.size)

        luminocities = self.lum_embed(x)
        
        sample_hv = HDF.bind(self.pos_embed.weight, luminocities)
        sample_hv = torch.sum(sample_hv, dim=-2)

        return HDF.soft_quantize(sample_hv)  # cap between -1 and +1

    def forward(self, x):
        enc = self.encode(x)
        logit = self.classify(enc)
        return logit

In [15]:
model = Model(IMG_SIZE)
model = model.to(device)

In [16]:
start_time = time.time()

with torch.no_grad():
    for samples, labels in tqdm(train_ld, desc="Train"):
        samples = samples.to(device)
        labels = labels.to(device)

        samples_hv = model.encode(samples)
        model.classify.weight.data[labels] += samples_hv * LEARNING_RATE

    model.classify.weight.data = F.normalize(model.classify.weight.data)
    
end_time = time.time()
duration = end_time - start_time
print(f"Training took {duration:.3f}s for {len(train_ds)} items")

Train: 100%|██████████| 5000/5000 [00:30<00:00, 162.66it/s]

Training took 30.744s for 60000 items





In [17]:
pred_labels = []
true_labels = []

start_time = time.time()
with torch.no_grad():
    for samples, labels in tqdm(test_ld, desc="Test"):
        samples = samples.to(device)

        outputs = model(samples)
        predictions = torch.argmax(outputs, dim=-1)

        pred_labels.append(predictions)
        true_labels.append(labels)
        
end_time = time.time()
duration = end_time - start_time
print(f"Testing took {duration:.2f}s for {len(test_ds)} items")

pred_labels = torch.cat(pred_labels).cpu()
true_labels = torch.cat(true_labels).cpu()

accuracy = sklearn.metrics.accuracy_score(pred_labels, true_labels)
print(f"Testing accuracy of {(accuracy * 100):.3f}%")

Test: 100%|██████████| 834/834 [00:06<00:00, 136.92it/s]

Testing took 6.10s for 10000 items
Testing accuracy of 82.580%





## Gradient-based model

In [18]:
model = Model(IMG_SIZE)
model = model.to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE)

In [19]:
start_time = time.time()

for samples, labels in tqdm(train_ld, desc="Train"):
    samples = samples.to(device)
    labels = labels.to(device)
    
    # zero the parameter gradients
    optimizer.zero_grad()

    outputs = model(samples)
    
    loss = loss_fn(outputs, labels)
    loss.backward()
    optimizer.step()
    
end_time = time.time()
duration = end_time - start_time
print(f"Training took {duration:.3f}s for {len(train_ds)} items")

Train: 100%|██████████| 5000/5000 [00:32<00:00, 153.80it/s]

Training took 32.526s for 60000 items





In [20]:
pred_labels = []
true_labels = []

start_time = time.time()
with torch.no_grad():
    for samples, labels in tqdm(test_ld, desc="Test"):
        samples = samples.to(device)

        outputs = model(samples)
        predictions = torch.argmax(outputs, dim=-1)

        pred_labels.append(predictions)
        true_labels.append(labels)
        
end_time = time.time()
duration = end_time - start_time
print(f"Testing took {duration:.2f}s for {len(test_ds)} items")

pred_labels = torch.cat(pred_labels).cpu()
true_labels = torch.cat(true_labels).cpu()

accuracy = sklearn.metrics.accuracy_score(pred_labels, true_labels)
print(f"Testing accuracy of {(accuracy * 100):.3f}%")

Test: 100%|██████████| 834/834 [00:05<00:00, 160.05it/s]


Testing took 5.22s for 10000 items
Testing accuracy of 89.610%


## Mixed HDC and gradient-based

In [59]:
model = Model(IMG_SIZE)
model = model.to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE)

In [60]:
start_time = time.time()

cache = torch.zeros(NUM_CLASSES, DIMENSIONS, device=device, dtype=torch.float)
dirty_bit = torch.tensor([False] * NUM_CLASSES, dtype=torch.bool, device=device)
for samples, labels in tqdm(train_ld, desc="Train"):
    samples = samples.to(device)
    labels = labels.to(device)
    
    if random.random() > 0.9:
        # zero the parameter gradients
        optimizer.zero_grad()

        enc = model.encode(samples)
        for l in range(labels.size(0)):
            enc[l] = HDF.bundle(enc[l], cache[labels[l]])
            cache[labels[l]] = 0
            dirty_bit[labels[l]] = False
            
        outputs = model.classify(enc)

        loss = loss_fn(outputs, labels)
        loss.backward()
        optimizer.step()
    else:
        with torch.no_grad():
            samples_hv = model.encode(samples)
            for l in range(samples_hv.size(0)):
                cache[labels[l]] += samples_hv[l]
                dirty_bit[labels[l]] = True
    
# Apply all accumulated samples                      
# zero the parameter gradients
optimizer.zero_grad()

outputs = model.classify(cache[dirty_bit])
labels = torch.arange(0, NUM_CLASSES, device=device, dtype=torch.long)
labels = labels[dirty_bit]

loss = loss_fn(outputs, labels)
loss.backward()
optimizer.step()

end_time = time.time()
duration = end_time - start_time
print(f"Training took {duration:.3f}s for {len(train_ds)} items")

Train: 100%|██████████| 5000/5000 [00:54<00:00, 91.55it/s] 

Training took 54.624s for 60000 items





In [61]:
pred_labels = []
true_labels = []

start_time = time.time()
with torch.no_grad():
    for samples, labels in tqdm(test_ld, desc="Test"):
        samples = samples.to(device)

        outputs = model(samples)
        predictions = torch.argmax(outputs, dim=-1)

        pred_labels.append(predictions)
        true_labels.append(labels)
        
end_time = time.time()
duration = end_time - start_time
print(f"Testing took {duration:.2f}s for {len(test_ds)} items")

pred_labels = torch.cat(pred_labels).cpu()
true_labels = torch.cat(true_labels).cpu()

accuracy = sklearn.metrics.accuracy_score(pred_labels, true_labels)
print(f"Testing accuracy of {(accuracy * 100):.3f}%")

Test: 100%|██████████| 834/834 [00:05<00:00, 160.80it/s]

Testing took 5.19s for 10000 items
Testing accuracy of 84.010%



