In [1]:
import os
import sys

cur_dir = os.getcwd()
sys.path.append(os.path.join(cur_dir, "lsh_lib"))
sys.path.append(os.path.join(cur_dir, "mongoose_slide"))

In [2]:
import time
import matplotlib.pyplot as plt
import numpy as np

import torch
import torch.optim as optim
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchvision.datasets import CIFAR100

In [3]:
from clsh import pyLSH
from mongoose_slide.slide_lib.lsh import LSH
# from mongoose_slide.slide_lib.simHash import SimHash
from mongoose_slide.slide_lib.projectionHash import RandomProjection

from src.lsh_layer import LSHLayer
from src.models.simple_mlp import SimpleMLP
from src.models.two_layer_lsh import TwoLayerLSH
from src.utils import train, train_lsh, eval

In [4]:
# some magic so that the notebook will reload external python modules;
# see https://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

In [5]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")
print(device)

cuda:0


## Loading CIFAR100 dataset

In [6]:
# Define the transformations for data pre-processing
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Load the CIFAR100 training dataset
trainset = torchvision.datasets.CIFAR100(root='./data', train=True, download=True, transform=transform)
# Load the CIFAR100 test dataset
testset = torchvision.datasets.CIFAR100(root='./data', train=False, download=True, transform=transform)

# Create a data loader for the training dataset
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4, shuffle=True, num_workers=2)
# Create a data loader for the test dataset
testloader = torch.utils.data.DataLoader(testset, batch_size=4, shuffle=False, num_workers=2)

# Define the classes in CIFAR100
classes = trainset.classes

print(f"size of train dataset: {len(trainset)}")
print(f"size of test dataset: {len(testset)}")

print(classes)

Files already downloaded and verified
Files already downloaded and verified
size of train dataset: 50000
size of test dataset: 10000
['apple', 'aquarium_fish', 'baby', 'bear', 'beaver', 'bed', 'bee', 'beetle', 'bicycle', 'bottle', 'bowl', 'boy', 'bridge', 'bus', 'butterfly', 'camel', 'can', 'castle', 'caterpillar', 'cattle', 'chair', 'chimpanzee', 'clock', 'cloud', 'cockroach', 'couch', 'crab', 'crocodile', 'cup', 'dinosaur', 'dolphin', 'elephant', 'flatfish', 'forest', 'fox', 'girl', 'hamster', 'house', 'kangaroo', 'keyboard', 'lamp', 'lawn_mower', 'leopard', 'lion', 'lizard', 'lobster', 'man', 'maple_tree', 'motorcycle', 'mountain', 'mouse', 'mushroom', 'oak_tree', 'orange', 'orchid', 'otter', 'palm_tree', 'pear', 'pickup_truck', 'pine_tree', 'plain', 'plate', 'poppy', 'porcupine', 'possum', 'rabbit', 'raccoon', 'ray', 'road', 'rocket', 'rose', 'sea', 'seal', 'shark', 'shrew', 'skunk', 'skyscraper', 'snail', 'snake', 'spider', 'squirrel', 'streetcar', 'sunflower', 'sweet_pepper', 'ta

In [7]:
for x, y in trainset:
    print(x.shape)
    # print(y.shape)
    print(x.min(), x.max())
    break

torch.Size([3, 32, 32])
tensor(-0.9922) tensor(1.)


## Normal MLP on CIFAR100

In [8]:
num_epochs = 2
input_size = 3 * 32 * 32
hidden_size = 10000
num_classes = len(classes)

model = SimpleMLP(input_size, hidden_size, num_classes, flatten_first=True)

model = model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

In [9]:
model = train(model, trainloader, criterion, optimizer, testloader, num_epochs=num_epochs)

Epoch time: 58.39 seconds
Epoch [1/2] | Train Loss: 3.1657 | Train Acc: 24.06% | Test Loss: 3.4835 | Test Acc: 19.52

Epoch time: 57.20 seconds
Epoch [2/2] | Train Loss: 2.8788 | Train Acc: 30.31% | Test Loss: 3.4682 | Test Acc: 20.66

Total training time: 160.32 seconds


## LSH MLP on CIFAR100

In [13]:
input_size = 3 * 32 * 32    
num_epochs = 2
hidden_size = 10000
num_classes = len(classes)
K = 14
L = 50
threads = 1
model_lsh = TwoLayerLSH(input_size, hidden_size, num_classes, K, L, threads, flatten_first=True)

model_lsh = model_lsh.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

cuda:0
cuda:0


In [14]:
model_lsh = train_lsh(model_lsh, trainloader, criterion, optimizer, testloader, num_epochs=num_epochs)

AttributeError: 'Linear' object has no attribute 'last_neurons'