In [16]:
# Import libraries
import torch
print('torch ver: ', torch.__version__)
from torchvision import datasets, transforms

transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,)),]) 
trainset = datasets.FashionMNIST('~/.pytorch/F_MNIST_data/', download=True, train=True, transform=transform)
testset = datasets.FashionMNIST('~/.pytorch/F_MNIST_data/', download=True, train=False, transform=transform)
image, label = trainset[0] 
print("image shape: ", image.shape) # torch.Size([1, 28, 28])
print("# labels: ", len(testset.classes))
print(testset.class_to_idx)

torch ver:  2.0.0
image shape:  torch.Size([1, 28, 28])
# labels:  10
{'T-shirt/top': 0, 'Trouser': 1, 'Pullover': 2, 'Dress': 3, 'Coat': 4, 'Sandal': 5, 'Shirt': 6, 'Sneaker': 7, 'Bag': 8, 'Ankle boot': 9}


In [17]:
# Final sizes are 50000, 10000, 10000
trainset, valset = torch.utils.data.random_split(trainset, [50000, 10000])
print(f'Train set size: {len(trainset)}, Validation set size: {len(valset)}, Test set size: {len(testset)}')
# Shuffle the data at the start of each epoch (only useful for training set)
batchsize = 128 # was 32
train_loader = torch.utils.data.DataLoader(trainset, batch_size=batchsize, shuffle=True)
val_loader = torch.utils.data.DataLoader(valset, batch_size=batchsize, shuffle=False)
test_loader = torch.utils.data.DataLoader(testset, batch_size=batchsize, shuffle=False)

Train set size: 50000, Validation set size: 10000, Test set size: 10000


In [18]:
_K = 10
dedup_train_loader = torch.load(f'./loaders/fmnist/dedup_train@k={_K}.pt')

In [19]:
import time
from utils.engine import *
from utils.models import myMLP

# set seed
torch.manual_seed(42)
# shared hparams
device = 'mps'
epochs = 10
lr = 1e-3
loss_fn = torch.nn.CrossEntropyLoss()

def driver(loader, mode):
    print(f"Training {mode} model...")
    model = myMLP().to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    trained_model = train(model, loader, optimizer,
                loss_fn, device=device, epochs=epochs)
    
    return trained_model

# 'dense' or 'dedup'
_MODE = 'dense' 
start = time.time()
dense_model = driver(train_loader, mode=_MODE)
end = time.time()
dense_time = end - start

_MODE = 'dedup'
start = time.time()
dedup_model =driver(dedup_train_loader, mode=_MODE)
end = time.time()
dedup_time = end - start

print(f"Dense training time: {dense_time}")
print(f"Dedup training time: {dedup_time}")

Training dense model...
Epoch 0: loss=0.5893656015396118
Epoch 1: loss=0.2995685935020447
Epoch 2: loss=0.3366810381412506
Epoch 3: loss=0.3461603820323944
Epoch 4: loss=0.45827823877334595
Epoch 5: loss=0.3244563639163971
Epoch 6: loss=0.37949681282043457
Epoch 7: loss=0.19144146144390106
Epoch 8: loss=0.14109563827514648
Epoch 9: loss=0.21466879546642303
Training dedup model...
Epoch 0: loss=0.5624696612358093
Epoch 1: loss=0.3399209976196289
Epoch 2: loss=0.5185234546661377
Epoch 3: loss=0.603087306022644
Epoch 4: loss=0.27909615635871887
Epoch 5: loss=0.540800929069519
Epoch 6: loss=0.321259468793869
Epoch 7: loss=0.4345487952232361
Epoch 8: loss=0.3049241006374359
Epoch 9: loss=0.3929382264614105
Dense training time: 38.63566780090332
Dedup training time: 7.133240222930908


In [22]:
dense_acc = evaluate(dense_model, test_loader, device=device)
dedup_acc = evaluate(dedup_model, test_loader, device=device)
print(f"Accuracy on test set: {dense_acc:.2f} (original), {dedup_acc:.2f} (deduplicated)")
print(f'Training Speedup = {dense_time / dedup_time:.2f}x')

Accuracy on test set: 0.88 (original), 0.86 (deduplicated)
Training Speedup = 5.42x


In [21]:
''' TRIAL 1
Accuracy on test set: 0.88 (original), 0.85 (deduplicated)
Dense training time: 38.458457708358765
Dedup training time: 8.423368215560913
'''

''' TRIAL 2
Accuracy on test set: 0.88 (original), 0.87 (deduplicated)
Dense training time: 38.01654291152954
Dedup training time: 7.568618059158325
'''


' TRIAL 2\nAccuracy on test set: 0.88 (original), 0.87 (deduplicated)\nDense training time: 38.01654291152954\nDedup training time: 7.568618059158325\n'