In [1]:
# Google Colab Only
try:
    import google.colab  # noqa: F401

    # specify the version of DataEval (==X.XX.X) for versions other than the latest
    %pip install -q dataeval
except Exception:
    pass

In [2]:
import numpy as np
import torch
from torch.utils.data import Subset

from dataeval.utils.data.datasets import MNIST

In [3]:
training_dataset = MNIST(
    root="./data/", train=True, download=True, unit_interval=True, dtype=np.float32, channels="channels_first"
)

testing_dataset = MNIST(
    root="./data/", train=False, download=True, unit_interval=True, dtype=np.float32, channels="channels_first"
)

Files already downloaded and verified


Files already downloaded and verified


In [4]:
print("Training dataset size:", len(training_dataset))
print("Training image shape:", training_dataset[0][0].shape)


Training dataset size: 54210
Training image shape: (1, 28, 28)


In [5]:
from dataeval.utils.torch.models import Autoencoder
from dataeval.utils.torch.trainer import AETrainer

In [6]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model = Autoencoder(channels=1)
trainer = AETrainer(model, device=device, batch_size=32)

In [7]:
training_subset = Subset(training_dataset, range(6000))
training_loss = trainer.train(training_subset, epochs=10)
print(training_loss[-1])

0.11228326819044479


In [8]:
eval_loss = trainer.eval(testing_dataset)
print(eval_loss)

0.11469037569124639


In [9]:
### TEST ASSERTION CELL ###
assert -0.1 < training_loss[-1] / eval_loss - 1 < 0.1

In [10]:
embeddings = trainer.encode(training_subset)

In [11]:
print("Embedded image shape:", embeddings.shape)

Embedded image shape: torch.Size([6000, 64, 6, 6])


In [12]:
### TEST ASSERTION CELL ###
assert embeddings.shape == (6000, 64, 6, 6)