In [1]:
# Google Colab Only
try:
    import google.colab  # noqa: F401Removing tensorflow datasets

    # specify the version of DataEval (==X.XX.X) for versions other than the latest
    %pip install -q dataeval[torch]
except Exception:
    pass

In [2]:
import numpy as np
import torch

from dataeval.utils.torch.datasets import MNIST

In [3]:
training_dataset = MNIST(
    root="./data/",
    train=True,
    download=True,
    size=6000,
    unit_interval=True,
    dtype=np.float32,
    channels="channels_first",
)
testing_dataset = MNIST(
    root="./data/", train=False, download=True, unit_interval=True, dtype=np.float32, channels="channels_first"
)

Files already downloaded and verified


Files already downloaded and verified


In [4]:
print("Training data size:", training_dataset.data.shape)
print("Training labels size:", training_dataset.targets.shape)

Training data size: (6000, 1, 28, 28)
Training labels size: (6000,)


In [5]:
from dataeval.utils.torch.models import AriaAutoencoder
from dataeval.utils.torch.trainer import AETrainer

In [6]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model = AriaAutoencoder(channels=1)
trainer = AETrainer(model, device=device, batch_size=32)

In [7]:
training_subset = torch.tensor(training_dataset.data)
training_loss = trainer.train(training_subset, epochs=10)
print(training_loss[-1])

0.113695649549048


In [8]:
eval_loss = trainer.eval(testing_dataset)
print(eval_loss)

0.11415031253223351


In [9]:
embeddings = trainer.encode(training_subset)

In [10]:
print("Embedded image shape:", embeddings.shape)

Embedded image shape: torch.Size([6000, 64, 6, 6])
