In [8]:
import auto_compyute as ac
import auto_compyute.nn.functional as F
from auto_compyute import nn

ac.backends.set_random_seed(0)
device = "cuda" if ac.backends.gpu_available() else "cpu"

In [9]:
import pandas as pd

# download the datasets
# train_url = "https://pjreddie.com/media/files/mnist_train.csv"
train_data = pd.read_csv("../data/mnist_train.csv")
train_tensor = ac.tensor(train_data.to_numpy())

# test_url = "https://pjreddie.com/media/files/mnist_test.csv"
test_data = pd.read_csv("../data/mnist_test.csv")
test_tensor = ac.tensor(test_data.to_numpy())

In [10]:
# split the data into train, val, test
rand_idx = ac.randperm(len(train_tensor))
n_train_samples = int(0.8 * len(train_tensor))
train_idx, val_idx = rand_idx[:n_train_samples], rand_idx[n_train_samples:]
train, val = train_tensor[train_idx], train_tensor[val_idx]

# split features from targets
X_train = train[:, 1:]
X_val = val[:, 1:]
X_test = test_tensor[:, 1:]

# reshape the data into an image format (B, 784) -> (B, 1, 28, 28)
X_train = X_train.view(X_train.shape[0], 1 , 28, -1).float()
X_val = X_val.view(X_val.shape[0], 1, 28, -1).float()
X_test = X_test.view(X_test.shape[0], 1, 28, -1).float()

# scaling
def scale(x: ac.Tensor) -> ac.Tensor:
    mean_px = x.mean().float()
    std_px = x.std().float()
    return (x - mean_px) / (std_px)

X_train = scale(X_train)
X_val = scale(X_val)
X_test = scale(X_test)

print(f'{X_train.shape=}')
print(f'{X_val.shape=}')
print(f'{X_test.shape=}')

X_train.shape=(47999, 1, 28, 28)
X_val.shape=(12000, 1, 28, 28)
X_test.shape=(9999, 1, 28, 28)


In [11]:
class VAE(nn.Module):
    def __init__(self, in_dim, hidden_dim, latent_dim) -> None:
        super().__init__()

        # encoder
        self.encoder = nn.Sequential(
            nn.Linear(in_dim, hidden_dim),
            nn.LeakyReLU(0.2),
            nn.Linear(hidden_dim, latent_dim),
            nn.LeakyReLU(0.2)
        )

        self.mean = nn.Linear(latent_dim, 2)
        self.logvar = nn.Linear(latent_dim, 2)

        # decoder
        self.decoder = nn.Sequential(
            nn.Linear(2, latent_dim),
            nn.LeakyReLU(0.2),
            nn.Linear(latent_dim, hidden_dim),
            nn.LeakyReLU(0.2),
            nn.Linear(hidden_dim, in_dim),
            nn.Sigmoid()
        )

    def encode(self, x):
        x = self.encoder(x)
        return self.mean(x), self.logvar(x)

    def reparameterization(self, mean, var):
        epsilon = ac.randn(*var.shape, dtype=var.dtype, device=var.device)     
        z = mean + var * epsilon
        return z

    def decode(self, x):
        return self.decoder(x)

    def forward(self, x):
        mean, logvar = self.encode(x)
        z = self.reparameterization(mean, logvar)
        x_hat = self.decode(z)
        return x_hat, mean, logvar

model = VAE(784, 256, 128)
model.to(device)

In [12]:
batch_size = 32
train_dl = nn.Dataloader((X_train,), batch_size=batch_size, device=device, drop_remaining=True)
optimizer = nn.optimizers.Adam(model.parameters(), learning_rate=1e-3)

In [None]:
def loss_function(x, x_hat, mean, log_var):
    reproduction_loss = F.bce_loss(x_hat, x, reduction='sum')
    KLD = - 0.5 * (1 + log_var - mean.pow(2) - log_var.exp()).sum()

    return reproduction_loss + KLD

In [None]:
def train(model, optimizer, epochs, device):
    model.train()
    for epoch in range(epochs):
        overall_loss = 0
        for batch_idx, (x,) in enumerate(train_dl()):
            x = x.view(batch_size, -1).to(device)

            optimizer.zero_grad()

            x_hat, mean, log_var = model(x)
            loss = loss_function(x, x_hat, mean, log_var)
            
            overall_loss += loss.item()
            
            loss.backward()
            optimizer.step()

        print("\tEpoch", epoch + 1, "\tAverage Loss: ", overall_loss/(batch_idx*batch_size))
    return overall_loss

train(model, optimizer, epochs=50, device=device)