In [1]:
import os
import torch
import numpy as np
from torch import optim, nn, utils, Tensor
from torchvision.transforms import ToTensor
from torch.utils.data import Dataset,DataLoader
import lightning as L

In [45]:
np.random.seed(0)
dim = 50
actual_dim = 25
latent_dim = 50
encoder = nn.Sequential(nn.Linear(50,25),nn.GELU(),nn.Linear(25,latent_dim))
decoder = nn.Sequential(nn.Linear(latent_dim, 50), nn.GELU(), nn.Linear(50, 50))

class AutoEncoder(L.LightningModule):
    def __init__(self, encoder, decoder):
        super().__init__()
        self.encoder = encoder
        self.decoder = decoder

    def training_step(self, batch, batch_idx):
        # training_step defines the train loop.
        # it is independent of forward
        z = self.encoder(batch)
        x_hat = self.decoder(z)
        loss = nn.functional.mse_loss(x_hat, batch)
        # Logging to TensorBoard (if installed) by default
        self.log("train_loss", loss)
        return loss

    def configure_optimizers(self):
        optimizer = optim.Adam(self.parameters(), lr=1e-4)
        return optimizer
AE = AutoEncoder(encoder,decoder)

In [47]:
class VectorDataset(Dataset):
    def __init__(self, n, idxs, dim=dim, actual_dim=actual_dim):
        self.x = torch.zeros(n,dim)+10
        self.x[:,idxs] = 5#torch.randn(n,actual_dim)

    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        return self.x[idx]

In [49]:
idxs = np.random.choice(dim,actual_dim)
train_ds,test_ds = VectorDataset(10000,idxs), VectorDataset(100,idxs)
train_dl = DataLoader(train_ds, batch_size=64, shuffle=True, num_workers=0)

In [51]:
trainer = L.Trainer(limit_train_batches=100, max_epochs=50)
trainer.fit(model=AE, train_dataloaders=train_dl)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name    | Type       | Params
---------------------------------------
0 | encoder | Sequential | 2.6 K 
1 | decoder | Sequential | 5.1 K 
---------------------------------------
7.7 K     Trainable params
0         Non-trainable params
7.7 K     Total params
0.031     Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=50` reached.


In [55]:
AE.eval()
with torch.no_grad():
    xs = train_ds[:5]
    print(xs[0])
    print(AE.decoder(AE.encoder(xs))[0])


tensor([ 5.,  5., 10.,  5., 10., 10.,  5., 10., 10.,  5., 10., 10.,  5.,  5.,
        10., 10., 10.,  5., 10.,  5., 10.,  5., 10.,  5.,  5.,  5., 10., 10.,
        10., 10., 10., 10., 10., 10., 10., 10.,  5.,  5.,  5.,  5., 10., 10.,
        10., 10.,  5., 10.,  5.,  5., 10., 10.])
tensor([ 5.0000,  5.0000, 10.0000,  5.0000, 10.0000, 10.0000,  5.0000, 10.0000,
        10.0000,  5.0000, 10.0000, 10.0000,  5.0000,  5.0000, 10.0000, 10.0000,
        10.0000,  5.0000, 10.0000,  5.0000, 10.0000,  5.0000, 10.0000,  5.0000,
         5.0000,  5.0000, 10.0000, 10.0000, 10.0000, 10.0000, 10.0000, 10.0000,
        10.0000, 10.0000, 10.0000, 10.0000,  5.0000,  5.0000,  5.0000,  5.0000,
        10.0000, 10.0000, 10.0000, 10.0000,  5.0000, 10.0000,  5.0000,  5.0000,
        10.0000, 10.0000])
