In [5]:
import torch
import torchvision
from torch import nn
from torch.utils.data import DataLoader, TensorDataset
from torchvision import transforms
from torchvision.datasets import MNIST

## First we test the model on MNIST dataset

In [2]:
img_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5])
])

dataset = MNIST('./mnist_data', transform=img_transform, download=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./mnist_data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ./mnist_data/MNIST/raw/train-images-idx3-ubyte.gz to ./mnist_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./mnist_data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ./mnist_data/MNIST/raw/train-labels-idx1-ubyte.gz to ./mnist_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./mnist_data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ./mnist_data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./mnist_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./mnist_data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ./mnist_data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./mnist_data/MNIST/raw



In [16]:
class LAutoencoder(nn.Module):
    def __init__(self):
        super().__init__()
        self.encoder = nn.Sequential(
#             nn.Linear(28 * 28, 128),
            nn.Linear(512,128),
            nn.ReLU(True),
            nn.Linear(128, 64),
            nn.ReLU(True), 
            nn.Linear(64, 12), 
            nn.ReLU(True), 
            nn.Linear(12, 2))
        self.decoder = nn.Sequential(
            nn.Linear(2, 12),
            nn.ReLU(True),
            nn.Linear(12, 64),
            nn.ReLU(True),
            nn.Linear(64, 128),
            nn.ReLU(True), 
#             nn.Linear(128, 28 * 28),
            nn.Linear(128, 512),
            nn.Tanh() )            
        
    def forward(self, x):
        latent = self.encoder(x)
        x = self.decoder(latent)
        return x,latent

In [4]:
num_epochs = 10
batch_size = 128
learning_rate = 2e-3
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

model = LAutoencoder()
model.cuda()
criterion = nn.MSELoss()
optimizer = torch.optim.AdamW(
    model.parameters(), lr=learning_rate)

for epoch in range(num_epochs):
    for data in dataloader:
        img, labels = data
        img = img.view(img.size(0), -1).cuda()  
               
        output,latent = model(img)
        loss = criterion(output, img)
       
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
    
    print(f'epoch [{epoch + 1}/{num_epochs}], loss:{loss.data.item()}')

epoch [1/10], loss:0.20798112452030182
epoch [2/10], loss:0.18242456018924713
epoch [3/10], loss:0.16543562710285187
epoch [4/10], loss:0.15606893599033356
epoch [5/10], loss:0.17223048210144043
epoch [6/10], loss:0.15534907579421997
epoch [7/10], loss:0.1619858294725418
epoch [8/10], loss:0.1725073605775833
epoch [9/10], loss:0.1606210172176361
epoch [10/10], loss:0.16098162531852722


## Lets try ours latents dataset :)

In [7]:
data_path = '/home/robert/data/diploma-thesis/datasets/stylegan3/tpsi_1/latents/sample_z.h5'
labels_path = '/home/robert/data/diploma-thesis/predictions/stylegan3/tpsi_1/resnet34_eyeglasses.pkl'

In [11]:
import pickle, h5py

labels = None
with open(labels_path,'rb') as f:
    labels = torch.Tensor(pickle.load(f))

data = None
with h5py.File(data_path, 'r') as f:
    data = torch.Tensor(f['z'][:])

dataset = TensorDataset(data,labels)

In [17]:
num_epochs = 10
batch_size = 128
learning_rate = 2e-3
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

model = LAutoencoder()
model.cuda()
criterion = nn.MSELoss()
optimizer = torch.optim.AdamW(
    model.parameters(), lr=learning_rate)

for epoch in range(num_epochs):
    for data in dataloader:
        img, labels = data
        img = img.view(img.size(0), -1).cuda()  
               
        output,latent = model(img)
        loss = criterion(output, img)
       
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
    
    print(f'epoch [{epoch + 1}/{num_epochs}], loss:{loss.data.item()}')

epoch [1/10], loss:0.9959666728973389
epoch [2/10], loss:0.9972504377365112
epoch [3/10], loss:0.9981735348701477
epoch [4/10], loss:0.9962769746780396
epoch [5/10], loss:0.991364598274231
epoch [6/10], loss:0.9933990240097046
epoch [7/10], loss:0.9971369504928589
epoch [8/10], loss:0.9857909679412842
epoch [9/10], loss:0.9908149242401123
epoch [10/10], loss:0.9904171228408813


In [28]:
dataset[:][0].mean()

tensor(-5.0598e-05)

In [29]:
dataset[:][0].std()

tensor(1.0000)