In [1]:
import torch
import torchvision
from torch import nn
from torch.utils.data import DataLoader, TensorDataset
from torchvision import transforms
from torchvision.datasets import MNIST

## First we test the model on MNIST dataset

In [2]:
img_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5])
])

dataset = MNIST('./mnist_data', transform=img_transform, download=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./mnist_data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ./mnist_data/MNIST/raw/train-images-idx3-ubyte.gz to ./mnist_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./mnist_data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ./mnist_data/MNIST/raw/train-labels-idx1-ubyte.gz to ./mnist_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./mnist_data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ./mnist_data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./mnist_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./mnist_data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ./mnist_data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./mnist_data/MNIST/raw



In [2]:
class LAutoencoder(nn.Module):
    def __init__(self):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Linear(512,1024),
            nn.Tanh(),
            nn.Linear(1024, 2048),
            nn.ReLU(True), 
            nn.Linear(2048, 2048), 
            nn.ReLU(True), 
            nn.Linear(2048, 2048))
        self.decoder = nn.Sequential(
            nn.Linear(2048, 2048),
            nn.ReLU(True),
            nn.Linear(2048, 2048),
            nn.ReLU(True),
            nn.Linear(2048, 1024),
            nn.Tanh(), 
            nn.Linear(1024, 512),
            nn.Tanh() )            
        
    def forward(self, x):
        latent = self.encoder(x)
        x = self.decoder(latent)
        return x,latent

In [None]:
num_epochs = 10
batch_size = 128
learning_rate = 2e-3
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

model = LAutoencoder()
model.cuda()
criterion = nn.MSELoss()
optimizer = torch.optim.AdamW(
    model.parameters(), lr=learning_rate)

for epoch in range(num_epochs):
    for data in dataloader:
        img, labels = data
        img = img.view(img.size(0), -1).cuda()  
               
        output,latent = model(img)
        loss = criterion(output, img)
       
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
    
    print(f'epoch [{epoch + 1}/{num_epochs}], loss:{loss.data.item()}')

## Lets try ours latents dataset :)

In [3]:
data_path = '/home/robert/data/diploma-thesis/datasets/stylegan3/tpsi_1/latents/sample_z.h5'
labels_path = '/home/robert/data/diploma-thesis/predictions/stylegan3/tpsi_1/resnet34_eyeglasses.pkl'

In [4]:
import pickle, h5py

labels = None
with open(labels_path,'rb') as f:
    labels = torch.Tensor(pickle.load(f))

data = None
with h5py.File(data_path, 'r') as f:
    data = torch.Tensor(f['z'][:])

dataset = TensorDataset(data,labels)

In [5]:
num_epochs = 10
batch_size = 128
learning_rate = 2e-3
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

model = LAutoencoder()
model.cuda()
criterion = nn.MSELoss()
optimizer = torch.optim.AdamW(
    model.parameters(), lr=learning_rate)

for epoch in range(num_epochs):
    for data in dataloader:
        img, labels = data
        img = img.view(img.size(0), -1).cuda()  
               
        output,latent = model(img)
        loss = criterion(output, img)
       
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
    
    print(f'epoch [{epoch + 1}/{num_epochs}], loss:{loss.data.item()}')

RuntimeError: mat1 and mat2 shapes cannot be multiplied (128x2024 and 2048x2048)

In [28]:
dataset[:][0].mean()

tensor(-5.0598e-05)

In [29]:
dataset[:][0].std()

tensor(1.0000)

In [33]:
torchvision.models.list_models()

['alexnet',
 'convnext_base',
 'convnext_large',
 'convnext_small',
 'convnext_tiny',
 'deeplabv3_mobilenet_v3_large',
 'deeplabv3_resnet101',
 'deeplabv3_resnet50',
 'densenet121',
 'densenet161',
 'densenet169',
 'densenet201',
 'efficientnet_b0',
 'efficientnet_b1',
 'efficientnet_b2',
 'efficientnet_b3',
 'efficientnet_b4',
 'efficientnet_b5',
 'efficientnet_b6',
 'efficientnet_b7',
 'efficientnet_v2_l',
 'efficientnet_v2_m',
 'efficientnet_v2_s',
 'fasterrcnn_mobilenet_v3_large_320_fpn',
 'fasterrcnn_mobilenet_v3_large_fpn',
 'fasterrcnn_resnet50_fpn',
 'fasterrcnn_resnet50_fpn_v2',
 'fcn_resnet101',
 'fcn_resnet50',
 'fcos_resnet50_fpn',
 'googlenet',
 'inception_v3',
 'keypointrcnn_resnet50_fpn',
 'lraspp_mobilenet_v3_large',
 'maskrcnn_resnet50_fpn',
 'maskrcnn_resnet50_fpn_v2',
 'maxvit_t',
 'mc3_18',
 'mnasnet0_5',
 'mnasnet0_75',
 'mnasnet1_0',
 'mnasnet1_3',
 'mobilenet_v2',
 'mobilenet_v3_large',
 'mobilenet_v3_small',
 'mvit_v1_b',
 'mvit_v2_s',
 'quantized_googlenet',
 '