In [None]:
import os
import time
import torch
import warnings
import numpy as np
import torchvision
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import matplotlib.pyplot as plt
import torchvision.transforms as transforms

from tqdm import tqdm
from torchvision import datasets
from torchsummary import summary

warnings.filterwarnings("ignore")
torch.cuda.empty_cache()

In [None]:
# Set the seed value all over the place to make this reproducible.

# Seed the behavior of the environment variable
os.environ['PYTHONHASHSEED'] = str(1)
# Seed numpy's instance in case you are using numpy's random number generator, shuffling operations, ...
np.random.seed(1)

# In general seed PyTorch operations
torch.manual_seed(0)
# If you are using CUDA on 1 GPU, seed it
torch.cuda.manual_seed(0)
# If you are using CUDA on more than 1 GPU, seed them all
torch.cuda.manual_seed_all(0)
# Disable the inbuilt cudnn auto-tuner that finds the best algorithm to use for your hardware.
torch.backends.cudnn.benchmark = False
# Certain operations in Cudnn are not deterministic, and this line will force them to behave!
torch.backends.cudnn.deterministic = True

In [None]:
# convert data to a normalized torch.FloatTensor
transform = transforms.Compose([
    transforms.Resize(size=(112,112)),
    transforms.ToTensor()])

In [None]:
data_train = datasets.ImageFolder('train', transform = transform)
data_val = datasets.ImageFolder('val', transform = transform)

In [None]:
train_loader = torch.utils.data.DataLoader(data_train, batch_size = 32, shuffle = True, drop_last=True)
valid_loader = torch.utils.data.DataLoader(data_val, batch_size = 32, shuffle = False)

In [None]:
# Define dispositivo utilizado para treinamento (GPU/ CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Dispositivo sendo usado: ", device)

In [None]:
class Encoder(nn.Module):

    def __init__(self,
                 num_input_channels : int,
                 base_channel_size : int,
                 latent_dim : int,
                 act_fn : object = nn.GELU):
        """
        Inputs:
            - num_input_channels : Number of input channels of the image. For CIFAR, this parameter is 3
            - base_channel_size : Number of channels we use in the first convolutional layers. Deeper layers might use a duplicate of it.
            - latent_dim : Dimensionality of latent representation z
            - act_fn : Activation function used throughout the encoder network
        """
        super().__init__()
        c_hid = base_channel_size
        self.net = nn.Sequential(
            nn.Conv2d(num_input_channels, c_hid, kernel_size=3, padding=1, stride=2), # 32x32 => 16x16
            act_fn(),
            nn.Conv2d(c_hid, c_hid, kernel_size=3, padding=1),
            act_fn(),
            nn.Conv2d(c_hid, 2*c_hid, kernel_size=3, padding=1, stride=2), # 16x16 => 8x8
            act_fn(),
            nn.Conv2d(2*c_hid, 2*c_hid, kernel_size=3, padding=1),
            act_fn(),
            nn.Conv2d(2*c_hid, 2*c_hid, kernel_size=3, padding=1, stride=2), # 8x8 => 4x4
            act_fn(),
            nn.Flatten(), # Image grid to single feature vector
            #nn.Linear(2*16*c_hid, latent_dim)
            nn.Linear(2*196*c_hid, latent_dim)
        )

    def forward(self, x):
        return self.net(x)

In [None]:
class Decoder(nn.Module):

    def __init__(self,
                 num_input_channels : int,
                 base_channel_size : int,
                 latent_dim : int,
                 act_fn : object = nn.GELU):
        """
        Inputs:
            - num_input_channels : Number of channels of the image to reconstruct. For CIFAR, this parameter is 3
            - base_channel_size : Number of channels we use in the last convolutional layers. Early layers might use a duplicate of it.
            - latent_dim : Dimensionality of latent representation z
            - act_fn : Activation function used throughout the decoder network
        """
        super().__init__()
        c_hid = base_channel_size
        self.linear = nn.Sequential(
            #nn.Linear(latent_dim, 2*16*c_hid),
            nn.Linear(latent_dim, 2*196*c_hid),
            act_fn()
        )
        self.net = nn.Sequential(
            nn.ConvTranspose2d(2*c_hid, 2*c_hid, kernel_size=3, output_padding=1, padding=1, stride=2), # 4x4 => 8x8
            act_fn(),
            nn.Conv2d(2*c_hid, 2*c_hid, kernel_size=3, padding=1),
            act_fn(),
            nn.ConvTranspose2d(2*c_hid, c_hid, kernel_size=3, output_padding=1, padding=1, stride=2), # 8x8 => 16x16
            act_fn(),
            nn.Conv2d(c_hid, c_hid, kernel_size=3, padding=1),
            act_fn(),
            nn.ConvTranspose2d(c_hid, num_input_channels, kernel_size=3, output_padding=1, padding=1, stride=2), # 16x16 => 32x32
            nn.Tanh() # The input images is scaled between -1 and 1, hence the output has to be bounded as well
        )

    def forward(self, x):
        x = self.linear(x)
        #x = x.reshape(x.shape[0], -1, 4, 4)
        x = x.reshape(x.shape[0], -1, 14, 14)
        x = self.net(x)
        return x

In [None]:
class Autoencoder(nn.Module):

    def __init__(self,
                 base_channel_size: int = 112,
                 latent_dim: int = 768,
                 encoder_class : object = Encoder,
                 decoder_class : object = Decoder,
                 num_input_channels: int = 3,
                 width: int = 112,
                 height: int = 112):
        super().__init__()
        # Creating encoder and decoder
        self.encoder = encoder_class(num_input_channels, base_channel_size, latent_dim)
        self.decoder = decoder_class(num_input_channels, base_channel_size, latent_dim)

    def forward(self, x):
        """
        The forward function takes in an image and returns the reconstructed image
        """
        z = self.encoder(x)
        x_hat = self.decoder(z)
        return x_hat

In [None]:
model = Autoencoder()
model.to(device)

In [None]:
# Define o pipeline de treinamento
def pipeline_train(model, loader, device, optimizer, base):

  base_loss = 0.0

  for data in loader:

        # Transfer Data to GPU if available
        x = data[0].to(device)

        if base == "train":
          # Clear the gradients
          optimizer.zero_grad()

        # Forward Pass
        x_hat = model(x)

        loss = F.mse_loss(x, x_hat, reduction="none")
        loss = loss.sum(dim=[1,2,3]).mean(dim=[0])

        if base == "train":

          # Calculate gradients
          loss.backward()
          # Update Weights
          optimizer.step()

        # Calculate Loss
        base_loss += loss.item()

  return base_loss, model, optimizer

In [None]:
# Realiza o treinamento do modelo

# Declaring Optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)
"""
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
# The scheduler reduces the LR if the validation performance hasn't improved for the last N epochs
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                 mode='min',
                                                 factor=0.2,
                                                 patience=20,
                                                 min_lr=5e-5)
"""
# Training with Validation
epochs = 200

start = time.time()
for e in tqdm(range(epochs)):

    model.train()
    train_loss, model, optimizer = pipeline_train(model, train_loader, device, optimizer,
                                                      "train")

    loss_train_mean = train_loss / len(train_loader)

    model.eval()     # Optional when not using Model Specific layer
    val_loss, model, optimizer = pipeline_train(model, valid_loader, device, optimizer,
                                                    "val")
    #scheduler.step(val_loss)
    loss_val_mean = val_loss / len(valid_loader)

    print(f'Epoch {e} \t Training Loss: {loss_train_mean} \t Validation Loss: {loss_val_mean}')

    torch.save({
            'epoch': e,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': val_loss,
            }, "autoencoder.ckpt")

end = time.time()

In [None]:
def get_train_images(num):
    return torch.stack([data_train[i][0] for i in range(num)], dim=0)

In [None]:
x = get_train_images(1)

In [None]:
xhat = model(x.to(device))

In [None]:
plt.imshow(data_train[0][0].numpy().T)

In [None]:
y = xhat[0, :, :, :]
y = y.permute(1,2,0)
y.shape

In [None]:
plt.imshow(y.detach().cpu().numpy())

In [None]:
# Training with Validation
epochs = 200

start = time.time()
for e in tqdm(range(epochs, epochs+100)):

    model.train()
    train_loss, model, optimizer = pipeline_train(model, train_loader, device, optimizer,
                                                      "train")

    loss_train_mean = train_loss / len(train_loader)

    model.eval()     # Optional when not using Model Specific layer
    val_loss, model, optimizer = pipeline_train(model, valid_loader, device, optimizer,
                                                    "val")
    #scheduler.step(val_loss)
    loss_val_mean = val_loss / len(valid_loader)

    print(f'Epoch {e} \t Training Loss: {loss_train_mean} \t Validation Loss: {loss_val_mean}')

    torch.save({
            'epoch': e,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': val_loss,
            }, "autoencoder.ckpt")

end = time.time()

In [None]:
model.encoder(x.to(device))