# Convolutional Autoencoder with Fashion Images
In this notebook, we will be building a Convolutional Neural Network (CNN) for an Autoencoder (AE)
using the Fashion MNIST dataset.
The dataset consists of 60k and 10k training and test sets respectively. Each image is grayscale of size  28x28, associated with a label from 10 classes.

#### import required libraries

In [1]:
import torch
import torchvision
from torch import nn
from torchsummary import summary
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.datasets import FashionMNIST
from torchvision.utils import save_image
import os
from matplotlib import pyplot as plt
%matplotlib inline

#### Create directory to save images

In [2]:
if not os.path.exists('./AE_IMAGES'):
    os.mkdir('./AE_IMAGES')

#### function to count number of parameters

In [3]:
def get_n_params(model):
    np=0
    for p in list(model.parameters()):
        np += p.nelement()
    return np

#### Convert vector to image

In [4]:
def to_img(x):
    x = 0.5 * (x + 1)
    x = x.clamp(0, 1) 
    x = x.view(x.size(0), 1, 28, 28)
    return x

#### Load and transform training and test data

In [5]:
batch_size = 128

img_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

train_dataset = FashionMNIST(root='./data', download=True,
                            train=True, transform=img_transform)

test_dataset = FashionMNIST(root='./data', 
                           train=False, transform=img_transform)

train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                           batch_size=batch_size, 
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 
                                          batch_size=1000, 
                                          shuffle=False)

In [6]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

#### Define model architecture and reconstruction loss

In [7]:
class Autoencoder(nn.Module):
    def __init__(self):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=16, kernel_size=3, stride=3, padding=1),
            nn.LeakyReLU(),
            nn.AvgPool2d(kernel_size=2, stride=2, padding=0),
            nn.Conv2d(in_channels=16, out_channels=8, kernel_size=3, stride=2, padding=1),
            nn.LeakyReLU(),
            nn.AvgPool2d(kernel_size=2, stride=1, padding=0)
            
        )
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(in_channels=8, out_channels=16, kernel_size=3, stride=2, padding=0),
            nn.LeakyReLU(),
            nn.ConvTranspose2d(in_channels=16, out_channels=8, kernel_size=5, stride=3, padding=1),
            nn.LeakyReLU(),   
            nn.ConvTranspose2d(in_channels=8, out_channels=1, kernel_size=2, stride=2, padding=1),   
            nn.Tanh()
        )
        
        
    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x
    
model = Autoencoder().to(device)
criterion = nn.MSELoss()

#### Print model architecture

In [8]:
model = Autoencoder()
print(model)

Autoencoder(
  (encoder): Sequential(
    (0): Conv2d(1, 16, kernel_size=(3, 3), stride=(3, 3), padding=(1, 1))
    (1): LeakyReLU(negative_slope=0.01)
    (2): AvgPool2d(kernel_size=2, stride=2, padding=0)
    (3): Conv2d(16, 8, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (4): LeakyReLU(negative_slope=0.01)
    (5): AvgPool2d(kernel_size=2, stride=1, padding=0)
  )
  (decoder): Sequential(
    (0): ConvTranspose2d(8, 16, kernel_size=(3, 3), stride=(2, 2))
    (1): LeakyReLU(negative_slope=0.01)
    (2): ConvTranspose2d(16, 8, kernel_size=(5, 5), stride=(3, 3), padding=(1, 1))
    (3): LeakyReLU(negative_slope=0.01)
    (4): ConvTranspose2d(8, 1, kernel_size=(2, 2), stride=(2, 2), padding=(1, 1))
    (5): Tanh()
  )
)


#### Print model parameters

In [9]:
### Input Parameters
params = list(model.parameters())
print(len(params))

for i in range(len(params)):
    print(params[i].size())

10
torch.Size([16, 1, 3, 3])
torch.Size([16])
torch.Size([8, 16, 3, 3])
torch.Size([8])
torch.Size([8, 16, 3, 3])
torch.Size([16])
torch.Size([16, 8, 5, 5])
torch.Size([8])
torch.Size([8, 1, 2, 2])
torch.Size([1])


In [10]:
### Model Total Parameters
print('Number of parameters: {}'.format(get_n_params(model)))

Number of parameters: 5729


In [11]:
### Model Summary
summary(model, input_size=(1,28,28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 10, 10]             160
         LeakyReLU-2           [-1, 16, 10, 10]               0
         AvgPool2d-3             [-1, 16, 5, 5]               0
            Conv2d-4              [-1, 8, 3, 3]           1,160
         LeakyReLU-5              [-1, 8, 3, 3]               0
         AvgPool2d-6              [-1, 8, 2, 2]               0
   ConvTranspose2d-7             [-1, 16, 5, 5]           1,168
         LeakyReLU-8             [-1, 16, 5, 5]               0
   ConvTranspose2d-9            [-1, 8, 15, 15]           3,208
        LeakyReLU-10            [-1, 8, 15, 15]               0
  ConvTranspose2d-11            [-1, 1, 28, 28]              33
             Tanh-12            [-1, 1, 28, 28]               0
Total params: 5,729
Trainable params: 5,729
Non-trainable params: 0
-----------------------------------

#### Configure the optimiser

In [12]:
learning_rate = 1e-3
l2_regularization = 1e-5

optimizer = torch.optim.Adam(
    model.parameters(),
    lr=learning_rate,
    weight_decay=l2_regularization
)

#### Train autoencoder (AE)

In [13]:
num_epochs = 20

for epoch in range(num_epochs):
    for data in train_loader:
        img, _ = data
        img.requires_grad_()
        # ===================forward=====================
        output = model(img)  
        loss = criterion(output, img.data)
        # ===================backward====================
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    # ===================log========================
    print(f'epoch [{epoch + 1}/{num_epochs}], loss:{loss.item():.4f}')  
    
    if epoch % 1 == 0:
        pic = to_img(output.cpu().data)
        save_image(pic, './AE_IMAGES/{}.png'.format(epoch+1))

epoch [1/20], loss:0.1660
epoch [2/20], loss:0.1318
epoch [3/20], loss:0.1153
epoch [4/20], loss:0.1140
epoch [5/20], loss:0.0991
epoch [6/20], loss:0.1000
epoch [7/20], loss:0.1039
epoch [8/20], loss:0.1039
epoch [9/20], loss:0.0876
epoch [10/20], loss:0.0920
epoch [11/20], loss:0.0946
epoch [12/20], loss:0.0947
epoch [13/20], loss:0.0857
epoch [14/20], loss:0.0814
epoch [15/20], loss:0.0939
epoch [16/20], loss:0.0849
epoch [17/20], loss:0.0879
epoch [18/20], loss:0.0858
epoch [19/20], loss:0.0826
epoch [20/20], loss:0.0880
